In [None]:
import sys
print(sys.executable)

In [3]:
from collections import deque
from datetime import datetime
import re
import pandas as pd
import matplotlib.pyplot as plt

def is_new_message(line):
    # Check if line starts with date pattern DD/MM/YYYY HH:MM
    date_pattern = r'^\d{2}/\d{2}/\d{4} \d{2}:\d{2}'
    return bool(re.match(date_pattern, line))

def parse_line(line):
    # Split line into date and message
    date, message = line.split(' - ', 1)
    date = datetime.strptime(date, '%d/%m/%Y %H:%M')
    return date, message

def parse_message(message):
    # Split message into author and content
    try:
        author, content = message.split(': ', 1)
        return author, content
    except ValueError:
        return None, None

def store_message(messages, date, author, content):
    # Store message in messages dictionary
    try:
        messages[author].append((date, content))
    except KeyError:
        messages[author] = [(date, content)]
    except TypeError:
        pass


In [None]:
dates = []
author_and_messages = {}
current_author = None
current_message = None

with open(r'C:\Users\guiga\Documents\ZapRecap\test\cvTeste.txt', 'r', encoding='utf-8') as file:
    # Skip first line (default WhatsApp messages)
    next(file)

    date, message = parse_line(next(file))
    print(message)
    dates.append(date)
    current_author, current_message = parse_message(message)

    for line in file:
        line = line.strip()
        if is_new_message(line):
            store_message(author_and_messages, date, current_author, current_message)
            date, message = parse_line(line)
            dates.append(date)
            current_author, current_message = parse_message(message)
        else:
            current_message += ' ' + line
            
    # Store last message
    store_message(author_and_messages, date, current_author, current_message)
        

In [None]:
# Create pivot table for heatmap
pivot = pd.pivot_table(
    df,
    values='datetime',
    index='weekday',
    columns=['month_year', 'week'],
    aggfunc='count',
    fill_value=0
)

# Flatten multi-index columns
pivot.columns = [f"{m}_{w}" for m, w in pivot.columns]

# Normalize data using percentile-based scaling
vmax = np.percentile(pivot.values[pivot.values > 0], 95)  # 95th percentile
vmin = np.percentile(pivot.values[pivot.values > 0], 5)   # 5th percentile

# Create heatmap with green colormap
plt.figure(figsize=(20, 5))
ax = plt.gca()
plt.imshow(pivot, cmap='Greens', aspect='auto', vmin=vmin, vmax=vmax)
plt.colorbar(label='Message Count')

# Add labels
plt.yticks(range(7), calendar.day_name)

# Create x-axis labels (show month only at transitions)
month_labels = []
prev_month = None
for col in pivot.columns:
    month = col.split('_')[0]
    if month != prev_month:
        month_labels.append(month)
        prev_month = month
    else:
        month_labels.append('')

plt.xticks(range(len(pivot.columns)), month_labels, rotation=45)
plt.xlabel('Month/Year')
plt.ylabel('Day of Week')

# Remove grid and border
ax.grid(False)
ax.set_frame_on(False)

plt.tight_layout()
plt.show()