In [3]:
import os
import pandas as pd
import datetime
import re

# Function to list all chat files in the directory structure
def list_chat_files(date_directory):
    chat_files = []
    for date_folder in os.listdir(date_directory):
        date_path = os.path.join(date_directory, date_folder)
        if os.path.isdir(date_path):
            for team_folder in os.listdir(date_path):
                team_path = os.path.join(date_path, team_folder)
                if os.path.isdir(team_path):
                    for person_folder in os.listdir(team_path):
                        person_path = os.path.join(team_path, person_folder)
                        if os.path.isdir(person_path):
                            for file in os.listdir(person_path):
                                if file.endswith('.txt'):
                                    chat_files.append(os.path.join(person_path, file))
    return chat_files

def parse_chat_file(file_path, expected_date):
    chat_data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            message_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*?): (.*)', line)
            system_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*)', line)
            if message_match:
                date_time_str, sender, message = message_match.groups()
            elif system_match:
                date_time_str, info = system_match.groups()
                sender = None
            else:
                continue

            date_time = pd.to_datetime(date_time_str, format='%d/%m/%y, %I:%M %p')

            if date_time.date() != expected_date:
                continue

            # Adjusted logic to check for numeric sender (phone number)
            is_person = re.match(r'^[+\d\s-]+$', sender) is None  # True if sender is NOT purely numeric

            # Debug: Print sender and is_person flag
            print(f"Sender: {sender}, Is Person: {is_person}")

            chat_data.append((date_time, sender, is_person))
    return chat_data

# Function to create a template dataframe
def create_template_dataframe():
    times = [datetime.datetime(2000, 1, 1, 0, 0) + datetime.timedelta(minutes=1 * i) for i in range(1440)]
    intervals = [time.strftime('%I:%M %p') for time in times]
    df = pd.DataFrame(index=intervals)
    return df

def populate_dataframe(df, parsed_data, start_column_index):
    person_column_index = start_column_index
    for entry in parsed_data:
        date_time, sender, is_person = entry
        interval_index = min((date_time.hour * 60 + date_time.minute) // 1, 1439)
        interval = df.index[interval_index]

        if person_column_index not in df.columns:
            df[person_column_index] = 0
        if person_column_index + 1 not in df.columns:
            df[person_column_index + 1] = 0

        if is_person:
            df.at[interval, person_column_index] = 1
        else:
            df.at[interval, person_column_index + 1] = 1

        person_column_index += 2

    return df, person_column_index


def process_person_chats(chat_files):
    dataframes = {}
    for file in chat_files:
        parts = file.split(os.sep)
        date_folder, person = parts[-4], parts[-2]
        file_name = os.path.basename(file)

        # Extracting group name from file name
        group_name_match = re.match(r'WhatsApp Chat with (.+).txt', file_name)
        if group_name_match:
            group_name = group_name_match.group(1)
        else:
            group_name = "Unknown Group"

        try:
            expected_date = pd.to_datetime(date_folder).date()
        except ValueError:
            print(f"Skipping file due to incorrect date format in folder name: {file}")
            continue

        key = f"{expected_date.strftime('%Y-%m-%d')}_{person}"

        if key not in dataframes:
            dataframes[key] = create_template_dataframe()
            start_column_index = 0
        else:
            if not dataframes[key].columns.empty:
                start_column_index = max(dataframes[key].columns) + 1
            else:
                start_column_index = 0

        parsed_data = parse_chat_file(file, expected_date)
        dataframes[key], next_column_index = populate_dataframe(dataframes[key], parsed_data, start_column_index)

        # Create a new DataFrame with the group name and concatenate it
        group_df = pd.DataFrame([[group_name] + [''] * (dataframes[key].shape[1] - 1)], columns=dataframes[key].columns)
        dataframes[key] = pd.concat([group_df, dataframes[key]], ignore_index=True)

    return dataframes


# Main script
date_directory = "C:\\Users\\mauriceyeng\\Python\\Daily-Reports\\Test\\filtered_chats"
chat_files = list_chat_files(date_directory)
person_dataframes = process_person_chats(chat_files)

# Save each dataframe as a CSV file in the current working directory for testing purpose only, will be omiited in real application

for key, df in person_dataframes.items():
    csv_file_path = f"matrix_csv/{key}.csv"
    df.to_csv(csv_file_path)
    print(f"Saved DataFrame to {csv_file_path}")

Sender: +263 77 430 1947, Is Person: False
Sender: +263 77 430 1947, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: F

  df[person_column_index] = 0
  df[person_column_index + 1] = 0


Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 126 2077, Is Person: False
Sender: +263 78 126 2077, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 126 2077, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 126 2077, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 126 2077, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 126 2077, Is Person: False
Sender: +263 78 126 2077, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 126 2077, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 126 2077, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sende

Sender: Aditi Edoofa, Is Person: True
Sender: +263 77 528 4875, Is Person: False
Sender: +263 77 528 4875, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 77 528 4875, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 77 528 4875, Is Person: False
Sender: +263 77 528 4875, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 77 528 4875, Is Person: False
Sender: +263 77 528 4875, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: +263 77 528 4875, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 77 528 4875, Is Person: False
Sender: +263 77 528 4875, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 050 5370, Is Person: False
Sender: Aditi Edoofa, Is Person: True
Sender: Aditi Edoofa, Is Person: True
Sender: +263 78 050 5370, Is Pers

  df[person_column_index] = 0
  df[person_column_index + 1] = 0


Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: +263 77 434 3833, Is Person: False
Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: +263 77 434 3833, Is Person: False
Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: +263 77 434 3833, Is Person: False
Sender: Jasmine Edoofa, Is Person: True
Sender: +263 77 434 3833, Is Person: False
Sender: Jasmine Edoofa, Is Person: True
Sender: +263 77 434 3833, Is Person: False
Sender: +263 77 434 3833, Is Person: False
Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: Jasmine Edoofa, Is Person: True
Sender: +263 77 434 3833, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa,

  df[person_column_index] = 0
  df[person_column_index + 1] = 0


Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: +263 71 326 9377, Is Person: False
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 326 9377, Is Person: False
Sender: Saloni E

Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 543 7831, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 543 7831, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: +263 71 432 6077, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: +263 

Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: +263 77 918 0886, Is Person: False
Sender: +263 77 918 0886, Is Person: False
Sender: +263 77 918 0886, Is Person: False
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Saloni Edoofa, Is Person: True
Sender: Salon

  df[person_column_index] = 0
  df[person_column_index + 1] = 0


Sender: Sharda Edoofa, Is Person: True
Sender: +263 77 889 2517, Is Person: False
Sender: Sharda Edoofa, Is Person: True
Sender: +263 77 889 2517, Is Person: False
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Person: True
Sender: +263 71 564 0809, Is Person: False
Sender: Sharda Edoofa, Is Person: True
Sender: +263 71 564 0809, Is Person: False
Sender: Sharda Edoofa, Is Person: True
Sender: +263 71 564 0809, Is Person: False
Sender: Sharda Edoofa, Is Person: True
Sender: +263 77 346 7191, Is Person: False
Sender: +263 78 653 0616, Is Person: False
Sender: +263 78 653 0616, Is Person: False
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Person: True
Sender: +263 78 653 0616, Is Person: False
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Person: True
Sender: Sharda Edoofa, Is Pe

Sender: +263 77 448 2658, Is Person: False
Sender: Ashi Edoofa, Is Person: True
Sender: +263 77 448 2658, Is Person: False
Sender: +263 77 448 2658, Is Person: False
Sender: Ashi Edoofa, Is Person: True
Sender: +263 77 448 2658, Is Person: False
Sender: +265 885 25 22 54, Is Person: False
Sender: +265 885 25 22 54, Is Person: False
Sender: +265 992 34 99 23, Is Person: False
Sender: +265 992 34 99 23, Is Person: False
Sender: +263 77 503 3000, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +263 77 503 3000, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +263 77 503 3000, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +263 77 503 3000, Is Person: False
Sender: +263 77 503 3000, Is Person: False
Sender: +263 77 503 3000, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +263 77 503 3000, Is Person: False
Sender: Kir

  df[person_column_index] = 0
  df[person_column_index + 1] = 0


Sender: Kirti Edoofa, Is Person: True
Sender: +263 77 453 7791, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +263 77 453 7791, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +263 78 501 9780, Is Person: False
Sender: +263 71 931 0305, Is Person: False
Sender: +263 71 931 0305, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +263 71 931 0305, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +263 71 931 0305, Is Person: False
Sender: +263 71 931 0305, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +263 71 931 0305, Is Person: False
Sender: Kirti Edoofa, Is Person: True


Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +263 71 397 1947, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +263 78 284 6426, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +260 97 9935858, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +260 97 9935858, Is Person: False
Sender: +260 97 9935858, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +260 97 9935858, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: Kirti Edoofa, Is Person: True
Sender: +260 97 9935858, Is Person: False
Sender: Kirti Edoofa, Is Person: True
Sender: +260 97 9935

  df[person_column_index] = 0
  df[person_column_index + 1] = 0


Sender: Milan Edoofa, Is Person: True
Sender: +263 71 355 9768, Is Person: False
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Sender: +263 71 355 9768, Is Person: False
Sender: Milan Edoofa, Is Person: True
Sender: Milan Edoofa, Is Person: True
Se

Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 77 671 4402, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True


  df[person_column_index] = 0
  df[person_column_index + 1] = 0


Sender: +263 71 380 7287, Is Person: False
Sender: +263 71 380 7287, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 71 380 7287, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 71 380 7287, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 71 380 7287, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 71 380 7287, Is Person: False
Sender: +263 71 380 7287, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 78 706 9627, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 78 706 9627, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 78 706 9627, Is Person: False
Sender: Shivjeet Edoofa, Is Person: True
Sender: +263 78 706 9627, Is Person: False
Sender: Shivjeet Edoofa, Is Person:

In [4]:
# Example: After processing the chats
for key, df in person_dataframes.items():
    print(f"DataFrame for {key}:")
    print(df.head())  # Print the first few rows


DataFrame for 2023-11-22_Aditi_Edoofa:
                      0    1    2    3    4    5    6    7    8    9     ...  \
0        Shyne EWYL21E0758                                               ...   
1     Shyne EWYL21E0758(1)                                               ...   
2     SHEUNESU EWYL23E0160                                               ...   
3  SHEUNESU EWYL23E0160(1)                                               ...   
4    Sharmaine EWYL23E0867                                               ...   

  1010 1011 1012 1013 1014 1015 1016 1017 1018 1019  
0                                                    
1                                  0    0    0    0  
2              0    0    0    0    0    0    0    0  
3    0    0    0    0    0    0    0    0    0    0  
4    0    0    0    0    0    0    0    0    0    0  

[5 rows x 1020 columns]
DataFrame for 2023-11-22_Jasmine_Edoofa:
                     0   1   2   3   4   5   6   7   8   9    ... 274 275 276  \
0      Sh