In [1]:
import os
import pandas as pd
import datetime
import re
import logging

# Setup basic logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def list_chat_files(date_directory):
    chat_files = []
    for date_folder in os.listdir(date_directory):
        date_path = os.path.join(date_directory, date_folder)
        if os.path.isdir(date_path):
            for team_folder in os.listdir(date_path):
                if team_folder != "KAM":
                    continue
                
                team_path = os.path.join(date_path, team_folder)
                if os.path.isdir(team_path):
                    for person_folder in os.listdir(team_path):
                        person_path = os.path.join(team_path, person_folder)
                        if os.path.isdir(person_path):
                            for file in os.listdir(person_path):
                                if file.endswith('.txt'):
                                    chat_files.append(os.path.join(person_path, file))
    logging.debug(f"Chat files listed: {chat_files}")
    return chat_files

def parse_chat_file(file_path, expected_date_minus_one):
    chat_data = []
    last_person_time = None
    last_sender = None
    delay_count = 0

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            message_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*?): (.*)', line)
            system_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*)', line)
            if message_match:
                date_time_str, sender, message = message_match.groups()
            elif system_match:
                date_time_str, info = system_match.groups()
                sender = None
            else:
                continue

            date_time = pd.to_datetime(date_time_str, format='%d/%m/%y, %I:%M %p')
            if date_time.date() != expected_date_minus_one:
                continue

            is_person = sender is not None and re.match(r'^[+\d\s-]+$', sender) is None
            delay = False
            if is_person:
                if last_person_time and sender != last_sender and (date_time - last_person_time).total_seconds() > 900:
                    delay = True
                    delay_count += 1
                if not delay:
                    last_person_time = date_time
                    last_sender = sender

            chat_data.append((date_time, sender, is_person, delay))
    logging.debug(f"File parsed: {file_path}. Delays detected: {delay_count}")
    return chat_data, extract_group_name(file_path)

def create_template_dataframe():
    times = [datetime.datetime(2000, 1, 1, 0, 0) + datetime.timedelta(minutes=1 * i) for i in range(1440)]
    intervals = [time.strftime('%I:%M %p') for time in times]
    df = pd.DataFrame(index=pd.to_datetime(intervals).strftime('%I:%M %p').unique())  # Ensure unique intervals
    return df

def populate_dataframe(df, parsed_data, group_name):
    # Define new column names
    person_col = f"{group_name}_person"
    others_col = f"{group_name}_others"
    delay_col = f"{group_name}_delay"

    # Initialize new columns
    if person_col not in df.columns:
        df[person_col] = 0
    if others_col not in df.columns:
        df[others_col] = 0
    if delay_col not in df.columns:
        df[delay_col] = 0

    # Populate the new columns with parsed data
    for date_time, sender, is_person, delay in parsed_data:
        interval_index = min((date_time.hour * 60 + date_time.minute) // 1, 1439)
        interval = df.index[interval_index]

        if is_person:
            df.at[interval, person_col] = 1
        else:
            df.at[interval, others_col] = 1

        if delay:
            df.at[interval, delay_col] = 1

        logging.debug(f"Updated DataFrame at {interval} for {group_name}: Person={is_person}, Delay={delay}")

    # Update active_chat column
    if 'active_chat' not in df.columns:
        df['active_chat'] = 0

    relevant_columns = [person_col, others_col]
    df['active_chat'] = df[relevant_columns].any(axis=1).astype(int)

    return df

def extract_group_name(file_path):
    group_name = os.path.basename(file_path).replace('WhatsApp Chat with ', '').split('.')[0]
    group_name = re.sub(r'\(\d+\)$', '', group_name)  # Remove any numbers in parentheses at the end
    return group_name  # Removed the extra "_person" suffix

date_directory = "C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231201T052455Z-001"
chat_files = list_chat_files(date_directory)
dataframes = {}

for file in chat_files:
    parts = file.split(os.sep)
    date_folder, person = parts[-4], parts[-2]

    try:
        folder_date = pd.to_datetime(date_folder, format='%Y-%m-%d').date()
    except ValueError:
        continue

    expected_date_minus_one = folder_date - datetime.timedelta(days=1)
    key = f"{folder_date.strftime('%Y-%m-%d')}_{person}"

    if key not in dataframes:
        dataframes[key] = create_template_dataframe()
    parsed_data, group_name = parse_chat_file(file, expected_date_minus_one)
    dataframes[key] = populate_dataframe(dataframes[key], parsed_data, group_name)
    logging.debug(f"Dataframe created for key: {key}")

# Example to show a dataframe
example_key = next(iter(dataframes))  # Just for demonstration
logging.debug(f"Example dataframe for key {example_key}: \n{dataframes[example_key]}")


2023-12-01 17:57:03,336 - DEBUG - Chat files listed: ['C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231201T052455Z-001\\2023-12-01\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Aaron 2K23OCT1666R(1).txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231201T052455Z-001\\2023-12-01\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Aaron 2K23OCT1666R.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231201T052455Z-001\\2023-12-01\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Agutha 2K23FEB1751(1).txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231201T052455Z-001\\2023-12-01\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Agutha 2K23FEB1751.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231201T052455Z-001\\2023-12-01\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Anesu 2K22JUN1424(1).txt', 'C:\\Users\\m

2023-12-01 17:57:03,479 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Aaron 2K23OCT1666R(1).txt. Delays detected: 0
2023-12-01 17:57:03,502 - DEBUG - Data populated for group: Aaron 2K23OCT1666R_person
2023-12-01 17:57:03,503 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:03,548 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Aaron 2K23OCT1666R.txt. Delays detected: 0
2023-12-01 17:57:03,554 - DEBUG - Data populated for group: Aaron 2K23OCT1666R_person
2023-12-01 17:57:03,555 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:03,598 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\Wh

2023-12-01 17:57:04,606 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Bervely 2K23FEB2310(1).txt. Delays detected: 0
2023-12-01 17:57:04,613 - DEBUG - Data populated for group: Bervely 2K23FEB2310_person
2023-12-01 17:57:04,613 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:04,636 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Bervely 2K23FEB2310.txt. Delays detected: 0
2023-12-01 17:57:04,655 - DEBUG - Data populated for group: Bervely 2K23FEB2310_person
2023-12-01 17:57:04,655 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:04,691 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoof

2023-12-01 17:57:05,717 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Joyline 2k21OCT0992.txt. Delays detected: 0
2023-12-01 17:57:05,717 - DEBUG - Data populated for group: Joyline 2k21OCT0992_person
2023-12-01 17:57:05,717 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:05,770 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Kelvin 2K23NOV1626R.txt. Delays detected: 0
2023-12-01 17:57:05,779 - DEBUG - Data populated for group: Kelvin 2K23NOV1626R_person
2023-12-01 17:57:05,780 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:05,820 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\W

2023-12-01 17:57:06,789 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Nyasha 2K23FEB1457.txt. Delays detected: 0
2023-12-01 17:57:06,796 - DEBUG - Data populated for group: Nyasha 2K23FEB1457_person
2023-12-01 17:57:06,797 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:06,842 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Paidamoyo 2K23OCT2188.txt. Delays detected: 0
2023-12-01 17:57:06,851 - DEBUG - Data populated for group: Paidamoyo 2K23OCT2188_person
2023-12-01 17:57:06,852 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:06,893 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa

2023-12-01 17:57:08,009 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Tawanda 2K23NOV1068R.txt. Delays detected: 0
2023-12-01 17:57:08,019 - DEBUG - Data populated for group: Tawanda 2K23NOV1068R_person
2023-12-01 17:57:08,019 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:08,078 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa\WhatsApp Chat with Tawanda 2K23OCT0439.txt. Delays detected: 0
2023-12-01 17:57:08,086 - DEBUG - Data populated for group: Tawanda 2K23OCT0439_person
2023-12-01 17:57:08,086 - DEBUG - Dataframe created for key: 2023-12-01_Ashi_Edoofa
2023-12-01 17:57:08,125 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Ashi_Edoofa

2023-12-01 17:57:09,137 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Kirti Edoofa\WhatsApp Chat with Godfrey 2K23MAY2671.txt. Delays detected: 0
2023-12-01 17:57:09,137 - DEBUG - Data populated for group: Godfrey 2K23MAY2671_person
2023-12-01 17:57:09,137 - DEBUG - Dataframe created for key: 2023-12-01_Kirti Edoofa
2023-12-01 17:57:09,239 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Kirti Edoofa\WhatsApp Chat with Gombe 2K23OCT0971R.txt. Delays detected: 0
2023-12-01 17:57:09,245 - DEBUG - Data populated for group: Gombe 2K23OCT0971R_person
2023-12-01 17:57:09,246 - DEBUG - Dataframe created for key: 2023-12-01_Kirti Edoofa
2023-12-01 17:57:09,307 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Kirti Edoof

2023-12-01 17:57:10,353 - DEBUG - Dataframe created for key: 2023-12-01_Kirti Edoofa
2023-12-01 17:57:10,407 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Kirti Edoofa\WhatsApp Chat with Shane 2K23OCT2588.txt. Delays detected: 0
2023-12-01 17:57:10,414 - DEBUG - Data populated for group: Shane 2K23OCT2588_person
2023-12-01 17:57:10,416 - DEBUG - Dataframe created for key: 2023-12-01_Kirti Edoofa
2023-12-01 17:57:10,468 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Kirti Edoofa\WhatsApp Chat with Staylodge 2K23OCT2788.txt. Delays detected: 0
2023-12-01 17:57:10,476 - DEBUG - Data populated for group: Staylodge 2K23OCT2788_person
2023-12-01 17:57:10,477 - DEBUG - Dataframe created for key: 2023-12-01_Kirti Edoofa
2023-12-01 17:57:10,506 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V

2023-12-01 17:57:11,652 - DEBUG - Dataframe created for key: 2023-12-01_Milan_Edoofa
2023-12-01 17:57:11,716 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Milan_Edoofa\WhatsApp Chat with Fidelis 2K23AUG2558.txt. Delays detected: 0
2023-12-01 17:57:11,735 - DEBUG - Data populated for group: Fidelis 2K23AUG2558_person
2023-12-01 17:57:11,737 - DEBUG - Dataframe created for key: 2023-12-01_Milan_Edoofa
2023-12-01 17:57:11,773 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Milan_Edoofa\WhatsApp Chat with Gracious 2K23MAY0959.txt. Delays detected: 0
2023-12-01 17:57:11,778 - DEBUG - Data populated for group: Gracious 2K23MAY0959_person
2023-12-01 17:57:11,778 - DEBUG - Dataframe created for key: 2023-12-01_Milan_Edoofa
2023-12-01 17:57:11,787 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer

2023-12-01 17:57:12,885 - DEBUG - Dataframe created for key: 2023-12-01_Milan_Edoofa
2023-12-01 17:57:12,916 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Milan_Edoofa\WhatsApp Chat with Purity 2K22JUL0742.txt. Delays detected: 0
2023-12-01 17:57:12,916 - DEBUG - Data populated for group: Purity 2K22JUL0742_person
2023-12-01 17:57:12,931 - DEBUG - Dataframe created for key: 2023-12-01_Milan_Edoofa
2023-12-01 17:57:12,977 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Milan_Edoofa\WhatsApp Chat with Recall 2K22MAY3019.txt. Delays detected: 0
2023-12-01 17:57:12,984 - DEBUG - Data populated for group: Recall 2K22MAY3019_person
2023-12-01 17:57:12,985 - DEBUG - Dataframe created for key: 2023-12-01_Milan_Edoofa
2023-12-01 17:57:13,025 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Ch

2023-12-01 17:57:14,100 - DEBUG - Data populated for group: Abraham 2K22NOV0105_person
2023-12-01 17:57:14,100 - DEBUG - Dataframe created for key: 2023-12-01_Shivjeet Edoofa
2023-12-01 17:57:14,166 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Shivjeet Edoofa\WhatsApp Chat with Aisha 2K22SEP0974.txt. Delays detected: 0
2023-12-01 17:57:14,166 - DEBUG - Data populated for group: Aisha 2K22SEP0974_person
2023-12-01 17:57:14,166 - DEBUG - Dataframe created for key: 2023-12-01_Shivjeet Edoofa
2023-12-01 17:57:14,188 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Shivjeet Edoofa\WhatsApp Chat with Allen 2K23NOV0995.txt. Delays detected: 0
2023-12-01 17:57:14,199 - DEBUG - Data populated for group: Allen 2K23NOV0995_person
2023-12-01 17:57:14,200 - DEBUG - Dataframe created for key: 2023-12-01_Shivjeet Ed

2023-12-01 17:57:15,091 - DEBUG - Data populated for group: Kuvimbanashe 2K23AUG2570_person
2023-12-01 17:57:15,092 - DEBUG - Dataframe created for key: 2023-12-01_Shivjeet Edoofa
2023-12-01 17:57:15,145 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Shivjeet Edoofa\WhatsApp Chat with Leonora 2K23APR0552.txt. Delays detected: 0
2023-12-01 17:57:15,153 - DEBUG - Data populated for group: Leonora 2K23APR0552_person
2023-12-01 17:57:15,154 - DEBUG - Dataframe created for key: 2023-12-01_Shivjeet Edoofa
2023-12-01 17:57:15,202 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Shivjeet Edoofa\WhatsApp Chat with Leroy 2K22DEC0817.txt. Delays detected: 0
2023-12-01 17:57:15,208 - DEBUG - Data populated for group: Leroy 2K22DEC0817_person
2023-12-01 17:57:15,209 - DEBUG - Dataframe created for key: 2023-12-01_Sh

2023-12-01 17:57:16,119 - DEBUG - Data populated for group: Shyleen 2K23OCT3022R_person
2023-12-01 17:57:16,120 - DEBUG - Dataframe created for key: 2023-12-01_Shivjeet Edoofa
2023-12-01 17:57:16,161 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Shivjeet Edoofa\WhatsApp Chat with Simbarashe 2K22DEC2390.txt. Delays detected: 0
2023-12-01 17:57:16,168 - DEBUG - Data populated for group: Simbarashe 2K22DEC2390_person
2023-12-01 17:57:16,168 - DEBUG - Dataframe created for key: 2023-12-01_Shivjeet Edoofa
2023-12-01 17:57:16,183 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231201T052455Z-001\2023-12-01\KAM\Shivjeet Edoofa\WhatsApp Chat with Simbarashe 2K23NOV1506.txt. Delays detected: 0
2023-12-01 17:57:16,186 - DEBUG - Data populated for group: Simbarashe 2K23NOV1506_person
2023-12-01 17:57:16,186 - DEBUG - Dataframe created for key: 2

date_directory = "C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231201T052455Z-001"


In [None]:
# Loop through the dictionary of DataFrames
for key, df in dataframes.items():
    print(f"DataFrame for {key}:\n{df.head()}")


In [None]:
# Directory to save CSV files
csv_save_directory = "C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat CSVs"
os.makedirs(csv_save_directory, exist_ok=True)

# Saving each DataFrame as a CSV
for key, df in dataframes.items():
    csv_file_path = os.path.join(csv_save_directory, f"{key}.csv")
    df.to_csv(csv_file_path)