In [1]:
import os
import pandas as pd
import datetime
import re
import logging

# Setup basic logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def list_chat_files(date_directory):
    chat_files = []
    for date_folder in os.listdir(date_directory):
        date_path = os.path.join(date_directory, date_folder)
        if os.path.isdir(date_path):
            for team_folder in os.listdir(date_path):
                if team_folder != "KAM":
                    continue
                
                team_path = os.path.join(date_path, team_folder)
                if os.path.isdir(team_path):
                    for person_folder in os.listdir(team_path):
                        person_path = os.path.join(team_path, person_folder)
                        if os.path.isdir(person_path):
                            for file in os.listdir(person_path):
                                if file.endswith('.txt'):
                                    chat_files.append(os.path.join(person_path, file))
    logging.debug(f"Chat files listed: {chat_files}")
    return chat_files

def parse_chat_file(file_path, expected_date_minus_one):
    chat_data = []
    last_non_person_time = None  # Tracks the time of the last non-person message
    delay_count = 0  # To count the number of delays

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            message_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*?): (.*)', line)
            system_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*)', line)
            if message_match:
                date_time_str, sender, message = message_match.groups()
            elif system_match:
                date_time_str, info = system_match.groups()
                sender = None
            else:
                continue

            date_time = pd.to_datetime(date_time_str, format='%d/%m/%y, %I:%M %p')

            if date_time.date() != expected_date_minus_one:
                continue

            is_person = sender is not None and re.match(r'^[+\d\s-]+$', sender) is None

            # Calculate delay
            delay = False
            if is_person and last_non_person_time:
                diff = date_time - last_non_person_time
                delay = diff.total_seconds() > 900  # 15 minutes in seconds
                if delay:
                    delay_count += 1

            chat_data.append((date_time, sender, is_person, delay))

            # Update last_non_person_time for non-person messages
            if not is_person:
                last_non_person_time = date_time

    logging.debug(f"File parsed: {file_path}. Delays detected: {delay_count}")
    return chat_data


def create_template_dataframe():
    times = [datetime.datetime(2000, 1, 1, 0, 0) + datetime.timedelta(minutes=1 * i) for i in range(1440)]
    intervals = [time.strftime('%I:%M %p') for time in times]
    df = pd.DataFrame(index=pd.to_datetime(intervals).strftime('%I:%M %p').unique())  # Ensure unique intervals
    return df

def populate_dataframe(df, parsed_data, group_name):
    # Define new column names
    person_col = f"{group_name}_person"
    others_col = f"{group_name}_others"
    delay_col = f"{group_name}_delay"

    # Initialize new columns
    if person_col not in df.columns:
        df[person_col] = 0
    if others_col not in df.columns:
        df[others_col] = 0
    if delay_col not in df.columns:
        df[delay_col] = 0

    # Populate the new columns with parsed data
    for date_time, sender, is_person, delay in parsed_data:
        interval_index = min((date_time.hour * 60 + date_time.minute) // 1, 1439)
        interval = df.index[interval_index]

        if is_person:
            df.at[interval, person_col] = 1
        else:
            df.at[interval, others_col] = 1

        if delay:
            df.at[interval, delay_col] = 1

        logging.debug(f"Updated DataFrame at {interval} for {group_name}: Person={is_person}, Delay={delay}")

    # Update active_chat column
    if 'active_chat' not in df.columns:
        df['active_chat'] = 0

    relevant_columns = [person_col, others_col]
    df['active_chat'] = df[relevant_columns].any(axis=1).astype(int)

    return df

def extract_group_name(file_path):
    group_name = os.path.basename(file_path).replace('WhatsApp Chat with ', '').split('.')[0]
    group_name = re.sub(r'\(\d+\)$', '', group_name)  # Remove any numbers in parentheses at the end
    return group_name  # Removed the extra "_person" suffix

date_directory = "C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001"
chat_files = list_chat_files(date_directory)
dataframes = {}

for file in chat_files:
    parts = file.split(os.sep)
    date_folder, person = parts[-4], parts[-2]

    try:
        folder_date = pd.to_datetime(date_folder, format='%Y-%m-%d').date()
    except ValueError:
        continue

    expected_date_minus_one = folder_date - datetime.timedelta(days=1)
    key = f"{folder_date.strftime('%Y-%m-%d')}_{person}"

    # Extract group_name using the dedicated function
    group_name = extract_group_name(file)

    if key not in dataframes:
        dataframes[key] = create_template_dataframe()
    
    # Get parsed_data without expecting group_name in return
    parsed_data = parse_chat_file(file, expected_date_minus_one)
    
    dataframes[key] = populate_dataframe(dataframes[key], parsed_data, group_name)
    logging.debug(f"Dataframe created for key: {key}")

# Example to show a dataframe
example_key = next(iter(dataframes))  # Just for demonstration
logging.debug(f"Example dataframe for key {example_key}: \n{dataframes[example_key]}")


2023-12-05 17:44:57,460 - DEBUG - Chat files listed: ['C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Agape 2K23NOV2370R.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Angeline 2K23FEB2804.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Auxillia 2K23JUL1004.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Chiedza 2K22FEB1213.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Courage 2K22JUL1682.txt', 'C:\\Users\\mau

2023-12-05 17:44:57,676 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Agape 2K23NOV2370R.txt. Delays detected: 1
2023-12-05 17:44:57,680 - DEBUG - Updated DataFrame at 01:14 AM for Agape 2K23NOV2370R: Person=False, Delay=False
2023-12-05 17:44:57,681 - DEBUG - Updated DataFrame at 07:08 AM for Agape 2K23NOV2370R: Person=False, Delay=False
2023-12-05 17:44:57,682 - DEBUG - Updated DataFrame at 07:08 AM for Agape 2K23NOV2370R: Person=False, Delay=False
2023-12-05 17:44:57,683 - DEBUG - Updated DataFrame at 07:08 AM for Agape 2K23NOV2370R: Person=False, Delay=False
2023-12-05 17:44:57,684 - DEBUG - Updated DataFrame at 07:08 AM for Agape 2K23NOV2370R: Person=False, Delay=False
2023-12-05 17:44:57,685 - DEBUG - Updated DataFrame at 07:08 AM for Agape 2K23NOV2370R: Person=False, Delay=False
2023-12-05 17:44:57,686 - DEBUG - Updated DataFrame at 07:46 AM for Agape 2K2

2023-12-05 17:44:58,239 - DEBUG - Updated DataFrame at 08:34 AM for Chiedza 2K22FEB1213: Person=False, Delay=False
2023-12-05 17:44:58,243 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-05 17:44:58,333 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Courage 2K22JUL1682.txt. Delays detected: 0
2023-12-05 17:44:58,333 - DEBUG - Updated DataFrame at 08:35 AM for Courage 2K22JUL1682: Person=False, Delay=False
2023-12-05 17:44:58,333 - DEBUG - Updated DataFrame at 12:51 PM for Courage 2K22JUL1682: Person=False, Delay=False
2023-12-05 17:44:58,333 - DEBUG - Updated DataFrame at 12:56 PM for Courage 2K22JUL1682: Person=False, Delay=False
2023-12-05 17:44:58,349 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-05 17:44:58,431 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-202

2023-12-05 17:44:58,613 - DEBUG - Updated DataFrame at 08:44 AM for Danai 2K23SEP1134R: Person=True, Delay=False
2023-12-05 17:44:58,615 - DEBUG - Updated DataFrame at 08:54 AM for Danai 2K23SEP1134R: Person=False, Delay=False
2023-12-05 17:44:58,617 - DEBUG - Updated DataFrame at 08:58 AM for Danai 2K23SEP1134R: Person=True, Delay=False
2023-12-05 17:44:58,619 - DEBUG - Updated DataFrame at 08:58 AM for Danai 2K23SEP1134R: Person=True, Delay=False
2023-12-05 17:44:58,620 - DEBUG - Updated DataFrame at 08:58 AM for Danai 2K23SEP1134R: Person=False, Delay=False
2023-12-05 17:44:58,622 - DEBUG - Updated DataFrame at 08:59 AM for Danai 2K23SEP1134R: Person=False, Delay=False
2023-12-05 17:44:58,623 - DEBUG - Updated DataFrame at 09:01 AM for Danai 2K23SEP1134R: Person=True, Delay=False
2023-12-05 17:44:58,628 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-05 17:44:58,738 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-do

2023-12-05 17:44:59,650 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-05 17:44:59,755 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Kupakwashe 2K23NOV1024.txt. Delays detected: 0
2023-12-05 17:44:59,762 - DEBUG - Updated DataFrame at 07:41 AM for Kupakwashe 2K23NOV1024: Person=True, Delay=False
2023-12-05 17:44:59,763 - DEBUG - Updated DataFrame at 09:30 AM for Kupakwashe 2K23NOV1024: Person=False, Delay=False
2023-12-05 17:44:59,765 - DEBUG - Updated DataFrame at 09:33 AM for Kupakwashe 2K23NOV1024: Person=True, Delay=False
2023-12-05 17:44:59,767 - DEBUG - Updated DataFrame at 09:41 AM for Kupakwashe 2K23NOV1024: Person=False, Delay=False
2023-12-05 17:44:59,769 - DEBUG - Updated DataFrame at 09:41 AM for Kupakwashe 2K23NOV1024: Person=False, Delay=False
2023-12-05 17:44:59,770 - DEBUG - Updated DataFrame at 09:42 AM for Kupakwashe 2K23NO

2023-12-05 17:45:00,037 - DEBUG - Updated DataFrame at 10:24 AM for Makaita 2K23NOV2346: Person=True, Delay=False
2023-12-05 17:45:00,038 - DEBUG - Updated DataFrame at 10:25 AM for Makaita 2K23NOV2346: Person=False, Delay=False
2023-12-05 17:45:00,038 - DEBUG - Updated DataFrame at 10:25 AM for Makaita 2K23NOV2346: Person=True, Delay=False
2023-12-05 17:45:00,038 - DEBUG - Updated DataFrame at 10:26 AM for Makaita 2K23NOV2346: Person=True, Delay=False
2023-12-05 17:45:00,038 - DEBUG - Updated DataFrame at 10:26 AM for Makaita 2K23NOV2346: Person=True, Delay=False
2023-12-05 17:45:00,049 - DEBUG - Updated DataFrame at 10:26 AM for Makaita 2K23NOV2346: Person=True, Delay=False
2023-12-05 17:45:00,053 - DEBUG - Updated DataFrame at 10:26 AM for Makaita 2K23NOV2346: Person=False, Delay=False
2023-12-05 17:45:00,055 - DEBUG - Updated DataFrame at 10:27 AM for Makaita 2K23NOV2346: Person=True, Delay=False
2023-12-05 17:45:00,056 - DEBUG - Updated DataFrame at 10:27 AM for Makaita 2K23NOV234

2023-12-05 17:45:00,409 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-05 17:45:00,499 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Moreblessing 2K22AUG1328.txt. Delays detected: 0
2023-12-05 17:45:00,506 - DEBUG - Updated DataFrame at 08:35 AM for Moreblessing 2K22AUG1328: Person=False, Delay=False
2023-12-05 17:45:00,507 - DEBUG - Updated DataFrame at 09:22 AM for Moreblessing 2K22AUG1328: Person=False, Delay=False
2023-12-05 17:45:00,509 - DEBUG - Updated DataFrame at 09:23 AM for Moreblessing 2K22AUG1328: Person=False, Delay=False
2023-12-05 17:45:00,518 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-05 17:45:00,642 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Mwango 2K23NOV2222R.txt. Delays

2023-12-05 17:45:02,687 - DEBUG - Updated DataFrame at 02:45 AM for Tadiwanashe 2K23OCT2045_: Person=False, Delay=False
2023-12-05 17:45:02,690 - DEBUG - Updated DataFrame at 07:47 AM for Tadiwanashe 2K23OCT2045_: Person=True, Delay=True
2023-12-05 17:45:02,693 - DEBUG - Updated DataFrame at 07:49 AM for Tadiwanashe 2K23OCT2045_: Person=True, Delay=True
2023-12-05 17:45:02,695 - DEBUG - Updated DataFrame at 09:47 AM for Tadiwanashe 2K23OCT2045_: Person=False, Delay=False
2023-12-05 17:45:02,698 - DEBUG - Updated DataFrame at 10:00 AM for Tadiwanashe 2K23OCT2045_: Person=True, Delay=False
2023-12-05 17:45:02,703 - DEBUG - Updated DataFrame at 11:34 AM for Tadiwanashe 2K23OCT2045_: Person=False, Delay=False
2023-12-05 17:45:02,706 - DEBUG - Updated DataFrame at 01:57 PM for Tadiwanashe 2K23OCT2045_: Person=True, Delay=True
2023-12-05 17:45:02,708 - DEBUG - Updated DataFrame at 02:01 PM for Tadiwanashe 2K23OCT2045_: Person=False, Delay=False
2023-12-05 17:45:02,710 - DEBUG - Updated DataF

2023-12-05 17:45:03,199 - DEBUG - Updated DataFrame at 11:58 AM for Tariro 2K23NOV2372R: Person=False, Delay=False
2023-12-05 17:45:03,201 - DEBUG - Updated DataFrame at 11:59 AM for Tariro 2K23NOV2372R: Person=True, Delay=False
2023-12-05 17:45:03,209 - DEBUG - Updated DataFrame at 12:05 PM for Tariro 2K23NOV2372R: Person=False, Delay=False
2023-12-05 17:45:03,211 - DEBUG - Updated DataFrame at 12:07 PM for Tariro 2K23NOV2372R: Person=True, Delay=False
2023-12-05 17:45:03,213 - DEBUG - Updated DataFrame at 12:08 PM for Tariro 2K23NOV2372R: Person=True, Delay=False
2023-12-05 17:45:03,215 - DEBUG - Updated DataFrame at 12:09 PM for Tariro 2K23NOV2372R: Person=False, Delay=False
2023-12-05 17:45:03,219 - DEBUG - Updated DataFrame at 12:11 PM for Tariro 2K23NOV2372R: Person=True, Delay=False
2023-12-05 17:45:03,224 - DEBUG - Updated DataFrame at 12:12 PM for Tariro 2K23NOV2372R: Person=False, Delay=False
2023-12-05 17:45:03,227 - DEBUG - Updated DataFrame at 12:12 PM for Tariro 2K23NOV23

2023-12-05 17:45:03,979 - DEBUG - Updated DataFrame at 12:03 PM for Violet 2K23NOV1095: Person=True, Delay=False
2023-12-05 17:45:03,980 - DEBUG - Updated DataFrame at 12:04 PM for Violet 2K23NOV1095: Person=False, Delay=False
2023-12-05 17:45:03,982 - DEBUG - Updated DataFrame at 12:07 PM for Violet 2K23NOV1095: Person=True, Delay=False
2023-12-05 17:45:03,984 - DEBUG - Updated DataFrame at 12:07 PM for Violet 2K23NOV1095: Person=True, Delay=False
2023-12-05 17:45:03,984 - DEBUG - Updated DataFrame at 12:07 PM for Violet 2K23NOV1095: Person=True, Delay=False
2023-12-05 17:45:03,984 - DEBUG - Updated DataFrame at 12:09 PM for Violet 2K23NOV1095: Person=False, Delay=False
2023-12-05 17:45:03,984 - DEBUG - Updated DataFrame at 12:11 PM for Violet 2K23NOV1095: Person=True, Delay=False
2023-12-05 17:45:03,984 - DEBUG - Updated DataFrame at 12:12 PM for Violet 2K23NOV1095: Person=False, Delay=False
2023-12-05 17:45:03,984 - DEBUG - Updated DataFrame at 12:15 PM for Violet 2K23NOV1095: Perso

2023-12-05 17:45:04,292 - DEBUG - Updated DataFrame at 08:46 AM for Whatfor 2K23NOV0371: Person=True, Delay=False
2023-12-05 17:45:04,293 - DEBUG - Updated DataFrame at 08:48 AM for Whatfor 2K23NOV0371: Person=False, Delay=False
2023-12-05 17:45:04,295 - DEBUG - Updated DataFrame at 08:49 AM for Whatfor 2K23NOV0371: Person=True, Delay=False
2023-12-05 17:45:04,301 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-05 17:45:04,999 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with Alson 2K22SEP0243R.txt. Delays detected: 0
2023-12-05 17:45:04,999 - DEBUG - Updated DataFrame at 11:12 AM for Alson 2K22SEP0243R: Person=False, Delay=False
2023-12-05 17:45:05,014 - DEBUG - Updated DataFrame at 11:15 AM for Alson 2K22SEP0243R: Person=True, Delay=False
2023-12-05 17:45:05,014 - DEBUG - Updated DataFrame at 11:18 AM for Alson 2K22SEP0243R: Person=False, Dela

2023-12-05 17:45:06,873 - DEBUG - Updated DataFrame at 08:40 AM for Godfrey 2K23MAY2671: Person=True, Delay=False
2023-12-05 17:45:06,874 - DEBUG - Updated DataFrame at 08:40 AM for Godfrey 2K23MAY2671: Person=True, Delay=False
2023-12-05 17:45:06,875 - DEBUG - Updated DataFrame at 08:40 AM for Godfrey 2K23MAY2671: Person=True, Delay=False
2023-12-05 17:45:06,884 - DEBUG - Dataframe created for key: 2023-12-03_Kirti Edoofa
2023-12-05 17:45:07,073 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with Gombe 2K23OCT0971R.txt. Delays detected: 0
2023-12-05 17:45:07,078 - DEBUG - Updated DataFrame at 08:34 AM for Gombe 2K23OCT0971R: Person=True, Delay=False
2023-12-05 17:45:07,080 - DEBUG - Updated DataFrame at 05:46 PM for Gombe 2K23OCT0971R: Person=False, Delay=False
2023-12-05 17:45:07,082 - DEBUG - Updated DataFrame at 05:46 PM for Gombe 2K23OCT0971R: Person=False, Dela

2023-12-05 17:45:08,385 - DEBUG - Updated DataFrame at 01:23 PM for Mwenje 2K23SEP1192R: Person=False, Delay=False
2023-12-05 17:45:08,393 - DEBUG - Dataframe created for key: 2023-12-03_Kirti Edoofa
2023-12-05 17:45:08,478 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with Nyasha 2K23JUN2334.txt. Delays detected: 0
2023-12-05 17:45:08,482 - DEBUG - Updated DataFrame at 08:33 AM for Nyasha 2K23JUN2334: Person=True, Delay=False
2023-12-05 17:45:08,484 - DEBUG - Updated DataFrame at 11:54 PM for Nyasha 2K23JUN2334: Person=False, Delay=False
2023-12-05 17:45:08,490 - DEBUG - Dataframe created for key: 2023-12-03_Kirti Edoofa
2023-12-05 17:45:08,585 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with Rangarirai 2K23JUL1024.txt. Delays detected: 0
2023

2023-12-05 17:45:09,667 - DEBUG - Updated DataFrame at 10:52 AM for Tinotenda 2K23AUG1863: Person=False, Delay=False
2023-12-05 17:45:09,669 - DEBUG - Updated DataFrame at 10:53 AM for Tinotenda 2K23AUG1863: Person=True, Delay=False
2023-12-05 17:45:09,670 - DEBUG - Updated DataFrame at 10:55 AM for Tinotenda 2K23AUG1863: Person=False, Delay=False
2023-12-05 17:45:09,671 - DEBUG - Updated DataFrame at 10:56 AM for Tinotenda 2K23AUG1863: Person=True, Delay=False
2023-12-05 17:45:09,672 - DEBUG - Updated DataFrame at 10:57 AM for Tinotenda 2K23AUG1863: Person=False, Delay=False
2023-12-05 17:45:09,672 - DEBUG - Updated DataFrame at 10:57 AM for Tinotenda 2K23AUG1863: Person=False, Delay=False
2023-12-05 17:45:09,672 - DEBUG - Updated DataFrame at 10:58 AM for Tinotenda 2K23AUG1863: Person=True, Delay=False
2023-12-05 17:45:09,672 - DEBUG - Updated DataFrame at 10:59 AM for Tinotenda 2K23AUG1863: Person=False, Delay=False
2023-12-05 17:45:09,686 - DEBUG - Dataframe created for key: 2023-1

2023-12-05 17:45:11,944 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Shivjeet Edoofa\WhatsApp Chat with Blessing 2K23MAR3173.txt. Delays detected: 0
2023-12-05 17:45:11,952 - DEBUG - Updated DataFrame at 03:01 PM for Blessing 2K23MAR3173: Person=False, Delay=False
2023-12-05 17:45:11,959 - DEBUG - Dataframe created for key: 2023-12-03_Shivjeet Edoofa
2023-12-05 17:45:12,180 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Shivjeet Edoofa\WhatsApp Chat with Bridget 2K23SEP2291.txt. Delays detected: 4
2023-12-05 17:45:12,188 - DEBUG - Updated DataFrame at 08:34 AM for Bridget 2K23SEP2291: Person=True, Delay=False
2023-12-05 17:45:12,188 - DEBUG - Updated DataFrame at 12:18 PM for Bridget 2K23SEP2291: Person=False, Delay=False
2023-12-05 17:45:12,188 - DEBUG - Updated DataFrame at 01:14 PM for Bridget 2K

2023-12-05 17:45:13,122 - DEBUG - Updated DataFrame at 12:44 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,125 - DEBUG - Updated DataFrame at 12:44 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,129 - DEBUG - Updated DataFrame at 12:44 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,133 - DEBUG - Updated DataFrame at 12:44 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,135 - DEBUG - Updated DataFrame at 12:45 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,138 - DEBUG - Updated DataFrame at 12:45 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,142 - DEBUG - Updated DataFrame at 12:49 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,145 - DEBUG - Updated DataFrame at 12:49 PM for Malcom 2K23NOV2028: Person=False, Delay=False
2023-12-05 17:45:13,147 - DEBUG - Updated DataFrame at 12:53 PM for Malcom 2K23NOV2028: 

2023-12-05 17:45:14,116 - DEBUG - Updated DataFrame at 09:00 AM for Senior 2K23NOV1790: Person=True, Delay=False
2023-12-05 17:45:14,116 - DEBUG - Updated DataFrame at 09:00 AM for Senior 2K23NOV1790: Person=True, Delay=False
2023-12-05 17:45:14,132 - DEBUG - Updated DataFrame at 09:02 AM for Senior 2K23NOV1790: Person=False, Delay=False
2023-12-05 17:45:14,133 - DEBUG - Updated DataFrame at 09:03 AM for Senior 2K23NOV1790: Person=True, Delay=False
2023-12-05 17:45:14,135 - DEBUG - Updated DataFrame at 03:59 PM for Senior 2K23NOV1790: Person=False, Delay=False
2023-12-05 17:45:14,137 - DEBUG - Updated DataFrame at 07:12 PM for Senior 2K23NOV1790: Person=False, Delay=False
2023-12-05 17:45:14,139 - DEBUG - Updated DataFrame at 07:12 PM for Senior 2K23NOV1790: Person=False, Delay=False
2023-12-05 17:45:14,140 - DEBUG - Updated DataFrame at 07:12 PM for Senior 2K23NOV1790: Person=False, Delay=False
2023-12-05 17:45:14,142 - DEBUG - Updated DataFrame at 07:12 PM for Senior 2K23NOV1790: Per

2023-12-05 17:45:15,239 - DEBUG - Updated DataFrame at 10:43 AM for Tinotenda 2K23FEB2181R: Person=False, Delay=False
2023-12-05 17:45:15,241 - DEBUG - Updated DataFrame at 10:47 AM for Tinotenda 2K23FEB2181R: Person=True, Delay=False
2023-12-05 17:45:15,242 - DEBUG - Updated DataFrame at 12:19 PM for Tinotenda 2K23FEB2181R: Person=False, Delay=False
2023-12-05 17:45:15,243 - DEBUG - Updated DataFrame at 01:16 PM for Tinotenda 2K23FEB2181R: Person=True, Delay=True
2023-12-05 17:45:15,245 - DEBUG - Updated DataFrame at 02:13 PM for Tinotenda 2K23FEB2181R: Person=False, Delay=False
2023-12-05 17:45:15,254 - DEBUG - Dataframe created for key: 2023-12-03_Shivjeet Edoofa
2023-12-05 17:45:15,282 - DEBUG - Example dataframe for key 2023-12-03_Ashi_Edoofa: 
          Agape 2K23NOV2370R_person  Agape 2K23NOV2370R_others  \
12:00 AM                          0                          0   
12:01 AM                          0                          0   
12:02 AM                          0       

In [None]:
# Directory to save CSV files
csv_save_directory = "C:\\Users\\mauriceyeng\\Python\\Daily-Reports\\Chat CSVs"
os.makedirs(csv_save_directory, exist_ok=True)

# Saving each DataFrame as a CSV
for key, df in dataframes.items():
    csv_file_path = os.path.join(csv_save_directory, f"{key}.csv")
    df.to_csv(csv_file_path)
    print(f"file saved as {key}")

# Fixing bugs, hehe, still good news

In [2]:
import os
import pandas as pd
import datetime
import re
import logging

# Setup basic logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def list_chat_files(date_directory):
    # Lists all chat files in a given directory structure
    chat_files = []
    for date_folder in os.listdir(date_directory):
        date_path = os.path.join(date_directory, date_folder)
        if os.path.isdir(date_path):
            for team_folder in os.listdir(date_path):
                if team_folder != "KAM":
                    continue
                team_path = os.path.join(date_path, team_folder)
                if os.path.isdir(team_path):
                    for person_folder in os.listdir(team_path):
                        person_path = os.path.join(team_path, person_folder)
                        if os.path.isdir(person_path):
                            for file in os.listdir(person_path):
                                if file.endswith('.txt'):
                                    chat_files.append(os.path.join(person_path, file))
    logging.debug(f"Chat files listed: {chat_files}")
    return chat_files

def parse_chat_file(file_path, expected_date_minus_one):
    # Parses a chat file to extract date, sender, message, and delay information
    chat_data = []
    last_non_person_time = None  # Tracks the time of the last non-person message
    delay_count = 0  # To count the number of delays
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            message_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*?): (.*)', line)
            system_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*)', line)
            if message_match:
                date_time_str, sender, message = message_match.groups()
            elif system_match:
                date_time_str, info = system_match.groups()
                sender = None
            else:
                continue
            date_time = pd.to_datetime(date_time_str, format='%d/%m/%y, %I:%M %p')
            if date_time.date() != expected_date_minus_one:
                continue
            is_person = sender is not None and re.match(r'^[+\d\s-]+$', sender) is None
            delay = False
            if is_person and last_non_person_time:
                diff = date_time - last_non_person_time
                delay = diff.total_seconds() > 900  # 15 minutes in seconds
                if delay:
                    delay_count += 1
            chat_data.append((date_time, sender, is_person, delay))
            if not is_person:
                last_non_person_time = date_time
    logging.debug(f"File parsed: {file_path}. Delays detected: {delay_count}")
    return chat_data

def create_template_dataframe():
    # Creates a template DataFrame with a unique time interval index for each minute of the day
    times = [datetime.datetime(2000, 1, 1, 0, 0) + datetime.timedelta(minutes=1 * i) for i in range(1440)]
    intervals = [time.strftime('%I:%M %p') for time in times]
    df = pd.DataFrame(index=pd.to_datetime(intervals).strftime('%I:%M %p').unique())  # Ensure unique intervals
    return df

def populate_dataframe(df, parsed_data, group_name):
    # Populates a DataFrame with parsed chat data
    person_col = f"{group_name}_person"
    others_col = f"{group_name}_others"
    delay_col = f"{group_name}_delay"
    if person_col not in df.columns:
        df[person_col] = 0
    if others_col not in df.columns:
        df[others_col] = 0
    if delay_col not in df.columns:
        df[delay_col] = 0
    for date_time, sender, is_person, delay in parsed_data:
        interval_index = min((date_time.hour * 60 + date_time.minute) // 1, 1439)
        interval = df.index[interval_index]
        if is_person:
            df.at[interval, person_col] = 1
        else:
            df.at[interval, others_col] = 1
        if delay:
            df.at[interval, delay_col] = 1
    logging.debug(f"Updated DataFrame at {interval} for {group_name}: Person={is_person}, Delay={delay}")
    return df

def calculate_active_chats_at_timestamp(df):
    active_chats = []
    num_columns = len(df.columns)
    for i in range(0, num_columns, 3):  # Iterate in steps of 3 to skip delay columns
        # Check if the next column index is within the DataFrame
        if i + 1 < num_columns:
            chat_active = (df.iloc[:, i] | df.iloc[:, i + 1]).astype(int)
            active_chats.append(chat_active)
        else:
            # If not, just add the current column's data
            chat_active = df.iloc[:, i].astype(int)
            active_chats.append(chat_active)

    df['active_chats_at_timestamp'] = pd.DataFrame(active_chats).sum().values
    return df


def extract_group_name(file_path):
    # Extracts and cleans the group name from the file path
    group_name = os.path.basename(file_path).replace('WhatsApp Chat with ', '').split('.')[0]
    group_name = re.sub(r'\(\d+\)$', '', group_name)
    return group_name

date_directory = "C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001"
chat_files = list_chat_files(date_directory)
dataframes = {}

for file in chat_files:
    # Main processing loop for each chat file
    parts = file.split(os.sep)
    date_folder, person = parts[-4], parts[-2]
    try:
        folder_date = pd.to_datetime(date_folder, format='%Y-%m-%d').date()
    except ValueError:
        continue
    expected_date_minus_one = folder_date - datetime.timedelta(days=1)
    key = f"{folder_date.strftime('%Y-%m-%d')}_{person}"
    group_name = extract_group_name(file)
    if key not in dataframes:
        dataframes[key] = create_template_dataframe()
    parsed_data = parse_chat_file(file, expected_date_minus_one)
    dataframes[key] = populate_dataframe(dataframes[key], parsed_data, group_name)
    dataframes[key] = calculate_active_chats_at_timestamp(dataframes[key])
    logging.debug(f"Dataframe created for key: {key}")

# Example to show a dataframe
example_key = next(iter(dataframes))  # Just for demonstration
logging.debug(f"Example dataframe for key {example_key}: \n{dataframes[example_key]}")


2023-12-04 16:25:54,471 - DEBUG - Chat files listed: ['C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Agape 2K23NOV2370R.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Angeline 2K23FEB2804.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Auxillia 2K23JUL1004.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Chiedza 2K22FEB1213.txt', 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001\\2023-12-03\\KAM\\Ashi_Edoofa\\WhatsApp Chat with Courage 2K22JUL1682.txt', 'C:\\Users\\mau

2023-12-04 16:25:54,553 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Agape 2K23NOV2370R.txt. Delays detected: 1
2023-12-04 16:25:54,564 - DEBUG - Updated DataFrame at 01:24 PM for Agape 2K23NOV2370R: Person=True, Delay=False
2023-12-04 16:25:54,609 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-04 16:25:54,667 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Angeline 2K23FEB2804.txt. Delays detected: 0
2023-12-04 16:25:54,669 - DEBUG - Updated DataFrame at 08:39 AM for Angeline 2K23FEB2804: Person=False, Delay=False
2023-12-04 16:25:54,711 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-04 16:25:54,764 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-dow

2023-12-04 16:25:56,176 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-04 16:25:56,219 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Nyasha 2K23OCT1144R.txt. Delays detected: 0
2023-12-04 16:25:56,222 - DEBUG - Updated DataFrame at 07:57 AM for Nyasha 2K23OCT1144R: Person=True, Delay=False
2023-12-04 16:25:56,275 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-04 16:25:56,309 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Ashi_Edoofa\WhatsApp Chat with Panashe 2K22MAR1786.txt. Delays detected: 0
2023-12-04 16:25:56,311 - DEBUG - Updated DataFrame at 10:47 AM for Panashe 2K22MAR1786: Person=False, Delay=False
2023-12-04 16:25:56,372 - DEBUG - Dataframe created for key: 2023-12-03_Ashi_Edoofa
2023-12-04 16:25:56,438 - DEBUG - File 

2023-12-04 16:25:58,146 - DEBUG - Updated DataFrame at 01:06 PM for Carol 2K23JUN1515: Person=False, Delay=False
2023-12-04 16:25:58,197 - DEBUG - Dataframe created for key: 2023-12-03_Kirti Edoofa
2023-12-04 16:25:58,245 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with Chanice 2K23JUL0590.txt. Delays detected: 0
2023-12-04 16:25:58,247 - DEBUG - Updated DataFrame at 08:28 AM for Chanice 2K23JUL0590: Person=True, Delay=False
2023-12-04 16:25:58,297 - DEBUG - Dataframe created for key: 2023-12-03_Kirti Edoofa
2023-12-04 16:25:58,332 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with CHIKONDI 2K23JUN2048.txt. Delays detected: 0
2023-12-04 16:25:58,333 - DEBUG - Updated DataFrame at 10:55 AM for CHIKONDI 2K23JUN2048: Person=True, Delay=False
2023-

2023-12-04 16:25:59,993 - DEBUG - Updated DataFrame at 08:22 AM for Ruth 2K23APR2291: Person=True, Delay=False
2023-12-04 16:26:00,050 - DEBUG - Dataframe created for key: 2023-12-03_Kirti Edoofa
2023-12-04 16:26:00,056 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with Samantha 2K22AUG2939.txt. Delays detected: 0
2023-12-04 16:26:00,056 - DEBUG - Updated DataFrame at 09:24 AM for Samantha 2K22AUG2939: Person=True, Delay=False
2023-12-04 16:26:00,125 - DEBUG - Dataframe created for key: 2023-12-03_Kirti Edoofa
2023-12-04 16:26:00,160 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Kirti Edoofa\WhatsApp Chat with Scott 2K22JUL2093.txt. Delays detected: 0
2023-12-04 16:26:00,162 - DEBUG - Updated DataFrame at 12:12 AM for Scott 2K22JUL2093: Person=False, Delay=False
2023-12-04

2023-12-04 16:26:01,930 - DEBUG - Updated DataFrame at 08:34 AM for Shantel 2K23JAN0755: Person=False, Delay=False
2023-12-04 16:26:01,980 - DEBUG - Dataframe created for key: 2023-12-03_Milan_Edoofa
2023-12-04 16:26:02,082 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Shivjeet Edoofa\WhatsApp Chat with Abraham 2K22NOV0105.txt. Delays detected: 1
2023-12-04 16:26:02,083 - DEBUG - Updated DataFrame at 08:45 PM for Abraham 2K22NOV0105: Person=False, Delay=False
2023-12-04 16:26:02,132 - DEBUG - Dataframe created for key: 2023-12-03_Shivjeet Edoofa
2023-12-04 16:26:02,144 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Shivjeet Edoofa\WhatsApp Chat with Adonis 2K23NOV2324R.txt. Delays detected: 0
2023-12-04 16:26:02,145 - DEBUG - Updated DataFrame at 08:08 PM for Adonis 2K23NOV2324R: Person=False, Delay=

2023-12-04 16:26:03,472 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Shivjeet Edoofa\WhatsApp Chat with Shepherd 2K23AUG1035.txt. Delays detected: 0
2023-12-04 16:26:03,472 - DEBUG - Updated DataFrame at 06:29 PM for Shepherd 2K23AUG1035: Person=False, Delay=False
2023-12-04 16:26:03,535 - DEBUG - Dataframe created for key: 2023-12-03_Shivjeet Edoofa
2023-12-04 16:26:03,542 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder from Drive\drive-download-20231204T064112Z-001\2023-12-03\KAM\Shivjeet Edoofa\WhatsApp Chat with Solomon 2K23NOV2295R.txt. Delays detected: 0
2023-12-04 16:26:03,542 - DEBUG - Updated DataFrame at 02:28 PM for Solomon 2K23NOV2295R: Person=False, Delay=False
2023-12-04 16:26:03,588 - DEBUG - Dataframe created for key: 2023-12-03_Shivjeet Edoofa
2023-12-04 16:26:03,631 - DEBUG - File parsed: C:\Users\maurice\Documents\Chat-Analyzer-V2\Chat Folder

In [3]:
# Directory to save CSV files
csv_save_directory = "C:\\Users\\mauriceyeng\\Python\\Daily-Reports\\Chat CSVs"
os.makedirs(csv_save_directory, exist_ok=True)

# Saving each DataFrame as a CSV
for key, df in dataframes.items():
    csv_file_path = os.path.join(csv_save_directory, f"{key}_v2.csv")
    df.to_csv(csv_file_path)
    print(f"file saved as {key}_v2")

PermissionError: [WinError 5] Access is denied: 'C:\\Users\\mauriceyeng'

In [10]:
import os
import pandas as pd
import datetime
import re
import logging

# Setup basic logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def list_chat_files(date_directory):
    chat_files = []
    for date_folder in os.listdir(date_directory):
        date_path = os.path.join(date_directory, date_folder)
        if os.path.isdir(date_path):
            for team_folder in os.listdir(date_path):
                if team_folder != "KAM":
                    continue
                
                team_path = os.path.join(date_path, team_folder)
                if os.path.isdir(team_path):
                    for person_folder in os.listdir(team_path):
                        person_path = os.path.join(team_path, person_folder)
                        if os.path.isdir(person_path):
                            for file in os.listdir(person_path):
                                if file.endswith('.txt'):
                                    chat_files.append(os.path.join(person_path, file))
    logging.debug(f"Chat files listed: {chat_files}")
    return chat_files

def parse_chat_file(file_path, expected_date_minus_one):
    chat_data = []
    last_non_person_time = None  # Tracks the time of the last non-person message
    delay_count = 0  # To count the number of delays

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            message_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*?): (.*)', line)
            system_match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) - (.*)', line)
            if message_match:
                date_time_str, sender, message = message_match.groups()
            elif system_match:
                date_time_str, info = system_match.groups()
                sender = None
            else:
                continue

            date_time = pd.to_datetime(date_time_str, format='%d/%m/%y, %I:%M %p')

            if date_time.date() != expected_date_minus_one:
                continue

            is_person = sender is not None and re.match(r'^[+\d\s-]+$', sender) is None

            # Calculate delay
            delay = False
            if is_person and last_non_person_time:
                diff = date_time - last_non_person_time
                delay = diff.total_seconds() > 900  # 15 minutes in seconds
                if delay:
                    delay_count += 1

            chat_data.append((date_time, sender, is_person, delay))

            # Update last_non_person_time for non-person messages
            if not is_person:
                last_non_person_time = date_time

    logging.debug(f"File parsed: {file_path}. Delays detected: {delay_count}")
    return chat_data


def create_template_dataframe():
    times = [datetime.datetime(2000, 1, 1, 0, 0) + datetime.timedelta(minutes=1 * i) for i in range(1440)]
    intervals = [time.strftime('%I:%M %p') for time in times]
    df = pd.DataFrame(index=pd.to_datetime(intervals).strftime('%I:%M %p').unique())  # Ensure unique intervals
    return df

def populate_dataframe(df, parsed_data, group_name):
    # Define new column names
    person_col = f"{group_name}_person"
    others_col = f"{group_name}_others"
    delay_col = f"{group_name}_delay"

    # Initialize new columns
    if person_col not in df.columns:
        df[person_col] = 0
    if others_col not in df.columns:
        df[others_col] = 0
    if delay_col not in df.columns:
        df[delay_col] = 0

    # Populate the new columns with parsed data
    for date_time, sender, is_person, delay in parsed_data:
        interval_index = min((date_time.hour * 60 + date_time.minute) // 1, 1439)
        interval = df.index[interval_index]

        if is_person:
            df.at[interval, person_col] = 1
        else:
            df.at[interval, others_col] = 1

        if delay:
            df.at[interval, delay_col] = 1

        logging.debug(f"Updated DataFrame at {interval} for {group_name}: Person={is_person}, Delay={delay}")

    # Update active_chat column
    if 'active_chat' not in df.columns:
        df['active_chat'] = 0

    relevant_columns = [person_col, others_col]
    df['active_chat'] = df[relevant_columns].any(axis=1).astype(int)

    return df

def extract_group_name(file_path):
    group_name = os.path.basename(file_path).replace('WhatsApp Chat with ', '').split('.')[0]
    group_name = re.sub(r'\(\d+\)$', '', group_name)  # Remove any numbers in parentheses at the end
    return group_name  # Removed the extra "_person" suffix

date_directory = "C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001"
chat_files = list_chat_files(date_directory)
dataframes = {}

for file in chat_files:
    parts = file.split(os.sep)
    date_folder, person = parts[-4], parts[-2]

    try:
        folder_date = pd.to_datetime(date_folder, format='%Y-%m-%d').date()
    except ValueError:
        continue

    expected_date_minus_one = folder_date - datetime.timedelta(days=1)
    key = f"{folder_date.strftime('%Y-%m-%d')}_{person}"

    # Extract group_name using the dedicated function
    group_name = extract_group_name(file)

    if key not in dataframes:
        dataframes[key] = create_template_dataframe()
    
    # Get parsed_data without expecting group_name in return
    parsed_data = parse_chat_file(file, expected_date_minus_one)
    
    dataframes[key] = populate_dataframe(dataframes[key], parsed_data, group_name)
    logging.debug(f"Dataframe created for key: {key}")

# Example to show a dataframe
example_key = next(iter(dataframes))  # Just for demonstration
logging.debug(f"Example dataframe for key {example_key}: \n{dataframes[example_key]}")


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Chat Folder from Drive\\drive-download-20231204T064112Z-001'