In [10]:
import os
import pandas as pd
import datetime
from datetime import datetime, timedelta
import re

# Your provided function for listing chat files
def list_chat_files(base_directory):
    chat_files = {}
    for date_folder in os.listdir(base_directory):
        date_path = os.path.join(base_directory, date_folder)
        if os.path.isdir(date_path):
            print(f"Processing date folder: {date_folder}")
            for team_folder in os.listdir(date_path):
                if team_folder == 'SALES':
                    print(f"  Found team folder: {team_folder}")
                    team_path = os.path.join(date_path, team_folder)
                    if os.path.isdir(team_path):
                        for person_folder in os.listdir(team_path):
                            #print(f"    Found person folder: {person_folder}")
                            person_path = os.path.join(team_path, person_folder)
                            if os.path.isdir(person_path):
                                for file in os.listdir(person_path):
                                    if file.endswith('.txt'):
                                        full_file_path = os.path.join(person_path, file)
                                        print(f"      Found chat file: {file}")
                                        chat_files.setdefault((date_folder, person_folder), []).append(full_file_path)
    return chat_files

def parse_chat_file(file_path, expected_date, person_name):
    start_time = None
    end_time = None
    last_message_time = None

    # Regex pattern to match the date, time, and sender
    message_pattern = re.compile(r'^(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2}\s?[apm]{2}) - (.*?): ')

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            match = message_pattern.match(line)
            if match:
                datetime_str, sender = match.groups()
                try:
                    date_time = datetime.strptime(datetime_str, '%d/%m/%y, %I:%M %p')
                    if date_time.date() == expected_date and sender == person_name:
                        if not start_time:
                            start_time = date_time
                        
                        # If a 2-hour gap is found from the last message, set end_time
                        if last_message_time and (date_time - last_message_time).total_seconds() / 3600 >= 2:
                            end_time = last_message_time
                            break

                        last_message_time = date_time  # Update the last message time
                except ValueError:
                    pass

    if not end_time and last_message_time:
        end_time = last_message_time  # If no gap found, use the last message time as end time

    return start_time, end_time



def process_chats(base_directory):
    chat_files = list_chat_files(base_directory)
    session_data = []

    for (date_folder, person), files in chat_files.items():
        folder_date = datetime.strptime(date_folder, '%Y-%m-%d').date()
        expected_date = folder_date - timedelta(days=1)
        start_time = None
        end_time = None

        for file in files:
            file_start_time, file_end_time = parse_chat_file(file, expected_date, person)
            if file_start_time:
                if not start_time or file_start_time < start_time:
                    start_time = file_start_time
                if file_end_time and (not end_time or file_end_time > end_time):
                    end_time = file_end_time
                    
        # Debug print
        #print(f"Processing {person} on {date_folder}: Start Time = {start_time}, Final End Time = {final_end_time}")

        if start_time and end_time:
            session_data.append([folder_date, person, start_time, end_time])
        else:
            print(f"Warning: No valid chat session found for {person} on {date_folder}")

    return session_data



# Save the data to a CSV file
def save_to_csv(session_data, output_file):
    df = pd.DataFrame(session_data, columns=['date', 'person', 'start_time', 'end_time'])
    df.to_csv(output_file, index=False)

# Define the base directory and output file
base_directory = 'F:\\Github-mauriceyeng\\Chat-Analyzer-V2\\date-wise'
output_file = 'chat_sessions.csv'

# Process chats and save to CSV
session_data = process_chats(base_directory)
save_to_csv(session_data, output_file)

print('Chat analysis complete and saved to', output_file)


Processing date folder: 2023-12-01
  Found team folder: SALES
      Found chat file: WhatsApp Chat with Abdul_Edoofa(28_11)AA.txt
      Found chat file: WhatsApp Chat with Aleta_Edoofa(20_11)AA-IE.txt
      Found chat file: WhatsApp Chat with Alistair_Edoofa(07_11)AA.txt
      Found chat file: WhatsApp Chat with Anenishe_Edoofa(24_11)AA.txt
      Found chat file: WhatsApp Chat with Angel_Edoofa(16_11)AA.txt
      Found chat file: WhatsApp Chat with Ashleigh_Edoofa(25_10)AA.txt
      Found chat file: WhatsApp Chat with Ashley_Edoofa(21_11)AA.txt
      Found chat file: WhatsApp Chat with Believe_Edoofa(16_11)AA.txt
      Found chat file: WhatsApp Chat with Bopoto_Edoofa(24_11)AA.txt
      Found chat file: WhatsApp Chat with Caleb_Edoofa(23_11)AA.txt
      Found chat file: WhatsApp Chat with Calvin_Edoofa(29_11)AA.txt
      Found chat file: WhatsApp Chat with Chido_Edoofa(28_11)AA-IE.txt
      Found chat file: WhatsApp Chat with Chipise_Edoofa(22_11)AA.txt
      Found chat file: WhatsApp 

Chat analysis complete and saved to chat_sessions.csv
