In [6]:
import os
import pandas as pd
import datetime
import re

# Constants
DELAY_THRESHOLD = 15  # minutes
LAST_DAYS = 7  # number of days to filter

# Function to check if a string is numeric
def is_numeric(s):
    return s.replace('+', '', 1).isdigit()

# Function to filter messages from the last 7 days
def filter_last_7_days_messages(file_path):
    print(f"Filtering messages from the last 7 days in file: {file_path}")  # Debug
    recent_messages = []
    current_date = datetime.datetime.now()
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            match = re.match(r'(\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2} [ap]m) -', line)
            if match:
                timestamp = datetime.datetime.strptime(match.group(1), '%d/%m/%y, %I:%M %p')
                if (current_date - timestamp).days < LAST_DAYS:
                    recent_messages.append(line.strip())
    print(f"Found {len(recent_messages)} messages from the last 7 days.")  # Debug
    return recent_messages

# Function to calculate delays and identify the person
def calculate_delays_and_identify_person(messages):
    print("Calculating delays and identifying the person...")  # Debug
    delays = []
    last_person_message_time = None
    for message in messages:
        timestamp_str, sender_and_message = message.split(' - ', 1)
        timestamp = datetime.datetime.strptime(timestamp_str, '%d/%m/%y, %I:%M %p')
        sender = sender_and_message.split(':', 1)[0]
        if not is_numeric(sender):  # Person's message
            if last_person_message_time and (timestamp - last_person_message_time).total_seconds() / 60 > DELAY_THRESHOLD:
                delays.append(last_person_message_time.strftime('%d/%m/%y, %I:%M %p'))
            last_person_message_time = timestamp
    print(f"Delays identified: {delays}")  # Debug
    return delays

# Main analysis process
def main_analysis(root_directory):
    print(f"Starting delay analysis in root directory: {root_directory}")  # Debug
    analysis_table = pd.DataFrame(columns=['Date', 'Chat Group Name', 'Person', 'Time of Delay', 'Last 7 Messages'])
    
    # Iterate over the directory structure
    for date_folder in os.listdir(root_directory):
        date_path = os.path.join(root_directory, date_folder)
        print(f"Processing date folder: {date_folder}")  # Debug
        for team_folder in os.listdir(date_path):
            team_path = os.path.join(date_path, team_folder)
            print(f"Processing team folder: {team_folder}")  # Debug
            for person_folder in os.listdir(team_path):
                person_path = os.path.join(team_path, person_folder)
                print(f"Processing person folder: {person_folder}")  # Debug
                for file in os.listdir(person_path):
                    if file.endswith('.txt'):
                        chat_file_path = os.path.join(person_path, file)
                        messages = filter_last_7_days_messages(chat_file_path)
                        delays = calculate_delays_and_identify_person(messages)
                        last_7_messages = ' | '.join(messages[-7:])
                        for delay in delays:
                            analysis_table = analysis_table.append({
                                'Date': date_folder, 
                                'Chat Group Name': team_folder, 
                                'Person': person_folder, 
                                'Time of Delay': delay, 
                                'Last 7 Messages': last_7_messages
                            }, ignore_index=True)
                            print(f"Added delay entry for {person_folder} on {date_folder}")  # Debug

    print("Analysis complete. Returning DataFrame.")  # Debug
    return analysis_table

# Run the analysis
root_directory = 'C:\\Users\\mauriceyeng\\Python\\Daily-Reports\\Test\\V1_maurice\\TestingData'  # Replace with the actual path
delay_analysis_table = main_analysis(root_directory)
print(delay_analysis_table.head())  # Displaying the first few rows of the analysis table


Starting delay analysis in root directory: C:\Users\mauriceyeng\Python\Daily-Reports\Test\V1_maurice\TestingData
Processing date folder: 2023-11-18
Processing team folder: EWYL TEAM
Processing person folder: Ananya_Edoofa
Filtering messages from the last 7 days in file: C:\Users\mauriceyeng\Python\Daily-Reports\Test\V1_maurice\TestingData\2023-11-18\EWYL TEAM\Ananya_Edoofa\WhatsApp Chat with Twazanga EWYL22E1057.txt
Found 0 messages from the last 7 days.
Calculating delays and identifying the person...
Delays identified: []
Processing person folder: Jasmine_Edoofa
Filtering messages from the last 7 days in file: C:\Users\mauriceyeng\Python\Daily-Reports\Test\V1_maurice\TestingData\2023-11-18\EWYL TEAM\Jasmine_Edoofa\WhatsApp Chat with Silence EWYL22E1150.txt
Found 0 messages from the last 7 days.
Calculating delays and identifying the person...
Delays identified: []
Processing person folder: Sharda_Edoofa
Filtering messages from the last 7 days in file: C:\Users\mauriceyeng\Python\Dail

In [4]:
import os
import pandas as pd
import datetime
import re

DELAY_THRESHOLD = 15  # minutes

# Function to check if a string is numeric
def is_numeric(s):
    return s.replace('+', '', 1).isdigit()


# Adjusted regex to account for any whitespace character between time and am/pm
regex_pattern = r'^\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2}\s[ap]m -'

# Function to read chat files and extract only the last 7 messages
def read_chat_file(file_path):
    print(f"Reading file: {file_path}")  # Debug
    messages = []
    regex_pattern = r'^\d{2}/\d{2}/\d{2}, \d{1,2}:\d{2}\s[ap]m -'  # Updated regex pattern
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if re.match(regex_pattern, line):
                messages.append(line.strip())
    last_7_messages = messages[-7:]  # Extract only the last 7 messages
    for msg in last_7_messages:  # Debug line to print last 7 messages
        print(f"Extracted message: {msg}")
    print(f"Total messages extracted: {len(last_7_messages)}")  # Debug
    return last_7_messages

# Helper function to extract timestamp and sender from a message
def extract_timestamp_and_sender(message):
    timestamp_str, sender_and_message = message.split(' - ', 1)
    sender = sender_and_message.split(':', 1)[0]
    timestamp = pd.to_datetime(timestamp_str, format='%d/%m/%y, %I:%M %p', errors='coerce')
    return timestamp, sender

# Helper function to calculate time difference in minutes
def calculate_time_difference(start_time, end_time):
    return (end_time - start_time).total_seconds() / 60

# Function to check for delay and calculate delay time
def check_for_delay_and_calculate_time(messages):
    last_7_messages = messages[-7:]
    delay_detected = False
    delay_time = None

    for i in range(len(last_7_messages) - 1):
        current_msg = last_7_messages[i]
        next_msg = last_7_messages[i + 1]
        print(f"Current message: {current_msg}")
        print(f"Next message: {next_msg}")

        current_timestamp, current_sender = extract_timestamp_and_sender(current_msg)
        next_timestamp, next_sender = extract_timestamp_and_sender(next_msg)

        if not is_numeric(current_sender) and is_numeric(next_sender):
            time_diff = calculate_time_difference(current_timestamp, next_timestamp)
            if time_diff > DELAY_THRESHOLD:
                print(f"Time difference: {time_diff} minutes")
                delay_detected = True
                delay_time = time_diff
                break
    
    
    

    return delay_detected, delay_time, ' | '.join(last_7_messages)

# Main analysis process
def main_analysis(root_directory):
    print(f"Starting delay analysis in root directory: {root_directory}")  # Debug
    analysis_table = pd.DataFrame(columns=['Date', 'Chat Group Name', 'Person', 'Delay Detected', 'Delay Time', 'Last 7 Messages'])
    analysis_data = []  # List to store analysis data

    for date_folder in os.listdir(root_directory):
        date_path = os.path.join(root_directory, date_folder)
        print(f"Processing date folder: {date_folder}")  # Debug
        for team_folder in os.listdir(date_path):
            team_path = os.path.join(date_path, team_folder)
            print(f"Processing team folder: {team_folder}")  # Debug
            for person_folder in os.listdir(team_path):
                person_path = os.path.join(team_path, person_folder)
                print(f"Processing person folder: {person_folder}")  # Debug
                for file in os.listdir(person_path):
                    if file.endswith('.txt'):
                        chat_file_path = os.path.join(person_path, file)
                        messages = read_chat_file(chat_file_path)
                        delay_detected, delay_time, last_7_messages = check_for_delay_and_calculate_time(messages)
                        
                        analysis_data.append({
                            'Date': date_folder, 
                            'Chat Group Name': team_folder, 
                            'Person': person_folder, 
                            'Delay Detected': delay_detected, 
                            'Delay Time': delay_time, 
                            'Last 7 Messages': last_7_messages
                        })
                        print(f"Processed file: {file}")  # Debug

    print("Analysis complete. Returning DataFrame.")  # Debug
    return analysis_table

# Run the analysis
root_directory = 'C:\\Users\\maurice\\Documents\\Chat-Analyzer-V2\\Test\\V1_maurice\\TestingData'  # Replace with the actual path
delay_analysis_table = main_analysis(root_directory)
print(delay_analysis_table.head())  # Displaying the first few rows of the analysis table


Starting delay analysis in root directory: C:\Users\maurice\Documents\Chat-Analyzer-V2\Test\V1_maurice\TestingData
Processing date folder: 2023-11-18
Processing team folder: EWYL TEAM
Processing person folder: Ananya_Edoofa
Reading file: C:\Users\maurice\Documents\Chat-Analyzer-V2\Test\V1_maurice\TestingData\2023-11-18\EWYL TEAM\Ananya_Edoofa\WhatsApp Chat with Twazanga EWYL22E1057.txt
Extracted message: 24/02/23, 1:07 pm - You were added
Extracted message: 01/08/23, 9:38 am - Ananya Edoofa: Good morning, My Proud Edoofian!
Extracted message: 01/08/23, 9:43 am - +260 95 7604595: I am a proud edoofian
Extracted message: 01/08/23, 9:51 am - +260 95 7604595: I am a proud edoofian
Extracted message: 01/08/23, 9:59 am - Ananya Edoofa: On your EWYL line, Twazanga!
Extracted message: 01/08/23, 10:00 am - +260 95 7604595: Oh, sorry ma’am
Extracted message: 02/08/23, 1:45 pm - Ananya Edoofa: Seems like you have missed your EWYL ritual today, it's a reminder for you. Do it now!
Extracted message

Reading file: C:\Users\maurice\Documents\Chat-Analyzer-V2\Test\V1_maurice\TestingData\2023-11-18\KAM TEAM\Shivjeet_Edoofa\WhatsApp Chat with Moreblessing 2K23APR1124.txt
Extracted message: 19/04/23, 7:36 am - You were added
Extracted message: 20/08/23, 10:25 am - Shivjeet Edoofa joined using this group's invite link
Extracted message: 08/09/23, 2:53 pm - +91 85956 45071 left
Extracted message: 09/09/23, 9:38 am - Shivjeet Edoofa: Good Morning, dear parents and Moreblessing,
Extracted message: 09/09/23, 11:09 am - +263 77 222 3447: Good morning
Extracted message: 09/09/23, 11:18 am - +263 77 222 3447: On behalf of the family we are all fine. The unfortunate part is More blessings lost her cell phone as a result has not been communicating  l an proceeding there to replace the phone. She will communicate as soon as she is back on line. Most probably Tuesday 12 September 2023. We are really grateful to be part of Edoofa
Extracted message: 21/09/23, 2:43 pm - Shivjeet Edoofa: Good Afternoon

Extracted message: 03/10/23, 2:37 pm - +263 71 133 8660: Noted
Extracted message: 03/10/23, 2:37 pm - +263 71 133 8660: Understood
Extracted message: 03/10/23, 2:43 pm - Ananya Edoofa: Now, it’s been an interesting conversation with you so far. However, I have some cool things left to share and thus, I think we’re ready to move forward. I’m sharing an audio file below which has an easy yet detailed explanation of the last project. So, let’s follow the same process- you hear it more than once, and then recall it quickly for me!
Extracted message: 03/10/23, 2:43 pm - Ananya Edoofa: <Media omitted>
Extracted message: 03/10/23, 2:45 pm - +263 71 133 8660: Okay
Extracted message: 03/10/23, 2:46 pm - Ananya Edoofa: After you go through this, I would appreciate it if you can briefly summarize your understanding of it.
Extracted message: 03/10/23, 2:47 pm - +263 71 133 8660: Okay
Extracted message: 03/10/23, 2:55 pm - +263 71 133 8660: African Schools Education Reform Mission. This project l w

Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Offline Application Form
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: 31 August 2023
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Offline Application Form
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Offline Application Form
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Date of application :31/08/2024
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Student name: Kenneth Rangarirayi Themba Chiwenga
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Offline Application Form
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Offline Application Form
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: 01-09-23
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: Offline Application Form
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: 1 /09/23
Extracted message: 01/09/23, 12:42 pm - +91 70879 73863: 1/09/23
Extracted message: 01/09/23, 12:42 pm - +91

Extracted message: 31/07/23, 9:40 am - +263 77 724 9633: Waiting for this message
Extracted message: 31/07/23, 9:41 am - +263 77 724 9633: Waiting for this message
Extracted message: 31/07/23, 9:41 am - +263 77 724 9633: Waiting for this message
Extracted message: 31/07/23, 9:42 am - +263 77 724 9633: Waiting for this message
Extracted message: 31/07/23, 9:53 am - Sharda Edoofa: Waiting for this message
Extracted message: 01/08/23, 8:54 am - Sharda Edoofa: Thank you so much. Even I have informed the Admissions team from my end. They will help you regarding this.
Extracted message: 01/08/23, 8:56 am - +263 77 724 9633: thank you Miss Sharda
Extracted message: 01/08/23, 8:57 am - +263 77 724 9633: on the PhD candidate group they posted this
Extracted message: 01/08/23, 8:58 am - +263 77 724 9633: <Media omitted>
Extracted message: 01/08/23, 9:07 am - Sharda Edoofa: Okay. Thank you so much for this. I will try informing them from my end.
Extracted message: 01/08/23, 9:07 am - Sharda Edoof

Extracted message: 18/11/23, 12:28 pm - +263 78 126 2077: Afternoon Sir
Extracted message: 18/11/23, 12:33 pm - Ashi Edoofa: Good Afternoon Parents and Denzel,
Extracted message: 18/11/23, 12:33 pm - Ashi Edoofa: Now that you have the letter in your possession, I wanted to take this opportunity to reach out and inquire if you've had the chance to go through the offer letter thoroughly?
Extracted message: 18/11/23, 12:35 pm - +263 78 126 2077: Yes Sir , we have gone through the latter
Extracted message: 18/11/23, 12:40 pm - Ashi Edoofa: I am glad to hear that. As we proceed with today's discussion, we will get into the details of this specific offer letter. I will guide you through the admission process, explain how it was carried out, and provide information about the steps you need to take to secure the scholarship offer.
Extracted message: 18/11/23, 12:42 pm - +263 78 126 2077: Yes Sir
Extracted message: 18/11/23, 12:43 pm - Ashi Edoofa: Amazing. Now that you are all set to discuss t

Extracted message: 18/11/23, 1:36 pm - Kirti Edoofa: After we share the application with the universities, it is on the admissions team of the university to accept or reject an application, and the decision to offer a scholarship depends on them. If you have any university on your mind, you can let me know and we will surely look into your inclinations as well.
Extracted message: 18/11/23, 1:36 pm - Kirti Edoofa: As of now, your application has been only accepted by the Galgotias and since we have shared your application with multiple universities, we are also hoping to receive updates from multiple universities. Once we get the same, we will surely inform you, but each university takes its own time to consider the application and so you have to be patient while the universities are evaluating the application.
Extracted message: 18/11/23, 3:09 pm - Kirti Edoofa: I was hoping to get a response from you, however, you did not respond since today is Saturday and our available hours were il

Extracted message: 18/11/23, 10:42 am - Kirti Edoofa: You are most welcome. Once you make the payment to Mr. Rajan, he will issue a payment receipt that includes all relevant details such as the student's name, depositor's name, payment amount, payment purpose, and more. This receipt can be shared with us for payment confirmation and serves as proof of payment.
Extracted message: 18/11/23, 10:42 am - Kirti Edoofa: Alternatively, you can revert to the payment method you previously used to make a cash deposit through Mr. Shorwi FBC Bank for your enrollment fee.
Extracted message: 18/11/23, 10:45 am - +263 77 369 1078: I still have his account details if I consider that option. But at the moment I will pursue Mr Raji option
Extracted message: 18/11/23, 11:02 am - Kirti Edoofa: Let me share the updated account details, if you need them for reference, please.
Extracted message: 20/11/23, 4:35 pm - +263 71 453 9094 joined using this group's invite link
Extracted message: 20/11/23, 5:59 pm - 

Extracted message: 13/10/23, 1:51 pm - Milan Edoofa: Good Afternoon Parents and Zibusiso!
Extracted message: 13/10/23, 1:53 pm - Milan Edoofa: As per the discussion that we had here earlier, you had mentioned that you are going to make the payment of your registration fee today, hence, can you please share an update about the same with me here.
Extracted message: 16/10/23, 11:29 am - Milan Edoofa: Good Morning Parents and Zibusiso!
Extracted message: 16/10/23, 11:29 am - Milan Edoofa: I was expecting a response from your side earlier, when we were connected with each other here so that we could have moved ahead with our discussion and planned the next steps in your higher education journey. Hence, can you please let me know by when can we connect with each other here for the same?
Extracted message: 16/10/23, 5:50 pm - +263 77 572 2942: Good evening sir, sorry was just stressed because our tenants haven't paid rent they said they lost a relative hence the delay in payment but they have

Reading file: C:\Users\maurice\Documents\Chat-Analyzer-V2\Test\V1_maurice\TestingData\2023-11-20\Sales\Arshita\WhatsApp Chat with Madungwe_Edoofa(21_11)AA.txt
Extracted message: 20/11/23, 4:48 pm - Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them. Tap to learn more.
Extracted message: 20/11/23, 4:48 pm - Arshita created group "Madungwe_Edoofa(21/11)AA"
Extracted message: 20/11/23, 4:48 pm - Arshita added you
Extracted message: 20/11/23, 4:56 pm - Arshita: Offline Application Form
Extracted message: 20/11/23, 4:56 pm - Arshita: Ruvimbo EWYL23E0875-AE
Extracted message: 20/11/23, 6:38 pm - Arshita: Add child
Extracted message: 20/11/23, 6:48 pm - Arshita added +263 77 264 1810 and +263 77 609 9990
Extracted message: 20/11/23, 6:48 pm - Arshita: This message was deleted
Extracted message: 20/11/23, 6:50 pm - Arshita: <Media omitted>
Extracted message: 20/11/23, 6:50 pm - Arshita: <Media omitted>
Extracted message: 20/1

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

