In [1]:
import re
import pandas as pd
 
# Load the file directly from the notebook directory
file_name = 'prabha.txt'
with open(file_name, 'r', encoding='utf-8') as file:
    lines = file.readlines()
 
# Define a regex pattern to capture WhatsApp messages
pattern = re.compile(r'^(\d{2}/\d{2}/\d{4}), (\d{1,2}:\d{2} (?:in the morning|in the afternoon|at night|in the evening)) - (.*?): (.*)$')
 
# Initialize lists to store message components
dates = []
times = []
senders = []
messages = []
 
# Process each line
current_message = ''
current_date = ''
current_time = ''
current_sender = ''
 
for line in lines:
    match = pattern.match(line)
    if match:
        if current_message:  # Save the previous message before moving to the next
            messages.append(current_message.strip())
            dates.append(current_date)
            times.append(current_time)
            senders.append(current_sender)
        # Extract details from the new matched line
        current_date, current_time, current_sender, current_message = match.groups()
    else:
        # Append to the current message if it doesn't match the initial pattern (multi-line message)
        current_message += ' ' + line.strip()
 
# Append the last message if it exists
if current_message:
    messages.append(current_message.strip())
    dates.append(current_date)
    times.append(current_time)
    senders.append(current_sender)
 
# Create a DataFrame
df = pd.DataFrame({
    'Date': dates,
    'Time': times,
    'Sender': senders,
    'Message': messages
})
 
# Format Date and Time columns to be Excel-friendly
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y').dt.strftime('%Y-%m-%d')
df['Time'] = df['Time'].apply(lambda x: x.replace('in the morning', 'AM')
                                          .replace('in the afternoon', 'PM')
                                          .replace('at night', 'PM')
                                          .replace('in the evening', 'PM'))
df['Time'] = pd.to_datetime(df['Time'], format='%I:%M %p').dt.strftime('%H:%M:%S')
 
# Save DataFrame to Excel directly
output_file_name = 'PrabhaWhatsApp_Chat_Formatted.xlsx'
df.to_excel(output_file_name, index=False, engine='openpyxl')
 
print(f"Excel file saved as: {output_file_name}")

Excel file saved as: PrabhaWhatsApp_Chat_Formatted.xlsx
