## Replace Numbers With Names

In [11]:
import json
import re

# Load the mapping from the JSON file
with open("numbers.json", "r") as file:
    numbers_dict = json.load(file)

# Read the content of the text file
with open("/Users/georgevengrovski/Documents/PersonalProjects/imessage_analysis/Righties gc (gyatt chat) - 389.txt", "r") as file:
    content = file.readlines()

# Function to replace numbers with names in a line of text
def replace_numbers_with_names(line, numbers_dict):
    # Use regex to find all potential phone numbers in the line
    for number in re.findall(r'\+\d+', line):
        # Replace each found number with the corresponding name if it exists in the dictionary
        if number in numbers_dict:
            line = line.replace(number, numbers_dict[number])
    return line

# Replace numbers with names using the numbers_dict for each line in the content
updated_content = [replace_numbers_with_names(line, numbers_dict) for line in content]

# Save the updated content to a new file
with open("output.txt", "w") as file:
    file.writelines(updated_content)

## Generate Pandas Database

In [51]:
import pandas as pd
import re

def parse_text_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    # Split the content by double newlines to separate each block
    blocks = content.strip().split('\n\n')

    # Prepare lists to hold column data
    date_times = []
    read_latencies = []
    senders = []
    message_contents = []
    all_reactions = []

    # Regex to capture the date-time and read latency
    datetime_latency_regex = re.compile(r"^(.*?) \(Read by you after (.*)\)$")
    for i, block in enumerate(blocks):

        lines = block.split('\n')
        if not lines:
            continue

        # Extract date-time and read latency
        datetime_latency_match = datetime_latency_regex.match(lines[0])
        if datetime_latency_match:
            date_times.append(datetime_latency_match.group(1))
            read_latencies.append(datetime_latency_match.group(2))
        else:
            date_times.append('')
            read_latencies.append('')

        # Extract sender
        sender = lines[1] if len(lines) > 1 else ''
        senders.append(sender)

        remaining_block = lines[2:]

        # Extract message content and reactions
        message_content = []
        reaction_dict = {}

        for line in remaining_block:
            if line.startswith("Reactions:"):
                reaction_lines = remaining_block[remaining_block.index(line) + 1:]
                for reaction in reaction_lines:
                    try:
                        reaction_type, name = reaction.split(' by ')
                    except:
                        continue
                    if reaction_type not in reaction_dict:
                        reaction_dict[reaction_type] = []
                    reaction_dict[reaction_type].append(name)
                break
            else:
                message_content.append(line)

        message_contents.append(' '.join(message_content))
        all_reactions.append(reaction_dict)

    # Create DataFrame
    df = pd.DataFrame({
        'date-time': date_times,
        'read_latency': read_latencies,
        'sender': senders,
        'message_content': message_contents,
        'reactions': all_reactions
    })

    return df

# Example usage
file_path = 'output.txt'
df = parse_text_file(file_path)

# Save the DataFrame to a CSV file
df.to_csv('formatted_output.csv', index=False)

Sticker from Mackey Guenther: 8edbaf335ded4627560252aba3aaa437-sticker.png
Sticker from Mackey Guenther: c6a19210874260dfb7444e3c76a52d3e-sticker.png
Sticker from Jacob Guenther: b3cff50d7610dbc430ac14b5cdbd86ce-sticker.png
Sticker from Jacob Guenther: b3cff50d7610dbc430ac14b5cdbd86ce-sticker.png
Sticker from Mackey Guenther: 61fe3a7941e52c8cb1578791e351e5af-sticker.png
Sticker from Jacob Guenther: 5326c9920d7c23ce3c6402aa04674f2c-sticker.png
Sticker from Jacob Guenther: b3cff50d7610dbc430ac14b5cdbd86ce-sticker.png
Sticker from Jacob Guenther: 0eeda8fbc5856f9eae6ebe32ac2e0f11-sticker.png
Sticker from Lamek Mekonen: 59269266bfb3659df4ee0b8c02609c04-sticker.png
Sticker from Jacob Guenther: aa65b71ffa40cbf5d9bb66b06227c85a-sticker.png
Sticker from Jacob Guenther: 48c581e22775467ca907a6a96aef2ba5-sticker.png
Sticker from Lamek Mekonen: 8e58f1e071bfaafa498c2457f6fce877-sticker.png
Sticker from Lamek Mekonen: b7485285f1f58f6a3d5a777f5feb4172-sticker.png
Sticker from Jacob Guenther: a5c2bd8a6