In [42]:
import json

file_path = "./log_longer.json"
data = ""

with open(file_path, 'r') as file:
    data = json.load(file)

In [43]:
import unidecode
import re

def normalize_text(text):
    # Convert to lowercase
    text = text.lower()
    
    # Remove accents and special characters using unidecode
    text = unidecode.unidecode(text)
    
    # Remove special characters (keep alphanumeric and spaces)
    text = re.sub(r'[^a-z0-9\s]', '', text)
    
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text

def clamp(value, min_value, max_value):
    return max(min_value, min(value, max_value))

In [44]:
class Message():
    def __init__(self, id: int, login: str, date_time: str, msg: str, viewed_by: list[int]):
        self.id = id
        self.login = login
        self.date_time = date_time
        self.msg = msg
        self.viewed_by = viewed_by
        
    def __repr__(self):
        return f"Message(id='{self.id}', login='{self.login}', date='{self.date_time}', msg='{self.msg}', viewed_by='{self.viewed_by}')"
    
class Match():
    def __init__(self, message: Message, context: list[Message]):
        self.message = message
        self.context = context
        
def get_context(messages: list[Message], index: int):
    context = []
    bottom = clamp(index - 10, 0, len(messages))
    top = clamp(index + 10, 0, len(messages))
    
    for i in range(bottom, top):
        context.append(messages[i])
        
    return context

In [45]:
messages_json = data["msgs"]

messages = []

for message in messages_json:
    if "content" not in message["msg"]:
        continue
    
    id = message["sender"]["id"]
    login = message["sender"]["login"]
    date_time = message["dt"]
    msg = normalize_text(message["msg"]["content"])
    viewed_by = message["visReceipt"]
    message_user = Message(id, login, date_time, msg, viewed_by)
    
    messages.append(message_user)
    

In [65]:

with open("output.txt", "w") as file:
    for message in messages:
        file.write(f"- {message.login}\n")
        file.write(f"{message.msg}\n")
        file.write("-----------------------\n")