In [1]:
import re
import pandas as pd

def parse_log_line(line):
    """Parse a single log line and extract details."""
    entry = {}
    # Extract the overall timestamp (first field)
    timestamp_match = re.search(r"^(\S+\s+\S+)", line)
    entry["timestamp"] = timestamp_match.group(1) if timestamp_match else None

    if "Clock rate" in line:
        # This is a clock rate line
        entry["event_type"] = "Clock Rate"
        client_match = re.search(r"client (\d+)", line)
        entry["client"] = int(client_match.group(1)) if client_match else None
        rate_match = re.search(r": (\d+) events per second", line)
        entry["clock_rate"] = int(rate_match.group(1)) if rate_match else None
        # Other fields not applicable
        entry["logical_time"] = None
        entry["physical_time"] = None
        entry["sender"] = None
        entry["recipient"] = None
        entry["message_queue_length"] = None
    elif "EVENT:" in line:
        # This is an event log line
        event_type_match = re.search(r"EVENT:\s+([^|]+)", line)
        entry["event_type"] = event_type_match.group(1).strip() if event_type_match else None
        
        # Extract logical time
        logical_time_match = re.search(r"LOGICAL TIME:\s*(\d+)", line)
        entry["logical_time"] = int(logical_time_match.group(1)) if logical_time_match else None
        
        # Extract physical time
        physical_time_match = re.search(r"PHYSICAL TIME:\s*([\d-]+\s[\d:,]+)", line)
        entry["physical_time"] = physical_time_match.group(1) if physical_time_match else None
        
        # Extract message queue length
        mql_match = re.search(r"MESSAGE QUEUE LENGTH:\s*(\d+)", line)
        entry["message_queue_length"] = int(mql_match.group(1)) if mql_match else None
        
        # Extract sender if available
        sender_match = re.search(r"SENDER:\s*(\d+)", line)
        entry["sender"] = int(sender_match.group(1)) if sender_match else None
        
        # Extract recipient if available
        recipient_match = re.search(r"RECIPIENT:\s*(\d+)", line)
        entry["recipient"] = int(recipient_match.group(1)) if recipient_match else None
        
        # Not applicable for event lines
        entry["client"] = None
        entry["clock_rate"] = None
    else:
        # If line doesn't match expected formats, return None.
        return None

    return entry

def parse_log_file(file_path):
    """Parse the given log file and return a pandas DataFrame."""
    with open(file_path, "r") as f:
        lines = f.readlines()

    parsed_entries = []
    for line in lines:
        line = line.strip()
        if not line:
            continue
        parsed = parse_log_line(line)
        if parsed:
            parsed_entries.append(parsed)

    return pd.DataFrame(parsed_entries)

In [None]:
client_1_file = "logs/client_1_events.log"
df1 = parse_log_file(client_1_file)

# Display the DataFrame
print(df)