In [1]:
import re
import pandas as pd
from math import sqrt
import os

In [2]:
def parse_ghost_game_data(log_file_path):
    """
    Parse GHOST game data from a log file.

    Args:
        log_file_path (str): Path to the log file.

    Returns:
        pd.DataFrame: DataFrame containing parsed GHOST game data.
    """
    # Initialize an empty structure for current info and results
    current_info = {
        "Boom Diff": None,
        "Stick Diff": None,
        "Bucket Diff": None,
        "Swing Diff": None,
    }
    data = []

    # Patterns for matching data
    boom_pattern = r"Boom: Diff=(?P<boom_diff>[\d.]+)"
    stick_pattern = r"Stick: Diff=(?P<stick_diff>[\d.]+)"
    bucket_pattern = r"Bucket: Diff=(?P<bucket_diff>[\d.]+)"
    swing_pattern = r"Swing: Diff=(?P<swing_diff>[\d.]+)"
    finished_pattern = r"(?P<time>[\d/: ]+ [APM]+): GHOST ROUND (?P<round>\d+) (?P<controller>JOYSTICK|MINIATURE) FINISHED IN (?P<completion_time>[\d.]+) SEC.*"

    # Read the log file
    with open(log_file_path, 'r') as file:
        log_lines = file.readlines()

    # Process each line
    for line in log_lines:
        # Update Boom Diff
        boom_match = re.search(boom_pattern, line)
        if boom_match:
            current_info["Boom Diff"] = float(boom_match.group("boom_diff"))

        # Update Stick Diff
        stick_match = re.search(stick_pattern, line)
        if stick_match:
            current_info["Stick Diff"] = float(stick_match.group("stick_diff"))

        # Update Bucket Diff
        bucket_match = re.search(bucket_pattern, line)
        if bucket_match:
            current_info["Bucket Diff"] = float(bucket_match.group("bucket_diff"))

        # Update Swing Diff
        swing_match = re.search(swing_pattern, line)
        if swing_match:
            current_info["Swing Diff"] = float(swing_match.group("swing_diff"))

        # Match "FINISHED" line
        finished_match = re.match(finished_pattern, line)
        if finished_match:
            # Check round number and skip if it's round 0
            round_number = int(finished_match.group("round"))
            if round_number == 0:
                continue

            # Record data when a round is finished
            task_data = {
                "Task type": "GHOST",
                "Controller": finished_match.group("controller"),
                "Completion Time (s)": float(finished_match.group("completion_time")),
                **current_info,  # Include the current info structure
            }
            data.append(task_data)  # Append to the results

            # Reset the current info structure
            current_info = {
                "Boom Diff": None,
                "Stick Diff": None,
                "Bucket Diff": None,
                "Swing Diff": None,
            }

    # Create a DataFrame
    df = pd.DataFrame(data)
    return df


In [3]:
def parse_bucket_game_data(log_file_path):
    """
    Parse BUCKET game data from a log file.

    Args:
        log_file_path (str): Path to the log file.

    Returns:
        pd.DataFrame: DataFrame containing parsed BUCKET game data.
    """
    # Initialize an empty structure for results
    data = []
    current_task = None

    # Patterns for matching data
    drop_position_pattern = r"dropSandPoint position: \((?P<drop_x>[-\d.]+), (?P<drop_y>[-\d.]+), [-\d.]+\)"
    target_position_pattern = r"targetLocation position: \((?P<target_x>[-\d.]+), (?P<target_y>[-\d.]+), [-\d.]+\)"
    finished_pattern = r"(?P<time>[\d/: ]+ [APM]+): BUCKET ROUND (?P<round>\d+) (?P<controller>JOYSTICK|MINIATURE) FINISHED IN (?P<completion_time>[\d.]+) SEC with (?P<errors>\d+) failed attempts.*"

    # Read the log file
    with open(log_file_path, 'r') as file:
        log_lines = file.readlines()

    # Process each line
    for line in log_lines:
        # Match "FINISHED" line
        finished_match = re.match(finished_pattern, line)
        if finished_match:
            # Check round number and skip if it's round 0
            round_number = int(finished_match.group("round"))
            if round_number == 0:
                continue

            # Save the current task data
            current_task = {
                "Task type": "BUCKET",
                "Controller": finished_match.group("controller"),
                "Completion Time (s)": float(finished_match.group("completion_time")),
                "Errors": int(finished_match.group("errors")),
                "dropSandPoint": None,
                "targetLocation": None,
                "Distance": None,
            }

        # Match "dropSandPoint position"
        if current_task:
            drop_match = re.search(drop_position_pattern, line)
            if drop_match:
                drop_x = float(drop_match.group("drop_x"))
                drop_y = float(drop_match.group("drop_y"))
                current_task["dropSandPoint"] = (drop_x, drop_y)

        # Match "targetLocation position"
        if current_task:
            target_match = re.search(target_position_pattern, line)
            if target_match:
                target_x = float(target_match.group("target_x"))
                target_y = float(target_match.group("target_y"))
                current_task["targetLocation"] = (target_x, target_y)

                # Calculate the distance if both positions are available
                if current_task["dropSandPoint"] and current_task["targetLocation"]:
                    drop_x, drop_y = current_task["dropSandPoint"]
                    target_x, target_y = current_task["targetLocation"]
                    current_task["Distance"] = sqrt((drop_x - target_x) ** 2 + (drop_y - target_y) ** 2)

                # Append the completed task and reset current_task
                data.append(current_task)
                current_task = None

    # Create a DataFrame
    df = pd.DataFrame(data)

    return df


In [6]:
# Define available user IDs
user_ids = ['CE1J', 'A9AB', '1IM9', '1I1I', 'F5S2', '2JW5', 'WSSL', 'RMYD', '50O9']

# Directory containing the log files
log_dir = "../logs/"

# Define the required columns
required_columns = ['User ID', 'Task type', 'Controller', 'Completion Time (s)',
                    'Swing Diff', 'Boom Diff', 'Stick Diff', 'Bucket Diff',
                    'Errors', 'Distance']

In [8]:
# Initialize a list to hold all data
all_data = []

# Iterate over all files in the directory
for filename in os.listdir(log_dir):
    if filename.endswith(".txt"):
        log_file_path = os.path.join(log_dir, filename)
        
        # Determine User ID from filename
        user_id = None
        for uid in user_ids:
            if uid in filename:
                user_id = uid
                break
        if user_id is None:
            continue  # Skip files that don't match any user ID
        
        # Parse the data
        ghost_data = parse_ghost_game_data(log_file_path)
        bucket_data = parse_bucket_game_data(log_file_path)
        
        # Add User ID column
        if not ghost_data.empty:
            ghost_data['User ID'] = user_id
        if not bucket_data.empty:
            bucket_data['User ID'] = user_id
        
        # Append data to the list
        all_data.append(ghost_data)
        all_data.append(bucket_data)

# Combine all data into a single DataFrame
if all_data:
    result_df = pd.concat(all_data, ignore_index=True)
    
    # Ensure all required columns are present
    for col in required_columns:
        if col not in result_df.columns:
            result_df[col] = None
    
    # Reorder columns
    result_df = result_df[required_columns]
    
    # Write to CSV
    result_df.to_csv('log-files-results.csv', index=False)
    
    # Print the data
    print(result_df)
else:
    print("No data found.")


   User ID Task type Controller  Completion Time (s)  Swing Diff  Boom Diff  \
0     WSSL     GHOST   JOYSTICK            97.085105    1.408867   1.402743   
1     WSSL     GHOST   JOYSTICK           122.688612    1.325277   1.429275   
2     WSSL     GHOST  MINIATURE            94.481276    0.209357   0.243893   
3     WSSL     GHOST  MINIATURE            81.869956    0.487787   1.153504   
4     WSSL    BUCKET   JOYSTICK            62.025889         NaN        NaN   
..     ...       ...        ...                  ...         ...        ...   
67    1I1I     GHOST  MINIATURE            48.125688    0.000000   0.276953   
68    1I1I    BUCKET   JOYSTICK           194.232837         NaN        NaN   
69    1I1I    BUCKET   JOYSTICK            55.231274         NaN        NaN   
70    1I1I    BUCKET  MINIATURE           132.634657         NaN        NaN   
71    1I1I    BUCKET  MINIATURE            65.924959         NaN        NaN   

    Stick Diff  Bucket Diff  Errors  Distance  
0  