# Transform into a DataFrame logs data.


In [12]:
import os
import json
import pandas as pd

# Directory where your JSON log files are stored.
CURRENT_DIR = os.getcwd()

# Join it with the 'logs' folder
LOGS_DIR = os.path.join(CURRENT_DIR, "dataset/logs")

# Lists to store extracted game-level and round-level data.
games_data = []
choices_data = []

# Iterate over all files in the logs folder.
for filename in os.listdir(LOGS_DIR):
    # Process only JSON files.
    if filename.endswith('.json'):
        file_path = os.path.join(LOGS_DIR, filename)
        with open(file_path, 'r') as f:
            data = json.load(f)
        
        # Parse game-level data
        game_id = data.get("game_id")
        start_time = pd.to_datetime(data.get("start_time"), utc=True)
        completion_time = pd.to_datetime(data.get("completion_time"), utc=True) if data.get("completion_time") else None
        total_duration = data.get("total_duration")
        success = data.get("success")
        
        # Some files may have a null final_choice. If not null, parse its details.
        final_choice = data.get("final_choice")
        if final_choice:
            chosen_quadrant = final_choice.get("chosen_quadrant")
            correct = final_choice.get("correct")
            score = final_choice.get("score")
            biased_quadrant = final_choice.get("biased_quadrant")
        else:
            chosen_quadrant = None
            correct = None
            score = None
            biased_quadrant = None
        
        games_data.append({
            "game_id": game_id,
            "start_time": start_time,
            "completion_time": completion_time,
            "total_duration": total_duration,
            "success": success,
            "chosen_quadrant": chosen_quadrant,
            "correct": correct,
            "score": score,
            "biased_quadrant": biased_quadrant,
            "source_file": filename  # Optional: track the source file of the log.
        })
        
        # Parse the nested choices list.
        for choice in data.get("choices", []):
            round_num = choice.get("round")
            quadrant = choice.get("quadrant")
            cue_name = choice.get("cue_name")
            color = choice.get("color")
            # Convert the string timestamp to a datetime object.
            timestamp = pd.to_datetime(choice.get("timestamp"), utc=True)
            choice_number = choice.get("choice_number")
            
            choices_data.append({
                "game_id": game_id,
                "round": round_num,
                "quadrant": quadrant,
                "cue_name": cue_name,
                "color": color,
                "timestamp": timestamp,
                "choice_number": choice_number,
                "source_file": filename  # Optional: track the source file.
            })

# Convert the lists to pandas DataFrames for further analysis.
games_df = pd.DataFrame(games_data)
choices_df = pd.DataFrame(choices_data)

# Display the first few rows of each DataFrame to verify the data
print("Games DataFrame:")
print(games_df.head())
print("\nChoices DataFrame:")
print(choices_df.head())


Games DataFrame:
                                game_id                       start_time  \
0  153f891f-9273-4bc4-a0d6-f78e8f6981df 2025-02-13 13:26:29.375831+00:00   
1  f32696f2-9de2-4678-8493-0afe7b692f10 2025-02-13 13:42:52.313630+00:00   
2  275e3189-faa6-42ac-b10a-15622a58bf2e 2025-02-13 13:23:34.191279+00:00   
3  13160f51-3302-4e6b-b8bd-9bd9de611e7d 2025-02-13 13:27:28.247465+00:00   
4  3e46841f-6365-484e-b5a0-232d6aed517c 2025-02-13 13:25:04.329227+00:00   

                   completion_time  total_duration success  chosen_quadrant  \
0 2025-02-13 13:26:36.372084+00:00        6.996253    True              0.0   
1 2025-02-13 13:43:19.499449+00:00       27.185819   False              0.0   
2 2025-02-13 13:24:09.800099+00:00       35.608820    True              0.0   
3 2025-02-13 13:27:38.204301+00:00        9.956836    True              2.0   
4 2025-02-13 13:25:20.198303+00:00       15.869076   False              2.0   

  correct  score  biased_quadrant  \
0    True  100