# The Game: Data Analysis - JSON to CSV and Analysis

This notebook performs the following:

1.  **Loads game data from a JSON file.**
2.  **Converts the JSON data into a pandas DataFrame.**  This eliminates the need for an intermediate CSV file.
3.  **Performs a comprehensive analysis of the game results**, including:
    *   Overall win rate.
    *   Strategy-specific win rates, average turns (for wins and losses), and average deck sizes.
    *   Distribution of turns for losses.
    *   Average hand size at the end of lost games.
    *   Head-to-head comparisons of all strategies, focusing on games won by at least one of the strategies.
4. **(Optional) Saves the cleaned DataFrame to CSV**

In [None]:
import json
import pandas as pd
import numpy as np
import re

## JSON to DataFrame Conversion

This function loads the JSON data and converts it directly into a pandas DataFrame.  This avoids the need for an intermediate CSV file, streamlining the process.

In [None]:
def json_to_dataframe(json_file):
    """Converts JSON game results to a pandas DataFrame."""
    try:
        with open(json_file, 'r') as f:
            data = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError, Exception) as e:
        print(f"Error loading JSON: {e}")
        return None  # Return None on error

    if 'game_results' not in data:
        print("Error: JSON data does not contain a 'game_results' key.")
        return None

    game_results = data['game_results']

    # Convert lists to strings for DataFrame compatibility
    for game in game_results:
        if isinstance(game['final_playing_rows'], list):
            row_strings = []
            for row in game['final_playing_rows']:
                if isinstance(row, list):
                    row_strings.append(", ".join(str(x) for x in row))
                else:
                    row_strings.append(str(row))
            game['final_playing_rows'] = "[" + "], [".join(row_strings) + "]"
        else:
             game['final_playing_rows'] = str(game['final_playing_rows'])

        if isinstance(game['final_hand'], list):
            game['final_hand'] = "[" + ", ".join(str(x) for x in game['final_hand']) + "]"
        else:
            game['final_hand'] = str(game['final_hand'])
        game['win'] = int(game['win'])

    return pd.DataFrame(game_results)

## Data Analysis Functions

These functions parse the string representations of playing rows and hands, and perform the core analysis.

In [None]:
def parse_rows(row_str):
    """Parses the playing_rows string into a list of lists of integers."""
    if pd.isna(row_str) or row_str.strip() == "[]":
        return [[] for _ in range(4)]

    try:
        row_str = row_str.strip("[]")
        rows = row_str.split("], [")
        while len(rows) < 4:
            rows.append("")
        parsed_rows = []
        for row in rows:
            row = row.strip()
            if not row:
                parsed_rows.append([100] if len(parsed_rows) >= 2 else [1])
                continue
            elements = row.split(", ")
            try:
                int_elements = [int(x) for x in elements]
            except ValueError:
                int_elements = []
                for x in elements:
                    if x:
                        try:
                            int_elements.append(int(x))
                        except ValueError:
                            print(f"Error parsing element: {x} in row: {row_str}")
                            pass
            if not int_elements:
                parsed_rows.append([100] if len(parsed_rows) >= 2 else [1])
                continue
            if len(parsed_rows) < 2 and int_elements[0] != 1:
                int_elements.insert(0, 1)
            elif len(parsed_rows) >= 2 and int_elements[0] != 100:
                int_elements.insert(0,100)
            parsed_rows.append(int_elements)
        return parsed_rows
    except (ValueError, IndexError) as e:
        print(f"Error parsing row string: {row_str}. Error: {e}")
        return [[] for _ in range(4)]

In [None]:
def parse_hand(hand_str):
    """Parses the hand string into a list of integers."""
    if pd.isna(hand_str) or hand_str.strip() == "[]":
        return []
    try:
        return [int(x) for x in hand_str.strip("[]").split(", ")]
    except ValueError as e:
        print(f"Error parsing hand string: {hand_str}. Error: {e}")
        return []

In [None]:
def analyze_strategy_comparison(df, strategy1, strategy2):
    """Compares two strategies, considering only shuffles won by at least one."""
    df_filtered = df[df['strategy_name'].isin([strategy1, strategy2])]
    df_filtered = df_filtered[df_filtered['win'] == 1]
    pivot_df = df_filtered.pivot_table(index='shuffle_id', columns='strategy_name', values='win', aggfunc='max')
    pivot_df = pivot_df.fillna(0)
    if strategy1 not in pivot_df.columns or strategy2 not in pivot_df.columns:
        return
    wins_strategy1 = (pivot_df[strategy1] == 1) & (pivot_df[strategy2] == 0)
    wins_strategy2 = (pivot_df[strategy1] == 0) & (pivot_df[strategy2] == 1)
    wins_both = (pivot_df[strategy1] == 1) & (pivot_df[strategy2] == 1)
    wins_neither = (pivot_df[strategy1] == 0) & (pivot_df[strategy2] == 0)
    print(f"Comparison of Strategies (considering only shuffles won by at least one): {strategy1} vs. {strategy2}")
    print(f"Wins by {strategy1} only: {wins_strategy1.sum()}")
    print(f"Wins by {strategy2} only: {wins_strategy2.sum()}")
    print(f"Wins by both {strategy1} and {strategy2}: {wins_both.sum()}")
    print(f"Wins by neither (should be 0): {wins_neither.sum()}")

In [None]:
def analyze_the_game_results(df):
    """Analyzes The Game results from a pandas DataFrame."""

    # Check for required columns (optional, since we create the DataFrame)
    required_columns = ['shuffle_id', 'strategy_name', 'win', 'turns', 'deck_size', 'final_playing_rows', 'final_hand']
    if not all(col in df.columns for col in required_columns):
        print(f"Error: DataFrame must contain the following columns: {required_columns}")
        return

    df['final_playing_rows'] = df['final_playing_rows'].apply(parse_rows)
    df['final_hand'] = df['final_hand'].apply(parse_hand)

    print("Overall Statistics:\n")
    print(f"Total Games Played: {len(df)}")
    print(f"Overall Win Rate: {df['win'].mean() * 100:.2f}%")

    print("\nStrategy-wise Statistics:\n")
    strategy_stats = df.groupby('strategy_name')['win'].agg(['count', 'mean', 'std'])
    strategy_stats.rename(columns={'count': 'Games Played', 'mean': 'Win Rate', 'std': 'Win Rate Std Dev'}, inplace=True)
    print(strategy_stats.fillna(0))

    print("\nAverage Turns for wins:\n")
    avg_turns_for_wins = df[df['win'] == 1].groupby('strategy_name')['turns'].agg(['mean', 'std']).fillna(0)
    print(avg_turns_for_wins)
    print("\nAverage Turns for losses:\n")
    avg_turns_for_losses = df[df['win'] == 0].groupby('strategy_name')['turns'].agg(['mean', 'std']).fillna(0)
    print(avg_turns_for_losses)
    print("\nAverage deck size for wins:\n")
    avg_deck_win = df[df['win']==1].groupby('strategy_name')['deck_size'].agg(['mean', 'std']).fillna(0)
    print(avg_deck_win)
    print("\nAverage deck size for losses:\n")
    avg_deck_lost = df[df['win']==0].groupby('strategy_name')['deck_size'].agg(['mean', 'std']).fillna(0)
    print(avg_deck_lost)
    print("\nTurns Distribution (Lost):")
    turns_dist_losts = pd.crosstab(df[df['win'] == 0]['strategy_name'], df[df['win'] == 0]['turns'])
    print(turns_dist_losts)
    print("\nPercentage of Lost Games by Turns:")
    turns_percentage_losts = turns_dist_losts.div(turns_dist_losts.sum(axis=1), axis=0) * 100
    print(turns_percentage_losts)
    df['hand_size'] = df['final_hand'].apply(len)
    print("\nAverage Hand Size at End of Game (by Strategy, for Losses):")
    avg_hand_size_loss = df[df['win'] == 0].groupby('strategy_name')['hand_size'].agg(['mean','std']).fillna(0)
    print(avg_hand_size_loss)
    print("\nChecking consistency of suffle_id. Number of distinct shuffle_id:")
    print(df['shuffle_id'].nunique())

    print("\n--- Strategy Comparison ---")
    strategies = df['strategy_name'].unique()
    for i in range(len(strategies)):
        for j in range(i + 1, len(strategies)):
            analyze_strategy_comparison(df.copy(), strategies[i], strategies[j])

## Main Execution

This section performs the following:

1.  **Specifies the input JSON file.**  (You'll need to change this!)
2.  **Loads the JSON data into a DataFrame** using the `json_to_dataframe` function.
3.  **Performs the analysis** using the `analyze_the_game_results` function, if the DataFrame was created successfully.
4.  **(Optional) Saves the cleaned DataFrame to a CSV file.**

In [None]:
# --- Main Execution ---

# Replace 'your_input.json' with your actual JSON file name
json_input_file = 'game_results.json'

# Load JSON data into a DataFrame
df = json_to_dataframe(json_input_file)

# Perform analysis if DataFrame was successfully created
if df is not None:
    analyze_the_game_results(df)
    # Optionally, save the processed DataFrame to CSV
    # df.to_csv('processed_game_results.csv', index=False)