## Player Types:

- `zs`: An AI agent initialized with a zero-shot prompt. Just the gave description.
- `spp`: Solo Performance Prompting; an AI agent initialized with the SPP prompt.
- `cot`: Chain-of-Thought; an AI agent initialized with the COT prompt.
- `srep`: Singe-Round-Equilibrium-Player; a player who strictly follows the Single Round Equilibrium Strategy (a specific probability distribution over the available moves)
- `pp`: Pattern Player; Follows a cyclic pattern of moves. Always playes moves from this pattern.
- `ap`: Adaptive Player; finds the most frequent move their opponent plays and counters it.
- `tft`: Tit-for-Tat Player; counters opponent's last played move.

## SC experiments:

- We compare an AI agent in a SC environment vs all other agents. If the opponent is also an AI agent, then the opponent in **NOT** a SC player.
- SC: Each time the AI agent has to play. They generate 5 different answers. We then choose the most frequent result and choose an answer that gave that result. We continue with this history for the rest of the game (as many rounds as it is). Conficts are resolved at random and/or by choosing the first answer that gave the result we picked.

In [1]:
models = [
    {
        "id" : "anthropic.claude-3-5-sonnet-20241022-v2:0",
        "name" : "Claude 3.5 Sonnet v2",
        "thinking" : False,
    },
    {
        "id" : "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
        "name" : "Claude 3.7 Sonnet",
        "thinking" : False,
    },
    {
        "id" : "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
        "name" : "Claude 3.7 Sonnet (Thinking)",
        "thinking" : True,
    },
    {
        "id" : "us.anthropic.claude-sonnet-4-20250514-v1:0",
        "name" : "Claude Sonnet 4",
        "thinking" : False,
    },
    {
        "id" : "us.anthropic.claude-sonnet-4-20250514-v1:0",
        "name" : "Claude Sonnet 4 (Thinking)",
        "thinking" : True,
    },
    {
        "id" : "us.meta.llama3-3-70b-instruct-v1:0",
        "name" : "Llama 3.3 70B Instruct",
        "thinking" : False,
    },
    {
        "id" : "mistral.mistral-large-2407-v1:0",
        "name" : "Mistral Large (24.07)",
        "thinking" : False,
    },
    {
        "id" : "us.deepseek.r1-v1:0",
        "name" : "DeepSeek-R1",
        "thinking" : False,
    },
]

game_settings_types = ["pd", "pd-alt", "sh", "sh-alt"]

prompt_types = ["zs", "spp", "cot"]

In [2]:
import pandas as pd
import os
import json
from collections import defaultdict

def get_total_points_dataframe(
    log_dir: str,
    model_names: list[str],
    prompt_types: list[str],
    game_type: str,
    game_settings_type: str,
    iteration_cnt: int,
    tot: bool,
) -> pd.DataFrame:
    y_replacements = {
        "zs": "zs",
    } if not tot else {
        "zs": "sc-zs",
        "spp": "sc-spp",
        "cot": "sc-cot",
    }
    x_replacements = {
        "zs": "zs",
    } 

    # {(model, prompt) -> {opponent_type -> total_points}}
    heatmap_data = defaultdict(lambda: defaultdict(list))
    opponent_set = set()

    for model in model_names:
        for prompt in prompt_types:
            for itr in range(iteration_cnt):
                directory = os.path.join(log_dir, f"iteration_{itr}", model, game_type, game_settings_type)

                if not os.path.isdir(directory):
                    continue

                for game_dir in sorted(os.listdir(directory)):
                    info_path = os.path.join(directory, game_dir, 'game.json')
                    if not os.path.isfile(info_path):
                        continue

                    with open(info_path) as f:
                        info = json.load(f)

                    player_types = [info.get(f"player_{i}_player_type") for i in range(2)]
                    if prompt not in player_types:
                        continue

                    model_idx = player_types.index(prompt)
                    if model_idx != 0:
                        continue

                    opponent_type = player_types[1 - model_idx]
                    opponent_set.add(opponent_type)

                    total_points = info.get(f"player_{model_idx}_total_points")
                    if total_points is None:
                        raise ValueError(f"Missing total_points for {info_path}")

                    heatmap_data[(model, prompt)][opponent_type].append(total_points)

    if not heatmap_data:
        raise ValueError("No data collected — check log paths and model+prompt naming conventions.")

    #for key in heatmap_data:
    #    for opponent in heatmap_data[key]:
    #        heatmap_data[key][opponent] /= iteration_cnt

    opponent_types_aux = ["zs", "spp", "cot", "srep", "pp", "mf", "tft"]
    sorted_opponents = [opp for opp in opponent_types_aux if opp in opponent_set]
    model_prompt_keys = [(model, prompt) for model in model_names for prompt in prompt_types]

    # Apply x label replacements
    x_labels = sorted_opponents.copy()
    for old, new in x_replacements.items():
        x_labels = [label.replace(old, new) for label in x_labels]

    rows = []
    index_tuples = []

    for model, prompt in model_prompt_keys:
        new_prompt = prompt
        for old, new in y_replacements.items():
            new_prompt = new_prompt.replace(old, new)

        index_tuples.append( (model, new_prompt) )

        values = []
        for opp in sorted_opponents:
            val = heatmap_data.get((model, prompt), {}).get(opp, -1000)
            values.append(val)
        rows.append(values)


    index = pd.MultiIndex.from_tuples(index_tuples, names=["model", "prompt"])
    df = pd.DataFrame(rows, index=index, columns=x_labels)
    return df


In [3]:
dfs_nonsc_tp = [
    get_total_points_dataframe(
        log_dir="../logs_pd/logs_3/data",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iteration_cnt=5,
        tot=False,
    )
    for game_settings_type in game_settings_types
]

dfs_sc_tp = [
    get_total_points_dataframe(
        log_dir="../logs_pd/logs_3/data_tot",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iteration_cnt=2,
        tot=True,
    )
    for game_settings_type in game_settings_types
]


In [4]:
prompt_order = ['zs', 'cot', 'spp', 'sc-zs', 'sc-cot', 'sc-spp']
prompt_order_map = {prompt: i for i, prompt in enumerate(prompt_order)}

dfs_merged_tp = []

for df_nonsc, df_sc in zip(dfs_nonsc_tp, dfs_sc_tp):
    df_nonsc = df_nonsc.copy()
    df_sc = df_sc.copy()

    # Prefix the prompt types in sc
    new_index = []
    for model, prompt in df_sc.index:
        new_index.append((model, prompt))
    df_sc.index = pd.MultiIndex.from_tuples(new_index, names=df_sc.index.names)

    # Concatenate vertically
    merged_df = pd.concat([df_nonsc, df_sc])

    # Reorder by (model, prompt) with custom prompt order
    merged_df = merged_df.reset_index()

    # Add a sort key column
    merged_df['prompt_order'] = merged_df['prompt'].map(prompt_order_map)

    # Sort by model, then prompt order
    merged_df = merged_df.sort_values(['model', 'prompt_order'])

    # Drop helper column
    merged_df = merged_df.drop(columns=['prompt_order'])

    # Restore MultiIndex
    merged_df = merged_df.set_index(['model', 'prompt'])

    dfs_merged_tp.append(merged_df)


In [5]:
import numpy as np

# Define shorter names for the models
rename_map = {
    "Claude 3.5 Sonnet v2": "C3.5Sv2",
    "Claude 3.7 Sonnet": "C3.7S",
    "Claude 3.7 Sonnet (Thinking)": "C3.7S(T)",
    "Claude Sonnet 4": "C4S",
    "Claude Sonnet 4 (Thinking)": "C4S(T)",
    "DeepSeek-R1": "DS-R1",
    "Llama 3.3 70B Instruct": "L3.3-70B",
    "Mistral Large (24.07)": "M-L(24.07)"
}

for df, game_settings_type in zip(dfs_merged_tp, game_settings_types):
    df = df.copy()

    # Rename models to shorter names
    df.rename(index=rename_map, inplace=True)

    # Compute mean & std for each list in df
    mean_df = df.map(lambda x: np.mean(x) if isinstance(x, list) else np.nan)
    std_df  = df.map(lambda x: np.std(x, ddof=1) if isinstance(x, list) else np.nan)

    # Identify maxima based on means
    row_max_mask = mean_df.eq(mean_df.max(axis=1), axis=0)
    col_max_mask = mean_df.eq(mean_df.max(axis=0), axis=1)

    # Make values strings, bold if max in row or column
    styled_df = df.copy().astype(str)
    for row in df.index:
        for col in df.columns:
            vals = df.loc[row, col]

            if not isinstance(vals, list) or len(vals) == 0:
                formatted = ""
            else:
                mean_val = np.mean(vals)
                std_val = np.std(vals, ddof=1)  # population std

                # Check if it's a max cell (by mean)
                is_max = row_max_mask.loc[row, col] or col_max_mask.loc[row, col]

                formatted = f"{mean_val:.1f} $\\pm$ {std_val:.1f}"
                if is_max:
                    formatted = f"\\textbf{{{formatted}}}"

            styled_df.loc[row, col] = formatted

    # Add game_settings_type as MultiIndex header
    styled_df.columns = pd.MultiIndex.from_product(
        [[game_settings_type], styled_df.columns],
    )

    # Output LaTeX
    latex_code = styled_df.to_latex(
        index=True,
        multirow=True,
        multicolumn=True,
        multicolumn_format='c',
        escape=False,  # Allow \textbf
        caption=f"Total Points Averaged Over All Iterations ({game_settings_type})",
        label=f"tab:pd_total_points_avg_heatmap_{game_settings_type}",
    )
    print(latex_code)
    print("\n\n")


\begin{table}
\caption{Total Points Averaged Over All Iterations (pd)}
\label{tab:pd_total_points_avg_heatmap_pd}
\begin{tabular}{lllllllll}
\toprule
 &  & \multicolumn{7}{c}{pd} \\
 &  & zs & spp & cot & srep & pp & mf & tft \\
model & prompt &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{6}{*}{C3.5Sv2} & zs & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & 29.8 $\pm$ 0.4 & 54.2 $\pm$ 4.5 & 30.2 $\pm$ 1.1 & 30.6 $\pm$ 0.9 \\
 & cot & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & 30.0 $\pm$ 0.0 & 53.6 $\pm$ 1.5 & 31.0 $\pm$ 1.0 & 30.4 $\pm$ 0.9 \\
 & spp & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & 30.4 $\pm$ 0.5 & 58.4 $\pm$ 4.3 & 30.6 $\pm$ 0.9 & 32.0 $\pm$ 0.0 \\
 & sc-zs & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & 30.5 $\pm$ 0.7 & 52.0 $\pm$ 0.0 & 32.0 $\pm$ 0.0 & 30.0 $\pm$ 1.4 \\
 & sc-cot & \textbf{64.0 $\pm$ 0.0} & \textbf{64.0 $\pm$ 0.0} & \textbf

In [6]:
import pandas as pd
import os
import json
from collections import defaultdict

def get_efficiency_dataframe(
    log_dir: str,
    model_names: list[str],
    prompt_types: list[str],
    game_type: str,
    game_settings_type: str,
    iteration_cnt: int,
    tot: bool,
) -> pd.DataFrame:
    y_replacements = {
        "zs": "zs",
    } if not tot else {
        "zs": "sc-zs",
        "spp": "sc-spp",
        "cot": "sc-cot",
    }
    x_replacements = {
        "zs": "zs",
    } 

    # {(model, prompt) -> {opponent_type -> total_points}}
    efficiency_data = defaultdict(lambda: defaultdict(list))
    opponent_set = set()

    for model in model_names:
        for prompt in prompt_types:
            for itr in range(iteration_cnt):
                directory = os.path.join(log_dir, f"iteration_{itr}", model, game_type, game_settings_type)

                if not os.path.isdir(directory):
                    continue

                for game_dir in sorted(os.listdir(directory)):
                    info_path = os.path.join(directory, game_dir, 'game.json')
                    if not os.path.isfile(info_path):
                        continue

                    with open(info_path) as f:
                        info = json.load(f)

                    player_types = [info.get(f"player_{i}_player_type") for i in range(2)]
                    if prompt not in player_types:
                        continue

                    model_idx = player_types.index(prompt)
                    if model_idx != 0:
                        continue

                    opponent_type = player_types[1 - model_idx]
                    opponent_set.add(opponent_type)

                    tokens = info.get(f"player_{model_idx}_tokens")
                    total_points = info.get(f"player_{model_idx}_total_points")

                    if tokens is None or total_points is None:
                        print(f"Model {model}, Prompt {prompt}, Iteration {itr}, Game directory {game_dir} - Missing data in {info_path}")
                        raise ValueError(f"Missing tokens or total_points for {info_path}")

                    efficiency_data[(model, prompt)][opponent_type].append(total_points / max(tokens) * 1000)  # Scale to per 1000 tokens

    if not efficiency_data:
        raise ValueError("No data collected — check log paths and model+prompt naming conventions.")

    #for key in efficiency_data:
    #    for opponent in efficiency_data[key]:
    #        efficiency_data[key][opponent] /= iteration_cnt

    opponent_types_aux = ["zs", "spp", "cot", "srep", "pp", "mf", "tft"]
    sorted_opponents = [opp for opp in opponent_types_aux if opp in opponent_set]
    model_prompt_keys = [(model, prompt) for model in model_names for prompt in prompt_types]

    for key in efficiency_data:
        aux = []
        for opp in sorted_opponents:
            aux.append(efficiency_data[key][opp])
        efficiency_data[key]["avg"] = aux

    sorted_opponents.append("avg")  # Add average to the end of the list

    # Apply x label replacements
    x_labels = sorted_opponents.copy()
    for old, new in x_replacements.items():
        x_labels = [label.replace(old, new) for label in x_labels]

    rows = []
    index_tuples = []

    for model, prompt in model_prompt_keys:
        new_prompt = prompt
        for old, new in y_replacements.items():
            new_prompt = new_prompt.replace(old, new)

        index_tuples.append( (model, new_prompt) )

        values = []
        for opp in sorted_opponents:
            val = efficiency_data.get((model, prompt), {}).get(opp, -1000)
            values.append(val)
        rows.append(values)


    index = pd.MultiIndex.from_tuples(index_tuples, names=["model", "prompt"])
    df = pd.DataFrame(rows, index=index, columns=x_labels)
    return df


In [7]:
dfs_nonsc_ef = [
    get_efficiency_dataframe(
        log_dir="../logs_pd/logs_3/data",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iteration_cnt=5,
        tot=False,
    )
    for game_settings_type in game_settings_types
]

dfs_sc_ef = [
    get_efficiency_dataframe(
        log_dir="../logs_pd/logs_3/data_tot",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iteration_cnt=2,
        tot=True,
    )
    for game_settings_type in game_settings_types
]



In [8]:
prompt_order = ['zs', 'cot', 'spp', 'sc-zs', 'sc-cot', 'sc-spp']
prompt_order_map = {prompt: i for i, prompt in enumerate(prompt_order)}

dfs_merged_ef = []

for df_nonsc, df_sc in zip(dfs_nonsc_ef, dfs_sc_ef):
    df_nonsc = df_nonsc.copy()
    df_sc = df_sc.copy()

    # Prefix the prompt types in sc
    new_index = []
    for model, prompt in df_sc.index:
        new_index.append((model, prompt))
    df_sc.index = pd.MultiIndex.from_tuples(new_index, names=df_sc.index.names)

    # Concatenate vertically
    merged_df = pd.concat([df_nonsc, df_sc])

    # Reorder by (model, prompt) with custom prompt order
    merged_df = merged_df.reset_index()

    # Add a sort key column
    merged_df['prompt_order'] = merged_df['prompt'].map(prompt_order_map)

    # Sort by model, then prompt order
    merged_df = merged_df.sort_values(['model', 'prompt_order'])

    # Drop helper column
    merged_df = merged_df.drop(columns=['prompt_order'])

    # Restore MultiIndex
    merged_df = merged_df.set_index(['model', 'prompt'])

    dfs_merged_ef.append(merged_df)


In [9]:
for df, game_settings_type in zip(dfs_merged_ef, game_settings_types):
    df = df.copy()

    # Compute mean & std for each list in df
    mean_df = df.map(lambda x: np.mean(x) if isinstance(x, list) else np.nan)
    std_df  = df.map(lambda x: np.std(x, ddof=1) if isinstance(x, list) else np.nan)

    # Identify maxima based on means
    row_max_mask = mean_df.eq(mean_df.max(axis=1), axis=0)
    col_max_mask = mean_df.eq(mean_df.max(axis=0), axis=1)

    # Make values strings, bold if max in row or column
    styled_df = df.copy().astype(str)
    for row in df.index:
        for col in df.columns:
            vals = df.loc[row, col]

            if not isinstance(vals, list) or len(vals) == 0:
                formatted = ""
            else:
                mean_val = np.mean(vals)
                std_val = np.std(vals, ddof=1)  # population std

                # Check if it's a max cell (by mean)
                is_max = row_max_mask.loc[row, col] or col_max_mask.loc[row, col]

                formatted = f"{mean_val:.1f} $\\pm$ {std_val:.1f}"
                if is_max:
                    formatted = f"\\textbf{{{formatted}}}"

            styled_df.loc[row, col] = formatted

    # Add game_settings_type as MultiIndex header
    styled_df.columns = pd.MultiIndex.from_product(
        [[game_settings_type], styled_df.columns],
    )

    # Output LaTeX
    latex_code = styled_df.to_latex(
        index=True,
        multirow=True,
        multicolumn=True,
        multicolumn_format='c',
        escape=False,  # Allow \textbf
        caption=f"Average Efficiency (Points per kilo-token) ({game_settings_type})",
        label=f"tab:pd_efficiency_avg_heatmap_{game_settings_type}",
    )
    print(latex_code)
    print("\n\n")


\begin{table}
\caption{Average Efficiency (Points per kilo-token) (pd)}
\label{tab:pd_efficiency_avg_heatmap_pd}
\begin{tabular}{llllllllll}
\toprule
 &  & \multicolumn{8}{c}{pd} \\
 &  & zs & spp & cot & srep & pp & mf & tft & avg \\
model & prompt &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{6}{*}{Claude 3.5 Sonnet v2} & zs & 26.4 $\pm$ 8.2 & \textbf{29.5 $\pm$ 5.7} & 27.7 $\pm$ 5.7 & 10.0 $\pm$ 1.4 & 17.5 $\pm$ 8.2 & 10.1 $\pm$ 4.3 & 7.3 $\pm$ 1.3 & 18.3 $\pm$ 10.2 \\
 & cot & 12.6 $\pm$ 0.9 & 13.1 $\pm$ 1.9 & \textbf{13.7 $\pm$ 3.2} & 4.7 $\pm$ 0.1 & 8.3 $\pm$ 0.4 & 5.7 $\pm$ 0.6 & 4.9 $\pm$ 0.3 & 9.0 $\pm$ 4.0 \\
 & spp & \textbf{10.6 $\pm$ 0.9} & 10.4 $\pm$ 0.8 & 10.5 $\pm$ 1.2 & 3.9 $\pm$ 0.4 & 8.1 $\pm$ 0.7 & 4.5 $\pm$ 0.4 & 4.7 $\pm$ 0.4 & 7.5 $\pm$ 3.0 \\
 & sc-zs & 8.7 $\pm$ 3.4 & 8.7 $\pm$ 3.3 & \textbf{10.7 $\pm$ 0.4} & 2.9 $\pm$ 0.1 & 6.1 $\pm$ 3.7 & 2.7 $\pm$ 0.3 & 4.9 $\pm$ 0.0 & 6.4 $\pm$ 3.4 \\
 & sc-cot & 3.9 $\pm$ 0.2 & \textbf{5.3 $\pm$ 1.8} & 3.8 $\pm$ 0.3 & 1

In [10]:
def optimal_move(move1, move2, game_settings):
    # is move1 the optimal move against move2?
    if move2 == game_settings["a"]:
        points = game_settings["aa"] if move1 == game_settings["a"] else game_settings["ba"]
        op_points = game_settings["aa"] if move2 == game_settings["a"] else game_settings["ab"]
        return points >= op_points
    elif move2 == game_settings["b"]:
        points = game_settings["ab"] if move1 == game_settings["a"] else game_settings["bb"]
        op_points = game_settings["ba"] if move2 == game_settings["a"] else game_settings["bb"]
        return points >= op_points

In [11]:
import pandas as pd
import os
import json
from collections import defaultdict

def get_round_of_understood_opponent(
    log_dir: str,
    model_names: list[str],
    prompt_types: list[str],
    game_type: str,
    game_settings_type: str,
    iter_cnt: int,
    tot: bool,
) -> pd.DataFrame:
    y_replacements = {
        "zs": "zs",
    } if not tot else {
        "zs": "sc-zs",
        "spp": "sc-spp",
        "cot": "sc-cot",
    }
    x_replacements = {
        "zs": "zs",
    } 

    # {(model, prompt) -> {opponent_type -> total_points}}
    round_data = defaultdict(lambda: defaultdict(list))
    opponent_set = set()

    for model in model_names:
        for prompt in prompt_types:
            for itr in range(iter_cnt):
                directory = os.path.join(log_dir, f"iteration_{itr}", model, game_type, game_settings_type)

                if not os.path.isdir(directory):
                    continue

                for game_dir in sorted(os.listdir(directory)):
                    info_path = os.path.join(directory, game_dir, 'game.json')
                    if not os.path.isfile(info_path):
                        continue

                    with open(info_path) as f:
                        info = json.load(f)

                    player_types = [info.get(f"player_{i}_player_type") for i in range(2)]
                    if prompt not in player_types:
                        continue

                    model_idx = player_types.index(prompt)
                    if model_idx != 0:
                        continue

                    opponent_type = player_types[1 - model_idx]
                    opponent_set.add(opponent_type)

                    tokens = info.get(f"player_{model_idx}_tokens")
                    moves = info.get(f"player_{model_idx}_moves")
                    op_moves = info.get(f"player_{1 - model_idx}_moves")

                    if tokens is None or moves is None or op_moves is None:
                        print(f"Model {model}, Prompt {prompt}, Iteration {itr}, Game directory {game_dir} - Missing data in {info_path}")
                        raise ValueError(f"Missing tokens or moves for {info_path}")

                    win_rates = []
                    wins = 0
                    for rounds_aux, (move, op_move) in enumerate(zip(reversed(moves), reversed(op_moves))):
                        rounds = rounds_aux + 1  # Rounds are 1-indexed in the game
                        flag = optimal_move(move, op_move, info["game_settings"])
                        if flag:
                            wins += 1
                        win_rate = wins / rounds
                        win_rates.insert(0, win_rate)  # Insert at the beginning to keep order
                    
                    target_percentage = 0.9
                    round_of_understood = next((i + 1 for i, rate in enumerate(win_rates) if rate >= target_percentage), len(win_rates) + 1)
                    
                    round_data[(model, prompt)][opponent_type].append(round_of_understood)


    if not round_data:
        raise ValueError("No data collected — check log paths and model+prompt naming conventions.")

    #for key in round_data:
    #    for opponent in round_data[key]:
    #        round_data[key][opponent] /= iter_cnt

    opponent_types_aux = ["zs", "spp", "cot", "srep", "pp", "mf", "tft"]
    sorted_opponents = [opp for opp in opponent_types_aux if opp in opponent_set]
    model_prompt_keys = [(model, prompt) for model in model_names for prompt in prompt_types]

    # Apply x label replacements
    x_labels = sorted_opponents.copy()
    for old, new in x_replacements.items():
        x_labels = [label.replace(old, new) for label in x_labels]

    rows = []
    index_tuples = []

    for model, prompt in model_prompt_keys:
        new_prompt = prompt
        for old, new in y_replacements.items():
            new_prompt = new_prompt.replace(old, new)

        index_tuples.append( (model, new_prompt) )

        values = []
        for opp in sorted_opponents:
            val = round_data.get((model, prompt), {}).get(opp, -1000)
            values.append(val)
        rows.append(values)


    index = pd.MultiIndex.from_tuples(index_tuples, names=["model", "prompt"])
    df = pd.DataFrame(rows, index=index, columns=x_labels)
    return df


In [12]:
dfs_nonsc_r = [
    get_round_of_understood_opponent(
        log_dir="../logs_pd/logs_3/data",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iter_cnt=5,
        tot=False,
    )
    for game_settings_type in game_settings_types
]

dfs_sc_r = [
    get_round_of_understood_opponent(
        log_dir="../logs_pd/logs_3/data_tot",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iter_cnt=2,
        tot=True,
    )
    for game_settings_type in game_settings_types
]



In [13]:
prompt_order = ['zs', 'cot', 'spp', 'sc-zs', 'sc-cot', 'sc-spp']
prompt_order_map = {prompt: i for i, prompt in enumerate(prompt_order)}

dfs_merged_r = []

for df_nonsc, df_sc in zip(dfs_nonsc_r, dfs_sc_r):
    df_nonsc = df_nonsc.copy()
    df_sc = df_sc.copy()

    # Prefix the prompt types in sc
    new_index = []
    for model, prompt in df_sc.index:
        new_index.append((model, prompt))
    df_sc.index = pd.MultiIndex.from_tuples(new_index, names=df_sc.index.names)

    # Concatenate vertically
    merged_df = pd.concat([df_nonsc, df_sc])

    # Reorder by (model, prompt) with custom prompt order
    merged_df = merged_df.reset_index()

    # Add a sort key column
    merged_df['prompt_order'] = merged_df['prompt'].map(prompt_order_map)

    # Sort by model, then prompt order
    merged_df = merged_df.sort_values(['model', 'prompt_order'])

    # Drop helper column
    merged_df = merged_df.drop(columns=['prompt_order'])

    # Restore MultiIndex
    merged_df = merged_df.set_index(['model', 'prompt'])

    dfs_merged_r.append(merged_df)


In [14]:
import numpy as np

# Define shorter names for the models
rename_map = {
    "Claude 3.5 Sonnet v2": "C3.5Sv2",
    "Claude 3.7 Sonnet": "C3.7S",
    "Claude 3.7 Sonnet (Thinking)": "C3.7S(T)",
    "Claude Sonnet 4": "C4S",
    "Claude Sonnet 4 (Thinking)": "C4S(T)",
    "DeepSeek-R1": "DS-R1",
    "Llama 3.3 70B Instruct": "L3.3-70B",
    "Mistral Large (24.07)": "M-L(24.07)"
}

for df, game_settings_type in zip(dfs_merged_r, game_settings_types):
    df = df.copy()

    # Rename models to shorter names
    df.rename(index=rename_map, inplace=True)

    # Compute mean & std for each list in df
    mean_df = df.map(lambda x: np.mean(x) if isinstance(x, list) else np.nan)
    std_df  = df.map(lambda x: np.std(x, ddof=1) if isinstance(x, list) else np.nan)

    # Identify minima based on means
    row_min_mask = mean_df.eq(mean_df.min(axis=1), axis=0)
    col_min_mask = mean_df.eq(mean_df.min(axis=0), axis=1)

    # Make values strings, bold if min in row or column
    styled_df = df.copy().astype(str)
    for row in df.index:
        for col in df.columns:
            vals = df.loc[row, col]

            if not isinstance(vals, list) or len(vals) == 0:
                formatted = ""
            else:
                mean_val = np.mean(vals)
                std_val = np.std(vals, ddof=1)  # population std

                # Check if it's a min cell (by mean)
                is_min = row_min_mask.loc[row, col] or col_min_mask.loc[row, col]

                formatted = f"{mean_val:.1f} $\\pm$ {std_val:.1f}"
                if is_min:
                    formatted = f"\\textbf{{{formatted}}}"

            styled_df.loc[row, col] = formatted

    # Add game_settings_type as MultiIndex header
    styled_df.columns = pd.MultiIndex.from_product(
        [[game_settings_type], styled_df.columns],
    )

    # Output LaTeX
    latex_code = styled_df.to_latex(
        index=True,
        multirow=True,
        multicolumn=True,
        multicolumn_format='c',
        escape=False,  # Allow \textbf
        caption=f"Round \\# where the Agent understood the opponent's Strategy ({game_settings_type})",
        label=f"tab:pd_round_heatmap_{game_settings_type}",
    )
    print(latex_code)
    print("\n\n")


\begin{table}
\caption{Round \# where the Agent understood the opponent's Strategy (pd)}
\label{tab:pd_round_heatmap_pd}
\begin{tabular}{lllllllll}
\toprule
 &  & \multicolumn{7}{c}{pd} \\
 &  & zs & spp & cot & srep & pp & mf & tft \\
model & prompt &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{6}{*}{C3.5Sv2} & zs & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & 2.2 $\pm$ 0.4 & 8.4 $\pm$ 7.0 & 5.6 $\pm$ 6.4 & 2.8 $\pm$ 1.3 \\
 & cot & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & 2.0 $\pm$ 0.0 & 14.0 $\pm$ 2.0 & 3.0 $\pm$ 1.2 & 2.2 $\pm$ 0.4 \\
 & spp & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & 1.6 $\pm$ 0.5 & 7.6 $\pm$ 6.1 & 2.0 $\pm$ 0.7 & 3.0 $\pm$ 0.0 \\
 & sc-zs & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & 1.5 $\pm$ 0.7 & 12.0 $\pm$ 0.0 & 3.0 $\pm$ 0.0 & 4.5 $\pm$ 0.7 \\
 & sc-cot & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & \textbf{1.0 $\pm$ 0.0} & 2.0

In [15]:
# Choose the opponent you want to extract
target_opponent = "avg"

# Assume you have:
# - dfs_merged: list of merged DataFrames (one per game_settings_type)
# - game_settings_types: list of corresponding names

# Collect slices for the target opponent from each df
slices = []

for df, setting in zip(dfs_merged_ef, game_settings_types):
    df = df.copy()
    if target_opponent not in df.columns:
        raise ValueError(f"Opponent '{target_opponent}' not found in columns of {setting}.")
    
    # Series with (model, prompt) index and opponent points as values
    s = df[target_opponent].rename(setting)
    slices.append(s)

# Merge all into one DataFrame on index
result_df = pd.concat(slices, axis=1)


In [16]:
def print_latex_heatmap(df):
    df = df.copy()

    # Compute mean & std for each list in df
    mean_df = df.map(lambda x: np.mean(x) if isinstance(x, list) else np.nan)

    # Identify maxima based on means
    row_max_mask = mean_df.eq(mean_df.max(axis=1), axis=0)
    col_max_mask = mean_df.eq(mean_df.max(axis=0), axis=1)

    # Make values strings, bold if max in row or column
    styled_df = df.copy().astype(str)
    for row in df.index:
        for col in df.columns:
            vals = df.loc[row, col]

            if not isinstance(vals, list) or len(vals) == 0:
                formatted = ""
            else:
                mean_val = np.mean(vals)
                std_val = np.std(vals, ddof=1)  # population std

                # Check if it's a max cell (by mean)
                is_max = row_max_mask.loc[row, col] or col_max_mask.loc[row, col]

                formatted = f"{mean_val:.1f} $\\pm$ {std_val:.1f}"
                if is_max:
                    formatted = f"\\textbf{{{formatted}}}"

            styled_df.loc[row, col] = formatted

    # Output LaTeX
    latex_code = styled_df.to_latex(
        index=True,
        multirow=True,
        multicolumn=True,
        multicolumn_format='c',
        escape=False,  # Allow \textbf
        caption="Average Efficiency (Points per kilo-token)",
        label="tab:pd_efficiency_avg_heatmap",
    )
    print(latex_code)
    print("\n\n")

print_latex_heatmap(result_df)

\begin{table}
\caption{Average Efficiency (Points per kilo-token)}
\label{tab:pd_efficiency_avg_heatmap}
\begin{tabular}{llllll}
\toprule
 &  & pd & pd-alt & sh & sh-alt \\
model & prompt &  &  &  &  \\
\midrule
\multirow[t]{6}{*}{Claude 3.5 Sonnet v2} & zs & 18.3 $\pm$ 10.2 & \textbf{28.0 $\pm$ 12.1} & 21.8 $\pm$ 12.2 & 15.6 $\pm$ 9.7 \\
 & cot & 9.0 $\pm$ 4.0 & \textbf{15.9 $\pm$ 8.8} & 11.2 $\pm$ 5.4 & 7.9 $\pm$ 2.9 \\
 & spp & 7.5 $\pm$ 3.0 & \textbf{12.0 $\pm$ 4.7} & 9.6 $\pm$ 4.1 & 5.9 $\pm$ 2.4 \\
 & sc-zs & 6.4 $\pm$ 3.4 & \textbf{9.9 $\pm$ 4.6} & 7.6 $\pm$ 3.6 & 4.6 $\pm$ 2.7 \\
 & sc-cot & 3.0 $\pm$ 1.5 & \textbf{6.1 $\pm$ 4.0} & 5.5 $\pm$ 5.3 & 2.6 $\pm$ 1.0 \\
 & sc-spp & 2.5 $\pm$ 1.1 & 3.8 $\pm$ 1.6 & \textbf{4.7 $\pm$ 4.3} & 2.0 $\pm$ 0.8 \\
\cline{1-6}
\multirow[t]{6}{*}{Claude 3.7 Sonnet} & zs & 14.0 $\pm$ 7.9 & \textbf{24.4 $\pm$ 12.6} & 24.0 $\pm$ 12.1 & 13.2 $\pm$ 7.7 \\
 & cot & 6.4 $\pm$ 2.6 & 10.1 $\pm$ 4.4 & \textbf{10.7 $\pm$ 4.7} & 6.2 $\pm$ 2.7 \\
 & spp & 5.

In [17]:
import pandas as pd
import os
import json
from collections import defaultdict

def get_valid_rate(
    log_dir: str,
    model_names: list[str],
    prompt_types: list[str],
    game_type: str,
    game_settings_type: str,
    iter_cnt: int,
    tot: bool,
) -> pd.DataFrame:
    y_replacements = {
        "zs": "zs",
    } if not tot else {
        "zs": "sc-zs",
        "spp": "sc-spp",
        "cot": "sc-cot",
    }
    x_replacements = {
        "zs": "zs",
    } 

    # {(model, prompt) -> {opponent_type -> total_points}}
    valid_data = defaultdict(lambda: defaultdict(list))
    opponent_set = set()

    for model in model_names:
        for prompt in prompt_types:
            for itr in range(iter_cnt):
                directory = os.path.join(log_dir, f"iteration_{itr}", model, game_type, game_settings_type)

                if not os.path.isdir(directory):
                    continue

                for game_dir in sorted(os.listdir(directory)):
                    info_path = os.path.join(directory, game_dir, 'game.json')
                    if not os.path.isfile(info_path):
                        continue

                    with open(info_path) as f:
                        info = json.load(f)

                    player_types = [info.get(f"player_{i}_player_type") for i in range(2)]
                    if prompt not in player_types:
                        continue

                    model_idx = player_types.index(prompt)
                    if model_idx != 0:
                        continue

                    opponent_type = player_types[1 - model_idx]
                    opponent_set.add(opponent_type)

                    valid_outcomes = info.get("valid_outcomes")

                    if valid_outcomes is None:
                        print(f"Model {model}, Prompt {prompt}, Iteration {itr}, Game directory {game_dir} - Missing data in {info_path}")
                        raise ValueError(f"Missing moves for {info_path}")

                    # find percentage of true valid outcomes
                    total_outcomes = len(valid_outcomes)
                    if total_outcomes == 0:
                        print(f"Model {model}, Prompt {prompt}, Iteration {itr}, Game directory {game_dir} - No valid outcomes in {info_path}")
                        continue

                    valid_count = sum(1 for outcome in valid_outcomes if outcome)
                    valid_rate = valid_count / total_outcomes
                    
                    valid_data[(model, prompt)][opponent_type].append(valid_rate * 100)


    if not valid_data:
        raise ValueError("No data collected — check log paths and model+prompt naming conventions.")

    #for key in valid_data:
    #    for opponent in valid_data[key]:
    #        valid_data[key][opponent] /= iter_cnt
    #        valid_data[key][opponent] *= 100  # Convert to percentage

    opponent_types_aux = ["zs", "spp", "cot", "srep", "pp", "mf", "tft"]
    sorted_opponents = [opp for opp in opponent_types_aux if opp in opponent_set]
    model_prompt_keys = [(model, prompt) for model in model_names for prompt in prompt_types]

    # Apply x label replacements
    x_labels = sorted_opponents.copy()
    for old, new in x_replacements.items():
        x_labels = [label.replace(old, new) for label in x_labels]

    rows = []
    index_tuples = []

    for model, prompt in model_prompt_keys:
        new_prompt = prompt
        for old, new in y_replacements.items():
            new_prompt = new_prompt.replace(old, new)

        index_tuples.append( (model, new_prompt) )

        values = []
        for opp in sorted_opponents:
            val = valid_data.get((model, prompt), {}).get(opp, -1000)
            values.append(val)
        rows.append(values)


    index = pd.MultiIndex.from_tuples(index_tuples, names=["model", "prompt"])
    df = pd.DataFrame(rows, index=index, columns=x_labels)
    return df


In [18]:
dfs_nonsc_v = [
    get_valid_rate(
        log_dir="../logs_pd/logs_3/data",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iter_cnt=5,
        tot=False,
    )
    for game_settings_type in game_settings_types
]

dfs_sc_v = [
    get_valid_rate(
        log_dir="../logs_pd/logs_3/data_tot",
        model_names=[model["name"] for model in models],
        prompt_types=prompt_types,
        game_type="pd",
        game_settings_type=game_settings_type,
        iter_cnt=2,
        tot=True,
    )
    for game_settings_type in game_settings_types
]



In [19]:
prompt_order = ['zs', 'cot', 'spp', 'sc-zs', 'sc-cot', 'sc-spp']
prompt_order_map = {prompt: i for i, prompt in enumerate(prompt_order)}

dfs_merged_v = []

for df_nonsc, df_sc in zip(dfs_nonsc_v, dfs_sc_v):
    df_nonsc = df_nonsc.copy()
    df_sc = df_sc.copy()

    # Prefix the prompt types in sc
    new_index = []
    for model, prompt in df_sc.index:
        new_index.append((model, prompt))
    df_sc.index = pd.MultiIndex.from_tuples(new_index, names=df_sc.index.names)

    # Concatenate vertically
    merged_df = pd.concat([df_nonsc, df_sc])

    # Reorder by (model, prompt) with custom prompt order
    merged_df = merged_df.reset_index()

    # Add a sort key column
    merged_df['prompt_order'] = merged_df['prompt'].map(prompt_order_map)

    # Sort by model, then prompt order
    merged_df = merged_df.sort_values(['model', 'prompt_order'])

    # Drop helper column
    merged_df = merged_df.drop(columns=['prompt_order'])

    # Restore MultiIndex
    merged_df = merged_df.set_index(['model', 'prompt'])

    dfs_merged_v.append(merged_df)


In [24]:
model_aggs = {}

for df, game_setting in zip(dfs_merged_v, game_settings_types):
    # df has index (model, prompt) and columns = opponent types

    # Step 1: aggregate across opponents
    df_agg_opponents = df.aggregate(lambda x: [item for sublist in x if isinstance(sublist, list) for item in sublist], axis=1)

    # Step 2: group by model and aggregate across prompts
    df_agg_model = df_agg_opponents.groupby(level=0).aggregate(lambda x: [item for sublist in x if isinstance(sublist, list) for item in sublist])

    model_aggs[game_setting] = df_agg_model

# Combine into a single DataFrame
df_model_agg = pd.DataFrame(model_aggs)

# add a column "all" which is the average across all game settings types
df_model_agg["avg"] = df_model_agg.aggregate(lambda x: [item for sublist in x if isinstance(sublist, list) for item in sublist], axis=1)

# drop other columns
df_model_agg = df_model_agg[["avg"]]


In [25]:
def print_valid_rates(df):
    df = df.copy()

    # Make values strings, bold if min in row or column
    styled_df = df.copy().astype(str)
    for row in df.index:
        for col in df.columns:
            vals = df.loc[row, col]

            if not isinstance(vals, list) or len(vals) == 0:
                formatted = ""
            else:
                mean_val = np.mean(vals)
                std_val = np.std(vals, ddof=1)  # population std

                formatted = f"{mean_val:.1f} $\\pm$ {std_val:.1f}"

            styled_df.loc[row, col] = formatted

    # Output LaTeX
    latex_code = styled_df.to_latex(
        index=True,
        multirow=True,
        multicolumn=True,
        multicolumn_format='c',
        escape=False,  # Allow \textbf
        caption="Average Valid Rate (\\% of Valid Outcomes)",
        label="tab:pd_valid_rates",
    )
    print(latex_code)
    print("\n\n")

print_valid_rates(df_model_agg)

\begin{table}
\caption{Average Valid Rate (\% of Valid Outcomes)}
\label{tab:pd_valid_rates}
\begin{tabular}{ll}
\toprule
 & avg \\
model &  \\
\midrule
Claude 3.5 Sonnet v2 & 100.0 $\pm$ 0.0 \\
Claude 3.7 Sonnet & 100.0 $\pm$ 0.0 \\
Claude 3.7 Sonnet (Thinking) & 100.0 $\pm$ 0.0 \\
Claude Sonnet 4 & 100.0 $\pm$ 0.0 \\
Claude Sonnet 4 (Thinking) & 100.0 $\pm$ 0.0 \\
DeepSeek-R1 & 99.1 $\pm$ 6.3 \\
Llama 3.3 70B Instruct & 100.0 $\pm$ 0.0 \\
Mistral Large (24.07) & 99.4 $\pm$ 6.5 \\
\bottomrule
\end{tabular}
\end{table}




