In [1]:
from pathlib import Path
import pprint
import json
import numpy as np
from collections import defaultdict
import pandas as pd
from IPython.display import display, HTML
from ipywidgets import interact
from scipy.stats import t
from scipy.stats import ttest_ind, ttest_1samp

In [2]:
folders = list(sorted(Path(f"data/agents_eval_results/").glob("*"), reverse=False))
folders 

[PosixPath('data/agents_eval_results/experiment-0'),
 PosixPath('data/agents_eval_results/experiment-1'),
 PosixPath('data/agents_eval_results/experiment-2'),
 PosixPath('data/agents_eval_results/experiment-3'),
 PosixPath('data/agents_eval_results/experiment-3-1'),
 PosixPath('data/agents_eval_results/experiment-4'),
 PosixPath('data/agents_eval_results/experiment-4-1'),
 PosixPath('data/agents_eval_results/experiment-4-more-exploration'),
 PosixPath('data/agents_eval_results/oos'),
 PosixPath('data/agents_eval_results/oos-eval'),
 PosixPath('data/agents_eval_results/tmp')]

# Experiment wise tables

In [3]:
@interact(folder=folders)
def f(folder):
    print(40*"-")
    print(folder, "(p1: mu<= 50%, p2: mu = 50%)")
    print(40*"-")
    relevant_data = defaultdict(lambda: {})
    files = folder.glob("tmp/*.json")

    for file in files:
        with open(str(file), "r") as f:
            data = json.load(f)
        
        name =  Path(file).name.replace(".json", "")
        opponent = name.split("-vs-")[0].replace("-", " ")
        agent = name.split("-vs-")[1].replace("-", " ")

        key = [x for x in data.keys() if not x.__contains__("DMCTS") and not x.__contains__("DPolicy")
               and not x.__contains__("random") and not x.__contains__("PMCTS") and not x.__contains__("mean")
               and not x.__contains__("Policy-B") and not x.__contains__("Policy-RAW") and not x.__contains__("MCTS-100-3")][0]

        relevant_data[agent][opponent] = data[key]
            
    data = {k: {x: np.array(y) for x, y in v.items()} for k,v in relevant_data.items()}
    data = {k: {x: y[:4*(y.shape[0]//4)].reshape(-1, 4) for x, y in v.items()} for k,v in data.items()}
    data = {k: {x: y 
                for x, y in sorted(v.items(), key=lambda item: item[0])}
            for k, v in sorted(data.items(), key=lambda item: item[0], reverse=True)} 
    
    print_data = {}
    
    for agent in data:
        print_data[agent] = {}
        for x, y in data[agent].items():
            mu =y.mean(axis=1).mean()
            ste = y.mean(axis=1).std() / np.sqrt(y.shape[0])
            p1 = ttest_1samp(y.reshape(-1), 0.5, nan_policy='omit', alternative='greater').pvalue
            p2 = ttest_1samp(y.reshape(-1), 0.5, nan_policy='omit', alternative='two-sided').pvalue
            
            print_data[agent][x] = f"{mu.round(5)}+-{ste.round(5)} {y.shape}, p1={p1.round(5)}, p2={p2.round(5)}"
    
    df = pd.DataFrame(print_data).T
    
    display(HTML(df.to_html()))
    
    print(40*"-")
    print("Compare distributions of row agents:")
    print(40*"-")
    
    @interact(agent1=df.index, agent2=df.index)
    def f(agent1, agent2):
        
        for opponent in df.columns:
            if opponent in data[agent1] and opponent in data[agent2]:
                length = min(data[agent1][opponent].reshape(-1).shape[0], data[agent2][opponent].reshape(-1).shape[0])
                print(opponent, length)
                print(ttest_ind(data[agent1][opponent].reshape(-1)[:length],
                                data[agent2][opponent].reshape(-1)[:length], nan_policy='omit'))

        

interactive(children=(Dropdown(description='folder', options=(PosixPath('data/agents_eval_results/experiment-0…

# All data

In [4]:
print(40*"-")
print("(p1: mu<= 50%, p2: mu = 50%)")
print(40*"-")


all_data = {}

for folder in folders:
    relevant_data = defaultdict(lambda: {})
    files = folder.glob("tmp/*.json")

    for file in files:
        with open(str(file), "r") as f:
            data = json.load(f)
        
        name =  Path(file).name.replace(".json", "")
        opponent = name.split("-vs-")[0].replace("-", " ")
        agent = name.split("-vs-")[1].replace("-", " ")

        key = [x for x in data.keys() if not x.__contains__("DMCTS") and not x.__contains__("DPolicy")
               and not x.__contains__("random") and not x.__contains__("PMCTS") and not x.__contains__("mean")
               and not x.__contains__("Policy-B") and not x.__contains__("Policy-RAW") and not x.__contains__("MCTS-100-3")][0]

        relevant_data[f"{folder}/{agent}"][opponent] = data[key]
            
    data = {k: {x: np.array(y) for x, y in v.items()} for k,v in relevant_data.items()}
    data = {k: {x: y[:4*(y.shape[0]//4)].reshape(-1, 4) for x, y in v.items()} for k,v in data.items()}
    data = {k: {x: y 
                for x, y in sorted(v.items(), key=lambda item: item[0])}
            for k, v in sorted(data.items(), key=lambda item: item[0], reverse=True)} 
    
    all_data.update(**data)
    
 
    
print_data = {}   
for agent in all_data:
    print_data[agent] = {}
    for x, y in all_data[agent].items():
        mu =y.mean(axis=1).mean()
        ste = y.mean(axis=1).std() / np.sqrt(y.shape[0])
        p1 = ttest_1samp(y.reshape(-1), 0.5, nan_policy='omit', alternative='greater').pvalue
        p2 = ttest_1samp(y.reshape(-1), 0.5, nan_policy='omit', alternative='two-sided').pvalue

        print_data[agent][x] = f"{mu.round(5)}+-{ste.round(5)} {y.shape}, p1={p1.round(5)}, p2={p2.round(5)}"

df = pd.DataFrame(print_data).T

display(HTML(df.to_html()))

print(40*"-")
print("Compare distributions of row agents:")
print(40*"-")

@interact(agent1=df.index, agent2=df.index)
def f(agent1, agent2):

    for opponent in df.columns:
        if opponent in all_data[agent1] and opponent in all_data[agent2]:
            print(opponent)
            print(ttest_ind(all_data[agent1][opponent].reshape(-1)[:1000], 
                            all_data[agent2][opponent].reshape(-1)[:1000], nan_policy='omit'))


----------------------------------------
(p1: mu<= 50%, p2: mu = 50%)
----------------------------------------


Unnamed: 0,DMCTS,PMCTS,Policy B,F I DMCTS,I DMCTS,Policy RAW,MCTS 50,MCTS 100 3
data/agents_eval_results/experiment-0/Policy,"0.51501+-0.00778 (250, 4), p1=0.03279, p2=0.06558","0.43878+-0.00914 (250, 4), p1=1.0, p2=0.0","0.49568+-0.00791 (251, 4), p1=0.69656, p2=0.60688",,,,,
data/agents_eval_results/experiment-0/MCTS 100,"0.55418+-0.00881 (250, 4), p1=0.0, p2=0.0","0.48306+-0.00964 (250, 4), p1=0.96278, p2=0.07444","0.52394+-0.00909 (250, 4), p1=0.00359, p2=0.00718",,,,,
data/agents_eval_results/experiment-1/Policy,"0.45386+-0.00801 (250, 4), p1=1.0, p2=0.0",,"0.43575+-0.00804 (250, 4), p1=1.0, p2=0.0",,,,,
data/agents_eval_results/experiment-1/MCTS 100,"0.45639+-0.0079 (250, 4), p1=1.0, p2=0.0",,"0.45157+-0.00743 (250, 4), p1=1.0, p2=0.0",,,,,
data/agents_eval_results/experiment-2/Policy,"0.40376+-0.00752 (250, 4), p1=1.0, p2=0.0",,"0.41148+-0.00684 (250, 4), p1=1.0, p2=0.0",,,,,
data/agents_eval_results/experiment-2/MCTS 100,"0.44741+-0.00721 (250, 4), p1=1.0, p2=0.0",,"0.42783+-0.00693 (250, 4), p1=1.0, p2=0.0",,,,,
data/agents_eval_results/experiment-3/Policy,"0.53057+-0.00788 (250, 4), p1=4e-05, p2=9e-05",,"0.50541+-0.00866 (250, 4), p1=0.26356, p2=0.52712","0.51203+-0.00253 (2500, 4), p1=0.0, p2=0.0","0.49653+-0.00268 (2412, 4), p1=0.90513, p2=0.18975",,,
data/agents_eval_results/experiment-3/MCTS 50,,,,,,"0.49461+-0.00847 (250, 4), p1=0.73154, p2=0.53692",,
data/agents_eval_results/experiment-3/MCTS 400,,,,,,"0.48187+-0.00879 (250, 4), p1=0.9837, p2=0.0326",,
data/agents_eval_results/experiment-3/MCTS 20,,,,,,"0.51182+-0.00892 (250, 4), p1=0.08359, p2=0.16719",,


----------------------------------------
Compare distributions of row agents:
----------------------------------------


interactive(children=(Dropdown(description='agent1', options=('data/agents_eval_results/experiment-0/Policy', …