In [103]:
import glob
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import pingouin as pg

from plot_data import ATARI_100K_GAMES
from utils import get_game_rankings

In [58]:
def iqm(values):
    n = len(values)
    return np.mean(np.sort(values)[n//4:-n//4])

In [134]:
with open(f'data/40M_experiments/final_perf/CNN_widths.pickle', mode='rb') as f:
        data = pickle.load(f)

In [135]:
data.keys()

dict_keys(['DER_CNN_widths', 'DrQ_eps_CNN_widths'])

In [137]:
der_data, drq_data = data['DER_CNN_widths'], data['DrQ_eps_CNN_widths']

In [138]:
def bootstrapped_ranking(scores, reps=100):
    bootstrap_scores = {
        hp: np.array(
                [
                iqm(np.random.choice(sc[0], size=8, replace=True))
                for _ in range(reps)
                ]
            )
            for hp, sc in scores.items()
        }
    sample_rankings = {hp: [] for hp in scores.keys()}
    for idx in range(reps):
        performances = np.array([bootstrap_scores[hp][idx] for hp in sample_rankings])
        rankings = np.argsort(np.argsort(-performances))
        for idy, hp in enumerate(sample_rankings.keys()):
            sample_rankings[hp].append(rankings[idy])
    return {hp: 1+np.mean(sc) for hp, sc in sample_rankings.items()}


In [139]:
def aggregate(x):
    return (x.sum(axis=0) - x.max(axis=0) - x.min(axis=0))/x.shape[0]

In [140]:
def transpose_data(data):
    return {
            game: 
                {
                    hp.split('_')[-1]: data[hp][:, idx].reshape(1, -1)
                    for hp in data.keys()
                } 
                for idx, game in enumerate(ATARI_100K_GAMES)
            }

In [141]:
transposed_der_data = {
        game: 
            {
                hp.split('_')[-1]: der_data[hp][:, idx].reshape(1, -1)
                for hp in der_data.keys()
            } 
            for idx, game in enumerate(ATARI_100K_GAMES)
        }

In [142]:
transposed_der_data = transpose_data(der_data)

In [143]:
bootstrapped_rankings = {game: bootstrapped_ranking(scores, reps=10) for game, scores in transposed_der_data.items()}

In [144]:
rankings = get_game_rankings(der_data)
rankings

{'Alien': [('0.25', 2.5),
  ('1.0', 2.5),
  ('0.5', 2.5),
  ('2.0', 3.0),
  ('4.0', 4.5)],
 'Amidar': [('2.0', 3.0),
  ('1.0', 3.0),
  ('4.0', 3.0),
  ('0.5', 3.0),
  ('0.25', 3.0)],
 'Assault': [('0.25', 1.0),
  ('0.5', 2.0),
  ('1.0', 3.0),
  ('2.0', 4.5),
  ('4.0', 4.5)],
 'Asterix': [('0.25', 3.0),
  ('4.0', 3.0),
  ('0.5', 3.0),
  ('2.0', 3.0),
  ('1.0', 3.0)],
 'BankHeist': [('0.5', 3.0),
  ('0.25', 3.0),
  ('2.0', 3.0),
  ('4.0', 3.0),
  ('1.0', 3.0)],
 'BattleZone': [('0.25', 1.0),
  ('0.5', 2.0),
  ('1.0', 3.5),
  ('2.0', 4.0),
  ('4.0', 4.5)],
 'Boxing': [('0.25', 3.0),
  ('4.0', 3.0),
  ('0.5', 2.5),
  ('1.0', 3.0),
  ('2.0', 3.0)],
 'Breakout': [('1.0', 3.0),
  ('2.0', 3.0),
  ('4.0', 3.0),
  ('0.5', 3.0),
  ('0.25', 3.0)],
 'ChopperCommand': [('0.25', 3.0),
  ('4.0', 2.5),
  ('1.0', 3.0),
  ('0.5', 3.0),
  ('2.0', 3.0)],
 'CrazyClimber': [('0.25', 1.0),
  ('0.5', 2.0),
  ('1.0', 3.0),
  ('2.0', 4.0),
  ('4.0', 5.0)],
 'DemonAttack': [('0.25', 1.0),
  ('4.0', 3.0),
  ('0.5'

In [145]:
rankings = {game:
            { 
                tup[0]: tup[1]
                for tup in rankings[game]
            }
            for game in rankings
           }

In [146]:
pd.DataFrame(rankings)

Unnamed: 0,Alien,Amidar,Assault,Asterix,BankHeist,BattleZone,Boxing,Breakout,ChopperCommand,CrazyClimber,...,Kangaroo,Krull,KungFuMaster,MsPacman,Pong,PrivateEye,Qbert,RoadRunner,Seaquest,UpNDown
0.25,2.5,3.0,1.0,3.0,3.0,1.0,3.0,3.0,3.0,1.0,...,1.5,3.5,3.0,3.0,3.0,2.0,3.0,1.0,1.0,1.0
1.0,2.5,3.0,3.0,3.0,3.0,3.5,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.5,4.0,3.0
0.5,2.5,3.0,2.0,3.0,3.0,2.0,2.5,3.0,3.0,2.0,...,1.5,3.5,3.0,3.0,3.0,3.0,3.0,3.5,2.0,2.0
2.0,3.0,3.0,4.5,3.0,3.0,4.0,3.0,3.0,3.0,4.0,...,4.5,3.5,3.0,3.0,3.0,3.5,3.0,3.5,4.0,4.5
4.0,4.5,3.0,4.5,3.0,3.0,4.5,3.0,3.0,2.5,5.0,...,4.5,1.5,3.0,3.0,3.0,3.5,3.0,3.5,4.0,4.5


In [147]:
temp = pd.DataFrame(rankings).values

In [148]:
temp.reshape(-1)[:26] - temp[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [149]:
long_form = temp.reshape(-1)

In [150]:
games_column = np.array(len(der_data.keys())*ATARI_100K_GAMES)
games_column[0:-1:len(ATARI_100K_GAMES)]

array(['Alien', 'Alien', 'Alien', 'Alien', 'Alien'], dtype='<U14')

In [151]:
hp_column = np.array(list(der_data.keys())).repeat(len(ATARI_100K_GAMES))
hp_column[:len(ATARI_100K_GAMES)]

array(['DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0',
       'DER_CNN_widths_4.0', 'DER_CNN_widths_4.0'], dtype='<U19')

In [152]:
df = pd.DataFrame([hp_column, games_column, long_form], index=["hyperparameter", "game", "ratings"]).T

In [153]:
df.drop(columns=["game"]).groupby(by="hyperparameter").agg("std")["ratings"].mean()

0.6312910537184679

In [121]:
df[df["hyperparameter"] == "DER_layer_funct_conv_relu"]

Unnamed: 0,hyperparameter,game,ratings
0,DER_layer_funct_conv_relu,Alien,3.5
1,DER_layer_funct_conv_relu,Amidar,3.5
2,DER_layer_funct_conv_relu,Assault,3.5
3,DER_layer_funct_conv_relu,Asterix,3.5
4,DER_layer_funct_conv_relu,BankHeist,3.5
5,DER_layer_funct_conv_relu,BattleZone,3.5
6,DER_layer_funct_conv_relu,Boxing,3.5
7,DER_layer_funct_conv_relu,Breakout,3.0
8,DER_layer_funct_conv_relu,ChopperCommand,3.5
9,DER_layer_funct_conv_relu,CrazyClimber,3.5


In [123]:
results = pg.intraclass_corr(data=df, targets='hyperparameter', raters='game', ratings='ratings')
results

Unnamed: 0,Type,Description,ICC,F,df1,df2,pval,CI95%
0,ICC1,Single raters absolute,0.068935,2.925,5,150,0.015028,"[0.0, 0.39]"
1,ICC2,Single random raters,0.06367,2.52809,5,125,0.032351,"[-0.0, 0.39]"
2,ICC3,Single fixed raters,0.05551,2.52809,5,125,0.032351,"[-0.0, 0.36]"
3,ICC1k,Average raters absolute,0.65812,2.925,5,150,0.015028,"[0.09, 0.94]"
4,ICC2k,Average random raters,0.638728,2.52809,5,125,0.032351,"[-0.06, 0.94]"
5,ICC3k,Average fixed raters,0.604444,2.52809,5,125,0.032351,"[-0.06, 0.93]"


In [29]:
iqms = np.vstack([aggregate(v) for v in der_data.values()])

In [41]:
iqms.reshape(-1)[:26] - iqms[0], iqms.shape

(array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32),
 (6, 26))

array(['Alien', 'Alien', 'Alien', 'Alien', 'Alien', 'Alien'], dtype='<U14')

array(['DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu',
       'DER_layer_funct_conv_relu', 'DER_layer_funct_conv_relu'],
      dtype='<U30')

In [54]:
df = pd.DataFrame([hp_column, games_column, iqms.reshape(-1)], index=["hyperparameter", "game", "results"]).T

In [56]:
results = pg.intraclass_corr(data=df, targets='hyperparameter', raters='game', ratings='results')

In [57]:
results

Unnamed: 0,Type,Description,ICC,F,df1,df2,pval,CI95%
0,ICC1,Single raters absolute,-0.031663,0.202036,5,150,0.961198,"[-0.04, 0.01]"
1,ICC2,Single random raters,0.003518,1.832581,5,125,0.111172,"[-0.0, 0.04]"
2,ICC3,Single fixed raters,0.031029,1.832581,5,125,0.111172,"[-0.01, 0.28]"
3,ICC1k,Average raters absolute,-3.94961,0.202036,5,150,0.961198,"[-12.13, 0.18]"
4,ICC2k,Average random raters,0.084072,1.832581,5,125,0.111172,"[-0.04, 0.53]"
5,ICC3k,Average fixed raters,0.454321,1.832581,5,125,0.111172,"[-0.46, 0.91]"
