In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts
from matplotlib import rcParams,font_manager
import os
from matplotlib.ticker import FormatStrFormatter
import pandas as pd
import pickle
from fourinarowfunctions import *

%load_ext autoreload
%autoreload 2

In [2]:
rcParams['figure.figsize'] = (6,6) #figure size in inches
rcParams['font.family'] = "sans-serif" 
rcParams['font.weight'] = "roman" 
rcParams['font.style'] = "normal" # not 100% sure what this does but it was recommended
rcParams['font.size'] = 32 # not 100% sure what this does but it was recommended
rcParams['pdf.fonttype'] = 42
rcParams['axes.linewidth'] = 2 #thickness of the border
rcParams['xtick.major.width'] = rcParams['axes.linewidth'] # make sure line widths are consistent
rcParams['ytick.major.width'] = rcParams['axes.linewidth']
rcParams['axes.spines.right'] = False #hides right border
rcParams['axes.spines.top'] = False #hides top
rcParams['legend.frameon'] = False #hides box around the legend
rcParams['legend.fontsize'] = 18 #font size in pt
rcParams['axes.labelsize'] = 32 
rcParams['xtick.labelsize'] = 24
rcParams['ytick.labelsize'] = rcParams['xtick.labelsize']
rcParams['lines.linewidth'] = 3
rcParams['xtick.major.size'] = 5
rcParams['lines.markersize'] = 16
rcParams['ytick.major.size'] = rcParams['xtick.major.size'] 

In [33]:
direc = 'C:/Users/svo/Google Drive/Bas Games/Analysis/'
osf_direc = 'C:/Users/svo/Documents/FourinarowData/OSF/'

In [34]:
with open(direc + 'loglik_all_pickled.txt','rb') as f:
    loglik_all = pickle.load(f)

In [35]:
loglik_all = np.hstack(loglik_all)

In [36]:
model = 'final'
expt_names = ['hvh','gen','eye','learn1','learn2','learn3','tai1','tai2']

In [37]:
param_names = ['pruning threshold','stopping probability','feature drop rate','lapse rate','active scaling constant',
               'center weight','connected 2-in-a-row weight','unconnected 2-in-a-row weight','3-in-a-row weight',
               '4-in-a-row weight']

In [38]:
models=['final','final_nonoise','final_nodelta','final_noprune','final_notree','final_noact','final_no3',
        'final_no2conn','final_nocenter','final_no4','final_no2unc',
        'final_optweights','final_mcts_myopic','final_drop_tile','final_fixed_iters',
        'final_fixed_depth','final_fixed_branch','final_weight_hvd','final_drop_hvd',
        'final_triangle','final_drop_type','final_opp']

model_names = ['Main model','No value noise','No feature drop','No pruning','No tree','No active scaling',
               'No 3-in-a-row','No connected 2-in-a-row','No center','No 4-in-a-row',
               'No unconnected 2-in-a-row','Optimal weights','MCTS',
               'Tile dropping','Fixed iterations','Fixed depth','Fixed branching',
               'Orientation-dependent weights','Orientation-dependent dropping','Triangle',
               'Type-dependent dropping','Opponent scaling']


In [39]:
param_names_models = [param_names,param_names,param_names[:2] + param_names[3:],param_names[1:],
                     param_names[2:],param_names[:4] + param_names[5:],param_names[:8] + param_names[9:],
                      param_names[:6] + param_names[7:],param_names[:5] + param_names[6:],param_names[:9],
                      param_names[:7] + param_names[8:],param_names[:5],
                      param_names[1:4] + ['exploration constant'] + param_names[4:],
                      param_names[:2] + ['tile drop rate'] + param_names[3:],
                      param_names[:1] + ['inverse iteration number'] + param_names[2:],
                      param_names[:1] + ['depth'] + param_names[2:],
                      ['branching factor'] + param_names[1:],       
                      param_names[:4] + ['horizontal-vertical scaling','horizontal-diagonal scaling'] + param_names[4:],
                      param_names[:2] + param_names[3:] + ['horizontal feature drop rate','vertical feature drop rate',
                                                           'diagonal feature drop rate'],
                      param_names + ['triangle weight'],
                      param_names[:2] + param_names[3:] + ['connected 2-in-a-row drop rate','unconnected 2-in-a-row drop rate',
                                                           '3-in-a-row drop rate','4-in-a-row drop rate'],
                      param_names[:4] + ['opponent scaling constant'] + param_names[4:],
                     ]

In [74]:
sts.spearmanr(np.mean(d,axis=1),df[df['experiment']=='human-vs-human']['stopping probability'].values)

SpearmanrResult(correlation=-0.8693972047332351, pvalue=1.5520092157724957e-62)

In [221]:
sts.spearmanr(np.mean(d,axis=1),np.loadtxt(direc + 'Params/params_learn_final_short.txt')[:,0])

SpearmanrResult(correlation=-0.21286234972954218, pvalue=3.9243057715479705e-09)

In [79]:
np.loadtxt(direc + 'Params/params_hvh_final_short.txt').shape

(200, 10)

In [207]:
params = np.loadtxt(direc + 'Params/params_hvh_final_short.txt')
heuristic_quality = np.apply_along_axis(lambda p: get_heuristic_quality(expand_params(p)),1,params)

In [220]:
sts.spearmanr(params[:,7]/params[:,5],heuristic_quality)

SpearmanrResult(correlation=0.27360879689174117, pvalue=8.846488647226968e-05)

In [213]:
from sklearn import linear_model
reg = linear_model.Lasso(alpha=0.1)
reg.fit((params-np.mean(params,axis=0))/np.std(params,axis=0)[None,:],
        (heuristic_quality-np.mean(heuristic_quality))/np.std(heuristic_quality)).coef_

array([-0.0631296 ,  0.        , -0.04629518,  0.        , -0.33270018,
       -0.        ,  0.00443478,  0.00055009,  0.3378023 ,  0.        ])

In [214]:
from statsmodels.api import OLS
OLS((heuristic_quality-np.mean(heuristic_quality))/np.std(heuristic_quality),
    (params-np.mean(params,axis=0))/np.std(params,axis=0)[None,:],).fit().summary()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.521
Model:,OLS,Adj. R-squared (uncentered):,0.496
Method:,Least Squares,F-statistic:,20.68
Date:,"Tue, 31 Aug 2021",Prob (F-statistic):,1.19e-25
Time:,14:31:11,Log-Likelihood:,-210.14
No. Observations:,200,AIC:,440.3
Df Residuals:,190,BIC:,473.3
Df Model:,10,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,-0.1414,0.053,-2.682,0.008,-0.245,-0.037
x2,0.0232,0.051,0.453,0.651,-0.078,0.124
x3,-0.1483,0.056,-2.626,0.009,-0.260,-0.037
x4,0.0957,0.051,1.883,0.061,-0.005,0.196
x5,-0.3515,0.061,-5.770,0.000,-0.472,-0.231
x6,-0.0370,0.059,-0.626,0.532,-0.153,0.079
x7,0.1237,0.053,2.330,0.021,0.019,0.228
x8,0.1059,0.056,1.895,0.060,-0.004,0.216
x9,0.4211,0.054,7.763,0.000,0.314,0.528

0,1,2,3
Omnibus:,36.406,Durbin-Watson:,1.42
Prob(Omnibus):,0.0,Jarque-Bera (JB):,61.702
Skew:,-0.951,Prob(JB):,4e-14
Kurtosis:,4.945,Cond. No.,2.05


In [124]:
param_names[5]

'center weight'

In [40]:
def create_df(i):
    model = models[i]
    model_name = model_names[i]
    param_names_model = param_names_models[i]
    loglik = loglik_all[i,:]
    params = np.vstack([np.loadtxt(direc + 'Params/params_' + name + '_' + model + '_short.txt') for name in expt_names])
    df = pd.DataFrame(params,columns=param_names_model)
    df['log-likelihood'] = loglik
    print(model,model_name)
    print(param_names_model)
    df['model'] = model_name
    df['experiment'] = ['human-vs-human']*200 + ['generalization']*200 + ['eye tracking']*50 + ['learning']*750 + ['time pressure']*450
    df['participant'] = np.hstack([np.repeat(range(n),5) for n in [40,40,10]] + [np.repeat(range(30),25)] + [np.repeat(range(30),15)])+1
    df['cross-validation group']=np.tile(range(1,6),330)
    df['session number'] = np.nan
    df['time limit'] = np.nan
    df.loc[df['experiment']=='learning','session number'] = np.tile(np.repeat(range(1,6),5),30).astype(int)
    df.loc[df['experiment']=='time pressure','time limit'] = np.tile(np.repeat([5,10,20],5),30).astype(int)
    return df


In [47]:
for i,name in enumerate(model_names):
    df = create_df(i)
    df.to_csv(osf_direc + 'model_fits_' + name.lower().replace(' ','_') + '.csv')

final Main model
['pruning threshold', 'stopping probability', 'feature drop rate', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_nonoise No value noise
['pruning threshold', 'stopping probability', 'feature drop rate', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_nodelta No feature drop
['pruning threshold', 'stopping probability', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_noprune No pruning
['stopping probability', 'feature drop rate', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_notree No t

In [49]:
df = create_df(0)

final Main model
['pruning threshold', 'stopping probability', 'feature drop rate', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']


In [51]:
df['stopping probability']

0       0.010000
1       0.014489
2       0.010000
3       0.020000
4       0.003085
          ...   
1645    0.002989
1646    0.007000
1647    0.001000
1648    0.003672
1649    0.001000
Name: stopping probability, Length: 1650, dtype: float64

In [44]:
def parse_binstring(x):
    return "{0:036b}".format(x)

def parse_line(line):
    line = line.replace('alltrials','').replace('{','').replace('}','').replace(',',' ').replace('ULL','').strip().split()
    line[0] = parse_binstring(int(line[0],0))
    line[1] = parse_binstring(int(line[1],0))
    line[2] = np.log2(int(line[2],0)).astype(int)
    line[3] = line[3].capitalize()
    line[4] = float(line[4])/1000
    line[5] = int(line[5])+1
    line[6] = int(line[6])
    return line

def load_data(expt_name,name):
    with open(direc + '../Data/data_' + expt_name + '.cpp') as f:
        lines = f.read().splitlines() 
        for i,line in enumerate(lines):
            if 'alltrials{{{' in line:
                start_line = i
                break
        for i,line in enumerate(lines):
            if '}}' in line:
                end_line = i
                break
        lines = [parse_line(line) for line in lines[start_line:end_line+1]]
        df = pd.DataFrame(lines,columns=['black_pieces','white_pieces','move','color','response_time','participant','cross-validation group'])
        if expt_name.endswith('2'):
            df['participant']+=50
        if expt_name.endswith('3'):
            df['participant']+=100
        df['experiment'] = name
    return df


In [45]:
df = pd.concat([load_data(expt_name,name) for expt_name,name in 
           zip(['hvh','gen','eye','learn1','learn2','learn3','tai1','tai2'],
               ['human-vs-human','generalization','eye tracking'] + ['learning'] *3 + ['time pressure']*2)],
               ignore_index=True)
df.loc[df['experiment']=='time pressure','time limit'] = df[df['experiment']=='time pressure']['participant'].map(
    lambda p: {0:5,1:10,2:20}[(p-1)%3])
df.loc[df['experiment']=='learning','session number'] = df[df['experiment']=='learning']['participant'].map(
    lambda p: (p-1)%5+1)
df.loc[df['experiment']=='time pressure','participant'] = (df.loc[df['experiment']=='time pressure','participant']-1)//3+1
df.loc[df['experiment']=='learning','participant'] = (df.loc[df['experiment']=='learning','participant']-1)//5+1


In [53]:
df.to_csv(osf_direc + 'raw_data.csv')

In [46]:
df

Unnamed: 0,black_pieces,white_pieces,move,color,response_time,participant,cross-validation group,experiment,time limit,session number
0,000000000000000000000000000000000000,000000000000000000000000000000000000,24,Black,7.223,1,5,human-vs-human,,
1,000000000001000000000000000000000000,000000000000000000000000000000000000,11,White,5.141,2,5,human-vs-human,,
2,000000000001000000000000000000000000,000000000000000000000000100000000000,14,Black,3.154,1,5,human-vs-human,,
3,000000000001000000000100000000000000,000000000000000000000000100000000000,4,White,5.715,2,4,human-vs-human,,
4,000000000001000000000100000000000000,000000000000000000000000100000010000,21,Black,6.837,1,3,human-vs-human,,
...,...,...,...,...,...,...,...,...,...,...
67326,000000000000000000000000000000000000,000000000000000000000000000000000000,23,Black,0.835,30,1,time pressure,10.0,
67327,000000000000100000000000000000000000,000000000000000000000000000001000000,14,Black,0.919,30,1,time pressure,10.0,
67328,000000000000100000000100000000000000,000000000000000000001000000001000000,24,Black,0.816,30,5,time pressure,10.0,
67329,000000000001100000000100000000000000,000000000010000000001000000001000000,5,Black,1.465,30,2,time pressure,10.0,
