In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sts
from matplotlib import rcParams,font_manager
import os
from matplotlib.ticker import FormatStrFormatter
import pandas as pd
import pickle
from fourinarowfunctions import *

%load_ext autoreload
%autoreload 2

In [2]:
rcParams['figure.figsize'] = (6,6) #figure size in inches
rcParams['font.family'] = "sans-serif" 
rcParams['font.weight'] = "roman" 
rcParams['font.style'] = "normal" # not 100% sure what this does but it was recommended
rcParams['font.size'] = 32 # not 100% sure what this does but it was recommended
rcParams['pdf.fonttype'] = 42
rcParams['axes.linewidth'] = 2 #thickness of the border
rcParams['xtick.major.width'] = rcParams['axes.linewidth'] # make sure line widths are consistent
rcParams['ytick.major.width'] = rcParams['axes.linewidth']
rcParams['axes.spines.right'] = False #hides right border
rcParams['axes.spines.top'] = False #hides top
rcParams['legend.frameon'] = False #hides box around the legend
rcParams['legend.fontsize'] = 18 #font size in pt
rcParams['axes.labelsize'] = 32 
rcParams['xtick.labelsize'] = 24
rcParams['ytick.labelsize'] = rcParams['xtick.labelsize']
rcParams['lines.linewidth'] = 3
rcParams['xtick.major.size'] = 5
rcParams['lines.markersize'] = 16
rcParams['ytick.major.size'] = rcParams['xtick.major.size'] 

In [3]:
direc = 'C:/Users/svo/Google Drive/Bas Games/Analysis/'
osf_direc = 'C:/Users/svo/Documents/FourinarowData/OSF/'

In [37]:
with open(direc + 'loglik_all_pickled.txt','rb') as f:
    loglik_all = pickle.load(f)

In [38]:
loglik_all = np.hstack(loglik_all)

In [18]:
model = 'final'
expt_names = ['hvh','gen','eye','learn1','learn2','learn3','tai1','tai2']

In [19]:
param_names = ['pruning threshold','stopping probability','feature drop rate','lapse rate','active scaling constant',
               'center weight','connected 2-in-a-row weight','unconnected 2-in-a-row weight','3-in-a-row weight',
               '4-in-a-row weight']

In [20]:
models=['final','final_nonoise','final_nodelta','final_noprune','final_notree','final_noact','final_no3',
        'final_no2conn','final_nocenter','final_no4','final_no2unc',
        'final_optweights','final_mcts_myopic','final_drop_tile','final_fixed_iters',
        'final_fixed_depth','final_fixed_branch','final_weight_hvd','final_drop_hvd',
        'final_triangle','final_drop_type','final_opp']

model_names = ['Main model','No value noise','No feature drop','No pruning','No tree','No active scaling',
               'No 3-in-a-row','No connected 2-in-a-row','No center','No 4-in-a-row',
               'No unconnected 2-in-a-row','Optimal weights','MCTS',
               'Tile dropping','Fixed iterations','Fixed depth','Fixed branching',
               'Orientation-dependent weights','Orientation-dependent dropping','Triangle',
               'Type-dependent dropping','Opponent scaling']


In [21]:
param_names_models = [param_names,param_names,param_names[:2] + param_names[3:],param_names[1:],
                     param_names[2:],param_names[:4] + param_names[5:],param_names[:8] + param_names[9:],
                      param_names[:6] + param_names[7:],param_names[:5] + param_names[6:],param_names[:9],
                      param_names[:7] + param_names[8:],param_names[:5],
                      param_names[1:4] + ['exploration constant'] + param_names[4:],
                      param_names[:2] + ['tile drop rate'] + param_names[3:],
                      param_names[:1] + ['inverse iteration number'] + param_names[2:],
                      param_names[:1] + ['depth'] + param_names[2:],
                      ['branching factor'] + param_names[1:],       
                      param_names[:4] + ['horizontal-vertical scaling','horizontal-diagonal scaling'] + param_names[4:],
                      param_names[:2] + param_names[3:] + ['horizontal feature drop rate','vertical feature drop rate',
                                                           'diagonal feature drop rate'],
                      param_names + ['triangle weight'],
                      param_names[:2] + param_names[3:] + ['connected 2-in-a-row drop rate','unconnected 2-in-a-row drop rate',
                                                           '3-in-a-row drop rate','4-in-a-row drop rate'],
                      param_names[:4] + ['opponent scaling constant'] + param_names[4:],
                     ]

In [45]:
def create_df(i):
    model = models[i]
    model_name = model_names[i]
    param_names_model = param_names_models[i]
    loglik = loglik_all[i,:]
    params = np.vstack([np.loadtxt(direc + 'Params/params_' + name + '_' + model + '_short.txt') for name in expt_names])
    df = pd.DataFrame(params,columns=param_names_model)
    df['log-likelihood'] = loglik
    print(model,model_name)
    print(param_names_model)
    df['model'] = model_name
    df['experiment'] = ['human-vs-human']*200 + ['generalization']*200 + ['eye tracking']*50 + ['learning']*750 + ['time pressure']*450
    df['participant'] = np.hstack([np.repeat(range(n),5) for n in [40,40,10]] + [np.repeat(range(30),25)] + [np.repeat(range(30),15)])+1
    df['cross-validation group']=np.tile(range(1,6),330)
    df['session number'] = np.nan
    df['time limit'] = np.nan
    df.loc[df['experiment']=='learning','session number'] = np.tile(np.repeat(range(1,6),5),30).astype(int)
    df.loc[df['experiment']=='time pressure','time limit'] = np.tile(np.repeat([5,10,20],5),30).astype(int)
    return df


In [46]:
for i in range(len(models)):
    df = create_df(i)
    df.to_csv(osf_direc + 'model_fits_' + name.lower().replace(' ','_') + '.csv')

final Main model
['pruning threshold', 'stopping probability', 'feature drop rate', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_nonoise No value noise
['pruning threshold', 'stopping probability', 'feature drop rate', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_nodelta No feature drop
['pruning threshold', 'stopping probability', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_noprune No pruning
['stopping probability', 'feature drop rate', 'lapse rate', 'active scaling constant', 'center weight', 'connected 2-in-a-row weight', 'unconnected 2-in-a-row weight', '3-in-a-row weight', '4-in-a-row weight']
final_notree No t

In [47]:
df.columns

Index(['pruning threshold', 'stopping probability', 'feature drop rate',
       'lapse rate', 'opponent scaling constant', 'active scaling constant',
       'center weight', 'connected 2-in-a-row weight',
       'unconnected 2-in-a-row weight', '3-in-a-row weight',
       '4-in-a-row weight', 'log-likelihood', 'model', 'experiment',
       'participant', 'cross-validation group', 'session number',
       'time limit'],
      dtype='object')

In [51]:
def parse_binstring(x):
    return "{0:036b}".format(x)

def parse_line(line):
    line = line.replace('alltrials','').replace('{','').replace('}','').replace(',',' ').replace('ULL','').strip().split()
    line[0] = parse_binstring(int(line[0],0))
    line[1] = parse_binstring(int(line[1],0))
    line[2] = np.log2(int(line[2],0)).astype(int)
    line[3] = line[3].capitalize()
    line[4] = float(line[4])/1000
    line[5] = int(line[5])+1
    line[6] = int(line[6])
    return line

def load_data(expt_name,name):
    with open(direc + '../Data/data_' + expt_name + '.cpp') as f:
        lines = f.read().splitlines() 
        for i,line in enumerate(lines):
            if 'alltrials{{{' in line:
                start_line = i
                break
        for i,line in enumerate(lines):
            if '}}' in line:
                end_line = i
                break
        lines = [parse_line(line) for line in lines[start_line:end_line+1]]
        df = pd.DataFrame(lines,columns=['black_pieces','white_pieces','move','color','response_time','participant','cross-validation group'])
        if expt_name.endswith('2'):
            df['participant']+=50
        if expt_name.endswith('3'):
            df['participant']+=100
        df['experiment'] = name
    return df


In [52]:
df = pd.concat([load_data(expt_name,name) for expt_name,name in 
           zip(['hvh','gen','eye','learn1','learn2','learn3','tai1','tai2'],
               ['human-vs-human','generalization','eye tracking'] + ['learning'] *3 + ['time pressure']*2)],
               ignore_index=True)
df.loc[df['experiment']=='time pressure','time limit'] = df[df['experiment']=='time pressure']['participant'].map(
    lambda p: {0:5,1:10,2:20}[(p-1)%3])
df.loc[df['experiment']=='learning','session number'] = df[df['experiment']=='learning']['participant'].map(
    lambda p: (p-1)%5+1)
df.loc[df['experiment']=='time pressure','participant'] = (df.loc[df['experiment']=='time pressure','participant']-1)//3+1
df.loc[df['experiment']=='learning','participant'] = (df.loc[df['experiment']=='learning','participant']-1)//5+1


In [53]:
df.to_csv(osf_direc + 'raw_data.csv')