In [88]:
import numpy as np
import pingouin as pg
import pandas as pd
import seaborn as sns
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import statsmodels.api as sm
import statsmodels.formula.api as sm


# Functions  

In [107]:
def read_csv(name, info=True):
    df = pd.read_csv(f'../data/stats/{name}.csv')
    # pd.set_option('display.max_columns', None)
    # pd.set_option('max_columns', None)
    if info:
        print_section('DATASET INFO')
        print('N rows=', df.size)
        print('N cols=', len(df.columns))
        print(df.head())
    return df

def print_section(title):
    print( '*' + '-' * 150 + '*')
    print(title)
    print( '*' + '-' * 150 + '*')

# Fig 2A

In [109]:
filename = 'Fig2A'
df = read_csv(filename)
pd.options.display.float_format = '{:,.5f}'.format

# test for performance above chance
print_section("test for performance above chance")
table = []
for exp_num in (1, 2, 3, 4):
    for cond in (1, 2, 3, 4):
        if exp_num == 4 and cond not in (1, 4):
            continue
        row = pg.ttest(df['score'][df.exp_num==exp_num][df.cond==cond], 0.5)
        row.insert(loc=0, column='exp_num', value=exp_num)
        row.insert(loc=1, column='cond', value=cond)
        table.append(row)

df2 = pd.concat(table)
df2

# test performance within exp and across cond with regression
print_section('regress performance within exp and across cond with regression')

x = df[['cond', 'exp_num']]
y = df[['exp_num', 'score']]

table = []
for exp_num in (1, 2, 3, 4):
    row = pg.linear_regression(x[x.exp_num==exp_num].cond, y[y.exp_num==exp_num].score, add_intercept=True, as_dataframe=False)

    row['dof_model'] = [row['df_model'], ] * len(row['coef'])
    row['dof_resid'] = [row['df_resid'], ] * len(row['coef'])

    del row['residuals']; del row['df_model'];
    del row['df_resid']; del row['X']; del row['y']; del row['pred']
    row = pd.DataFrame(row)
    
    row.insert(loc=0, column='exp_num', value=exp_num)
    table.append(
        row
    )

df3 = pd.concat(table)
df3



*------------------------------------------------------------------------------------------------------------------------------------------------------*
DATASET INFO
*------------------------------------------------------------------------------------------------------------------------------------------------------*
N rows= 7672
N cols= 7
   subject  exp_num  complete  block  less_cues   score  cond
0        1        1         0      0          0 0.16667     4
1        2        1         0      0          0 0.86667     4
2        3        1         0      0          0 0.46667     4
3        4        1         0      0          0 0.40000     4
4        5        1         0      0          0 0.73333     4
*------------------------------------------------------------------------------------------------------------------------------------------------------*
test for performance above chance
*--------------------------------------------------------------------------------------------------

Unnamed: 0,exp_num,cond,T,dof,tail,p-val,CI95%,cohen-d,BF10,power
T-test,1,1,10.9838,75,two-sided,0.0,"[0.71, 0.8]",1.25993,210200000000000.0,1.0
T-test,1,2,6.99011,75,two-sided,0.0,"[0.63, 0.73]",0.80182,10930000.0,1.0
T-test,1,3,7.00086,75,two-sided,0.0,"[0.62, 0.72]",0.80305,11430000.0,1.0
T-test,1,4,1.50661,75,two-sided,0.13611,"[0.49, 0.6]",0.17282,0.372,0.31852
T-test,2,1,19.43878,70,two-sided,0.0,"[0.8, 0.87]",2.30696,4.8199999999999996e+26,1.0
T-test,2,2,11.65526,70,two-sided,0.0,"[0.71, 0.8]",1.38323,1206000000000000.0,1.0
T-test,2,3,7.72376,70,two-sided,0.0,"[0.63, 0.72]",0.91664,162000000.0,1.0
T-test,2,4,3.62985,70,two-sided,0.00054,"[0.54, 0.64]",0.43078,44.471,0.94735
T-test,3,1,27.05025,82,two-sided,0.0,"[0.88, 0.94]",2.96915,1.5490000000000002e+39,1.0
T-test,3,2,19.91188,82,two-sided,0.0,"[0.82, 0.9]",2.18561,7.365e+29,1.0


*------------------------------------------------------------------------------------------------------------------------------------------------------*
regress performance within exp and across cond with regression
*------------------------------------------------------------------------------------------------------------------------------------------------------*


Unnamed: 0,exp_num,names,coef,se,T,pval,r2,adj_r2,CI[2.5%],CI[97.5%],dof_model,dof_resid
0,1,Intercept,0.82193,0.03102,26.49265,0.0,0.09662,0.09363,0.76088,0.88298,1,302
1,1,cond,-0.06439,0.01133,-5.68346,0.0,0.09662,0.09363,-0.08668,-0.04209,1,302
0,2,Intercept,0.92347,0.02662,34.69104,0.0,0.20638,0.20357,0.87108,0.97587,1,282
1,2,cond,-0.08324,0.00972,-8.56352,0.0,0.20638,0.20357,-0.10237,-0.06411,1,282
0,3,Intercept,0.96486,0.0254,37.98767,0.0,0.09879,0.09606,0.91489,1.01482,1,330
1,3,cond,-0.05578,0.00927,-6.01467,0.0,0.09879,0.09606,-0.07403,-0.03754,1,330
0,4,Intercept,0.96521,0.03172,30.42501,0.0,0.18856,0.1839,0.9026,1.02783,1,174
1,4,cond,-0.06919,0.01088,-6.35875,0.0,0.18856,0.1839,-0.09067,-0.04772,1,174


# Fig 2B

# Fig 2D

In [91]:
filename = 'Fig2D'
df = read_csv(filename)
pd.options.display.float_format = '{:,.5f}'.format
pg.pairwise_ttests(dv='slope', subject='subject', between='exp_num', within='modality', effsize='cohen', data=df, padjust='bonf', parametric=True, within_first=True)

N rows= 3816
N cols= 4
   subject  exp_num   slope modality
0        1        1 0.42728       LE
1        2        1 0.90420       LE
2        3        1 0.43349       LE
3        4        1 0.65978       LE
4        5        1 0.84207       LE


Unnamed: 0,Contrast,modality,A,B,Paired,Parametric,T,dof,Tail,p-unc,p-corr,p-adjust,BF10,cohen
0,modality,-,LE,ES,True,True,21.46513,317.0,two-sided,0.0,0.0,bonf,1.54e+60,1.61196
1,modality,-,LE,SP,True,True,14.85496,317.0,two-sided,0.0,0.0,bonf,8.099e+34,1.08016
2,modality,-,ES,SP,True,True,-7.36087,317.0,two-sided,0.0,0.0,bonf,3713000000.0,-0.33217
3,exp_num,-,1,2,False,True,0.71682,134.6091,two-sided,0.47473,1.0,bonf,0.225,0.11693
4,exp_num,-,1,3,False,True,-3.14624,154.33538,two-sided,0.00198,0.01191,bonf,15.065,-0.50046
5,exp_num,-,1,4,False,True,-4.5975,161.2677,two-sided,1e-05,5e-05,bonf,1945.049,-0.71571
6,exp_num,-,2,3,False,True,-4.41133,148.51098,two-sided,2e-05,0.00012,bonf,931.038,-0.69631
7,exp_num,-,2,4,False,True,-5.97435,150.28953,two-sided,0.0,0.0,bonf,702100.0,-0.91179
8,exp_num,-,3,4,False,True,-1.65671,168.3119,two-sided,0.09944,0.59664,bonf,0.588,-0.25258
9,modality * exp_num,LE,1,2,False,True,-2.1526,144.84472,two-sided,0.03301,0.59411,bonf,1.459,-0.35486


# Review, fig1, test accuracy bad/good

In [92]:
filename = 'corr_tables'
df = read_csv(filename)
pg.pairwise_ttests(dv='slope', subject='subject', between='exp_num', within='modality', 
            data=df, padjust='bonf', within_first=False, parametric=True)

# pg.print_table(res, floatfmt='.6f')




N rows= 592
N cols= 4
   subject  exp_num    slope modality
0        1        1  0.22351       ES
1        2        1  0.44697       ES
2        3        1 -0.06116       ES
3        4        1  0.52977       ES
4        5        1  0.23695       ES


ValueError: Columns must have at least two unique values.

# Fig 2, test slope bad good

In [None]:
filename = 'review_perf_fig2'
df = read_csv(filename)

pg.pairwise_ttests(dv='value', between=['exp_num', 'split'], subject='id',
            data=df, padjust='bonf', parametric=True)





In [None]:
filename = 'review_accuracy'
df = read_csv(filename)

res = pg.pairwise_ttests(dv='value', between=['modality', 'split'], subject='id',
            data=df, padjust='bonf', parametric=True)

pg.print_table(res, floatfmt='.6f')


