In [1]:
import numpy as np
import pandas as pd
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

import EEG_Decoder.eeg_decoder as eeg_decoder
from utils import clean_behavior, setup_hyperplane_conds

from scipy.stats import ttest_1samp, ttest_rel

%load_ext autoreload
%autoreload 2

  return warn(
  return warn(


In [2]:
np.random.seed(2023)

In [3]:
experiment_name = 'DATA/Preprocessed'
data_dir = f'../../{experiment_name}'
exp = eeg_decoder.Experiment(experiment_name,data_dir,dev=False)
exp.behavior_files = sorted(list(exp.data_dir.glob('*.csv')))

In [4]:
behavior_dict = {}

for isub in range(exp.nsub):    
    
    noArt_idx = exp.load_artifact_idx(isub)    
    behavior = pd.read_csv(exp.behavior_files[isub])
    behavior = clean_behavior(behavior, noArt_idx, isub)  
    behavior = setup_hyperplane_conds(behavior, separate_no_overlap=True)
    
    # drop interruption trials for subs 4,5
    no_interruption_trials = (behavior.port_codes < 240).values
    behavior = behavior[no_interruption_trials].reset_index(drop=True)

    # drop set size 1 trials with < 24 dots
    behavior = behavior.query('total_nDots >= 24').reset_index(drop=True)
    
    behavior_dict[isub] = behavior



In [5]:
full_behavior_df = pd.DataFrame()
for isub, behavior in behavior_dict.items():
    behavior['sid'] = isub
    full_behavior_df = full_behavior_df.append(behavior, ignore_index=True)

In [6]:
participant_accs = full_behavior_df.groupby(['sid'])['ACC'].mean()
# np.mean(participant_accs), np.std(participant_accs)
participant_accs.aggregate(['mean', 'std'])

mean    0.919328
std     0.065620
Name: ACC, dtype: float64

# SS1 vs SS2

In [7]:
set_size_df = full_behavior_df.groupby(['sid', 'SetSize'])['ACC'].mean().reset_index()
set_size_df.head()

Unnamed: 0,sid,SetSize,ACC
0,0,1,0.936819
1,0,2,0.756098
2,1,1,0.997951
3,1,2,0.98008
4,2,1,0.982684


In [8]:
set_size_df.groupby('SetSize').aggregate(['mean', 'std'])['ACC']

Unnamed: 0_level_0,mean,std
SetSize,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.96746,0.05342
2,0.886801,0.08144


In [9]:
1.728e+04

17280.0

In [10]:
pg.pairwise_ttests(dv='ACC', within='SetSize', subject='sid', data=set_size_df, effsize='cohen', return_desc=True)


Unnamed: 0,Contrast,A,B,mean(A),std(A),mean(B),std(B),Paired,Parametric,T,dof,alternative,p-unc,BF10,cohen
0,SetSize,1,2,0.96746,0.05342,0.886801,0.08144,True,True,6.673954,22.0,two-sided,1e-06,17280.0,1.171182


## Repeat without informative eyes

In [11]:
bad_eyes = pd.read_csv('informative_eye_subs.csv', index_col=0).index.values
set_size_df = set_size_df[~set_size_df.sid.isin(bad_eyes)]

In [12]:
pg.pairwise_ttests(dv='ACC', within='SetSize', subject='sid', data=set_size_df, effsize='cohen', return_desc=True)


Unnamed: 0,Contrast,A,B,mean(A),std(A),mean(B),std(B),Paired,Parametric,T,dof,alternative,p-unc,BF10,cohen
0,SetSize,1,2,0.964074,0.05817,0.876631,0.085549,True,True,6.26605,18.0,two-sided,7e-06,3229.998,1.195373


## Set Size 2 - Overlap vs Non-Overlap

In [13]:
full_behavior_df['Overlap'] = full_behavior_df['hyperplane_conditions'].isin([2, 3, 4, 5])  # narrow, broad, superset, partial

overlap_df = full_behavior_df.query('SetSize==2').groupby(['sid', 'Overlap'])['ACC'].mean().reset_index()
overlap_df.head()

Unnamed: 0,sid,Overlap,ACC
0,0,False,0.765376
1,0,True,0.74031
2,1,False,0.981013
3,1,True,0.978495
4,2,False,0.954128


In [14]:
overlap_df.groupby('Overlap').aggregate(['mean', 'std'])['ACC']

Unnamed: 0_level_0,mean,std
Overlap,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.90186,0.081446
True,0.861347,0.086559


In [15]:
pg.pairwise_ttests(dv='ACC', within='Overlap', subject='sid', data=overlap_df, effsize='cohen', return_desc=True)


Unnamed: 0,Contrast,A,B,mean(A),std(A),mean(B),std(B),Paired,Parametric,T,dof,alternative,p-unc,BF10,cohen
0,Overlap,False,True,0.90186,0.081446,0.861347,0.086559,True,True,5.290454,22.0,two-sided,2.6e-05,911.6,0.482063


In [16]:
overlap_df = overlap_df[~overlap_df.sid.isin(bad_eyes)]

pg.pairwise_ttests(dv='ACC', within='Overlap', subject='sid', data=overlap_df, effsize='cohen', return_desc=True)


Unnamed: 0,Contrast,A,B,mean(A),std(A),mean(B),std(B),Paired,Parametric,T,dof,alternative,p-unc,BF10,cohen
0,Overlap,False,True,0.893738,0.086921,0.847763,0.088321,True,True,5.418212,18.0,two-sided,3.8e-05,665.943,0.524691


# Based on Hyperplane results, compare training conditions against held out conditions that weren't significantly different

In [17]:
hyperplane_conds = {
    0: 'SS1: Narrow',
    1: 'SS1: Broad',
    2: 'SS2: Overlap, Narrow',
    3: 'SS2: Overlap, Broad',
    4: 'SS2: Superset Overlap',
    5: 'SS2: Partial Overlap',
    6: 'SS2: No Overlap, Mixed',
    7: 'SS2: No Overlap, Narrow',
    8: 'SS2: No Overlap, Broad'
}

cond_load_map = {0:0, 1:0, 2:1, 3:1, 4:1, 5:1, 6:1, 7:1, 8:1} # mapping the conditions to loads

colormap = {
    0: 'royalblue',
    1: 'forestgreen',
    2: 'orchid',
    3: 'coral',
    4: 'orange',
    5: 'red',
    6: 'darkred',
    7: 'yellow',
    8: 'dodgerblue'
}

palette={hyperplane_conds[idx]: colormap[idx] for idx in range(9)}
palette

{'SS1: Narrow': 'royalblue',
 'SS1: Broad': 'forestgreen',
 'SS2: Overlap, Narrow': 'orchid',
 'SS2: Overlap, Broad': 'coral',
 'SS2: Superset Overlap': 'orange',
 'SS2: Partial Overlap': 'red',
 'SS2: No Overlap, Mixed': 'darkred',
 'SS2: No Overlap, Narrow': 'yellow',
 'SS2: No Overlap, Broad': 'dodgerblue'}

In [18]:
ss1_breadth_accs = full_behavior_df.query('SetSize==1').groupby(['sid', 'hyperplane_conditions'])['ACC'].mean().reset_index()

pg.pairwise_ttests(dv='ACC', within='hyperplane_conditions', subject='sid', data=ss1_breadth_accs, effsize='cohen', return_desc=True)


Unnamed: 0,Contrast,A,B,mean(A),std(A),mean(B),std(B),Paired,Parametric,T,dof,alternative,p-unc,BF10,cohen
0,hyperplane_conditions,0,1,0.97008,0.051317,0.964671,0.057076,True,True,1.360456,22.0,two-sided,0.187463,0.493,0.099649


In [19]:
ss1_breadth_accs = ss1_breadth_accs[~ss1_breadth_accs.sid.isin(bad_eyes)]

pg.pairwise_ttests(dv='ACC', within='hyperplane_conditions', subject='sid', data=ss1_breadth_accs, effsize='cohen', return_desc=True)

Unnamed: 0,Contrast,A,B,mean(A),std(A),mean(B),std(B),Paired,Parametric,T,dof,alternative,p-unc,BF10,cohen
0,hyperplane_conditions,0,1,0.967728,0.055844,0.960183,0.061946,True,True,1.695895,18.0,two-sided,0.107133,0.793,0.127941


In [20]:
ss2_cond_accs = full_behavior_df.query('SetSize==2').groupby(['sid', 'hyperplane_conditions'])['ACC'].mean().reset_index()

In [24]:
ss2_cond_accs = full_behavior_df.query('SetSize==2').groupby(['sid', 'hyperplane_conditions'])['ACC'].mean().reset_index()

for hyperplane_cond in [2, 3, 5]:
    print(f'SS2 Superset - {hyperplane_conds[hyperplane_cond]}: ')
    print(pg.pairwise_ttests(dv='ACC', within='hyperplane_conditions', subject='sid', data=ss2_cond_accs.query(f'hyperplane_conditions in [{hyperplane_cond}, 4]'), effsize='cohen', return_desc=True))
    print('')

SS2 Superset - SS2: Overlap, Narrow: 
                Contrast  A  B   mean(A)    std(A)   mean(B)    std(B)  \
0  hyperplane_conditions  2  4  0.918557  0.113062  0.861927  0.095155   

   Paired  Parametric         T   dof alternative     p-unc  BF10     cohen  
0    True        True  2.523939  22.0   two-sided  0.019328  2.84  0.541942  

SS2 Superset - SS2: Overlap, Broad: 
                Contrast  A  B   mean(A)    std(A)   mean(B)    std(B)  \
0  hyperplane_conditions  3  4  0.900659  0.123855  0.861927  0.095155   

   Paired  Parametric         T   dof alternative     p-unc   BF10   cohen  
0    True        True  2.302338  22.0   two-sided  0.031154  1.926  0.3507  

SS2 Superset - SS2: Partial Overlap: 
                Contrast  A  B   mean(A)    std(A)   mean(B)    std(B)  \
0  hyperplane_conditions  4  5  0.861927  0.095155  0.836281  0.090802   

   Paired  Parametric         T   dof alternative     p-unc   BF10     cohen  
0    True        True  1.889639  22.0   two-sided

In [22]:
ss2_cond_accs = ss2_cond_accs[~ss2_cond_accs.sid.isin(bad_eyes)]

for hyperplane_cond in [2, 3, 5]:
    print(f'SS2 Superset - {hyperplane_conds[hyperplane_cond]}: ')
    print(pg.pairwise_ttests(dv='ACC', within='hyperplane_conditions', subject='sid', data=ss2_cond_accs.query(f'hyperplane_conditions in [{hyperplane_cond}, 4]'), effsize='cohen', return_desc=True))

SS2 Superset - SS2: Overlap, Narrow: 
                Contrast  A  B  mean(A)   std(A)   mean(B)   std(B)  Paired  \
0  hyperplane_conditions  2  4  0.90918  0.12129  0.846399  0.09682    True   

   Parametric         T   dof alternative     p-unc   BF10     cohen  
0        True  2.379636  18.0   two-sided  0.028596  2.203  0.572091  
SS2 Superset - SS2: Overlap, Broad: 
                Contrast  A  B   mean(A)    std(A)   mean(B)   std(B)  Paired  \
0  hyperplane_conditions  3  4  0.882377  0.128904  0.846399  0.09682    True   

   Parametric         T   dof alternative     p-unc   BF10     cohen  
0        True  1.794362  18.0   two-sided  0.089569  0.906  0.315604  
SS2 Superset - SS2: Partial Overlap: 
                Contrast  A  B   mean(A)   std(A)   mean(B)    std(B)  Paired  \
0  hyperplane_conditions  4  5  0.846399  0.09682  0.826023  0.092759    True   

   Parametric       T   dof alternative     p-unc   BF10     cohen  
0        True  1.3221  18.0   two-sided  0.202691

# Make Table for Paper

In [23]:
summary_table = full_behavior_df.groupby(['sid', 'hyperplane_conditions']).aggregate({'ACC': 'mean'}).reset_index().groupby('hyperplane_conditions').aggregate({'ACC': ['mean', 'std']}).reset_index()
summary_table.hyperplane_conditions = summary_table.hyperplane_conditions.map(hyperplane_conds)
summary_table

Unnamed: 0_level_0,hyperplane_conditions,ACC,ACC
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
0,SS1: Narrow,0.97008,0.051317
1,SS1: Broad,0.964671,0.057076
2,"SS2: Overlap, Narrow",0.918557,0.113062
3,"SS2: Overlap, Broad",0.900659,0.123855
4,SS2: Superset Overlap,0.861927,0.095155
5,SS2: Partial Overlap,0.836281,0.090802
6,"SS2: No Overlap, Mixed",0.893953,0.08589
7,"SS2: No Overlap, Narrow",0.920336,0.078061
8,"SS2: No Overlap, Broad",0.885173,0.086741
