In [1]:
import os, re
import pandas as pd
import numpy as np
import json
from scipy.stats import ttest_rel,ttest_ind
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_context('talk')
%matplotlib inline
plt.rcParams['font.size'] = 22

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

In [2]:
from humanUtils import *
## this file contains most of the logic for loading and processing database
from cswHumanDatabase import load_final_df

In [None]:
## load a thresholded dateset
thresh = 0.9
dfD = {}
for cond in ALL_CONDITIONS:
  dfD[cond] = load_final_df(cond,threshold=thresh) 


-loading code version: RT01B1000cl
N = 55 subjects

-loading code version: csw1000block01.04.25.19
N = 56 subjects

-loading code version: RT40B1000cl
N = 50 subjects

-loading code version: csw1000block40.04.07.19
N = 63 subjects


# figures

In [None]:

## herlper for operating pandas dataframes
compute_mean = lambda df_: df_.groupby(['story']).correct_response.mean()
compute_stderr = lambda df_: df_.groupby(['story']).correct_response.std(
  ) / np.sqrt(len(df_.index.get_level_values('subjnum').unique()))

def plot_acc_with_background(df,ax=None,cL=['blue','green','blue','green','red']):
  ## compute mean and stderr
  M = compute_mean(df)
  S = compute_stderr(df)
  ## plot with error shading
  if type(ax)==type(None):
    ax = plt.gca()
  ax.plot(M)
  ax.fill_between(np.arange(len(M)),M-S,M+S,alpha=0.3)
  ## make nice background
  for idx in range(5):
    ax.fill_between(np.arange(40*idx,41+40*idx),0,1.05,color=cL[idx],alpha=0.2)

In [None]:
fg,axar = plt.subplots(len(dfD),1,figsize=(10,40),sharex=True);
get_tstep_df = lambda tstep,df: df[df.index.get_level_values('depth') == tstep]
cLD = {
  'blocked': ['blue','green','blue','green','red'],
  'blocked_rep': ['blue','green','blue','green','red'],
  'interleaved': ['purple','purple','purple','purple','red'],
  'interleaved_rep': ['purple','purple','purple','purple','red'],
  'inserted_early': ['blue','green','purple','purple','red'],
  'inserted_early_rep': ['blue','green','purple','purple','red'],
  'inserted_middle': ['purple','blue','green','purple','red'],
  'inserted_middle_rep': ['purple','blue','green','purple','red'],
  'inserted_late': ['purple','purple','blue','green','red'],
  'inserted_late_rep': ['purple','purple','blue','green','red'],
  'explicit_interleaved': ['purple','purple','purple','purple','red'],
}
for idx,(c_str,c_df) in enumerate(dfD.items()):
  ax = axar[idx]
  ax.set_title(c_str)
  ax.axhline(0.5,c='k',ls='--',lw=.3)
  cL = cLD[c_str]
  ## mean
  plot_acc_with_background(dfD[c_str],cL=cL,ax=ax)
  ## individual tsteps
#   plot_acc_with_background(get_tstep_df(1,dfD[c_str]),cL=cL,ax=ax)
#   plot_acc_with_background(get_tstep_df(2,dfD[c_str]),cL=cL,ax=ax)

# stats

## between conditions

In [None]:
## helper for pulling mean test acc for each subject
get_test_df = lambda df: df[df.index.get_level_values('block') == 4]
## compute ttest
def tstat_betweencond_testacc(df1,df2):
  """ independent samples t-test
  test acc of two conditions
  """
  sub_testacc_df1 = get_test_df(df1).groupby(['subjnum']).correct_response.mean()
  sub_testacc_df2 = get_test_df(df2).groupby(['subjnum']).correct_response.mean()
  return ttest_ind(sub_testacc_df1,sub_testacc_df2)

In [None]:
import itertools
big_str = ""
for (c1,df1),(c2,df2) in itertools.product(dfD.items(),dfD.items()):
  t,p = tstat_betweencond_testacc(df1,df2)
  smal_str = "%s vs %s\n ttest_ind=%s, p=%s"%(c1,c2,t,p)
  print(smal_str)
  big_str += "\n%s"%smal_str

with open('stats_reports/between_condition_test_acc.txt', 'w') as f:
    f.write(big_str)

## within condition between timesteps

In [None]:
## helper for pulling df on specified tstep for each subject
get_tstep_df = lambda tstep,df: df[df.index.get_level_values('depth') == tstep]
# compute stat
def tstat_wicond_betweenstep_testacc(df):
  """ related samples t-test
  test acc step 1 vs step 2
  """
  # step 1
  sub_testacc_step1 = get_tstep_df(1,get_test_df(df)
                    ).groupby(['subjnum']).correct_response.mean()
  # step 2
  sub_testacc_step2 = get_tstep_df(2,get_test_df(df)
                    ).groupby(['subjnum']).correct_response.mean()

  return ttest_rel(sub_testacc_step1,sub_testacc_step2)

In [None]:
big_str2 = ""
for cond_,df_ in dfD.items():
  t,p = tstat_wicond_betweenstep_testacc(df_)
  smal_str2 = "%s step1-vs-step2 \n ttest_rel=%s, p=%s"%(cond_,t,p)
  print(smal_str2)
  big_str2 += "\n%s"%smal_str2

with open('stats_reports/within_condition_between_tstep_testacc.txt', 'w') as f:
    f.write(big_str2)