# CSW DATASET
* click [here](https://drive.google.com/drive/u/0/folders/1qYP5zcY7i9ALt0d_SldA06ouiPHp4wpF) to download

In [None]:
import os, re
import pandas as pd
import numpy as np
import json

from matplotlib import pyplot as plt
import seaborn as sns
sns.set_context('talk')
%matplotlib inline
plt.rcParams['font.size'] = 22

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

In [None]:
from humanUtils import *
## this file contains most of the logic for loading and processing database
from cswHumanDatabase import load_final_df, load_dfs


# load thresholded dataset

In [None]:
## load a thresholded dateset
cond = 'blocked'
thresh = 0.9
df = load_final_df(cond,threshold=thresh)

## group level plots

In [None]:
## operating pandas dataframes
compute_mean = lambda df_: df_.groupby(['story']).correct_response.mean()
compute_stderr = lambda df_: df_.groupby(['story']).correct_response.std(
  ) / np.sqrt(len(df_.index.get_level_values('subjnum').unique()))


In [None]:
M = compute_mean(df)
S = compute_stderr(df)
plt.plot(M)
plt.fill_between(np.arange(len(M)),M-S,M+S,alpha=0.3)

### separate steps 2 and 3

In [None]:
df2 = df[df.index.get_level_values('depth')==1]
df3 = df[df.index.get_level_values('depth')==2]

plt.plot(compute_mean(df2))
plt.plot(compute_mean(df3))

### colored background

In [None]:
## load a thresholded dateset
cond = 'blocked'
thresh = 0.9
df = load_final_df(cond,threshold=thresh)

In [None]:
## compute mean and stderr
M = compute_mean(df)
S = compute_stderr(df)
## plot with error shading
ax = plt.gca()
ax.plot(M)
ax.fill_between(np.arange(len(M)),M-S,M+S,alpha=0.3)
## make nice background
cL = ['blue','green','blue','green','red']
for idx in range(5):
  ax.fill_between(np.arange(40*idx,41+40*idx),0,1.05,color=cL[idx],alpha=0.2)

## subject level plots

### iterate over subjects, compute sliding window accuracy

In [None]:
# one subject at a time
for sidx,sub_df in df.groupby(['subjnum']):
  sub_acc = mov_avg(sub_df.correct_response.values,window=10)

In [None]:
# for all subjects at once
mov_avg_window = 10
sub_acc_series = df.groupby(['subjnum']).correct_response.apply(mov_avg,mov_avg_window)
type(sub_acc_series),'N=',len(sub_acc_series)

In [None]:
fg,axar = plt.subplots(12,5,figsize=(40,40),sharey=True,sharex=True);axar=axar.reshape(-1)

for idx in range(len(sub_acc_series)):
  ax=axar[idx]
  sub_acc=sub_acc_series.iloc[idx]
  ax.set_title('S%i'%sub_acc_series.index[idx])
  ax.plot(sub_acc)
  ax.axhline(0.5,c='k',lw=.3)


# stats

In [None]:
from scipy.stats import ttest_rel,ttest_ind

### test performance difference between conditions

In [None]:
cond1,cond2 = 'interleaved','interleaved_rep'
df1 = load_final_df(cond1)
df2 = load_final_df(cond2)

In [None]:
get_test_df = lambda df: df[df.index.get_level_values('block') == 4]

In [None]:
sub_testacc_df1 = get_test_df(df1).groupby(['subjnum']).correct_response.mean()
sub_testacc_df2 = get_test_df(df2).groupby(['subjnum']).correct_response.mean()
ttest_ind(sub_testacc_df1,sub_testacc_df2)

### interleaved timestep 2 vs 3 

In [None]:
df1 = load_final_df('interleaved')

In [None]:
get_tstep_df = lambda df,tstep: df[df.index.get_level_values('depth') == tstep]

In [None]:
# step 1
sub_testacc_step1 = get_tstep_df(get_test_df(df1),1
                  ).groupby(['subjnum']).correct_response.mean()
# step 2
sub_testacc_step2 = get_tstep_df(get_test_df(df1),2
                  ).groupby(['subjnum']).correct_response.mean()


In [None]:
ttest_rel(sub_testacc_step1,sub_testacc_step2)