In [1]:
%config InlineBackend.figure_format = 'retina'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob

In [3]:
from functools import partial

pd.set_option('precision', 0)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [4]:
def load_data_cb(path, dropc=None):
    rawdf = pd.DataFrame(cb.Data(path).data)
    rawdf.rename(columns={'_mseed': 'seed'}, inplace=True)
    
    if dropc:
        rawdf.drop(dropc, axis=1, inplace=True) 
    
    # simulation & model parameter columns
    cidx = list(filter(lambda x: x.startswith('_') or x=='seed', rawdf.columns))
    df = rawdf.set_index(cidx).apply(pd.Series.explode).reset_index()
    
    # remove trials 
    df = df[np.logical_and(np.logical_and(df.trial>=1, df.trial<=128), df.n_categories<=5)]
    df.loc[df.correct=='10', 'n_categories'] += 1

    return df

In [1]:
def load_data(path, add_cols):
    """
    Load the csv file produced by the experiment and return truncated responses.
    add_cols: add columns with fixed parameter values (e.g., SP dim)
    """
    path = os.path.join(path, '*.csv')
    df = pd.concat((pd.read_csv(f, header=0, index_col=0) for f in glob.glob(path)), ignore_index=False)
    assert len(df) > 0
    
    # truncate the dataset to 128 trials or when 6 categories have been completed
    df = df[np.logical_and(np.logical_and(df.trial>=1, df.trial<=128), df.n_categories<=5)]
    df.loc[df.correct=='10', 'n_categories'] += 1
    
    # add additional columns with parameter values
    if add_cols:
        for c_name, c_val in add_cols.items():
            df[c_name] = c_val
    return df

In [None]:
def discard_seeds(df, index, criterion='above', th=1, debug=True):
    """Use this function only for a single experiment (i.e., all seeds in df are unique)
    and not aggregate of multiple ones.
    """
    op = np.greater if criterion=='above' else np.less
    dat = getattr(df.groupby('seed', as_index=False)[index].max(), index)
    remove_idx = dat[op(dat, th)]
    if debug:            
        print('Will discard {} seeds'.format(len(remove_idx)))
    return df[~df.seed.isin(remove_idx.index)]

In [34]:
def get_stats_SE(df):
    return df.apply(lambda row: row.SD/np.sqrt(row.N), axis=1)

def get_ctimesSE(df):
    return df.apply(lambda row: 1.96*row.SE, axis=1)

def get_stats_CIs(df):
    cil = df.apply(lambda row: row.Mean-1.96*row.SE, axis=1)
    cih = df.apply(lambda row: row.Mean+1.96*row.SE, axis=1)
    cidiff = cih - cil
    return cil, cih, cidiff

In [35]:
def get_human_stats():
    """
    Data from Table 1 (younger group, N=25) from Ashendorf et al (2008): 
    EXPLORING AGE-RELATED DECLINE ON THE WISCONSIN CARD SORTING TEST,
    The Clinical Neuropsychologist
    """
    n = 25
    
    stats = pd.DataFrame(
        {'Total trials': [n, 88.24, 17.52],
         'Trials correct': [n, 69.40, 6.65],
         'Errors': [n, 18.84, 12.40],
         'Perseverative responses': [n, 9.08, 6.51],
         'Perseverative errors': [n, 9.00, 6.59],
         'Categories': [n, 5.80, 0.58],
         'Trials to 1st category': [n, 17.20, 9.63],
         'Failure to maintain set': [n, 0.28, 0.54],        
         'Conceptual category': [n, 0, 0],
         'Learning to learn': [n, 1.75, 3.19],
        }, index=['N', 'Mean', 'SD']).T
    
    stats['SE'] = get_stats_SE(stats)
    stats['cSE'] = get_ctimesSE(stats)
    stats['CIl'], stats['CIh'], stats['CIdiff'] = get_stats_CIs(stats)

    return stats

In [36]:
def get_stats(df):    
    
    def get_concept_category(row):
        idx, counts = np.unique(row.correct, return_counts=True)
        sums = np.array([0 if c in ['1', '2', 'X'] else counts[i] for i, c in enumerate(idx)])
        return 100*sum(sums)/sum(counts)
    
    def get_learning_to_learn(row):
        g = row.groupby('n_categories')
        return np.diff(g.error.sum()/len(g)).mean()    

    n = df.seed.unique().size
    group = df.groupby('seed')
    errors = group.error.sum()#.error
    total_trials = group.trial.max()
    correct_trials = group.apply(lambda x: sum(x.correct!='X'))
    presp = group.p_response.sum()
    p_errors = group.p_error.sum()
    categories = group.n_categories.max()
    trials_to_first = group.apply(lambda x: x.query('n_categories==1').trial.min())
    fail_set = group.fail_shift.sum()
    concept_cat = group.apply(get_concept_category)
    l2l = group.apply(get_learning_to_learn)
    
    stats = pd.DataFrame(
        {'Total trials': [n, total_trials.mean(), total_trials.std()],
         'Trials correct': [n, correct_trials.mean(), correct_trials.std()],
         'Errors': [n, errors.mean(), errors.std()],
         'Perseverative responses': [n, presp.mean(), presp.std()],
         'Perseverative errors': [n, p_errors.mean(), p_errors.std()],
         'Categories': [n, categories.mean(), categories.std()],
         'Trials to 1st category': [n, trials_to_first.mean(), trials_to_first.std()],
         'Failure to maintain set': [n, fail_set.mean(), fail_set.std()],        
         'Conceptual category': [n, concept_cat.mean(), concept_cat.std()],
         'Learning to learn': [n, l2l.mean(), l2l.std()],
        }, index=['N', 'Mean', 'SD']).T
    
    stats['SE'] = get_stats_SE(stats)
    stats['cSE'] = get_ctimesSE(stats)
    stats['CIl'], stats['CIh'], stats['CIdiff'] = get_stats_CIs(stats)
    
    return stats