# Code for analyzing the behavioral data from the associative boosting task

In [1]:
import os
import numpy as np
import glob
import csv
import matplotlib
import glob
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
from patsy import dmatrices
from sklearn.linear_model import LogisticRegression
import scipy
import patsy
import ast
%matplotlib inline



In [2]:
home_dir = '/Users/ianballard/Dropbox/two_step_analysis/' #where the data live
data_dir = home_dir + '/data/associative_boost/'

In [3]:
ignore_files = []
data_files = glob.glob(data_dir + '*txt')
data_files = [d for d in data_files if d.split('/')[-1] not in ignore_files]
sub_ids = map(lambda f: f.split('/')[-1].split('.csv')[0],data_files)
print(len(data_files))

26


In [4]:
def fixup(sub_data,d):
    if d in sub_data:
        col = sub_data[d].dropna()     
        sub_data[d] = col.values[0]    
    return sub_data

In [5]:
all_data = []
for i,f in enumerate(data_files):
    sub_data = []
    with open(f, 'r') as myfile:
        data = myfile.read() 
    d = data.split('\n')[:-1]
    for line in d:
        line = ast.literal_eval(line)
        line = {key:str(val) for key, val in line.iteritems()}
        line = pd.DataFrame(line, index=[0])
        sub_data.append(line)
    sub_data = pd.concat(sub_data).reset_index()
    sub_data['sub'] = f.split('/')[-1].split('.')[0]
    
    #clean up demongraphic info
    demographic = ['age','cig','drg','eth','his','oth','numPieces','number','text']
    for d in demographic:
        sub_data = fixup(sub_data,d)
    sub_data = sub_data.drop('index', 1)
    
    all_data.append(sub_data)

In [6]:
all_data = pd.concat(all_data)

In [7]:
subs = list(set(all_data['sub']))

In [18]:
bad_subs = []
for s in subs:
    sub_df = all_data[all_data['sub'] == s]
    if len(set(sub_df['loopNumber'].values)) != 4: #3 loops plus NaN
        bad_subs.append(s)
    if sub_df[sub_df['name'] == 'twoStep'].shape[0] < 145:
        bad_subs.append(s)
    
for s in bad_subs:
    all_data = all_data[all_data['sub'] != s]
subs = list(set(all_data['sub']))

In [123]:
def get_prop_targ(pre_post,block_df):
    pre = block_df[block_df['pre_post'] == pre_post]
    num_targ = np.sum(pre['target_indicator'])
    num_choice_targ = np.sum(pre['choice'] == 'tgt')
    prop_targ = num_choice_targ *1.0 / num_targ
    return prop_targ

In [203]:
neutral_choice = {'sub':[],'block':[],'pre_post':[],'prop':[]}
for s in subs:
    sub_df = all_data[all_data['sub'] == s]
    neutral = sub_df[sub_df['name'] == 'neutral'].copy()

    ##parse out the block and the run numbers from the neutral trial numbers [0 to 9]
    trial_num = neutral['trialNum'].values
    neutral_starts = np.where(trial_num == '0')[0]
    neutral_run = []
    for n,i in enumerate(neutral_starts):
        if n < len(neutral_starts)-1:
            block = np.zeros(neutral_starts[n+1] - i) + n
        else:
            block = np.zeros(len(trial_num) - i) + n
        neutral_run.extend(block)
    neutral_run = map(int,neutral_run)
    neutral['run'] = neutral_run

    block_map = {0:0,1:0,2:1,3:1,4:2,5:2}
    neutral['block'] = [block_map[x] for x in neutral_run]

    prepost_map = {0:'pre',1:'post',2:'pre',3:'post',4:'pre',5:'post'}
    neutral['pre_post'] = [prepost_map[x] for x in neutral_run]


    #parse the choices
    choice_map = {'left':0,'right':1}
    choices = [ast.literal_eval(x)['1'] for x in neutral['response'].values]
    choices = [choice_map[x] for x in choices]
    neutral['response'] = choices

    #parse the stimuli
    stims = [ast.literal_eval(x) for x in neutral['stimuli'].values]
    which_choice = [stim[choice] for stim,choice in zip(stims,choices)]
    target_indicator = ['tgt' in x for x in stims]
    neutral['target_indicator'] = target_indicator
    neutral['choice'] = which_choice

    #reaction time
    respT = np.array([ast.literal_eval(x)['1'] for x in neutral['responseTime'].values])
    onsetT = np.array([ast.literal_eval(x)['1'] for x in neutral['stageOnsetTime'].values])
    neutral['RT'] = respT - onsetT

    # neutral
    for block in range(3):
        block_df = neutral[neutral['block'] == block]
        for pp in ['pre','post']:
            prop_targ = get_prop_targ(pp,block_df)
            neutral_choice['sub'].append(s)
            neutral_choice['prop'].append(prop_targ)
            neutral_choice['pre_post'].append(pp)
            neutral_choice['block'].append(block)
neutral_choice = pd.DataFrame(neutral_choice)

In [204]:
prop_diff = neutral_choice[neutral_choice['pre_post'] == 'post']['prop'].values \
    - neutral_choice[neutral_choice['pre_post'] == 'pre']['prop'].values
neutral_choice = neutral_choice.groupby(['sub','block']).mean()
neutral_choice.reset_index(inplace=True)  
neutral_choice['prop'] = prop_diff
neutral_choice.groupby('sub').mean()


Unnamed: 0_level_0,block,prop
sub,Unnamed: 1_level_1,Unnamed: 2_level_1
A1Q7XYZUIKHUS4,1,0.416667
A1ZZ7YO3YASNIP,1,0.0
A2BHHEWR9PRDX8,1,0.027778
A2COVA7FSAIH3D,1,0.083333
A2Q3KN46PUMMX2,1,-0.25
A36470UBRH28GO,1,-0.25
A3EJ44J2ZNRMDA,1,-0.083333
A3T7N805PVNEXN,1,-0.166667
ABMX8XUNPR3LP,1,-0.166667
AHDBHMH3AY1V2,1,-0.166667


In [82]:
for s in subs:
    sub_df = all_data[all_data['sub'] ==s]
    neutral = sub_df[sub_df['name'] == 'neutral'].copy()
    respT = np.array([ast.literal_eval(x)['1'] for x in neutral['responseTime'].values])
    onsetT = np.array([ast.literal_eval(x)['1'] for x in neutral['stageOnsetTime'].values])
    neutral['RT'] = respT - onsetT

    print np.max(neutral['RT'])/1000.0

19.427
4.74
18.595
1.318
1.531
129.776
26.342
1.39
51.963
2.263
