# Behavior

Examine all trials for variance which is a proxy for understanding of the task.

In [40]:
import os,glob,re
import numpy as np
import pylab as plt
import seaborn as sns
from pandas import read_csv, DataFrame, get_dummies
from scipy import stats
from statsmodels.api import Logit
from matplotlib.colors import SymLogNorm
from matplotlib.mlab import griddata

sns.set_style('white')
sns.set_context('paper', font_scale=2.5)
plt.rcParams['figure.figsize'] = (18,12)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Load and prepare behavior.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Load data.
task = 'ARC'
root = '/autofs/space/karima_003/users/New_ARC/'
subject_dirs = glob.glob(root + '*')
subject_dirs = [sdir for sdir in subject_dirs if re.search('[a-z]{2}[0-9]{3}',os.path.basename(sdir))]

recalculate = False

def logistic(arr): return 1. / (1 + np.exp(-arr))

def get_xyz(d):
    xyz = {}
    counters = {}
    for trial in d.index:
        reward = d.loc[trial,'Reward']
        risk = d.loc[trial,'Risk']
        rt = float(d.loc[trial,'ResponseTime'])
        if (reward,risk) in xyz:
            xyz[(reward,risk)] = xyz[(reward,risk)] + rt
            counters[(reward,risk)] += 1
        else:
            xyz[(reward,risk)] = rt
            counters[(reward,risk)] = 1
    for (reward,risk) in counters:
        xyz[(reward,risk)] /= counters[(reward,risk)]
    return [r[0] for r in xyz.keys()],[r[1] for r in xyz.keys()],xyz.values()

for subject_dir in subject_dirs:
    subject = re.search('[a-z]{2}[0-9]{3}',os.path.basename(subject_dir)).group()
    print(subject)
    for modality in ['eeg','mri']:
        for day in range(1,4):
            if subject == 'hc009' and modality == 'mri' and day == 1:
                continue
            fname = '%s/behavior/%s_%s_%s_%i.csv' %(subject_dir,subject,task,modality,day)
            backup_fname = fname.split('.')[0] + '_backup.csv'
            if os.path.isfile(fname) and (not os.path.isfile(backup_fname) or recalculate):
                if recalculate and os.path.isfile(backup_fname):
                    df = read_csv(backup_fname)
                else:
                    df = read_csv(fname)
                    df.to_csv(backup_fname)
                ## Set 99s to NaNs.
                df.loc[df.ResponseType==99,'ResponseType'] = np.nan
                df['ResponseTime'] = [float(rt) if rt != '[]' else np.nan for rt in df.ResponseTime]
                #throw out nonchoice trials and trials that were not timed correctly for analysis
                d = df.copy().dropna()
                #
                typelist = []
                #ANOVA test for time type, if they responsed differently for different blocks, exclude from regular analysis, mark
                F, p = stats.f_oneway(d['ResponseType'][(d['TimeType']==1)],d['ResponseType'][(d['TimeType']==2)],
                                      d['ResponseType'][(d['TimeType']==3)])
                if p < 0.05:
                    typelist.append('time variant')
                good = False
                for i,j in zip(range(2),range(1,3)):
                    F, p = stats.ttest_ind(d['ResponseType'][(d['RiskType']==i)],d['ResponseType'][(d['RiskType']==j)])
                    if p < 0.05:
                        good = True
                    F, p = stats.ttest_ind(d['ResponseType'][(d['RewardType']==i)],d['ResponseType'][(d['RewardType']==j)])
                    if p < 0.05:
                        good = True
                if good:
                    typelist.append('normal risk reward variance')
                else:
                    typelist.append('risk reward invariant')
                F, p = stats.ttest_ind(d['ResponseType'][(d['LeftKey']==0)],d['ResponseType'][(d['LeftKey']==1)])
                if p < 0.05: #subjects did respond differentially based on decision side
                    typelist.append('bias model')
                    good = False
                df['Type'] = [typelist for i in range(len(df))]
                #
                df['Intercept'] = 1
                for block in np.unique(df['TimeType']):
                    print('block %i' %(block))
                    theta_model = Logit.from_formula('ResponseType ~ Reward + Risk', data=df.loc[df.TimeType==block], 
                                   subset=~df.loc[df.TimeType==block,'ResponseOnset'].isnull())
                    theta_result = theta_model.fit()
                    theta_betas = theta_result.params
                    np.savez_compressed(fname.split('.')[0] + 'block_%i_theta_betas.npz' %(block),betas=theta_betas)
                    df.loc[df.TimeType==block,'Theta'] = logistic( np.dot(df.loc[df.TimeType==block,['Intercept','Reward','Risk']], theta_betas) )
                    #
                    alpha,loc,beta = stats.gamma.fit(df.loc[df.TimeType==block,'ResponseTime'])
                    np.savez_compressed(fname.split('.')[0] + 'block_%i_rt_params.npz' %(block),alpha=alpha,loc=loc,beta=beta)
                    df.loc[df.TimeType==block,'DDB'] = \
                        abs((df.loc[df.TimeType==block,'Theta']-0.5)*2*(1-stats.gamma.cdf(df.loc[df.TimeType==block,'ResponseTime'],alpha,loc,beta)))
                for index in df.index:
                    d = df.loc[index,'DDB']
                    tertile = -1
                    for i in range(3):
                        if (1.0/3*i <= d and d <= 1.0/3*(i+1)):
                            tertile = i + 1
                    df.loc[index,'DDB Tertile'] = tertile
                for tertile in np.unique(df['DDB Tertile']):
                    print('Tertile %i: %i' %(tertile,len(df[df['DDB Tertile']==tertile])))
                df.to_csv(fname, index=False)
            if os.path.isfile(fname) and (not os.path.isfile(fname.split('.')[0] + 'behavior_plot.jpg') or recalculate):
                d = read_csv(fname)
                d = d.dropna()
                fig,(ax0,ax1,ax2) = plt.subplots(1,3)
                sns.barplot(x = 'RiskType', y = 'ResponseType', data=d, ax=ax0)
                ax0.set_title('Risk Taking Responses Over Risk Probilities')
                sns.barplot(x = 'RewardType', y = 'ResponseType', data=d, ax=ax1)
                ax1.set_title('Risk Taking Responses Over Reward Probabilities')
                sns.barplot(x = 'TimeType', y = 'ResponseType', data=d, ax=ax2)
                ax2.set_title('Risk Taking Responses Over Time Block')   
                fig.savefig(fname.split('.')[0] + 'behavior_plot.jpg')
                #
                fig,axs = plt.subplots(3,3)
                res = 100
                rewards = np.linspace(5,15,res)
                risks = np.linspace(0,1,res)
                X, Y = np.meshgrid(rewards, risks)
                #compute thetas for a enough values to give a decent picture of the landscape
                for block in np.unique(d['TimeType']):
                    block = int(block)
                    betas = np.load(fname.split('.')[0] + 'block_%i_theta_betas.npz' %(block))['betas']
                    f = np.load(fname.split('.')[0] + 'block_%i_rt_params.npz' %(block))
                    alpha,loc,beta = f['alpha'].item(),f['loc'].item(),f['beta'].item()
                    thetamat = np.zeros((res,res))
                    x,y,z = get_xyz(d[d.TimeType==block])
                    Z = griddata(x, y, z, rewards, risks)
                    Z2 = np.array(Z)
                    DDBmat = np.zeros((res,res))
                    #
                    for i,reward in enumerate(rewards):
                        for j,risk in enumerate(risks):
                            theta = logistic( np.dot([1,reward,risk],betas) )
                            thetamat[-j-1,i] = theta
                            DDBmat[-j-1,i] = abs((theta-0.5)*2*(1-stats.gamma.cdf(Z2[j,i],alpha,loc,beta)))
                    sns.heatmap(thetamat,ax=axs[block-1,0],vmin=0,vmax=1)
                    axs[block-1,0].scatter((d['Reward']-5)/10*res,d['Risk']*res)
                    axs[block-1,0].set_xlabel('Reward')
                    axs[block-1,0].set_ylabel('Risk')
                    axs[block-1,0].set_xticks(np.linspace(0,100,5))
                    axs[block-1,0].set_yticks(np.linspace(0,100,5))
                    axs[block-1,0].set_xticklabels(np.round(np.linspace(5,15,5),2))
                    axs[block-1,0].set_yticklabels(np.linspace(100,0,5,dtype='uint8'))
                    axs[block-1,0].set_title('Theta (Likelihood of Take) Plot Block %i' %(block),fontsize=14)
                    #
                    cf = axs[block-1,1].contourf(X,Y,Z,vmin=0,vmax=2)#,norm=SymLogNorm(0.1,vmin=-0.5,vmax=0.5))
                    fig.colorbar(cf, ax=axs[block-1,1])
                    axs[block-1,1].set_xlabel('Reward')
                    axs[block-1,1].set_ylabel('Risk')
                    axs[block-1,1].set_title('RT Contour Plot Block %i' %(block),fontsize=14)
                    #
                    sns.heatmap(DDBmat,ax=axs[block-1,2],vmin=0,vmax=1)#,norm=SymLogNorm(0.1,vmin=-0.5,vmax=0.5))
                    axs[block-1,2].set_xlabel('Reward')
                    axs[block-1,2].set_ylabel('Risk')
                    axs[block-1,2].set_xticks(np.linspace(0,100,5))
                    axs[block-1,2].set_yticks(np.linspace(0,100,5))
                    axs[block-1,2].set_xticklabels(np.round(np.linspace(5,15,5),2))
                    axs[block-1,2].set_yticklabels(np.linspace(100,0,5,dtype='uint8'))
                    axs[block-1,2].set_title('Distance to Decision Boundary Plot Block %i' %(block),fontsize=14)
                    fig.savefig(fname.split('.')[0] + 'behavior3.jpg')
                    #
                fig, axs = plt.subplots(3,3)
                ax0,ax1,ax2,ax3,ax4,ax5,ax6,ax7,ax8 = axs.flatten()
                sns.distplot(d.ResponseTime.dropna(),ax=ax0)
                ax0.set_title('Distribution of Response Times',fontsize=12)
                #
                sns.barplot(x = 'LeftKey', y = 'ResponseType', data = d, ax=ax1)
                ax1.set_title('Relationship Between Direction and Response',fontsize=12)
                #
                sns.regplot(x = 'Risk', y = 'ResponseTime', data = d, ax=ax2)
                ax2.set_title('Decision Time for Risk Probabilities',fontsize=12)
                #
                sns.regplot(d['Trial'],d['ResponseTime'], ax = ax3)
                ax3.set_title('Response Time over Time',fontsize=12)
                #
                sns.regplot(x = 'Reward', y = 'ResponseTime', data = d, ax=ax4)
                ax4.set_title('Decision Time for Reward Probabilities',fontsize=12)
                #
                sns.distplot(d['Reward'], ax = ax5)
                sns.distplot(d['Risk'], bins = np.linspace(0.05,0.85,9),ax=ax5)
                ax5.set_title('Distribution of Risk/Reward Probabilities',fontsize=12)
                ax5.legend(['Risk','Reward'])
                #
                sns.barplot(x = 'RiskType', y = 'Shock', data = d, ax = ax6)
                ax6.set_title('Distribution of Shocks for Risk Probabilities',fontsize=12)
                #
                sns.pointplot(x = 'RewardType', y = 'Risk', data = d, ax = ax7)
                ax7.set_title('Distribution of Risks for Reward Probabilities',fontsize=12)
                #
                sns.distplot(d[d['Shock']==1]['Reward'], ax = ax8)
                sns.distplot(d[d['Shock']==0]['Reward'], ax=ax8)
                ax8.set_title('Distribution of Shocks for Reward Probabilities',fontsize=12)
                ax8.legend(['Shock','No shock'])
                #
                fig.savefig(fname.split('.')[0] + 'behavior2.jpg')


hc007
hc008
hc013
hc014
hc009
hc011
hc004
hc012
block 1
Optimization terminated successfully.
         Current function value: 0.336848
         Iterations 8
block 2
Optimization terminated successfully.
         Current function value: 0.194170
         Iterations 9
block 3
Optimization terminated successfully.
         Current function value: 0.203077
         Iterations 9
Tertile 1: 97
Tertile 2: 102
Tertile 3: 44
hc005
hc003
hc010
hc001
hc015
block 1
Optimization terminated successfully.
         Current function value: 0.167940
         Iterations 10
block 2
Optimization terminated successfully.
         Current function value: 0.102013
         Iterations 11
block 3
Optimization terminated successfully.
         Current function value: 0.146230
         Iterations 10
Tertile 1: 91
Tertile 2: 74
Tertile 3: 78
block 1
Optimization terminated successfully.
         Current function value: 0.162178
         Iterations 10
block 2
Optimization terminated successfully.
         Current 

In [39]:
task = 'ARC'
root = '/autofs/space/karima_003/users/New_ARC/'
subject_dirs = glob.glob(root + '*')
subject_dirs = [sdir for sdir in subject_dirs if re.search('[a-z]{2}[0-9]{3}',os.path.basename(sdir))]


import shutil

for subject_dir in subject_dirs:
    subject = re.search('[a-z]{2}[0-9]{3}',os.path.basename(subject_dir)).group()
    fname = (subject_dir + '/behavior/%s_ARC_eeg_1behavior3.jpg'%(subject))
    if os.path.isfile(fname):
        shutil.copyfile(fname,os.getcwd()+'/0_DDB_Summary/' + os.path.basename(fname))
    fname = (subject_dir + '/behavior/%s_ARC_mri_1behavior3.jpg'%(subject))
    if os.path.isfile(fname):
        shutil.copyfile(fname,os.getcwd()+'/0_DDB_Summary/' + os.path.basename(fname))