In [1]:
import pandas as pd
import csv
import numpy as np
import math
import matplotlib.pyplot as plt

In [2]:
# input single value
# convert ages 12 to 18
# to ages -3 to 3
def age(v):
    return v - 15

# input single value
# convert 1 (female) to 0
# convert 2 (male) to 1
def gender(v):
    return v-1

# input single value
# convert 1 (depression) to 1
# convert 2 (healthy) to 0
def diagnosis(v):
    nv = 0
    if v == 1:
        nv = 1
    return nv

def bool2int(b):
    i = 1 if b else 0
    return i


def ems(rewards, gamma):
    pastrewards = np.zeros(len(rewards))
    for i in range(len(rewards) - 1):
        pastrewards[i+1] = gamma * pastrewards[i] + rewards.iloc[i]
    return pastrewards

# input a single value
# return the utility
# as sigmoid function centered at 0 instead 0.5
def utility(v):
    u = math.exp(v) / (math.exp(v) + 1) - 0.5
    return u

## All Files at Once

In [3]:
datacsvs = ["0data_for_jess", "9data_for_jess_3block", "10data_for_jess_random"]
suffices = ["_1block", "_3block", "_random"]

standardize = True
cn = 'Actual' 
# cn = 'CertainAmount' 

for datacsv, suffix in zip(datacsvs, suffices):
    data=pd.read_csv(datacsv + ".csv", sep=',',header='infer')
    nrows = data.shape[0]
    data['HigherOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].max(axis=1)
    data['LowerOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].min(axis=1)
    data['Mood'] = data['Happiness']
    data['diagnosis'] = data['ptype']
    
    # update subject features
    data['age'] = data['age'].copy().map(age)
    data['gender'] = data['gender'].copy().map(gender)
    data['diagnosis'] = data['diagnosis'].copy().map(diagnosis)
    data['Indicator'] = data['CertainAmount'] < data['LowerOutcome']
    data['Indicator'] = data['Indicator'].map(bool2int)
    
    # standardize raw trial parameters
    if standardize:
        v = data.loc[:,cn].copy()
        sd = np.std(v)
        print ("for ", datacsv, " sd of ", cn, " : ", str(sd))
        data['CertainAmount'] = data['CertainAmount'].copy().map(lambda x : x / sd)
        data['HigherOutcome'] = data['HigherOutcome'].copy().map(lambda x : x / sd)
        data['LowerOutcome'] = data['LowerOutcome'].copy().map(lambda x : x / sd)
        data['Actual'] = data['Actual'].copy().map(lambda x : x / sd)
        
    # calculate utility over trial parameters
    data['CertainAmountUtility'] = data['CertainAmount'].copy().map(utility)
    data['HigherOutcomeUtility'] = data['HigherOutcome'].copy().map(utility)
    data['LowerOutcomeUtility'] = data['LowerOutcome'].copy().map(utility)
    data['ActualUtility'] = data['Actual'].copy().map(utility)
    
    # compute 2nd layer values
    data['CurrentExpectedReward'] = (data['CertainAmount'] + data['HigherOutcome'] + data['LowerOutcome']) / 3
    data['ExpectedGambleOutcome'] = (data['LowerOutcome'] + data['HigherOutcome']) / 2
    data['diff'] = data['ExpectedGambleOutcome'] - data['CertainAmount']
    data['PredictionError'] = data['Gamble'] * (data['Actual'] - data['ExpectedGambleOutcome'])
    data['CertainReward'] = (1 - data['Gamble']) * data['Actual']
    data['GamblingReward'] = data['Gamble'] * data['Actual']
    data['GamblingRange'] = data['HigherOutcome'] - data['LowerOutcome']
    
    # compute 2nd layer utilities
    data['ExpectedGambleOutcomeUtility'] = (data['LowerOutcomeUtility'] + data['HigherOutcomeUtility']) / 2
    data['diffUtility'] = data['ExpectedGambleOutcomeUtility'] - data['CertainAmountUtility']
    data['PredictionErrorUtility'] = data['Gamble'] * (data['ActualUtility'] - data['ExpectedGambleOutcomeUtility'])
    data['CertainRewardUtility'] = (1 - data['Gamble']) * data['ActualUtility']
    data['GamblingRewardUtility'] = data['Gamble'] * data['ActualUtility']
    data['GamblingRangeUtility'] = data['HigherOutcomeUtility'] - data['LowerOutcomeUtility']
    data['CurrentExpectedRewardUtility'] = (data['CertainAmountUtility'] + data['HigherOutcomeUtility'] + data['LowerOutcomeUtility']) / 3
    
    # print final column names
    colnames = data.columns
    
    # update parameters that summarize the past (mood, cumulative reward, past not gamble reward, past gamble reward)
    ## record the indices of each new subject
    ID = {}
    for i in range(nrows):
        id = int(data.iloc[i,0])
        ID[id] = ID.get(id, []) + [i]
    nsubj = len(ID)
    
    ## Create a dictionary to store all subjects
    subjDF = {}
    subjs = ID.keys()
    for subj in subjs:
        subjD = data.iloc[ID[subj],:].copy()
        subjDF[subj] = subjD
    
    # compute timeseries values
    gammas = [0.0, 0.3, 0.5, 0.7, 1.0]
    for subj in subjs:
        # fill mood with most recently reported mood, otherwise pad with 0's
        subjDF[subj].loc[:,'Mood'] = subjDF[subj].loc[:,'Mood'].copy().fillna(method='ffill').fillna(0)
        # Calculate EMS features for the following features 
        for col in ['Actual', 'CertainReward', 'GamblingReward', 'PredictionError',
                    'ActualUtility', 'CertainRewardUtility', 'GamblingRewardUtility', 'PredictionErrorUtility']:
            rewards = subjDF[subj].loc[:,col].copy()
            for gamma in gammas:
                pastrewards = ems(rewards, gamma)
                subjDF[subj].loc[:,col+'EMS'+str(gamma)] = pastrewards

    # remove the first three trials of all subjects
    def strip3(df):
        newdf = df.iloc[3:,:].copy()
        return newdf
    stripsubjDF = dict(map(lambda kv: (kv[0], strip3(kv[1])), subjDF.items()))

    # put subject data into a new dataframe
    datanew = pd.concat(stripsubjDF.values())

    # save all the columns into a csv
    if standardize:
        datanew.to_csv(datacsv + "_normed_standardized_" + cn + ".csv", index=False) # don't include the indices
    else:
        datanew.to_csv(datacsv + "_normed.csv", index=False) # don't include the indices
    
    namesl = ['subject_id', 'age', 'gender', 'diagnosis', 'mood', 
              'current expected reward', 'current gambling range', 'current indicator',
              'past rewards', 'past reward prediction error']

    # modelEV
    for gamma in gammas:
        xl = ['subject_id', 'age', 'gender', 'diagnosis', 'Mood',
              'CurrentExpectedReward', 'GamblingRange', 'Indicator', 
              'ActualEMS'+str(gamma), 'PredictionErrorEMS'+str(gamma)]
        yl = ['subject_id', 'Gamble']
        x = datanew[xl].copy()
        x = x.rename(index=str, columns=dict(zip(xl, namesl))).copy()
        y = datanew[yl].copy()
        if standardize:
            x.to_csv("x" + suffix + "_normed_standardized_" + cn + "_EV" + str(gamma) + ".csv", index=False) # don't include the indices
            y.to_csv("y" + suffix + "_normed_standardized_" + cn + "_EV" + str(gamma) + ".csv", index=False) # don't include the indices
        else:
            x.to_csv("x" + suffix + "_normed_EV" + str(gamma) + ".csv", index=False) # don't include the indices
            y.to_csv("y" + suffix + "_normed_EV" + str(gamma) + ".csv", index=False) # don't include the indices
    
#     # modelEU
#     for gamma in gammas:
#         xl = ['subject_id', 'age', 'gender', 'diagnosis', 'Mood',
#               'CurrentExpectedRewardUtility', 'diffUtility', 'GamblingRangeUtility', 'Indicator', 
#               'ActualUtilityEMS'+str(gamma), 'PredictionErrorUtilityEMS'+str(gamma)]
#         yl = ['subject_id', 'Gamble']
#         x = datanew[xl].copy()
#         x = x.rename(index=str, columns=dict(zip(xl, namesl))).copy()
#         y = datanew[yl].copy()
#         if standardize:
#             x.to_csv("x" + suffix + "_normed_standardized_" + cn + "_EU" + str(gamma) + ".csv", index=False) # don't include the indices
#             y.to_csv("y" + suffix + "_normed_standardized_" + cn + "_EU" + str(gamma) + ".csv", index=False) # don't include the indices
#         else:
#             x.to_csv("x" + suffix + "_normed_EU" + str(gamma) + ".csv", index=False) # don't include the indices
#             y.to_csv("y" + suffix + "_normed_EU" + str(gamma) + ".csv", index=False) # don't include the indices

for  0data_for_jess  sd of  Actual  :  1.6234073151919128
for  9data_for_jess_3block  sd of  Actual  :  6.808132523442242
for  10data_for_jess_random  sd of  Actual  :  0.8812333806155501


## Create Old Features csv

In [None]:
datacsvs = ["10data_for_jess_random", "0data_for_jess", "9data_for_jess_3block"]
suffices = ["_random", "_1block", "_3block"]
cols = ["random", "1block", "3block"]

standardize = True
cn = 'Actual' 

for ax, col in zip(axes[0], cols):
    ax.set_title(col)

for datacsv, suffix in zip(datacsvs, suffices):
    data=pd.read_csv(datacsv + ".csv", sep=',',header='infer')
    nrows = data.shape[0]
    print (data.columns)
    data['HigherOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].max(axis=1)
    data['LowerOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].min(axis=1)
    data['Mood'] = data['Happiness']
    data['diagnosis'] = data['ptype']
    
    # update subject features
    data['age'] = data['age'].copy().map(age)
    data['gender'] = data['gender'].copy().map(gender)
    data['diagnosis'] = data['diagnosis'].copy().map(diagnosis)
    data['Indicator'] = data['CertainAmount'] < data['LowerOutcome']
    
    # standardize raw trial parameters
    if standardize:
        v = data.loc[:,cn].copy()
        sd = np.std(v)
        print ("for ", datacsv, " sd of ", cn, " : ", str(sd))
        data['CertainAmount'] = data['CertainAmount'].copy().map(lambda x : x / sd)
        data['HigherOutcome'] = data['HigherOutcome'].copy().map(lambda x : x / sd)
        data['LowerOutcome'] = data['LowerOutcome'].copy().map(lambda x : x / sd)
        data['Actual'] = data['Actual'].copy().map(lambda x : x / sd)
    
    # compute 2nd layer values
    data['CurrentExpectedReward'] = (data['CertainAmount'] + data['HigherOutcome'] + data['LowerOutcome']) / 3
    data['ExpectedGambleOutcome'] = (data['LowerOutcome'] + data['HigherOutcome']) / 2
    data['diff'] = data['ExpectedGambleOutcome'] - data['CertainAmount']
    data['PredictionError'] = data['Gamble'] * (data['Actual'] - data['ExpectedGambleOutcome'])
    data['CertainReward'] = (1 - data['Gamble']) * data['Actual']
    data['GamblingReward'] = data['Gamble'] * data['Actual']
    data['GamblingRange'] = data['HigherOutcome'] - data['LowerOutcome']
    
    # update parameters that summarize the past (mood, cumulative reward, past not gamble reward, past gamble reward)
    ## record the indices of each new subject
    ID = {}
    for i in range(nrows):
        id = int(data.iloc[i,0])
        ID[id] = ID.get(id, []) + [i]
    nsubj = len(ID)
    
    ## Create a dictionary to store all subjects
    subjDF = {}
    subjs = ID.keys()
    for subj in subjs:
        subjD = data.iloc[ID[subj],:].copy()
        subjDF[subj] = subjD
    
    # compute timeseries values
    gammas = [0.5, 0.7]
    for subj in subjs:
        # fill mood with most recently reported mood, otherwise pad with 0's
        subjDF[subj].loc[:,'Mood'] = subjDF[subj].loc[:,'Mood'].copy().fillna(method='ffill').fillna(0)
        # Calculate EMS features for the following features 
        for col in ['Actual', 'CertainReward', 'GamblingReward', 'PredictionError']:
            rewards = subjDF[subj].loc[:,col].copy()
            for gamma in gammas:
                pastrewards = ems(rewards, gamma)
                subjDF[subj].loc[:,col+'EMS'+str(gamma)] = pastrewards

    # remove the first three trials of all subjects
    def strip3(df):
        newdf = df.iloc[3:,:].copy()
        return newdf
    stripsubjDF = dict(map(lambda kv: (kv[0], strip3(kv[1])), subjDF.items()))

    # put subject data into a new dataframe
    datanew = pd.concat(stripsubjDF.values())
    
    
    ## old features
    for gamma in gammas:
        namesl = ['subject_id', 'age', 'gender', 'diagnosis', 'mood', 
                  'current certain reward', 'current gambling range',
                  'past not gamble reward', 'past gamble reward']

        xl = ['subject_id', 'age', 'gender', 'diagnosis', 'Mood',
              'CertainAmount', 'GamblingRange', 
              'CertainRewardEMS'+str(gamma), 'GamblingRewardEMS'+str(gamma)]
        yl = ['subject_id', 'Gamble']
        x = datanew[xl].copy()
        x = x.rename(index=str, columns=dict(zip(xl, namesl))).copy()
        y = datanew[yl].copy()
        if standardize:
            x.to_csv("x" + suffix + "_normed_standardized_" + cn + "_EV" + str(gamma) + ".csv", index=False) # don't include the indices
            y.to_csv("y" + suffix + "_normed_standardized_" + cn + "_EV" + str(gamma) + ".csv", index=False) # don't include the indices

        print ("old features : ") 
        for i in range(1, len(namesl)):
            print (i, namesl[i])



## Evaluate Collinearity

In [None]:
datacsvs = ["10data_for_jess_random", "0data_for_jess", "9data_for_jess_3block"]
suffices = ["_random", "_1block", "_3block"]
cols = ["random", "1block", "3block"]

fig, axes = plt.subplots(nrows=2, ncols=3, sharex=True, sharey=True, figsize=(19,15))
# st = fig.suptitle("Co", fontsize="x-large")

for ax, col in zip(axes[0], cols):
    ax.set_title(col)

j = 0
for datacsv, suffix in zip(datacsvs, suffices):
    data=pd.read_csv(datacsv + ".csv", sep=',',header='infer')
    nrows = data.shape[0]
    print (data.columns)
    data['HigherOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].max(axis=1)
    data['LowerOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].min(axis=1)
    data['Mood'] = data['Happiness']
    data['diagnosis'] = data['ptype']
    
    # update subject features
    data['age'] = data['age'].copy().map(age)
    data['gender'] = data['gender'].copy().map(gender)
    data['diagnosis'] = data['diagnosis'].copy().map(diagnosis)
    data['Indicator'] = data['CertainAmount'] < data['LowerOutcome']
    
    # compute 2nd layer values
    data['CurrentExpectedReward'] = (data['CertainAmount'] + data['HigherOutcome'] + data['LowerOutcome']) / 3
    data['ExpectedGambleOutcome'] = (data['LowerOutcome'] + data['HigherOutcome']) / 2
    data['diff'] = data['ExpectedGambleOutcome'] - data['CertainAmount']
    data['PredictionError'] = data['Gamble'] * (data['Actual'] - data['ExpectedGambleOutcome'])
    data['CertainReward'] = (1 - data['Gamble']) * data['Actual']
    data['GamblingReward'] = data['Gamble'] * data['Actual']
    data['GamblingRange'] = data['HigherOutcome'] - data['LowerOutcome']
    
    # update parameters that summarize the past (mood, cumulative reward, past not gamble reward, past gamble reward)
    ## record the indices of each new subject
    ID = {}
    for i in range(nrows):
        id = int(data.iloc[i,0])
        ID[id] = ID.get(id, []) + [i]
    nsubj = len(ID)
    
    ## Create a dictionary to store all subjects
    subjDF = {}
    subjs = ID.keys()
    for subj in subjs:
        subjD = data.iloc[ID[subj],:].copy()
        subjDF[subj] = subjD
    
    # compute timeseries values
    gammas = [0.5]
    for subj in subjs:
        # fill mood with most recently reported mood, otherwise pad with 0's
        subjDF[subj].loc[:,'Mood'] = subjDF[subj].loc[:,'Mood'].copy().fillna(method='ffill').fillna(0)
        # Calculate EMS features for the following features 
        for col in ['Actual', 'CertainReward', 'GamblingReward', 'PredictionError']:
            rewards = subjDF[subj].loc[:,col].copy()
            for gamma in gammas:
                pastrewards = ems(rewards, gamma)
                subjDF[subj].loc[:,col+'EMS'+str(gamma)] = pastrewards

    # remove the first three trials of all subjects
    def strip3(df):
        newdf = df.iloc[3:,:].copy()
        return newdf
    stripsubjDF = dict(map(lambda kv: (kv[0], strip3(kv[1])), subjDF.items()))

    # put subject data into a new dataframe
    datanew = pd.concat(stripsubjDF.values())
    
    
    ## old features
    
    namesl = ['subject_id', 'age', 'gender', 'diagnosis', 'mood', 
              'current expected reward', 'current gambling range',
              'past not gamble reward', 'past gamble reward']

    xl = ['subject_id', 'age', 'gender', 'diagnosis', 'Mood',
          'CertainAmount', 'GamblingRange', 
          'CertainRewardEMS'+str(gamma), 'GamblingRewardEMS'+str(gamma)]
    yl = ['subject_id', 'Gamble']
    x = datanew[xl].copy()
    x = x.rename(index=str, columns=dict(zip(xl, namesl))).copy()
    y = datanew[yl].copy()

    im = axes[0,j].imshow(x.iloc[:,1:].copy().corr(), vmin=-1.0, vmax=1.0)
    
    print ("old features : ") 
    for i in range(1, len(namesl)):
        print (i, namesl[i])
    
    
    ## new features
    
    namesl = ['subject_id', 'age', 'gender', 'diagnosis', 'mood', 
              'current expected reward', 'current diff', 'current gambling range', 'current indicator',
              'past rewards', 'past reward prediction error']

    xl = ['subject_id', 'age', 'gender', 'diagnosis', 'Mood',
          'CurrentExpectedReward', 'diff', 'GamblingRange', 'Indicator', 
          'ActualEMS'+str(gamma), 'PredictionErrorEMS'+str(gamma)]
    yl = ['subject_id', 'Gamble']
    x = datanew[xl].copy()
    x = x.rename(index=str, columns=dict(zip(xl, namesl))).copy()
    y = datanew[yl].copy()

    print (suffix)
    im = axes[1,j].imshow(x.iloc[:,1:].copy().corr(), vmin=-1.0, vmax=1.0)
        
    print ("new features : ")
    for i in range(1, len(namesl)):
        print (i, namesl[i])
        
    j = j + 1

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(im, cax=cbar_ax)
plt.savefig("corr.png")
plt.show()

## Visualize Standardization

In [None]:
datacsvs = ["10data_for_jess_random", "0data_for_jess", "9data_for_jess_3block"]
suffices = ["_random", "_1block", "_3block"]
cols = ["random", "1block", "3block"]

standardize = True
cn = 'Actual' # 'CertainAmount'

rows = ['Before', 'After']
fig, axes = plt.subplots(nrows=2, ncols=3, sharex=True, sharey=True, figsize=(15,8))
st = fig.suptitle("Trial Values Before (Top) and After (Bottom) Standardizing with " + cn, fontsize="x-large")

for ax, col in zip(axes[0], cols):
    ax.set_title(col)

# for ax, row in zip(axes[:,0], rows):
#     ax.set_ylabel(row, rotation=0, size='large')
#     ax.tick_params(axis='both', which='major', pad=10)
    
j = 0
for datacsv, suffix in zip(datacsvs, suffices):
    # https://stackoverflow.com/questions/31726643/how-do-i-get-multiple-subplots-in-matplotlib
    
    data=pd.read_csv(datacsv + ".csv", sep=',',header='infer')
    nrows = data.shape[0]
    data['HigherOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].max(axis=1)
    data['LowerOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].min(axis=1)
    data['Mood'] = data['Happiness']
    data['diagnosis'] = data['ptype']
    
    print (suffix, str(j))
    
    axes[0,j].hist(data['HigherOutcome'], label = 'Higher Outcome')
    axes[0,j].hist(data['LowerOutcome'], label = 'Lower Outcome')
    axes[0,j].hist(data['CertainAmount'], label = 'Certain Amount')
    axes[0,j].tick_params(axis='both', which='major', pad=10)
    
    # update subject features
    data['age'] = data['age'].copy().map(age)
    data['gender'] = data['gender'].copy().map(gender)
    data['diagnosis'] = data['diagnosis'].copy().map(diagnosis)
    
    # standardize raw trial parameters
    if standardize:
        
        v = data.loc[:,cn].copy()
        sd = np.std(v)
        print ("for ", datacsv, " sd of ", cn, " : ", str(sd))
        data['CertainAmount'] = data['CertainAmount'].copy().map(lambda x : x / sd)
        data['HigherOutcome'] = data['HigherOutcome'].copy().map(lambda x : x / sd)
        data['LowerOutcome'] = data['LowerOutcome'].copy().map(lambda x : x / sd)
        data['Actual'] = data['Actual'].copy().map(lambda x : x / sd)
    
    axes[1,j].hist(data['HigherOutcome'], label = 'Higher Outcome')
    axes[1,j].hist(data['LowerOutcome'], label = 'Lower Outcome')
    axes[1,j].hist(data['CertainAmount'], label = 'Certain Amount')
    axes[1,j].tick_params(axis='both', which='major', pad=10)
    
    j = j + 1

# https://stackoverflow.com/questions/39164828/global-legend-for-all-subplots
axes.flatten()[-2].legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=3)
plt.savefig('standardize_'+cn+'.png')
plt.show()


## Test for one file at a time

In [None]:
datacsv = "0data_for_jess"
data=pd.read_csv(datacsv + ".csv", sep=',',header='infer')
nrows = data.shape[0]
data['HigherOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].max(axis=1)
data['LowerOutcome'] = data[["Outcome1Amount", "Outcome2Amount"]].min(axis=1)
data['Mood'] = data['Happiness']
data['diagnosis'] = data['ptype']

# update subject features
data['age'] = data['age'].copy().map(age)
data['gender'] = data['gender'].copy().map(gender)
data['diagnosis'] = data['diagnosis'].copy().map(diagnosis)

# standardize raw trial parameters
ac = data.loc[:,'Actual'].copy()
sd = np.std(ac)
data['CertainAmount'] = data['CertainAmount'].copy().map(lambda x : x / sd)
data['HigherOutcome'] = data['HigherOutcome'].copy().map(lambda x : x / sd)
data['LowerOutcome'] = data['LowerOutcome'].copy().map(lambda x : x / sd)
data['Actual'] = data['Actual'].copy().map(lambda x : x / sd)

# calculate utility over trial parameters
data['CertainAmountUtility'] = data['CertainAmount'].copy().map(utility)
data['HigherOutcomeUtility'] = data['HigherOutcome'].copy().map(utility)
data['LowerOutcomeUtility'] = data['LowerOutcome'].copy().map(utility)
data['ActualUtility'] = data['Actual'].copy().map(utility)

# compute 2nd layer values
data['ExpectedGambleOutcome'] = (data['LowerOutcome'] + data['HigherOutcome']) / 2
data['diff'] = data['ExpectedGambleOutcome'] - data['CertainAmount']
data['PredictionError'] = data['Gamble'] * (data['Actual'] - data['ExpectedGambleOutcome'])
data['CertainReward'] = (1 - data['Gamble']) * data['Actual']
data['GamblingReward'] = data['Gamble'] * data['Actual']
data['GamblingRange'] = data['HigherOutcome'] - data['LowerOutcome']

# compute 2nd layer utilities
data['ExpectedGambleOutcomeUtility'] = (data['LowerOutcomeUtility'] + data['HigherOutcomeUtility']) / 2
data['diffUtility'] = data['ExpectedGambleOutcomeUtility'] - data['CertainAmountUtility']
data['PredictionErrorUtility'] = data['Gamble'] * (data['ActualUtility'] - data['ExpectedGambleOutcomeUtility'])
data['CertainRewardUtility'] = (1 - data['Gamble']) * data['ActualUtility']
data['GamblingRewardUtility'] = data['Gamble'] * data['ActualUtility']
data['GamblingRangeUtility'] = data['HigherOutcomeUtility'] - data['LowerOutcomeUtility']

# print final column names
colnames = data.columns
print (colnames)



In [None]:
# update parameters that summarize the past (mood, cumulative reward, past not gamble reward, past gamble reward)
## record the indices of each new subject
ID = {}
for i in range(nrows):
    id = int(data.iloc[i,0])
    ID[id] = ID.get(id, []) + [i]
nsubj = len(ID)

## Create a dictionary to store all subjects
subjDF = {}
subjs = ID.keys()
for subj in subjs:
    subjD = data.iloc[ID[subj],:].copy()
    subjDF[subj] = subjD

gammas = [0.3, 0.5, 0.7]
for subj in subjs:
    # fill mood with most recently reported mood, otherwise pad with 0's
    subjDF[subj].loc[:,'Mood'] = subjDF[subj].loc[:,'Mood'].copy().fillna(method='ffill').fillna(0)
    # Calculate EMS features for the following features 
    for col in ['Actual', 'CertainReward', 'GamblingReward', 'PredictionError',
                'ActualUtility', 'CertainRewardUtility', 'GamblingRewardUtility', 'PredictionErrorUtility']:
        rewards = subjDF[subj].loc[:,col].copy()
        for gamma in gammas:
            pastrewards = ems(rewards, gamma)
            subjDF[subj].loc[:,col+'EMS'+str(gamma)] = pastrewards
        
# remove the first three trials of all subjects
def strip3(df):
    newdf = df.iloc[3:,:].copy()
    return newdf
stripsubjDF = dict(map(lambda kv: (kv[0], strip3(kv[1])), subjDF.items()))

# put subject data into a new dataframe
datanew = pd.concat(stripsubjDF.values())

# relabel the x csv
namesl = ['subject_id', 'age', 'gender', 'diagnosis', 'mood', 
          'current expected reward', 'current diff', 'current gambling range', 
          'past rewards', 'past reward prediction error']

# modelEV
for gamma in gammas:
    # lookup the x variables inside the dataframe
    xl = ['subject_id', 'age', 'gender', 'diagnosis', 'Mood',
          'CertainAmount', 'diff', 'GamblingRange',
          'ActualEMS'+str(gamma), 'PredictionErrorEMS'+str(gamma)]
    yl = ['subject_id', 'Gamble']
    x = datanew[xl].copy()
    x = x.rename(index=str, columns=dict(zip(xl, namesl))).copy()
    y = datanew[yl].copy()
    x.to_csv("x_normed_EV" + str(gamma) + ".csv", index=False) # don't include the indices
    y.to_csv("y_normed_EV" + str(gamma) + ".csv", index=False) # don't include the indices

# modelEU
for gamma in gammas:
    # lookup the x variables inside the dataframe
    xl = ['subject_id', 'age', 'gender', 'diagnosis', 'Mood',
          'CertainAmountUtility', 'diffUtility', 'GamblingRangeUtility',
          'ActualUtilityEMS'+str(gamma), 'PredictionErrorUtilityEMS'+str(gamma)]
    yl = ['subject_id', 'Gamble']
    x = datanew[xl].copy()
    x = x.rename(index=str, columns=dict(zip(xl, namesl))).copy()
    y = datanew[yl].copy()
    x.to_csv("x_normed_EU" + str(gamma) + ".csv", index=False) # don't include the indices
    y.to_csv("y_normed_EU" + str(gamma) + ".csv", index=False) # don't include the indices


In [None]:
subj = 22686


In [None]:
subj = 22686
# rewards = subjDF[subj].loc[:,col].copy()
# pastrewards = ems(rewards, gamma)
# subjDF[subj].loc[:,col+'EMS'] = pastrewards
# for col in ['Actual', 'CertainReward', 'GamblingReward']:
#     rewards = subjDF[subj].loc[:,col].copy()
#     pastrewards = ems(rewards, gamma)
#     subjDF[subj].loc[:,col+'EMS'] = pastrewards

print (subjDF[subj].columns)

print (subjDF[subj].loc[:,'Actual'])
print (subjDF[subj].loc[:,'ActualEMS'])




In [None]:
datanew = pd.concat(subjDF.values())
print (datanew.columns)
print (datanew.shape)

In [None]:
print ((subjDF.values()))

In [None]:

print (datanew.columns)
print (datanew.shape)
print (datanew.iloc[0:61,:].copy())

In [None]:
a = [1,2,3,4]
b = [4,5,6,7]

l = zip(a,b)
d = dict(l)
print (d)

## Look at Each Subject

In [None]:
datacsv = "9data_for_jess_3block"
data=pd.read_csv(datacsv + ".csv", sep=',',header='infer')
nrows = data.shape[0]
print (nrows)

ID = {}
for i in range(nrows):
    id = int(data.iloc[i,0])
    ID[id] = ID.get(id, []) + [i]
nsubj = len(ID)

## Create a dictionary to store all subjects
subjDF = {}
subjs = ID.keys()
for subj in subjs:
    subjD = data.iloc[ID[subj],:].copy()
    subjDF[subj] = subjD
    
for key, value in subjDF.items():
    print (str(key), len(value))
    if key == 23798:
        print (value)

In [None]:
print (data.iloc[1800,:])