In [1]:
import pandas as pd
from scipy.io import savemat
import os
import os.path as osp
import numpy as np
import glob
import scipy.io

### this section serves to go line by line, can skip to function below 

In [2]:
os.getcwd()

'/Users/jackiebeltran/Documents/RL/scripts'

In [None]:
raw_df = pd.read_csv('../N_55_raw/506_RPlearning_2020_Jan_08_1346.csv')
subID = 506


In [None]:
raw_df

In [None]:
# Extract relevant columns into df
df = raw_df[["stim2bottom","stim1position","stim1top","condition", 'key_resp_2.keys', 'key_resp_2.corr',
             'feedback', 'corrResp']]
df

In [None]:
# Correct cues: create 'cueTop' & 'cueBottom' columns; reassigning stimuli to values of 1 & 2

df['cueTop']=df.stim1top.apply(lambda x: x.split('.')[0])
df['cueBottom']=df.stim2bottom.apply(lambda x: x.split('.')[0])
cue=df[['cueTop','cueBottom']].T.to_numpy()
df

df.cueTop.replace({"1" : 1, "3" : 1, "5" : 1, "2":2, "4":2, "6":2}, inplace=True)
df.cueBottom.replace({"1" : 1, "3" : 1, "5" : 1, "2":2, "4":2, "6":2}, inplace=True)
df


In [None]:
# recode key response into a "choice" column
# in the original data, 2 is a button press (top stimulus) and 'None' is choosing the bottom stimulus 
# we'll recode 1 = button press, 2 = no action

df['choice']=df['key_resp_2.keys'] # duplicate choice data column 
df.choice.replace({'2':1, 'None':2}, inplace=True)
choice=df.choice.to_numpy()
df

In [None]:
# determine what stimulus the choice corresponds to and add that stimulus choice to an actions column

df['actions'] = ''

for index, row in df.iterrows():
    if row['choice'] == 1: # chose top 
        df.at[index, 'actions'] = row['cueTop'] 
    elif row['choice'] == 2: # chose bottom 
        df.at[index, 'actions'] = row['cueBottom']
        
df

In [None]:
#create rewards column with feedback recoded

df['rewards']=df.feedback # creating a reward column that I'll recode
df
df.rewards.replace({"win" : 1,
                    "loss" : -1,
                    "nil" : 0,
                    "nothing" : 0}, inplace=True)
df
rewards=df.rewards.to_numpy()
df

In [None]:
# recode conditions 

df['conditions']=df.condition # creating a condition column 
df.conditions.replace({"gain" : 1, "loss" : 2, "neutral" : 3}, inplace=True)
conditions=df.conditions.to_numpy()
df

In [None]:
# recode corrResp to "best action" to determine what the best stimulus choise was for each trial type,
# this has to be based on the stimulus position that's correspondent to the "best_action"
# so if best_action is 1, that means taking the value for the stimulus that was on top 

df['better_action']=df.corrResp # creating a best action column 

# we'll recode 1 = button press, 2 = no action
df.better_action.replace({"2" : 1, "None" : 2}, inplace=True)
better_action=df.better_action.to_numpy()


# determine what stimulus the best_action corresponds to and add that stimulus choice to a best_actions column

df['best_action'] = '' # create empty column

for index, row in df.iterrows():
    if row['better_action'] == 1: # chose top 
        df.at[index, 'best_action'] = row['cueTop'] 
    elif row['better_action'] == 2: # chose bottom 
        df.at[index, 'best_action'] = row['cueBottom']
        
df

In [None]:
# recode took_best_action variable = "key_resp_2.corr" ; where 1 = True, 0 = False  

df['took_best_action']=df['key_resp_2.corr'] 

took_best_action=df.took_best_action.to_numpy()
df


In [None]:
# add a participant id column 
df['sub_ID'] = 506
df


In [None]:
df[df["condition"] == "gain"]

In [None]:
# keep data of interest

dfout=df[['sub_ID', 'cueTop', 'cueBottom', 'actions','rewards','conditions', 'best_action', 'took_best_action']]

dfout

In [None]:
os.getcwd()

In [None]:
dfout.to_csv(f'../data/506_behavior.csv')


### streamline process

In [2]:
os.getcwd()

'/Users/jackiebeltran/Documents/GitHub/RP_Learning/1_scripts/behavior'

In [None]:
# flist = glob.glob('../../RL_2/N_55/raw_HC_data/*.csv')
flist = glob.glob('../../RL_2/N_55/raw_MDD_data/*.csv')
print("Number of subjects: " + str(len(flist)))
flist

In [10]:
def preproc(filename):
    #extract index    
    a = filename.split('/')
    idx = a[-1].index('_')
    subID=a[-1][:idx]

    raw_df = pd.read_csv(filename)
    
    df = raw_df[["stim2bottom","stim1position","stim1top","condition", 'key_resp_2.keys','feedback', 'corrResp']]
    
    # Correct cues 

    df['cueTop']=df.stim1top.apply(lambda x: x.split('.')[0])
    df['cueBottom']=df.stim2bottom.apply(lambda x: x.split('.')[0])
    cue=df[['cueTop','cueBottom']].T.to_numpy()
    
    # stimulus 1,3,5 are on top (set equal to action 1)
    # stimulus 2,4,6 are on bottom (set equal to action 2)

    df.cueTop.replace({"1" : 1, "3" : 1, "5" : 1, "2":2, "4":2, "6":2}, inplace=True)
    df.cueBottom.replace({"1" : 1, "3" : 1, "5" : 1, "2":2, "4":2, "6":2}, inplace=True)
    
    # recode key response into a "choice" column
    # in the original data, 2 is a button press (top stimulus) and 'None' is choosing the bottom stimulus 
    # we'll recode 1 = button press, 2 = no action

    df['choice']=df['key_resp_2.keys'] # duplicate choice data column 
    df.choice.replace({'2':1, 'None':2}, inplace=True)
    choice=df.choice.to_numpy()
    
    # Determine what stimulus the choice corresponds to

    df['actions'] = ''

    for index, row in df.iterrows():
        if row['choice'] == 1: # chose top 
            df.at[index, 'actions'] = row['cueTop']
        elif row['choice'] == 2: # chose bottom 
            df.at[index, 'actions'] = row['cueBottom']

    # Rewards

    df['rewards']=df.feedback # creating a reward column that I'll recode
    df.rewards.replace({"win" : 1,
                        "loss" : -1,
                        "nil" : 0,
                        "nothing" : 0}, inplace=True)
    rewards=df.rewards.to_numpy()
    
    # Conditions

    df['conditions']=df.condition # creating a conditions column 
    df.condition.replace({"gain" : 1, "loss" : 2, "neutral" : 3}, inplace=True)
    feedback=df.feedback.to_numpy()
    
    # add participant ID column 
    df['sub_ID'] = subID 
    
    # keep data of interest

    dfout=df[['sub_ID', 'actions','rewards','condition']]
    
    ### Healthy 
#     dfout.to_csv(f'../../RL_2/N_55/healthy/{subID}.csv')

    ### MDD 
    dfout.to_csv(f'../../RL_2/N_55/mdd/{subID}.csv') 
    

In [None]:
for name in flist:
    print(name)
    preproc(name)