# Social Reward Modeling

Social reward task computational modeling from the DSCN Lab Social Connection (SCONN) project. These analyses will propose computational models that aim to explain participants' behavior during a task in which they learn about a similar peer, dissimilar peer, and a computer. Similarity is defined as common interest from a survey that asked participants whether they enjoy certain things (e.g. reading, hiking, etc.).

## Set Up

In [1]:
import pandas as pd
import numpy as np
import os
import glob
import seaborn as sns
import matplotlib.pyplot as plt


import importlib
import rl_functions
importlib.reload(rl_functions)

<module 'rl_functions' from '/Users/hpopal/Library/CloudStorage/GoogleDrive-hpopal@umd.edu/My Drive/dscn_lab/projects/scon_social_reward/code/rl_functions.py'>

### Set Paths

In [2]:
if os.path.isdir('/Users/hpopal'):
    proj_dir = '/Users/hpopal/Google Drive/My Drive/dscn_lab/projects/scon_social_reward/'
else:
    proj_dir = '/Users/haroonpopal/hpopal@umd.edu - Google Drive/My Drive/dscn_lab/projects/scon_social_reward/'

data_dir = os.path.join(proj_dir, 'derivatives', 'task_socialreward', 'data')
outp_dir = os.path.join(proj_dir, 'derivatives', 'rl_modeling')

os.chdir(proj_dir)

In [3]:
# Import participant id data
subj_df = pd.read_csv(proj_dir+'participants.tsv', sep='\t')

# Fix participant IDs to match the directories in the data folder (e.g. sub-SCN101 -> SCN_101)
subj_df['participant_id'] = [x[4:7]+'_'+x[7:] for x in subj_df['participant_id']]

# Create subject list
subj_list = subj_df['participant_id'].unique()

subj_df['participant_id'] = 'sub-'+subj_df['participant_id'].str.replace('_','')



print('Found '+str(len(subj_list))+' participants')

Found 150 participants


## Model Fitting
Now we will try to determine the parameters of interest from an individual participant's data. We will attempt to get a value for their learning rate (alpha) and their reaction time parameter (beta). **Note:** Right now, we are doing to do this for Models 2 and 3, since they are kind of null models and I just want to see if I am doing these modeling steps correctly, before testing out actual hypothesis models. 

In [4]:
model_fit_files = glob.glob(outp_dir + '/model_fit_results/sub-*/*_fit_results.csv')
model_fit_files.sort()

print(len(model_fit_files))

model_fit_results = pd.DataFrame()

for temp_file in model_fit_files:
    temp_data = pd.read_csv(temp_file)

    model_fit_results = pd.concat([model_fit_results, temp_data])

model_fit_results.head(10)

9261


Unnamed: 0,model,participant_id,alpha,beta,intercept,mse,BIC
0,Model 1: Rescorla-Wagner + Condition Different...,sub-SCN101,0.05,0.05,0.05,0.080719,13.854482
1,Model 2: Rescorla-Wagner + Reaction Time Value,sub-SCN101,0.05,0.05,0.05,0.080719,13.854482
2,Model 3: Rescorla-Wagner + Reaction Time Item ...,sub-SCN101,0.05,0.05,0.05,0.080719,13.854482
3,Model 4: Rescorla-Wagner + Social Preference,sub-SCN101,0.05,0.05,0.05,0.080068,13.853181
4,Model 5: Rescorla-Wagner + Reaction Time Surprise,sub-SCN101,0.05,0.05,0.05,0.077821,13.848686
0,Model 1: Rescorla-Wagner + Condition Different...,sub-SCN101,0.05,0.05,0.1,0.080719,13.854482
1,Model 2: Rescorla-Wagner + Reaction Time Value,sub-SCN101,0.05,0.05,0.1,0.080719,13.854482
2,Model 3: Rescorla-Wagner + Reaction Time Item ...,sub-SCN101,0.05,0.05,0.1,0.080719,13.854482
3,Model 4: Rescorla-Wagner + Social Preference,sub-SCN101,0.05,0.05,0.1,0.080068,13.853181
4,Model 5: Rescorla-Wagner + Reaction Time Surprise,sub-SCN101,0.05,0.05,0.1,0.077815,13.848675


In [7]:
# Find the best parameters for each participant
subj_fit_params = pd.DataFrame(columns=['participant_id', 'model', 'alpha', 'beta', 'intercept',
                                        'mse','BIC'])
i_row = 0

for subj in model_fit_results['participant_id'].unique():
    temp_subj_data = model_fit_results[model_fit_results['participant_id'] == subj]

    for temp_model in temp_subj_data['model'].unique():
        temp_model_data = temp_subj_data[temp_subj_data['model'] == temp_model]
        temp_params = temp_model_data[temp_model_data['mse'] == temp_model_data['mse'].min()]

        subj_fit_params.loc[i_row, 'participant_id'] = subj
        subj_fit_params.loc[i_row, 'model'] = temp_model
        subj_fit_params.loc[i_row, 'alpha'] = temp_params.iloc[0]['alpha']
        subj_fit_params.loc[i_row, 'beta'] = temp_params.iloc[0]['beta']
        subj_fit_params.loc[i_row, 'intercept'] = temp_params.iloc[0]['intercept']
        subj_fit_params.loc[i_row, 'mse'] = temp_params.iloc[0]['mse']
        subj_fit_params.loc[i_row, 'BIC'] = temp_params.iloc[0]['BIC']
        i_row += 1

subj_fit_params = subj_fit_params.merge(subj_df, on='participant_id')
subj_fit_params[['alpha','beta','intercept']] = subj_fit_params[['alpha','beta','intercept']].apply(pd.to_numeric)

subj_fit_params.to_csv(outp_dir+'/model_fit_results/subject_best_fits.csv', index=False)

subj_fit_params.head(10)

Unnamed: 0,participant_id,model,alpha,beta,intercept,mse,BIC,age,gender,group
0,sub-SCN101,Model 1: Rescorla-Wagner + Condition Different...,0.05,0.1,0.8,0.080719,13.854482,13.29,2.0,1.0
1,sub-SCN101,Model 2: Rescorla-Wagner + Reaction Time Value,0.05,0.05,0.55,0.080719,13.854482,13.29,2.0,1.0
2,sub-SCN101,Model 3: Rescorla-Wagner + Reaction Time Item ...,0.8,0.05,0.2,0.080601,13.854246,13.29,2.0,1.0
3,sub-SCN101,Model 4: Rescorla-Wagner + Social Preference,0.15,0.7,0.65,0.080068,13.853181,13.29,2.0,1.0
4,sub-SCN101,Model 5: Rescorla-Wagner + Reaction Time Surprise,0.5,0.55,0.75,0.077815,13.848675,13.29,2.0,1.0


## Parameter Recovery

In [9]:
param_rcv_files = glob.glob(outp_dir + '/parameter_recovery/sub-*/*_rcv_results.csv')
param_rcv_files.sort()

print(len(param_rcv_files))

param_rcv_results = pd.DataFrame()

for temp_file in param_rcv_files:
    temp_data = pd.read_csv(temp_file)

    param_rcv_results = pd.concat([param_rcv_results, temp_data])

param_rcv_results.head(10)

441


Unnamed: 0,model,participant_id_sim,alpha_sim,beta_sim,inter_sim,alpha_guess,beta_guess,inter_guess,alpha_fit,beta_fit,inter_fit,mse
0,Model 1: Rescorla-Wagner + Condition Different...,sub-SCN101,0.05,0.1,0.1,0.05,0.05,0.05,0.044322,0.105815,0.061594,3.58824e-07
1,Model 2: Rescorla-Wagner + Reaction Time Value,sub-SCN101,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.0
2,Model 3: Rescorla-Wagner + Reaction Time Item ...,sub-SCN101,0.8,0.05,0.05,0.05,0.05,0.05,0.799993,0.050006,0.050002,2.901022e-11
3,Model 4: Rescorla-Wagner + Social Preference,sub-SCN101,0.15,0.7,0.7,0.05,0.05,0.05,0.150017,0.699911,0.699947,3.01691e-10
4,Model 5: Rescorla-Wagner + Reaction Time Surprise,sub-SCN101,0.5,0.55,0.55,0.05,0.05,0.05,0.499998,0.550017,0.525013,0.000908787
0,Model 1: Rescorla-Wagner + Condition Different...,sub-SCN101,0.05,0.1,0.1,0.05,0.1,0.05,0.043104,0.107362,0.05244,4.345731e-07
1,Model 2: Rescorla-Wagner + Reaction Time Value,sub-SCN101,0.05,0.05,0.05,0.05,0.1,0.05,0.026764,0.078437,0.064135,1.750063e-07
2,Model 3: Rescorla-Wagner + Reaction Time Item ...,sub-SCN101,0.8,0.05,0.05,0.05,0.1,0.05,0.800014,0.05,0.05,3.347451e-13
3,Model 4: Rescorla-Wagner + Social Preference,sub-SCN101,0.15,0.7,0.7,0.05,0.1,0.05,0.15001,0.69999,0.699995,7.556847e-12
4,Model 5: Rescorla-Wagner + Reaction Time Surprise,sub-SCN101,0.5,0.55,0.55,0.05,0.1,0.05,0.499999,0.550009,0.547462,0.0001973334


In [16]:
# Find the best parameters for each participant
subj_rcv_params = pd.DataFrame(columns=['participant_id', 'model', 
                                        'alpha_sim', 'beta_sim', 'inter_sim',
                                        'alpha_guess', 'beta_guess', 'inter_guess',
                                        'alpha_fit', 'beta_fit', 'inter_fit',
                                        'mse'])
i_row = 0

for subj in param_rcv_results['participant_id_sim'].unique():
    temp_subj_data = param_rcv_results[param_rcv_results['participant_id_sim'] == subj]

    for temp_model in temp_subj_data['model'].unique():
        temp_model_data = temp_subj_data[temp_subj_data['model'] == temp_model]
        temp_params = temp_model_data[temp_model_data['mse'] == temp_model_data['mse'].min()]

        subj_rcv_params.loc[i_row, 'participant_id'] = subj
        subj_rcv_params.loc[i_row, 'model'] = temp_model
        subj_rcv_params.loc[i_row, 'alpha_sim'] = temp_params.iloc[0]['alpha_sim']
        subj_rcv_params.loc[i_row, 'beta_sim'] = temp_params.iloc[0]['beta_sim']
        subj_rcv_params.loc[i_row, 'inter_sim'] = temp_params.iloc[0]['inter_sim']
        subj_rcv_params.loc[i_row, 'alpha_guess'] = temp_params.iloc[0]['alpha_guess']
        subj_rcv_params.loc[i_row, 'beta_guess'] = temp_params.iloc[0]['beta_guess']
        subj_rcv_params.loc[i_row, 'inter_guess'] = temp_params.iloc[0]['inter_guess']
        subj_rcv_params.loc[i_row, 'alpha_fit'] = temp_params.iloc[0]['alpha_fit']
        subj_rcv_params.loc[i_row, 'beta_fit'] = temp_params.iloc[0]['beta_fit']
        subj_rcv_params.loc[i_row, 'inter_fit'] = temp_params.iloc[0]['inter_fit']
        subj_rcv_params.loc[i_row, 'mse'] = temp_params.iloc[0]['mse']
        i_row += 1

subj_rcv_params = subj_rcv_params.merge(subj_df, on='participant_id')
#subj_rcv_params[['alpha','beta','intercept']] = subj_rcv_params[['alpha','beta','intercept']].apply(pd.to_numeric)

subj_rcv_params.to_csv(outp_dir+'/parameter_recovery/subject_best_rcv.csv', index=False)

subj_rcv_params.head(10)

Unnamed: 0,participant_id,model,alpha_sim,beta_sim,inter_sim,alpha_guess,beta_guess,inter_guess,alpha_fit,beta_fit,inter_fit,mse,age,gender,group
0,sub-SCN101,Model 1: Rescorla-Wagner + Condition Different...,0.05,0.1,0.1,0.4,0.25,0.05,0.049999,0.100001,0.099995,0.0,13.29,2.0,1.0
1,sub-SCN101,Model 2: Rescorla-Wagner + Reaction Time Value,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.0,13.29,2.0,1.0
2,sub-SCN101,Model 3: Rescorla-Wagner + Reaction Time Item ...,0.8,0.05,0.05,0.8,0.05,0.05,0.8,0.05,0.05,0.0,13.29,2.0,1.0
3,sub-SCN101,Model 4: Rescorla-Wagner + Social Preference,0.15,0.7,0.7,0.75,0.0,0.05,0.15,0.7,0.7,0.0,13.29,2.0,1.0
4,sub-SCN101,Model 5: Rescorla-Wagner + Reaction Time Surprise,0.5,0.55,0.55,0.1,0.3,0.05,0.5,0.550001,0.549789,1e-06,13.29,2.0,1.0
