In [64]:
import glob
import os
import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import scipy
from scipy import optimize
import read_files
from copy import deepcopy
from sklearn.metrics import mean_squared_error
import warnings
warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [2]:
choice_stimuli = pd.read_csv('../../stimuli/stimuli_trinary.csv')
trinary_stimuli = choice_stimuli
binary_stimuli = pd.read_csv('../../stimuli/stimuli_binary.csv')

### Fit computational models: Adaptive Gain

#### min-max scaling of amount and probabilities

In [3]:
binary_stimuli = trinary_stimuli.drop(['catch', 'decoyDirection', 'amountC', 'probC', 'trinary_id'], axis=1)
binary_stimuli.loc[:, 'avg_amounts'] = np.mean(binary_stimuli.loc[:, ['amountA', 'amountB']], 1)
binary_stimuli.loc[:, 'avg_probs'] = np.mean(binary_stimuli.loc[:, ['probA', 'probB']], 1)
binary_stimuli = binary_stimuli.loc[binary_stimuli.binary_id < 28, :] # remove catch trials
trinary_stimuli = trinary_stimuli.drop(['catch', 'decoyDirection', 'binary_id'], axis=1)
trinary_stimuli.loc[:, 'avg_amounts'] = np.mean(trinary_stimuli.loc[:, ['amountA', 'amountB', 'amountC']], 1)
trinary_stimuli.loc[:, 'avg_probs'] = np.mean(trinary_stimuli.loc[:, ['probA', 'probB', 'probC']], 1)
trinary_stimuli = trinary_stimuli.loc[trinary_stimuli.trinary_id < 28] # remove catch trials

In [4]:
min_amount = 4
max_amount = 79
min_prob = 0
max_prob = 100
all_EVs = np.concatenate([trinary_stimuli.EVA.unique(), trinary_stimuli.EVB.unique(), trinary_stimuli.EVC.unique()])
min_EV = np.min(all_EVs)
max_EV = np.max(all_EVs)
to_scale_cols = ['amountA', 'probA', 'amountB', 'probB', 'amountC', 'probC', 'EVA', 'EVB', 'EVC', 'avg_amounts', 'avg_probs']
trinary_stimuli_norm = trinary_stimuli.copy()
trinary_stimuli_norm.loc[:, 'amountA'] = (trinary_stimuli_norm.amountA - min_amount) / (max_amount - min_amount)
trinary_stimuli_norm.loc[:, 'amountB'] = (trinary_stimuli_norm.amountB - min_amount) / (max_amount - min_amount)
trinary_stimuli_norm.loc[:, 'amountC'] = (trinary_stimuli_norm.amountC - min_amount) / (max_amount - min_amount)
trinary_stimuli_norm.loc[:, 'probA'] = (trinary_stimuli_norm.probA - min_prob) / (max_prob - min_prob)
trinary_stimuli_norm.loc[:, 'probB'] = (trinary_stimuli_norm.probB - min_prob) / (max_prob - min_prob)
trinary_stimuli_norm.loc[:, 'probC'] = (trinary_stimuli_norm.probC - min_prob) / (max_prob - min_prob)
trinary_stimuli_norm.loc[:, 'EVA'] = (trinary_stimuli_norm.EVA - min_EV) / (max_EV - min_EV)
trinary_stimuli_norm.loc[:, 'EVB'] = (trinary_stimuli_norm.EVB - min_EV) / (max_EV - min_EV)
trinary_stimuli_norm.loc[:, 'EVC'] = (trinary_stimuli_norm.EVC - min_EV) / (max_EV - min_EV)
trinary_stimuli_norm.loc[:, 'avg_amounts'] = trinary_stimuli_norm.loc[:, ['amountA', 'amountB', 'amountC']].mean(axis=1)
trinary_stimuli_norm.loc[:, 'avg_probs'] = trinary_stimuli_norm.loc[:, ['probA', 'probB', 'probC']].mean(axis=1)

binary_stimuli_norm = binary_stimuli.copy()
binary_stimuli_norm.loc[:, 'amountA'] = (binary_stimuli_norm.amountA - min_amount) / (max_amount - min_amount)
binary_stimuli_norm.loc[:, 'amountB'] = (binary_stimuli_norm.amountB - min_amount) / (max_amount - min_amount)
binary_stimuli_norm.loc[:, 'probA'] = (binary_stimuli_norm.probA - min_prob) / (max_prob - min_prob)
binary_stimuli_norm.loc[:, 'probB'] = (binary_stimuli_norm.probB - min_prob) / (max_prob - min_prob)
binary_stimuli_norm.loc[:, 'EVA'] = (binary_stimuli_norm.EVA - min_EV) / (max_EV - min_EV)
binary_stimuli_norm.loc[:, 'EVB'] = (binary_stimuli_norm.EVB - min_EV) / (max_EV - min_EV)
binary_stimuli_norm.loc[:, 'avg_amounts'] = binary_stimuli_norm.loc[:, ['amountA', 'amountB']].mean(axis=1)
binary_stimuli_norm.loc[:, 'avg_probs'] = binary_stimuli_norm.loc[:, ['probA', 'probB']].mean(axis=1)

#### read choice file per set, instead of per subject


In [5]:
raw_choices = pd.DataFrame()
raw_files = glob.glob('../../data/behavioral_experiment/raw_choices_per_set/*.csv')
for i in range(len(raw_files)):
    set_choices = pd.read_csv(raw_files[i])
    raw_choices = pd.concat([raw_choices, set_choices], axis=0)
# remove catch trials
raw_choices = raw_choices[raw_choices.binary_id<28]
raw_choices = raw_choices[raw_choices.subject_id.str.contains('-')]

In [6]:
# add the lotteries' attributes to the choices
binary_raw_choices = raw_choices[raw_choices.trinary_group==0]
trinary_raw_choices = raw_choices[raw_choices.trinary_group==1]
trinary_raw_choices_stimuli = trinary_raw_choices.merge(trinary_stimuli_norm, left_on='trinary_id', right_on='trinary_id', how='outer')
binary_raw_choices_stimuli = binary_raw_choices.merge(binary_stimuli_norm, left_on='binary_id', right_on='binary_id', how='outer')

In [7]:
raw_choices_stimuli = pd.concat([binary_raw_choices_stimuli, trinary_raw_choices_stimuli], axis=0)

In [32]:
def run_model(params, df, return_p=0):
    # df could be subjects' raw choices, or dataframe of stimuli (amounts, probs) without choices
    if 'amountC' in df.columns:
        # if trinary group, include option C
        amounts = df[['amountA', 'amountB', 'amountC']]
        probs = df[['probA', 'probB', 'probC']]
    else:
        # if binary group, include only A and B
        amounts = df[['amountA', 'amountB']]
        probs = df[['probA', 'probB']]
    n_amounts = amounts.shape[1]
    avg_amounts_mat = np.vstack([df.avg_amounts.values]*n_amounts).transpose()
    avg_probs_mat = np.vstack([df.avg_probs.values]*n_amounts).transpose()

    # adaptive gain
    c_amount, c_prob, tau, slope, w = params
    u_amounts = 1 / (1 + np.exp(-1 * (amounts - avg_amounts_mat - c_amount) / slope))
    u_probs =  1 / (1 + np.exp(-1 * (probs - avg_probs_mat - c_prob) / slope))

    u_all = w*u_amounts.values + (1-w)*u_probs.values
    exp_u = np.exp(tau*u_all)
    softmax_denominator = np.vstack([np.nansum(exp_u, axis=1)]*n_amounts).transpose()
    
    p_all = exp_u / softmax_denominator
    p_A = p_all[:, 0] / (p_all[:, 0] + p_all[:, 1])
    p_B = p_all[:, 1] / (p_all[:, 0] + p_all[:, 1])
    p_A[p_A==0] = 1e-5
    p_B[p_B==0] = 1e-5
    if return_p:
        # return probaility to choose A, for inference
        return p_A
    if 'target_choice' in df.columns:
        # return negative log likelihood, for optimization
        choose_A = df.target_choice.values
        neg_log_like = -1 * np.sum(choose_A * np.log(p_A) + (1-choose_A)*np.log(p_B))
        return neg_log_like

### out of sample predictions with AG (leave-one-set-out)

In [None]:
os.chdir('../mri')
from utils import load_params
behavior_results = load_params.load_behavior_results()
set_objs = load_params.load_sets(behavior_results)
folds = []
for i, set_out in enumerate(set_objs):
    test_sets = []
    test_ind = []
    train_sets = []
    train_ind = []
    for j, set_obj in enumerate(set_objs):
        if set_obj.overlapping_with(set_out):
            test_sets.append(set_obj)
            test_ind.append(j)
        else:
            train_sets.append(set_obj)
            train_ind.append(j)
    if (train_ind, test_ind) not in folds:
        folds.append((train_ind, test_ind))

In [None]:
cv_rmse = np.zeros(len(folds))
cv_corr = np.zeros(len(folds))
train_rmse = np.zeros(len(folds))
c_amount_init = 0
c_prob_init = 0
tau_init = 0.15
slope_init = 0.1
w_init = 0.3

fold_i = 0
for train_ind, test_ind in tqdm.tqdm(folds):
    # fit model to training sets
    y_train, y_test = y[train_ind], y[test_ind]
    train_set_inds = np.array(train_ind) + 1
    test_set_inds = np.array(test_ind) + 1
    train_raw_choices = raw_choices_stimuli[raw_choices_stimuli.trinary_id.isin(train_set_inds)]
    test_raw_choices = raw_choices_stimuli[raw_choices_stimuli.trinary_id.isin(test_set_inds)]
    AG_group_params = optimize.fmin(run_model, AG_init_params, args=(train_raw_choices, 0), maxiter=1e4, disp=True)
    # calculate prediction for choice ratios for trinary and binary test sets
    test_trinary_stimuli = trinary_stimuli_norm.loc[trinary_stimuli_norm.trinary_id.isin(test_set_inds)]
    test_binary_stimuli = binary_stimuli_norm.loc[binary_stimuli_norm.binary_id.isin(test_set_inds)]
    trinary_ratio_pred = run_model(AG_group_params, test_trinary_stimuli, return_p=1)
    binary_ratio_pred = run_model(AG_group_params, test_binary_stimuli, return_p=1)
    # predicted decoy effect is predicted trinary ratio minus predicted binary ratio
    decoy_pred = trinary_ratio_pred - binary_ratio_pred
    decoy_pred = np.nan_to_num(decoy_pred)
    cv_rmse[fold_i] = np.sqrt(mean_squared_error(y_test, decoy_pred))
    corr = scipy.stats.spearmanr(decoy_pred, y_test)[0]
    cv_corr[fold_i] = corr if ~np.isnan(corr) else 0
    # calculate prediction for choice ratios for trinary and binary training sets
    train_trinary_stimuli = trinary_stimuli_norm.loc[trinary_stimuli_norm.trinary_id.isin(train_set_inds)]
    train_binary_stimuli = binary_stimuli_norm.loc[binary_stimuli_norm.binary_id.isin(train_set_inds)]
    train_trinary_ratio_pred = run_model(AG_group_params, train_trinary_stimuli, return_p=1)
    train_binary_ratio_pred = run_model(AG_group_params, train_binary_stimuli, return_p=1)
    train_decoy_pred = train_trinary_ratio_pred - train_binary_ratio_pred
    train_rmse[fold_i] = np.sqrt(mean_squared_error(y_train, train_decoy_pred))
    fold_i += 1

  4%|▍         | 1/25 [00:01<00:42,  1.79s/it]

Optimization terminated successfully.
         Current function value: 4994.638847
         Iterations: 494
         Function evaluations: 788


  8%|▊         | 2/25 [00:04<00:51,  2.22s/it]

Optimization terminated successfully.
         Current function value: 4932.161411
         Iterations: 676
         Function evaluations: 1103


 12%|█▏        | 3/25 [00:07<00:57,  2.63s/it]

Optimization terminated successfully.
         Current function value: 4699.147492
         Iterations: 863
         Function evaluations: 1382


 16%|█▌        | 4/25 [00:09<00:49,  2.33s/it]

Optimization terminated successfully.
         Current function value: 4693.421686
         Iterations: 537
         Function evaluations: 855


 20%|██        | 5/25 [00:11<00:42,  2.11s/it]

Optimization terminated successfully.
         Current function value: 4809.875084
         Iterations: 475
         Function evaluations: 773


 24%|██▍       | 6/25 [00:13<00:41,  2.19s/it]

Optimization terminated successfully.
         Current function value: 5411.537233
         Iterations: 614
         Function evaluations: 1000


 28%|██▊       | 7/25 [00:14<00:35,  1.99s/it]

Optimization terminated successfully.
         Current function value: 5012.209351
         Iterations: 444
         Function evaluations: 709


 32%|███▏      | 8/25 [00:17<00:34,  2.04s/it]

Optimization terminated successfully.
         Current function value: 5834.963057
         Iterations: 568
         Function evaluations: 917


 36%|███▌      | 9/25 [00:18<00:31,  1.94s/it]

Optimization terminated successfully.
         Current function value: 5262.705480
         Iterations: 480
         Function evaluations: 761


 40%|████      | 10/25 [00:21<00:31,  2.08s/it]

Optimization terminated successfully.
         Current function value: 4941.596022
         Iterations: 659
         Function evaluations: 1074


 44%|████▍     | 11/25 [00:22<00:26,  1.88s/it]

Optimization terminated successfully.
         Current function value: 4927.100801
         Iterations: 368
         Function evaluations: 622


 48%|████▊     | 12/25 [00:26<00:32,  2.47s/it]

Optimization terminated successfully.
         Current function value: 5090.076712
         Iterations: 994
         Function evaluations: 1643


 52%|█████▏    | 13/25 [00:28<00:28,  2.41s/it]

Optimization terminated successfully.
         Current function value: 4812.363828
         Iterations: 637
         Function evaluations: 1003


 56%|█████▌    | 14/25 [00:30<00:24,  2.20s/it]

Optimization terminated successfully.
         Current function value: 4769.018360
         Iterations: 485
         Function evaluations: 775


 60%|██████    | 15/25 [00:31<00:20,  2.00s/it]

Optimization terminated successfully.
         Current function value: 5054.119459
         Iterations: 401
         Function evaluations: 636


 64%|██████▍   | 16/25 [00:34<00:18,  2.02s/it]

Optimization terminated successfully.
         Current function value: 4572.141469
         Iterations: 582
         Function evaluations: 943


 68%|██████▊   | 17/25 [00:35<00:13,  1.73s/it]

Optimization terminated successfully.
         Current function value: 4947.659993
         Iterations: 264
         Function evaluations: 450


 72%|███████▏  | 18/25 [00:37<00:12,  1.85s/it]

Optimization terminated successfully.
         Current function value: 4661.264897
         Iterations: 625
         Function evaluations: 981


 76%|███████▌  | 19/25 [00:40<00:14,  2.35s/it]

Optimization terminated successfully.
         Current function value: 4289.047116
         Iterations: 968
         Function evaluations: 1574


 80%|████████  | 20/25 [00:43<00:11,  2.33s/it]

Optimization terminated successfully.
         Current function value: 4925.119664
         Iterations: 617
         Function evaluations: 1001


 84%|████████▍ | 21/25 [00:44<00:08,  2.18s/it]

Optimization terminated successfully.
         Current function value: 4510.816397
         Iterations: 509
         Function evaluations: 829


 88%|████████▊ | 22/25 [00:47<00:06,  2.26s/it]

Optimization terminated successfully.
         Current function value: 4844.304055
         Iterations: 668
         Function evaluations: 1083


 92%|█████████▏| 23/25 [00:50<00:04,  2.43s/it]

Optimization terminated successfully.
         Current function value: 4663.337854
         Iterations: 810
         Function evaluations: 1273


 96%|█████████▌| 24/25 [00:51<00:02,  2.15s/it]

Optimization terminated successfully.
         Current function value: 5212.572638
         Iterations: 386
         Function evaluations: 638


100%|██████████| 25/25 [00:53<00:00,  2.14s/it]

Optimization terminated successfully.
         Current function value: 4894.390753
         Iterations: 531
         Function evaluations: 849





In [None]:
np.mean(cv_rmse)

0.11180057258869

In [None]:
np.mean(cv_corr)

0.2751301537428308

### fitting with AG

In [56]:
c_amount_init = 0
c_prob_init = 0
tau_init = 0.15
slope_init = 0.1
w_init = 0.3
AG_init_params = np.array([c_amount_init, c_prob_init, tau_init, slope_init, w_init])
AG_group_params = optimize.fmin(run_model, AG_init_params, args=(raw_choices_stimuli, 0), maxiter=1e4)

Optimization terminated successfully.
         Current function value: 6512.408070
         Iterations: 482
         Function evaluations: 771


In [57]:
AG_group_params

array([-0.21432596,  0.03485916,  2.60392595,  0.03045105,  0.73974457])

In [58]:
real_decoy_effects = pd.read_csv('../../results/decoy_table.csv')
y = real_decoy_effects.decoy_effect_A.values

In [59]:
trinary_P_a = run_model(AG_group_params, trinary_stimuli_norm, return_p=1)
binary_P_a = run_model(AG_group_params, binary_stimuli_norm, return_p=1)
decoys = trinary_P_a - binary_P_a
AG_decoys = pd.DataFrame({'trinary_id': trinary_stimuli.trinary_id, 'P_a_trinary': trinary_P_a, 'P_a_binary': binary_P_a, 'decoy_AG':decoys})
AG_decoys = AG_decoys.merge(real_decoy_effects[['trinary_id', 'decoy_effect_A']], on='trinary_id').sort_values('decoy_effect_A')

In [63]:
AG_decoys[['decoy_AG', 'decoy_effect_A']].corr().values[0, 1]

0.7384623403900014