In [1]:
import numpy as np
import pandas as pd
import sys
sys.path.append('/home/ubuntu/repos/learning-habits-analysis')
from utils.data import Subject, load_participant_list

In [2]:
base_dir = '/home/ubuntu/data/learning-habits'
sub_ids = load_participant_list(base_dir)

In [3]:
subjects = [Subject(base_dir, sub_id, include_modeling=True, include_imaging=False) for sub_id in sub_ids]



In [4]:
bbt = pd.concat(
    [pd.concat([pd.DataFrame({'sub_id': [sub.sub_id] * len(sub.extended_trials)}), sub.extended_trials.reset_index(drop=True)], axis=1) for sub in subjects],
    ignore_index=True
)

# Handle trials with missing chosen/unchosen stim
a few trials, due to RT < 50ms were excluded from the modeling data,  
this resulted in missing 'chosen_stim' and 'unchosen_stim' data.

In [5]:
# to check before fixing
old_bbt = bbt.copy()

In [6]:
bbt_resp = bbt[~bbt['action'].isna()]

In [7]:
cols2display = ['left_stim', 'right_stim', 'action', 'chosen_stim', 'stim_chosen', 'stim_unchosen']

In [8]:
chosen_stim = bbt_resp.left_stim.where(bbt_resp.action == 1, bbt_resp.right_stim).astype(float)
unchosen_stim = bbt_resp.right_stim.where(bbt_resp.action == 1, bbt_resp.left_stim).astype(float)

In [9]:
# Check that the mismatch corresponds to 3 trials
assert (len(chosen_stim) - (chosen_stim == bbt_resp['stim_chosen']).sum()) == 3
assert (len(unchosen_stim) - (unchosen_stim == bbt_resp['stim_unchosen']).sum()) == 3

In [10]:
bbt.loc[bbt_resp.index, 'stim_chosen'] = chosen_stim
bbt.loc[bbt_resp.index, 'stim_unchosen'] = unchosen_stim

# refresh bbt_resp so it reflects the changes in bbt
bbt_resp = bbt.loc[bbt['action'].notna()]

In [11]:
# this should now be empty
bbt_resp.loc[bbt_resp['chosen_stim'] != bbt_resp['stim_chosen'], cols2display]

Unnamed: 0,left_stim,right_stim,action,chosen_stim,stim_chosen,stim_unchosen


In [12]:
assert len(bbt.compare(old_bbt)) == 3
bbt.compare(old_bbt)

Unnamed: 0_level_0,stim_chosen,stim_chosen,stim_unchosen,stim_unchosen
Unnamed: 0_level_1,self,other,self,other
3946,8.0,,7.0,
16020,6.0,,1.0,
19753,8.0,,6.0,


In [13]:
def _get_stim_value(row, stim_col, value_kind):
    stim = row[stim_col]
    if pd.isna(stim):
        return np.nan
    return row.get(f"stim{int(stim)}_value_{value_kind}", np.nan)

bbt['chosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'rl'), axis=1)
bbt['chosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'ck'), axis=1)
bbt['unchosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'rl'), axis=1)
bbt['unchosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'ck'), axis=1)

In [14]:
#bbt.to_csv('/home/ubuntu/data/learning-habits/bbt.csv', index=False)

In [15]:
bbt.set_index(['sub_id', 'block'], inplace=True)

In [16]:
bbt

Unnamed: 0_level_0,Unnamed: 1_level_0,left_stim,right_stim,left_value,right_value,shift,action,rt,chosen_stim,reward,correct,...,reward1C,stim_diff_prop_trainingS,stim_diff_prop_training_chosenS,diff_valS,diff_val_chosenS,score_EHI,chosen_value_rl,chosen_value_ck,unchosen_value_rl,unchosen_value_ck
sub_id,block,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
sub-01,learning1,5,7,3,4,0,1.0,0.934432,5.0,3.0,0.0,...,,,,,,,1.291667,0.000000,1.830729,0.000000
sub-01,learning1,4,6,3,4,1,1.0,0.645392,4.0,3.0,0.0,...,,,,,,,0.517113,0.000000,2.191592,0.000000
sub-01,learning1,3,1,2,1,0,,,,,,...,,,,,,,,,,
sub-01,learning1,6,8,4,5,0,1.0,0.368762,6.0,4.0,0.0,...,,,,,,,2.560893,0.000000,0.714286,0.000000
sub-01,learning1,2,4,2,3,0,1.0,0.537730,2.0,2.0,0.0,...,,,,,,,0.194196,0.000000,1.024152,0.006794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sub-73,test,8,3,5,2,0,1.0,0.311992,8.0,5.0,1.0,...,1.997063,0.308900,-0.087760,1.778391,1.522241,26.0,5.000000,0.303980,2.000000,0.196392
sub-73,test,7,3,4,2,1,1.0,0.368056,7.0,4.0,1.0,...,0.997063,-0.254991,-0.699711,1.185594,0.672531,26.0,4.000000,0.257360,2.000000,0.195369
sub-73,test,2,3,2,2,1,2.0,0.377980,3.0,2.0,,...,-1.002937,-1.023934,0.691087,0.000000,-1.026889,26.0,2.000000,0.194352,2.000000,0.126914
sub-73,test,5,7,3,4,1,2.0,0.464238,7.0,4.0,1.0,...,-0.002937,-1.023934,0.691087,-0.592797,-0.177179,26.0,4.000000,0.261228,3.000000,0.104130


In [20]:
bbt.loc['sub-23','learning2']['chosen_value_rl']-bbt.loc['sub-23', 'learning2']['unchosen_value_rl']

sub_id  block    
sub-23  learning2         NaN
        learning2    0.999998
        learning2    0.999999
        learning2    1.000000
        learning2    1.000000
                       ...   
        learning2    1.000000
        learning2    1.000000
        learning2    1.000000
        learning2    1.000000
        learning2    1.000000
Length: 96, dtype: float64

In [18]:
bbt.loc['sub-71','learning2']['unchosen_value_rl']

sub_id  block    
sub-71  learning2    3.0
        learning2    4.0
        learning2    3.0
        learning2    2.0
        learning2    4.0
                    ... 
        learning2    4.0
        learning2    3.0
        learning2    1.0
        learning2    3.0
        learning2    4.0
Name: unchosen_value_rl, Length: 96, dtype: float64

In [21]:
# compute mean accuracy (proportion correct) per run for each subject
acc_per_run = bbt.groupby(['sub_id', 'block']).agg(
    accuracy=('correct', 'mean'),        # mean of 0/1 (NaNs ignored)
    n_trials=('correct', 'count'),      # number of trials with a non-null 'correct'
    n_correct=('correct', 'sum')        # sum of corrects (NaNs ignored)
).reset_index()

# sort for readability
acc_per_run = acc_per_run.sort_values(['sub_id', 'block']).reset_index(drop=True)

acc_per_run

Unnamed: 0,sub_id,block,accuracy,n_trials,n_correct
0,sub-01,learning1,0.838710,93,78.0
1,sub-01,learning2,0.978723,94,92.0
2,sub-01,test,0.990000,100,99.0
3,sub-02,learning1,0.916667,96,88.0
4,sub-02,learning2,0.968750,96,93.0
...,...,...,...,...,...
181,sub-72,learning2,0.937500,96,90.0
182,sub-72,test,1.000000,100,100.0
183,sub-73,learning1,0.784946,93,73.0
184,sub-73,learning2,0.843750,96,81.0


In [23]:
acc_per_run.sort_values('accuracy', ascending=False).head(20)

Unnamed: 0,sub_id,block,accuracy,n_trials,n_correct
61,sub-23,learning2,1.0,94,94.0
182,sub-72,test,1.0,100,100.0
178,sub-71,learning2,1.0,96,96.0
32,sub-12,test,1.0,100,100.0
59,sub-22,test,1.0,100,100.0
62,sub-23,test,1.0,98,98.0
137,sub-55,test,1.0,100,100.0
52,sub-20,learning2,1.0,95,95.0
113,sub-47,test,1.0,98,98.0
112,sub-47,learning2,1.0,96,96.0
