In [42]:
import numpy as np
import pandas as pd
import sys
sys.path.append('/home/ubuntu/repos/learning-habits-analysis')
from utils.data import Subject, load_participant_list

In [43]:
base_dir = '/home/ubuntu/data/learning-habits'
sub_ids = load_participant_list(base_dir)

In [44]:
subjects = [Subject(base_dir, sub_id, include_modeling=True, include_imaging=False) for sub_id in sub_ids]



In [45]:
bbt = pd.concat(
    [pd.concat([pd.DataFrame({'sub_id': [sub.sub_id] * len(sub.extended_trials)}), sub.extended_trials.reset_index(drop=True)], axis=1) for sub in subjects],
    ignore_index=True
)

# Handle trials with missing chosen/unchosen stim
a few trials, due to RT < 50ms were excluded from the modeling data,  
this resulted in missing 'chosen_stim' and 'unchosen_stim' data.

In [46]:
# to check before fixing
old_bbt = bbt.copy()

In [47]:
bbt_resp = bbt[~bbt['action'].isna()]

In [48]:
cols2display = ['left_stim', 'right_stim', 'action', 'chosen_stim', 'stim_chosen', 'stim_unchosen']

In [49]:
chosen_stim = bbt_resp.left_stim.where(bbt_resp.action == 1, bbt_resp.right_stim).astype(float)
unchosen_stim = bbt_resp.right_stim.where(bbt_resp.action == 1, bbt_resp.left_stim).astype(float)

In [50]:
# Check that the mismatch corresponds to 3 trials
assert (len(chosen_stim) - (chosen_stim == bbt_resp['stim_chosen']).sum()) == 3
assert (len(unchosen_stim) - (unchosen_stim == bbt_resp['stim_unchosen']).sum()) == 3

In [51]:
bbt.loc[bbt_resp.index, 'stim_chosen'] = chosen_stim
bbt.loc[bbt_resp.index, 'stim_unchosen'] = unchosen_stim

# refresh bbt_resp so it reflects the changes in bbt
bbt_resp = bbt.loc[bbt['action'].notna()]

In [52]:
# this should now be empty
bbt_resp.loc[bbt_resp['chosen_stim'] != bbt_resp['stim_chosen'], cols2display]

Unnamed: 0,left_stim,right_stim,action,chosen_stim,stim_chosen,stim_unchosen


In [53]:
assert len(bbt.compare(old_bbt)) == 3
bbt.compare(old_bbt)

Unnamed: 0_level_0,stim_chosen,stim_chosen,stim_unchosen,stim_unchosen
Unnamed: 0_level_1,self,other,self,other
3946,8.0,,7.0,
16020,6.0,,1.0,
19753,8.0,,6.0,


# Create Chosen/Unchosen columns

In [55]:
def _get_stim_value(row, stim_col, value_kind):
    stim = row[stim_col]
    if pd.isna(stim):
        return np.nan
    return row.get(f"stim{int(stim)}_value_{value_kind}", np.nan)

bbt['chosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'rl'), axis=1)
bbt['chosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'ck'), axis=1)
bbt['unchosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'rl'), axis=1)
bbt['unchosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'ck'), axis=1)

# Handle normalization of pmod columns

In [56]:
columns_to_normalize = ['reward',
                        'first_stim_value_rl', 'second_stim_value_rl',
                        'first_stim_value_ck', 'second_stim_value_ck', 
                        'first_stim_choice_val', 'second_stim_choice_val',
                        'chosen_value_rl', 'chosen_value_ck',
                        'unchosen_value_rl', 'unchosen_value_ck']

In [57]:
for col in columns_to_normalize:
    bbt[col+'_zscore'] = (
        bbt.groupby('sub_id')[col]
           .transform(lambda x: (x - x.mean()) / x.std())
    )

In [58]:
# Check that the z-scoring worked correctly
for sub_id, group in bbt.groupby('sub_id'):
    if sub_id in ['sub-04', 'sub-45']:
        continue  # skip subject because alpha_ck is 0
    for col in columns_to_normalize:
        col_z = col + '_zscore'
        mean = np.nanmean(group[col_z])
        std = group[col_z].std()
        assert np.isclose(mean, 0, atol=1e-6), f"Mean for {col_z} in subject {sub_id} is not zero: {mean}"
        assert np.isclose(std, 1, atol=1e-6), f"Std for {col_z} in subject {sub_id} is not one: {std}"

In [59]:
bbt.groupby(['sub_id','block'])['first_stim_value_ck_zscore'].mean()

sub_id  block    
sub-01  learning1   -0.888752
        learning2   -0.147115
        test         0.731201
sub-02  learning1   -0.880984
        learning2   -0.155151
                       ...   
sub-72  learning2   -0.185266
        test         0.801487
sub-73  learning1   -0.826204
        learning2   -0.240919
        test         0.753264
Name: first_stim_value_ck_zscore, Length: 186, dtype: float64

# Save to csv

In [60]:
bbt.to_csv('/home/ubuntu/data/learning-habits/bbt.csv', index=False)

In [34]:
bbt.set_index(['sub_id', 'block'], inplace=True)

In [35]:
bbt

Unnamed: 0_level_0,Unnamed: 1_level_0,left_stim,right_stim,left_value,right_value,shift,action,rt,chosen_stim,reward,correct,...,first_stim_value_rl_zscore,second_stim_value_rl_zscore,first_stim_value_ck_zscore,second_stim_value_ck_zscore,first_stim_choice_val_zscore,second_stim_choice_val_zscore,chosen_value_rl_zscore,chosen_value_ck_zscore,unchosen_value_rl_zscore,unchosen_value_ck_zscore
sub_id,block,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
sub-01,learning1,5,7,3,4,0,1.0,0.934432,5.0,3.0,0.0,...,-0.875325,-1.360409,-1.274261,-1.333968,-1.016531,-1.461812,-1.853054,-1.457170,-0.550617,-1.310773
sub-01,learning1,4,6,3,4,1,1.0,0.645392,4.0,3.0,0.0,...,-1.965438,-0.597760,-1.274261,-1.333968,-1.966110,-0.790334,-2.534799,-1.457170,-0.199857,-1.310773
sub-01,learning1,3,1,2,1,0,,,,,,...,-1.062259,-1.174870,-1.274261,-1.333968,-1.179366,-1.298453,,,,
sub-01,learning1,6,8,4,5,0,1.0,0.368762,6.0,4.0,0.0,...,-1.801813,-0.284793,-1.274261,-1.333968,-1.823579,-0.514781,-0.735907,-1.457170,-1.635802,-1.310773
sub-01,learning1,2,4,2,3,0,1.0,0.537730,2.0,2.0,0.0,...,-1.544669,-2.290468,-1.197214,-1.333968,-1.584224,-2.280687,-2.819024,-1.457170,-1.334611,-1.209257
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sub-73,test,8,3,5,2,0,1.0,0.311992,8.0,5.0,1.0,...,-0.846042,1.612940,1.422875,2.942291,-0.130809,2.317590,1.399111,2.449099,-0.495918,2.091624
sub-73,test,7,3,4,2,1,1.0,0.368056,7.0,4.0,1.0,...,0.903094,-0.743114,2.235844,1.440927,1.581889,-0.117920,0.532633,1.847502,-0.495918,2.074938
sub-73,test,2,3,2,2,1,2.0,0.377980,3.0,2.0,,...,-0.846042,-0.743114,0.496439,1.426864,-0.486675,-0.122697,-1.200324,1.034409,-0.495918,0.958041
sub-73,test,5,7,3,4,1,2.0,0.464238,7.0,4.0,1.0,...,0.028526,0.827589,0.192641,2.351314,0.096837,1.475007,0.532633,1.897407,0.405234,0.586315


In [None]:
bbt.loc['sub-23','learning2']['chosen_value_rl']-bbt.loc['sub-23', 'learning2']['unchosen_value_rl']

In [None]:
bbt.loc['sub-71','learning2']['unchosen_value_rl']

In [None]:
# compute mean accuracy (proportion correct) per run for each subject
acc_per_run = bbt.groupby(['sub_id', 'block']).agg(
    accuracy=('correct', 'mean'),        # mean of 0/1 (NaNs ignored)
    n_trials=('correct', 'count'),      # number of trials with a non-null 'correct'
    n_correct=('correct', 'sum')        # sum of corrects (NaNs ignored)
).reset_index()

# sort for readability
acc_per_run = acc_per_run.sort_values(['sub_id', 'block']).reset_index(drop=True)

acc_per_run

In [None]:
acc_per_run.sort_values('accuracy', ascending=False).head(20)