In [1]:
import numpy as np
import pandas as pd
import sys
sys.path.append('/home/ubuntu/repos/learning-habits-analysis')
from utils.data import Subject, load_participant_list

In [2]:
base_dir = '/home/ubuntu/data/learning-habits'
sub_ids = load_participant_list(base_dir)

In [3]:
#modeling_dir = 'modeling_data/2024-09-27'
modeling_dir = 'modeling_data/2025-12-18'

In [4]:
subjects = [Subject(base_dir, sub_id, include_modeling=True, include_imaging=False, modeling_dir=modeling_dir) for sub_id in sub_ids]



In [5]:
bbt = pd.concat(
    [pd.concat([pd.DataFrame({'sub_id': [sub.sub_id] * len(sub.extended_trials)}), sub.extended_trials.reset_index(drop=True)], axis=1) for sub in subjects],
    ignore_index=True
)

In [19]:
bbt.groupby('sub_id')['alpha_ck5'].mean().pipe(lambda s: s[s == 0].index.tolist())

['sub-02',
 'sub-03',
 'sub-04',
 'sub-07',
 'sub-14',
 'sub-18',
 'sub-21',
 'sub-22',
 'sub-27',
 'sub-36',
 'sub-40',
 'sub-45',
 'sub-50',
 'sub-54',
 'sub-56',
 'sub-60',
 'sub-61',
 'sub-64',
 'sub-67',
 'sub-72']

# Handle trials with missing chosen/unchosen stim
a few trials, due to RT < 50ms were excluded from the modeling data,  
this resulted in missing 'chosen_stim' and 'unchosen_stim' data.

In [6]:
# to check before fixing
old_bbt = bbt.copy()

In [7]:
bbt_resp = bbt[~bbt['action'].isna()]

In [8]:
cols2display = ['left_stim', 'right_stim', 'action', 'chosen_stim', 'stim_chosen', 'stim_unchosen']

In [9]:
chosen_stim = bbt_resp.left_stim.where(bbt_resp.action == 1, bbt_resp.right_stim).astype(float)
unchosen_stim = bbt_resp.right_stim.where(bbt_resp.action == 1, bbt_resp.left_stim).astype(float)

In [10]:
# Check that the mismatch corresponds to 3 trials
assert (len(chosen_stim) - (chosen_stim == bbt_resp['stim_chosen']).sum()) == 3
assert (len(unchosen_stim) - (unchosen_stim == bbt_resp['stim_unchosen']).sum()) == 3

In [11]:
bbt.loc[bbt_resp.index, 'stim_chosen'] = chosen_stim
bbt.loc[bbt_resp.index, 'stim_unchosen'] = unchosen_stim

# refresh bbt_resp so it reflects the changes in bbt
bbt_resp = bbt.loc[bbt['action'].notna()]

In [12]:
# this should now be empty
bbt_resp.loc[bbt_resp['chosen_stim'] != bbt_resp['stim_chosen'], cols2display]

Unnamed: 0,left_stim,right_stim,action,chosen_stim,stim_chosen,stim_unchosen


In [13]:
assert len(bbt.compare(old_bbt)) == 3
bbt.compare(old_bbt)

Unnamed: 0_level_0,stim_chosen,stim_chosen,stim_unchosen,stim_unchosen
Unnamed: 0_level_1,self,other,self,other
3946,8.0,,7.0,
16020,6.0,,1.0,
19753,8.0,,6.0,


# Create Chosen/Unchosen columns

In [14]:
def _get_stim_value(row, stim_col, value_kind):
    stim = row[stim_col]
    if pd.isna(stim):
        return np.nan
    return row.get(f"stim{int(stim)}_value_{value_kind}", np.nan)

bbt['chosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'rl'), axis=1)
bbt['chosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'ck'), axis=1)
bbt['unchosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'rl'), axis=1)
bbt['unchosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'ck'), axis=1)

# Handle normalization of pmod columns

In [15]:
columns_to_normalize = ['reward',
                        'first_stim_value_rl', 'second_stim_value_rl',
                        'first_stim_value_ck', 'second_stim_value_ck', 
                        'first_stim_choice_val', 'second_stim_choice_val',
                        'chosen_value_rl', 'chosen_value_ck',
                        'unchosen_value_rl', 'unchosen_value_ck']

In [16]:
for col in columns_to_normalize:
    bbt[col+'_zscore'] = (
        bbt.groupby('sub_id')[col]
           .transform(lambda x: (x - x.mean()) / x.std())
    )

In [22]:
# Check that the z-scoring worked correctly
for sub_id, group in bbt.groupby('sub_id'):
    if sub_id in ['sub-02', 'sub-04', 'sub-45']:
        continue  # skip subject because alpha_ck is 0
    for col in columns_to_normalize:
        col_z = col + '_zscore'
        mean = np.nanmean(group[col_z])
        std = group[col_z].std()
        assert np.isclose(mean, 0, atol=1e-6), f"Mean for {col_z} in subject {sub_id} is not zero: {mean}"
        assert np.isclose(std, 1, atol=1e-6), f"Std for {col_z} in subject {sub_id} is not one: {std}"

  mean = np.nanmean(group[col_z])


AssertionError: Mean for first_stim_value_ck_zscore in subject sub-03 is not zero: nan

In [None]:
bbt.groupby(['sub_id','block'])['first_stim_value_ck_zscore'].mean()

# Save to csv

In [None]:
bbt.to_csv('/home/ubuntu/data/learning-habits/bbt.csv', index=False)

In [None]:
bbt.set_index(['sub_id', 'block'], inplace=True)

In [None]:
bbt

In [None]:
bbt.loc['sub-23','learning2']['chosen_value_rl']-bbt.loc['sub-23', 'learning2']['unchosen_value_rl']

In [None]:
bbt.loc['sub-71','learning2']['unchosen_value_rl']

In [None]:
# compute mean accuracy (proportion correct) per run for each subject
acc_per_run = bbt.groupby(['sub_id', 'block']).agg(
    accuracy=('correct', 'mean'),        # mean of 0/1 (NaNs ignored)
    n_trials=('correct', 'count'),      # number of trials with a non-null 'correct'
    n_correct=('correct', 'sum')        # sum of corrects (NaNs ignored)
).reset_index()

# sort for readability
acc_per_run = acc_per_run.sort_values(['sub_id', 'block']).reset_index(drop=True)

acc_per_run

In [None]:
acc_per_run.sort_values('accuracy', ascending=False).head(20)