In [1]:
import numpy as np
import pandas as pd
import sys
sys.path.append('/home/ubuntu/repos/learning-habits-analysis')
from utils.data import Subject, load_participant_list

In [2]:
base_dir = '/home/ubuntu/data/learning-habits'
sub_ids = load_participant_list(base_dir)

In [3]:
subjects = [Subject(base_dir, sub_id, include_modeling=True, include_imaging=False) for sub_id in sub_ids]



In [4]:
bbt = pd.concat(
    [pd.concat([pd.DataFrame({'sub_id': [sub.sub_id] * len(sub.extended_trials)}), sub.extended_trials.reset_index(drop=True)], axis=1) for sub in subjects],
    ignore_index=True
)

# Handle trials with missing chosen/unchosen stim
a few trials, due to RT < 50ms were excluded from the modeling data,  
this resulted in missing 'chosen_stim' and 'unchosen_stim' data.

In [5]:
# to check before fixing
old_bbt = bbt.copy()

In [6]:
bbt_resp = bbt[~bbt['action'].isna()]

In [7]:
cols2display = ['left_stim', 'right_stim', 'action', 'chosen_stim', 'stim_chosen', 'stim_unchosen']

In [8]:
chosen_stim = bbt_resp.left_stim.where(bbt_resp.action == 1, bbt_resp.right_stim).astype(float)
unchosen_stim = bbt_resp.right_stim.where(bbt_resp.action == 1, bbt_resp.left_stim).astype(float)

In [9]:
# Check that the mismatch corresponds to 3 trials
assert (len(chosen_stim) - (chosen_stim == bbt_resp['stim_chosen']).sum()) == 3
assert (len(unchosen_stim) - (unchosen_stim == bbt_resp['stim_unchosen']).sum()) == 3

In [10]:
bbt.loc[bbt_resp.index, 'stim_chosen'] = chosen_stim
bbt.loc[bbt_resp.index, 'stim_unchosen'] = unchosen_stim

# refresh bbt_resp so it reflects the changes in bbt
bbt_resp = bbt.loc[bbt['action'].notna()]

In [11]:
# this should now be empty
bbt_resp.loc[bbt_resp['chosen_stim'] != bbt_resp['stim_chosen'], cols2display]

Unnamed: 0,left_stim,right_stim,action,chosen_stim,stim_chosen,stim_unchosen


In [12]:
assert len(bbt.compare(old_bbt)) == 3
bbt.compare(old_bbt)

Unnamed: 0_level_0,stim_chosen,stim_chosen,stim_unchosen,stim_unchosen
Unnamed: 0_level_1,self,other,self,other
3946,8.0,,7.0,
16020,6.0,,1.0,
19753,8.0,,6.0,


In [13]:
def _get_stim_value(row, stim_col, value_kind):
    stim = row[stim_col]
    if pd.isna(stim):
        return np.nan
    return row.get(f"stim{int(stim)}_value_{value_kind}", np.nan)

bbt['chosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'rl'), axis=1)
bbt['chosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_chosen', 'ck'), axis=1)
bbt['unchosen_value_rl'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'rl'), axis=1)
bbt['unchosen_value_ck'] = bbt.apply(lambda r: _get_stim_value(r, 'stim_unchosen', 'ck'), axis=1)

In [14]:
#bbt.to_csv('/home/ubuntu/data/learning-habits/bbt.csv', index=False)