In [1]:
import numpy as np
import pandas as pd
import sys
sys.path.append('../')
from utils.data import Subject, load_participant_list
import copy

In [2]:
base_dir = '/Users/hugofluhr/phd_local/data/LearningHabits/dev_sample'
sub_ids = load_participant_list(base_dir)

In [3]:
subjects24 = [Subject(base_dir, sub_id) for sub_id in sub_ids]



In [4]:
subjects25 = copy.deepcopy(subjects24)

In [5]:
# load modeling data
modeling24_dir = 'modeling_data/2024-09-27'
modeling25_dir = 'modeling_data/2025-12-18'
for subject in subjects24:
    subject.add_modeling_data(modeling24_dir)
for subject in subjects25:
    subject.add_modeling_data(modeling25_dir)

In [13]:
# Exhaustive flagging and reporting for RL and CK modeling data changes
rl_cols = [f'stim{i}_value_rl' for i in range(1, 9)]
ck_cols = [f'stim{i}_value_ck' for i in range(1, 9)]
threshold_rl = 0.1  # RL change threshold
threshold_ck = 0.1  # CK change threshold
flagged_rl = []
flagged_ck = []
flagged_both = []
subject_diffs = []
for idx, (subj24, subj25) in enumerate(zip(subjects24, subjects25)):
    df24 = subj24.extended_trials[rl_cols + ck_cols].reset_index(drop=True)
    df25 = subj25.extended_trials[rl_cols + ck_cols].reset_index(drop=True)
    min_len = min(len(df24), len(df25))
    df24 = df24.iloc[:min_len]
    df25 = df25.iloc[:min_len]
    rl_diff = (df25[rl_cols] - df24[rl_cols]).abs()
    ck_diff = (df25[ck_cols] - df24[ck_cols]).abs()
    # Compute max, mean, and median difference for RL and CK values for this subject
    max_rl = rl_diff.max().max()
    mean_rl = rl_diff.mean().mean()
    median_rl = rl_diff.median().median()
    max_ck = ck_diff.max().max()
    mean_ck = ck_diff.mean().mean()
    median_ck = ck_diff.median().median()
    subject_diffs.append({
        'subject': subj24.sub_id,
        'max_rl_diff': max_rl,
        'mean_rl_diff': mean_rl,
        'median_rl_diff': median_rl,
        'max_ck_diff': max_ck,
        'mean_ck_diff': mean_ck,
        'median_ck_diff': median_ck
    })
    if max_rl > threshold_rl:
        flagged_rl.append(subj24.sub_id)
    if max_ck > threshold_ck:
        flagged_ck.append(subj24.sub_id)
    if (max_rl > threshold_rl) and (max_ck > threshold_ck):
        flagged_both.append(subj24.sub_id)
print('Flagged subjects for RL (max diff > threshold_rl):')
for sub_id in flagged_rl:
    print(sub_id)
print('\nFlagged subjects for CK (max diff > threshold_ck):')
for sub_id in flagged_ck:
    print(sub_id)
print('\nFlagged subjects for BOTH RL and CK:')
for sub_id in flagged_both:
    print(sub_id)
print('\nAll subject diffs summary:')
import pandas as pd
df_report = pd.DataFrame(subject_diffs)
print(df_report)
print('\nSubjects with any RL diff above threshold:')
print(df_report[df_report['max_rl_diff'] > threshold_rl]['subject'].tolist())
print('\nSubjects with any CK diff above threshold:')
print(df_report[df_report['max_ck_diff'] > threshold_ck]['subject'].tolist())

Flagged subjects for RL (max diff > threshold_rl):
sub-03
sub-05
sub-12
sub-18
sub-26
sub-27
sub-41
sub-49
sub-50
sub-53
sub-59
sub-73

Flagged subjects for CK (max diff > threshold_ck):
sub-01
sub-02
sub-03
sub-05
sub-06
sub-07
sub-08
sub-09
sub-10
sub-12
sub-13
sub-14
sub-15
sub-16
sub-18
sub-19
sub-20
sub-21
sub-22
sub-23
sub-24
sub-26
sub-27
sub-28
sub-29
sub-30
sub-33
sub-34
sub-35
sub-36
sub-37
sub-38
sub-40
sub-41
sub-43
sub-47
sub-48
sub-49
sub-51
sub-52
sub-53
sub-54
sub-55
sub-56
sub-57
sub-58
sub-59
sub-62
sub-63
sub-64
sub-65
sub-67
sub-69
sub-70
sub-71
sub-72
sub-73

Flagged subjects for BOTH RL and CK:
sub-03
sub-05
sub-12
sub-18
sub-26
sub-27
sub-41
sub-49
sub-53
sub-59
sub-73

All subject diffs summary:
   subject  max_rl_diff  mean_rl_diff  median_rl_diff  max_ck_diff  \
0   sub-01     0.003307  2.178724e-04    3.609121e-06     0.216414   
1   sub-02     0.000000  0.000000e+00    0.000000e+00     0.196973   
2   sub-03     0.142864  1.176944e-03    0.000000e+00     0.2

In [17]:
subjects24[3].extended_trials.alpha_ck20

0      0.0
1      0.0
2      0.0
3      0.0
4      0.0
      ... 
323    0.0
324    0.0
325    0.0
326    0.0
327    0.0
Name: alpha_ck20, Length: 328, dtype: float64