In [2]:
import os
import sys
import time
import json
import pickle
import warnings
from datetime import datetime

from joblib import Parallel, delayed
import multiprocessing

import numpy as np
import pandas as pd
from nilearn.image import load_img
from nilearn.glm.first_level import FirstLevelModel, make_first_level_design_matrix
from nilearn.plotting import plot_design_matrix

sys.path.append('/home/ubuntu/repos/learning-habits-analysis')
from utils.data import Subject, load_participant_list, create_dummy_regressors
# compute_parametric_modulator intentionally unused here to avoid leakage

# Dynamically set the number of workers based on available CPUs
max_workers = min(30, multiprocessing.cpu_count())

base_dir = '/home/ubuntu/data/learning-habits'
bids_dir = "/home/ubuntu/data/learning-habits/bids_dataset/derivatives/fmriprep-24.0.1"

sub_ids = load_participant_list(base_dir)

In [17]:
model_params = {
    'model_name': 'mvpa_firststim',
    'tr': 2.33384,
    'hrf_model': 'spm',
    'noise_model': 'ar1',
    # For MVPA keep smoothing very low or zero; override if needed
    'smoothing_fwhm': 0.0,
    'motion_type': 'basic',
    'include_physio': True,
    'brain_mask': True,
    'fd_thresh': 0.5,
    'std_dvars_thresh': 2,
    'exclusion_threshold': 0.2,
    'scrub': 'dummies',        # keep as before
    'exclude_stimuli': False,   # keep your 1/8 exclusion if desired
    'duration': 'all',         # keep consistent with your design (often RT-based)
    'iti_included': False,

    # NEW: choose single-trial strategy
    # 'LSA' = one GLM per run with one column per trial (fast)
    # 'LSS' = one GLM per trial: target vs others (slower, often more robust)
    'beta_mode': 'LSA',

    # NEW: which phase to turn into single-trial betas
    'decoding_phase': 'first_stim_presentation',

    # QC toggles
    'save_design_png': True
}

In [1]:
def build_firststim_events(subject, run, model_params):
    """
    Builds an events dataframe restricted to the phase we want to decode,
    without inserting parametric modulators into the design.
    """
    exclude_stimuli = model_params['exclude_stimuli']
    decoding_phase = model_params['decoding_phase']

    # Use your helper to get a standardized events df with columns you rely on
    # (includes first_stim, first_stim_value_rl/ck columns, etc.)
    columns_event = {
        'first_stim': 'first_stim_presentation'
    }
    ev_all = getattr(subject, run).extend_events_df(columns_event)

    # Keep only the decoding phase rows
    ev = ev_all[ev_all['trial_type'] == decoding_phase].copy()

    # Optional: tag exclusions (e.g., stimulus IDs 1 and 8) while keeping them in the table
    # so that your mapping CSV still has their Qs if you want to drop later.
    if exclude_stimuli:
        ev['include_flag'] = ~ev['first_stim'].astype(int).isin([1, 8])
    else:
        ev['include_flag'] = True

    # Duration handling (your script used 'none' vs 'all')
    duration = model_params['duration']
    if duration == 'none':
        ev['duration'] = 0
    elif duration == 'all':
        # do nothing; your events already encode duration (e.g., RT if that's how you built them upstream)
        pass
    else:
        raise ValueError("Invalid duration type. Must be 'none' or 'all'")

    return ev

In [10]:
subject = Subject(base_dir, 'sub-01', include_modeling=True, include_imaging=True, bids_dir=bids_dir)

run = 'learning2'

In [12]:
# Parameters
model_name = model_params['model_name']
tr = model_params['tr']
hrf_model = model_params['hrf_model']
noise_model = model_params['noise_model']
smoothing_fwhm = model_params['smoothing_fwhm']
motion_type = model_params['motion_type']
include_physio = model_params['include_physio']
fd_thresh = model_params['fd_thresh']
std_dvars_thresh = model_params['std_dvars_thresh']
exclusion_threshold = model_params['exclusion_threshold']
scrub = model_params['scrub']
brain_mask_flag = model_params['brain_mask']
iti_included = model_params['iti_included']

beta_mode = model_params.get('beta_mode', 'LSA').upper()
decoding_phase = model_params.get('decoding_phase', 'first_stim_presentation')
save_design_png = model_params.get('save_design_png', True)

# Create output directory (encode mode)
sub_id = subject.sub_id
derivatives_dir = os.path.join(os.path.dirname(subject.bids_dir), 'nilearn')
current_time = datetime.now().strftime("%Y%m%d")
model_dir = os.path.join(derivatives_dir, f"{model_name}_{beta_mode}_{current_time}")
sub_output_dir = os.path.join(model_dir, sub_id, f"run-{run}")
os.makedirs(sub_output_dir, exist_ok=True)

# Load fMRI volume
img_path = subject.img.get(run)
fmri_img = load_img(img_path)
n_volumes = fmri_img.shape[-1]

# Load confounds (keep your fMRIPrep pipeline)
confounds, sample_mask = subject.load_confounds(
    run, motion_type=motion_type,
    fd_thresh=fd_thresh, std_dvars_thresh=std_dvars_thresh,
    scrub=(0 if scrub == 'dummies' else scrub)
)

# Exclude runs with too many scrubbed volumes
if sample_mask is not None and len(sample_mask) < (1 - exclusion_threshold) * n_volumes:
    with open(os.path.join(sub_output_dir, 'exclusion_flag.txt'), 'w') as f:
        f.write(f"Run {run} of {sub_id} excluded due to excessive scrubbing")
    print(f"Run {run} of {sub_id} excluded due to excessive scrubbing")

# Physio regressors
if include_physio:
    physio_regressors = subject.load_physio_regressors(run)
    confounds = confounds.join(physio_regressors)

# Scrub with dummies
if scrub == 'dummies':
    dummies = create_dummy_regressors(sample_mask, len(confounds))
    confounds = pd.concat([confounds, dummies], axis=1)

# Brain mask
brain_mask = load_img(subject.brain_mask.get(run)) if brain_mask_flag else None

# Build events for the phase of interest (no parametric modulators in design)
events = build_firststim_events(subject, run, model_params)

In [13]:

# Optionally drop ITI (your default is to exclude ITI anyway)
if not iti_included:
    # events already filtered to first_stim_presentation; no ITI here
    pass

# Frame times
n = fmri_img.shape[-1]
frametimes = np.linspace(tr / 2., (n - .5) * tr, n)

# Shared model kwargs
base_model_kwargs = dict(
    t_r=tr,
    smoothing_fwhm=smoothing_fwhm,
    mask_img=brain_mask,
    hrf_model=hrf_model,
    noise_model=noise_model,
    drift_model=None,
    minimize_memory=True
)

# We’ll collect a mapping for decoding (per trial)
mapping_rows = []

In [52]:
# ---------------------
# LSA: one GLM per run
# ---------------------
if beta_mode == 'LSA':
    # Give each trial a unique label so design has one column per trial
    trials = events.reset_index(drop=True).copy()
    trials['trial_label'] = [f"{decoding_phase}_t{ix+1:03d}" for ix in range(len(trials))]
    lsa_events = trials.rename(columns={'trial_type': 'old_trial_type'})
    lsa_events = lsa_events.rename(columns={'trial_label': 'trial_type'})

    warnings.filterwarnings("ignore", message=".*events with null duration.*")

    design_matrix = make_first_level_design_matrix(
        frame_times=frametimes,
        events=lsa_events,
        hrf_model=hrf_model,
        drift_model=None,
        add_regs=confounds
    )

    model = FirstLevelModel(**base_model_kwargs).fit(
        fmri_img, design_matrices=design_matrix, sample_masks=sample_mask
    )

    # Save each trial's beta (1-hot contrast on its column)
    dm_cols = list(design_matrix.columns)
    trial_cols = [c for c in dm_cols if c.startswith(f"{decoding_phase}_t")]

    for i, col in enumerate(trial_cols, start=1):
        cvec = np.zeros(len(dm_cols), dtype=float)
        cvec[dm_cols.index(col)] = 1.0               # 1-hot for this trial
        beta_img = model.compute_contrast(cvec, output_type="effect_size")
        beta_path = os.path.join(sub_output_dir, f"{sub_id}_run-{run}_trial-{i:03d}_beta_LSA.nii.gz")
        beta_img.to_filename(beta_path)

        row_ev = trials.iloc[i-1]
        mapping_rows.append({
            'sub_id': sub_id,
            'run': run,
            'trial_index': i,
            'onset': float(row_ev['onset']),
            'duration': float(row_ev['duration']),
            'first_stim': int(row_ev.get('first_stim', np.nan)),
            'q_rl': float(row_ev.get('first_stim_value_rl', np.nan)),
            'q_ck': float(row_ev.get('first_stim_value_ck', np.nan)),
            'include_flag': bool(row_ev.get('include_flag', True)),
            'beta_path': beta_path,
            'mode': 'LSA'
        })

    # Optional QC
    if save_design_png:
        qc_design_path = os.path.join(sub_output_dir, f'{sub_id}_run-{run}_design_matrix.png')
        plot_design_matrix(design_matrix, output_file=qc_design_path)

    # Persist design for audit
    design_matrix_path = os.path.join(sub_output_dir, f'{sub_id}_run-{run}_design_matrix.csv')
    design_matrix.to_csv(design_matrix_path, index=False)

  Y, _ = mean_scaling(Y, self.signal_scaling)


In [36]:
design_matrix = make_first_level_design_matrix(frame_times=frametimes,
                                    events=lsa_events,
                                    hrf_model=hrf_model,
                                    drift_model=None,
                                    add_regs=confounds)

AttributeError: 'DataFrame' object has no attribute 'unique'

In [37]:
lsa_events

Unnamed: 0,onset,duration,trial_type,trial,first_stim,include_flag,trial_type.1
0,0.001868,0.820289,first_stim_presentation,1,7,True,first_stim_presentation_t001
1,9.592707,0.848439,first_stim_presentation,2,1,True,first_stim_presentation_t002
2,19.554709,0.816044,first_stim_presentation,3,5,True,first_stim_presentation_t003
3,29.504384,0.821090,first_stim_presentation,4,6,True,first_stim_presentation_t004
4,39.950071,0.913002,first_stim_presentation,5,5,True,first_stim_presentation_t005
...,...,...,...,...,...,...,...
91,919.436290,0.845959,first_stim_presentation,92,3,True,first_stim_presentation_t092
92,929.459714,0.896282,first_stim_presentation,93,2,True,first_stim_presentation_t093
93,939.690733,0.819459,first_stim_presentation,94,6,True,first_stim_presentation_t094
94,949.171984,0.859578,first_stim_presentation,95,8,True,first_stim_presentation_t095


In [None]:
# ----------------------
# LSS: one GLM per trial
# ----------------------
if beta_mode == 'LSS':
    trials = events.reset_index(drop=True).copy()

    for t in range(len(trials)):
        ev_t = trials.iloc[[t]].copy()
        ev_others = trials.drop(index=t).copy()
        ev_t['trial_type'] = 'target'
        ev_others['trial_type'] = 'others'
        events_t = pd.concat([ev_t, ev_others], ignore_index=True)

        model = FirstLevelModel(**base_model_kwargs).fit(
            fmri_img, events=events_t, confounds=confounds, sample_masks=sample_mask
        )

        dm_cols = model.design_matrices_[0].columns
        con = {c: (1.0 if c == 'target' else 0.0) for c in dm_cols}
        beta_img = model.compute_contrast(con, output_type="effect_size")
        beta_path = os.path.join(sub_output_dir, f"{sub_id}_run-{run}_trial-{t+1:03d}_beta_LSS.nii.gz")
        beta_img.to_filename(beta_path)

        row_ev = trials.iloc[t]
        mapping_rows.append({
            'sub_id': sub_id,
            'run': run,
            'trial_index': t+1,
            'onset': float(row_ev['onset']),
            'duration': float(row_ev['duration']),
            'first_stim': int(row_ev.get('first_stim', np.nan)),
            'q_rl': float(row_ev.get('first_stim_value_rl', np.nan)),
            'q_ck': float(row_ev.get('first_stim_value_ck', np.nan)),
            'include_flag': bool(row_ev.get('include_flag', True)),
            'beta_path': beta_path,
            'mode': 'LSS'
        })
else:
    raise ValueError("beta_mode must be 'LSA' or 'LSS'")

# Save mapping CSV for decoding
mapping_df = pd.DataFrame(mapping_rows)
map_path = os.path.join(sub_output_dir, f'{sub_id}_run-{run}_trial_beta_mapping.csv')
mapping_df.to_csv(map_path, index=False)

# Always save the phase-restricted events we used
events_path = os.path.join(sub_output_dir, f'{sub_id}_run-{run}_events_used.csv')
events.to_csv(events_path, index=False)

# Save analysis parameters
params_path = os.path.join(sub_output_dir, f'{sub_id}_run-{run}_params.json')
with open(params_path, 'w') as f:
    json.dump(model_params, f, indent=4)