In [1]:
import pandas as pd
import numpy as np
import os
import sys
from nilearn.glm import expression_to_contrast_vector
sys.path.append('..')
from utils.analysis import est_c_vifs, est_vifs

In [2]:
first_lvl_dir = '/Users/hugofluhr/phd_local/data/LearningHabits/spm_outputs/glm1_2025-06-05-05-15'

In [3]:
def load_dms(first_lvl_dir):
    # Get list of subject directories
    subjects = [d for d in os.listdir(first_lvl_dir) if d.startswith('sub-')]
    subjects.sort()

    DMs = {}
    # walk through each subject and run to load design matrices
    for sub in subjects:
        DMs[sub] = {}
        for run in [f'run-{i}' for i in range(1, 4)]:
            design_matrix_path = os.path.join(first_lvl_dir, sub, run, f'{sub}_{run}_design_matrix.csv')
            column_names_path = os.path.join(first_lvl_dir, sub, run, f'{sub}_{run}_column_names.txt')
            if os.path.exists(design_matrix_path) and os.path.exists(column_names_path):
                with open(column_names_path, 'r') as f:
                    col_names = [line.strip() for line in f]
                DMs[sub][run] = pd.read_csv(design_matrix_path, names=col_names, header=None)
    return DMs

In [4]:
DMs = load_dms(first_lvl_dir)

In [5]:
def cVIFs_summary(DMs, n_regs = 7):
    sample_dm = next(iter(next(iter(DMs.values())).values()))
    contrasts = {col:col for col in sample_dm.columns[:n_regs]}
    cVIFs = []
    for _, runs in DMs.items():
        for _, dm in runs.items():
            cVIFs.append(est_c_vifs(dm, contrasts))
    runs = [f'run-{i}' for i in range(1, 4)]
    subjects = list(DMs.keys())
    index = [(sub, run) for sub in subjects for run in runs]
    df_vifs = pd.DataFrame(cVIFs, index=pd.MultiIndex.from_tuples(index, names=['subject', 'run']))
    return df_vifs.describe()

In [6]:
def VIFs_summary(DMs, n_regs = 7):
    sample_dm = next(iter(next(iter(DMs.values())).values()))
    regressors = sample_dm.columns[:n_regs]
    VIFs = []
    for _, runs in DMs.items():
        for _, dm in runs.items():
            VIFs.append(est_vifs(dm, regressors))
    runs = [f'run-{i}' for i in range(1, 4)]
    subjects = list(DMs.keys())
    index = [(sub, run) for sub in subjects for run in runs]
    df_vifs = pd.DataFrame(VIFs, index=pd.MultiIndex.from_tuples(index, names=['subject', 'run']))
    return df_vifs.describe()

In [7]:
cVIFs_summary(DMs)

Unnamed: 0,Sn(1) first_stim*bf(1),Sn(1) first_stimxQval^1*bf(1),Sn(1) first_stimxHval^1*bf(1),Sn(1) second_stim*bf(1),Sn(1) response*bf(1),Sn(1) feedback*bf(1),Sn(1) first_stim_excl*bf(1)
count,180.0,180.0,180.0,180.0,180.0,180.0,180.0
mean,16.597477,1.178005,1.328539,11.886019,49.627241,13.280107,10.33745
std,3.358924,0.102918,0.197152,2.76949,5.812404,1.276761,2.499572
min,10.053973,1.063698,1.051599,5.913146,36.963293,10.624233,4.547384
25%,14.173684,1.111447,1.17849,10.030425,45.867095,12.318561,8.632922
50%,16.020681,1.156574,1.274923,11.834031,49.252511,13.144555,9.885517
75%,18.552227,1.210517,1.434213,13.461572,52.932901,14.160415,11.937641
max,26.930413,1.777338,2.176355,25.540812,70.388134,17.981682,19.09196


In [11]:
VIFs_summary(DMs)

Unnamed: 0,Sn(1) first_stim*bf(1),Sn(1) first_stimxQval^1*bf(1),Sn(1) first_stimxHval^1*bf(1),Sn(1) second_stim*bf(1),Sn(1) response*bf(1),Sn(1) feedback*bf(1),Sn(1) first_stim_excl*bf(1)
count,180.0,180.0,180.0,180.0,180.0,180.0,180.0
mean,16.597477,1.178005,1.328539,11.886019,49.627241,13.280107,10.33745
std,3.358924,0.102918,0.197152,2.76949,5.812404,1.276761,2.499572
min,10.053973,1.063698,1.051599,5.913146,36.963293,10.624233,4.547384
25%,14.173684,1.111447,1.17849,10.030425,45.867095,12.318561,8.632922
50%,16.020681,1.156574,1.274923,11.834031,49.252511,13.144555,9.885517
75%,18.552227,1.210517,1.434213,13.461572,52.932901,14.160415,11.937641
max,26.930413,1.777338,2.176355,25.540812,70.388134,17.981682,19.09196
