In [1]:
# ==========================================
# Setup (installs + Drive mount)
# ==========================================
!pip -q install torch torchvision open_clip_torch tqdm

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m127.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m98.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m60.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# ==========================================
# Imports
# ==========================================
import os, json, math, re, random
from pathlib import Path
from typing import Tuple, List
import numpy as np
import pandas as pd
from PIL import Image
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as tvm
import open_clip
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from scipy.spatial.distance import pdist, squareform
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
from tqdm import tqdm


# ==========================================
# DEBUG / BATCH CONFIG
# ==========================================
DEBUG = False
DEBUG_SUBJECT   = "subj01"
DEBUG_ROI_KEY   = "FFA"
DEBUG_N_IMAGES  = None    # e.g., 200 for quick tests

SUBJECTS = [f"subj{idx:02d}" for idx in range(1, 8+1)]
ROI_KEYS = ["EBA", "FFA", "PPA"]  # adjust as needed

if DEBUG:
    SUBJECTS = [DEBUG_SUBJECT]
    ROI_KEYS = [DEBUG_ROI_KEY]

# ==========================================
# SPEED/SAFETY TOGGLES
# ==========================================
RESUME = False           # skip (subj, ROI) already finished
FORCE  = True          # recompute even if outputs exist
SAVE_PLOTS = False      # disable to save time/I-O during dev
SAVE_RDM_MATS = False   # saving full RDMs can be big; disable in dev
FAST_MODE = True        # fewer folds/alphas/perms + RSA subsample
DO_PERM = True          # disable during dev to only compute rho

# ==========================================
# ENCODING / FEATURES CONFIG
# ==========================================
SEED = 42
OUTER_FOLDS = 5
INNER_FOLDS = 3
ALPHAS = np.logspace(-2, 4, 12)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
FEATURE_MODEL = "clip"   # "clip" | "resnet" | "auto"
SAFE_BATCH = 32
NUM_WORKERS = 2          # Colab Pro+ sweet spot is usually 2–4; start with 2
PIN_MEMORY = (DEVICE == "cuda")
PERSISTENT_WORKERS = (NUM_WORKERS > 0)
PREFETCH_FACTOR = 2 if NUM_WORKERS > 0 else None

USE_AMP = True            # half precision on GPU for speed

# ==========================================
# PATHS
# ==========================================
MULTIROI_BASE = Path("/content/drive/MyDrive/algonauts_outputs/multiROI")

# 🔁 NEW: write outputs to a different folder so old results aren’t touched
RSA_BASE = Path(f"/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_{FEATURE_MODEL}")
(RSA_BASE / "features").mkdir(parents=True, exist_ok=True)

# Algonauts data root (images)
FOLDER_ID = "1DUf3nGNNFk6YjRjQtZPfAY5N105GoGJb"
BASE = Path(f"/content/drive/.shortcut-targets-by-id/{FOLDER_ID}")
DATA_ROOT = BASE / "algonauts_2023_tutorial_data"




# ==========================================
# RSA CONFIG
# ==========================================
N_PERM = 1000 if not DEBUG else 200
N_RDM_IMAGES = None      # set in FAST_MODE below

if FAST_MODE:
    OUTER_FOLDS = 3
    INNER_FOLDS = 2
    ALPHAS = np.logspace(-2, 3, 8)
    N_PERM = 200 if DO_PERM else 0
    N_RDM_IMAGES = 600

# ==========================================
# Reproducibility
# ==========================================
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

print(f"DEBUG={DEBUG} | device={DEVICE} | feature={FEATURE_MODEL} | folds={OUTER_FOLDS}/{INNER_FOLDS} | perms={N_PERM}")


DEBUG=False | device=cuda | feature=clip | folds=3/2 | perms=200


In [3]:
# ==========================================
# Utilities
# ==========================================
class ImageListDataset(Dataset):
    def __init__(self, paths, transform):
        self.paths = paths
        self.transform = transform
    def __len__(self): return len(self.paths)
    def __getitem__(self, i):
        im = Image.open(self.paths[i]).convert("RGB")
        return self.transform(im)

def load_image_paths(subject: str, n_imgs: int, meta_dir: Path) -> Tuple[List[Path], int]:
    """Load image paths in the CSV order (no shuffle) to keep alignment with Y."""
    meta_path = meta_dir / f"{subject}_train_images_meta.csv"
    if not meta_path.exists():
        raise FileNotFoundError(f"Missing meta CSV: {meta_path}")
    df = pd.read_csv(meta_path)
    n = min(len(df), n_imgs)
    df = df.iloc[:n]
    img_dir = DATA_ROOT / subject / "training_split" / "training_images"
    paths = [img_dir / fn for fn in df["filename"].tolist()]
    return paths, n

def choose_random_indices(n: int, k: int, seed: int = SEED):
    """Random, reproducible, sorted indices (so blocks are easy to slice)."""
    rng = np.random.default_rng(seed)
    k = min(k, n)
    return np.sort(rng.choice(n, size=k, replace=False))

# ---------- feature extractors ----------
def extract_clip_features(paths, batch_size=SAFE_BATCH, tag=""):
    model, _, preprocess = open_clip.create_model_and_transforms(
        'ViT-B-32', pretrained='laion2b_s34b_b79k', device=DEVICE
    )
    model.eval()

    ds = ImageListDataset(paths, preprocess)
    dl_kwargs = dict(
        batch_size=batch_size, shuffle=False,
        num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY,
        persistent_workers=PERSISTENT_WORKERS
    )
    if NUM_WORKERS > 0:
        dl_kwargs["prefetch_factor"] = PREFETCH_FACTOR
    dl = DataLoader(ds, **dl_kwargs)

    feats = []
    amp_ctx = torch.amp.autocast("cuda", enabled=(DEVICE=="cuda" and USE_AMP))
    with torch.no_grad(), amp_ctx:
        for xb in tqdm(dl, desc=f"CLIP feats [{tag}]"):
            xb = xb.to(DEVICE, non_blocking=False)
            f = model.encode_image(xb)
            f = (f / f.norm(dim=-1, keepdim=True)).float().cpu().numpy()
            feats.append(f)
            if DEVICE == "cuda":
                torch.cuda.empty_cache()
    return np.vstack(feats), "CLIP_ViT-B-32"

def extract_resnet_features(paths, batch_size=SAFE_BATCH, tag=""):
    model = tvm.resnet50(weights=tvm.ResNet50_Weights.IMAGENET1K_V1).to(DEVICE).eval()
    tfm = tvm.ResNet50_Weights.IMAGENET1K_V1.transforms()
    feat_extractor = nn.Sequential(*list(model.children())[:-1]).to(DEVICE).eval()

    ds = ImageListDataset(paths, tfm)
    dl_kwargs = dict(
        batch_size=batch_size, shuffle=False,
        num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY,
        persistent_workers=PERSISTENT_WORKERS
    )
    if NUM_WORKERS > 0:
        dl_kwargs["prefetch_factor"] = PREFETCH_FACTOR
    dl = DataLoader(ds, **dl_kwargs)

    feats = []
    amp_ctx = torch.amp.autocast("cuda", enabled=(DEVICE=="cuda" and USE_AMP))
    with torch.no_grad(), amp_ctx:
        for xb in tqdm(dl, desc=f"ResNet feats [{tag}]"):
            xb = xb.to(DEVICE, non_blocking=False)
            f = feat_extractor(xb)     # [B,2048,1,1]
            f = torch.flatten(f, 1)    # [B,2048]
            f = (f / f.norm(dim=-1, keepdim=True)).float().cpu().numpy()
            feats.append(f)
            if DEVICE == "cuda":
                torch.cuda.empty_cache()
    return np.vstack(feats), "ResNet50"

def get_or_make_features(subject: str, n_imgs: int, meta_dir: Path) -> Tuple[np.ndarray, str]:
    """Return (features, tag). Cache under NEW RSA_BASE/features. Honors FEATURE_MODEL."""
    cache_dir = RSA_BASE / "features"
    cache_dir.mkdir(parents=True, exist_ok=True)

    def try_load(tag):
        p = cache_dir / f"{subject}_{tag}.npy"
        if p.exists():
            F = np.load(p)
            if F.shape[0] >= n_imgs:
                return F[:n_imgs], tag
        return None, None

    # 1) Try cache
    for tag in ["CLIP_ViT-B-32", "ResNet50"]:
        F, used = try_load(tag)
        if F is not None:
            return F, used

    # 2) Compute
    paths, n = load_image_paths(subject, n_imgs, meta_dir)
    if FEATURE_MODEL == "clip":
        F, tag = extract_clip_features(paths, batch_size=SAFE_BATCH, tag=subject)
    elif FEATURE_MODEL == "resnet":
        F, tag = extract_resnet_features(paths, batch_size=SAFE_BATCH, tag=subject)
    else:  # auto
        try:
            F, tag = extract_clip_features(paths, batch_size=SAFE_BATCH, tag=subject)
        except Exception as e:
            print(f"[WARN] CLIP failed ({e}); falling back to ResNet50.")
            F, tag = extract_resnet_features(paths, batch_size=SAFE_BATCH, tag=subject)

    np.save(cache_dir / f"{subject}_{tag}.npy", F)
    return F[:n], tag


In [4]:
# ---------- encoding ----------
def _fast_r2(Y_true: np.ndarray, Y_pred: np.ndarray) -> np.ndarray:
    Y_true = np.asarray(Y_true, dtype=np.float64)
    Y_pred = np.asarray(Y_pred, dtype=np.float64)
    ss_res = np.sum((Y_true - Y_pred) ** 2, axis=0)
    y_mean = np.mean(Y_true, axis=0, keepdims=True)
    ss_tot = np.sum((Y_true - y_mean) ** 2, axis=0)
    with np.errstate(divide="ignore", invalid="ignore"):
        r2 = 1.0 - (ss_res / ss_tot)
    r2[~np.isfinite(r2)] = np.nan
    return r2

def nested_ridge_encoding(F, Y, outer_folds=5, inner_folds=3, alphas=ALPHAS, seed=SEED):
    kf = KFold(n_splits=outer_folds, shuffle=True, random_state=seed)
    all_fold_r2 = []
    chosen_alphas = []

    for tr_idx, te_idx in kf.split(F):
        Ftr, Fte = F[tr_idx], F[te_idx]
        Ytr, Yte = Y[tr_idx], Y[te_idx]

        ridge_cv = RidgeCV(alphas=alphas, cv=inner_folds, scoring="r2")
        ridge_cv.fit(Ftr, Ytr)
        alpha_star = float(ridge_cv.alpha_)
        chosen_alphas.append(alpha_star)

        model = Ridge(alpha=alpha_star)
        model.fit(Ftr, Ytr)
        Yhat = model.predict(Fte)

        all_fold_r2.append(_fast_r2(Yte, Yhat))

    fold_r2 = np.stack(all_fold_r2, axis=0)
    mean_r2 = np.nanmean(fold_r2, axis=0)
    return mean_r2, chosen_alphas, fold_r2


In [5]:
# ---------- RSA ----------
def compute_rdm(X, metric="correlation", zscore_columns=True):
    A = np.asarray(X).copy()
    if zscore_columns:
        A = (A - A.mean(axis=0, keepdims=True)) / (A.std(axis=0, keepdims=True) + 1e-8)
    d = pdist(A, metric=metric)
    return squareform(d)

def rsa_spearman_perm(rdm1, rdm2, n_perm=1000, seed=SEED):
    iu = np.triu_indices_from(rdm1, k=1)
    v1, v2 = rdm1[iu], rdm2[iu]
    rho, _ = spearmanr(v1, v2)
    rng = np.random.default_rng(seed)
    cnt = 0
    for _ in range(n_perm):
        perm = rng.permutation(rdm2.shape[0])
        vp = rdm2[perm][:, perm][iu]
        rp, _ = spearmanr(v1, vp)
        if abs(rp) >= abs(rho):
            cnt += 1
    pval = (cnt + 1) / (n_perm + 1)
    return float(rho), float(pval)

# ---------- RESUME helper ----------
def is_done(out_dir: Path):
    req = ["encoding_summary.json", "rsa_summary.json", "r2_per_voxel.npy", "r2_per_voxel_folds.npy"]
    return out_dir.exists() and all((out_dir / r).exists() for r in req)


In [6]:
import time, json, numpy as np, pandas as pd
from pathlib import Path

# ---------- Logging helpers ----------
VERBOSE = True  # set False to quiet down
def log(msg):
    if VERBOSE:
        print(msg, flush=True)

def stamp():
    return time.perf_counter()

# ==========================================
# MAIN LOOP (with progress prints)
# ==========================================
plt.rcParams["figure.dpi"] = 130
summary_rows = []

log("== Pipeline start ==")
log(f"Subjects: {len(SUBJECTS)} | ROIs: {len(ROI_KEYS)} | RESUME={RESUME} FORCE={FORCE} | DEBUG={DEBUG} DEBUG_N_IMAGES={DEBUG_N_IMAGES}")
log(f"Paths: MULTIROI_BASE={MULTIROI_BASE} | RSA_BASE={RSA_BASE}")

total_t0 = stamp()
n_pairs_total = 0

for si, subj in enumerate(SUBJECTS, start=1):
    subj_t0 = stamp()
    log(f"\n--- [SUBJECT {si}/{len(SUBJECTS)}] {subj} ---")

    meta_dir = MULTIROI_BASE / "meta"
    subj_meta = meta_dir / f"{subj}_train_images_meta.csv"
    if not subj_meta.exists():
        log(f"[SKIP] {subj}: meta CSV not found -> {subj_meta}")
        continue

    # probe ROI to get subject n_images
    probe_roi = next((r for r in ROI_KEYS if (MULTIROI_BASE / r / f"{subj}.npy").exists()), None)
    if probe_roi is None:
        log(f"[SKIP] {subj}: no ROI arrays found in any ROI among {list(ROI_KEYS)}")
        continue

    Y_probe_path = MULTIROI_BASE / probe_roi / f"{subj}.npy"
    log(f"[{subj}] Probe ROI: {probe_roi} -> {Y_probe_path}")
    Y_probe = np.load(Y_probe_path, mmap_mode="r")
    n_imgs_subj = Y_probe.shape[0]
    if DEBUG and DEBUG_N_IMAGES is not None:
        n_imgs_subj = min(n_imgs_subj, DEBUG_N_IMAGES)
    log(f"[{subj}] n_images (after DEBUG cap if any): {n_imgs_subj}")

    # features once per subject
    t0 = stamp()
    log(f"[{subj}] Getting features via get_or_make_features(...).")
    F_full, feat_tag = get_or_make_features(subj, n_imgs_subj, meta_dir)
    F_full = F_full[:n_imgs_subj]
    log(f"[{subj}] Features ready: F_full shape={F_full.shape} | feature_model={feat_tag} | took {stamp()-t0:.2f}s")

    # ---------- Randomized RSA subset (subject-level), with alignment ----------
    if N_RDM_IMAGES:
        n_req = min(N_RDM_IMAGES, n_imgs_subj)
        log(f"[{subj}] Choosing RSA subset: requested={N_RDM_IMAGES}, effective={n_req}")
        rdm_idx_subj = choose_random_indices(n_imgs_subj, n_req, seed=SEED)
    else:
        rdm_idx_subj = np.arange(n_imgs_subj)
        log(f"[{subj}] Using all images for RSA subset: {len(rdm_idx_subj)}")

    # Precompute feature RDM for the chosen subset (same order as rdm_idx_subj)
    t0 = stamp()
    F_rdm = F_full[rdm_idx_subj]
    log(f"[{subj}] Computing feature RDM on F_rdm shape={F_rdm.shape}")
    rdm_feat_subj = compute_rdm(F_rdm, metric="cosine", zscore_columns=True)
    log(f"[{subj}] Feature RDM done, shape={rdm_feat_subj.shape} | took {stamp()-t0:.2f}s")

    # Keep a record of the indices used for RSA at the subject level
    subj_rdm_idx_dir = RSA_BASE / "rdm_indices" / subj
    subj_rdm_idx_dir.mkdir(parents=True, exist_ok=True)
    np.save(subj_rdm_idx_dir / "rdm_indices.npy", rdm_idx_subj)
    log(f"[{subj}] Saved RSA subset indices -> {subj_rdm_idx_dir/'rdm_indices.npy'}")

    for ri, roi in enumerate(ROI_KEYS, start=1):
        roi_file = MULTIROI_BASE / roi / f"{subj}.npy"
        if not roi_file.exists():
            log(f"[SKIP] {subj} {roi}: ROI array missing -> {roi_file}")
            continue

        out_dir = RSA_BASE / roi / subj
        (out_dir / "plots").mkdir(parents=True, exist_ok=True)

        if RESUME and not FORCE and is_done(out_dir):
            log(f"[RESUME] skip {subj} {roi} (already done) -> {out_dir}")
            continue

        log(f"[RUN ] {subj} {roi} | out_dir={out_dir}")
        pair_t0 = stamp()

        # --- Load Y and align ---
        Y = np.load(roi_file)
        if DEBUG and DEBUG_N_IMAGES is not None:
            Y = Y[:n_imgs_subj]

        n = min(F_full.shape[0], Y.shape[0])
        F = F_full[:n]
        Y = Y[:n]
        log(f"[{subj} {roi}] Shapes aligned: F={F.shape}, Y={Y.shape}")

        # --- Encoding ---
        t0 = stamp()
        log(f"[{subj} {roi}] Encoding start: OUTER_FOLDS={OUTER_FOLDS}, INNER_FOLDS={INNER_FOLDS}, |ALPHAS|={len(ALPHAS)}")
        mean_r2, chosen_alphas, fold_r2 = nested_ridge_encoding(
            F, Y, OUTER_FOLDS, INNER_FOLDS, ALPHAS, SEED
        )
        enc_dur = stamp() - t0
        k_top = max(1, int(0.10 * Y.shape[1]))
        top10_mean = float(np.mean(np.partition(mean_r2, -k_top)[-k_top:]))
        enc_mean = float(np.nanmean(mean_r2))
        enc_median = float(np.nanmedian(mean_r2))
        log(f"[{subj} {roi}] Encoding done in {enc_dur:.2f}s | r2_mean={enc_mean:.4f}, r2_median={enc_median:.4f}, r2_top10_mean={top10_mean:.4f}")

        np.save(out_dir / "r2_per_voxel.npy", mean_r2)
        np.save(out_dir / "r2_per_voxel_folds.npy", fold_r2)
        with open(out_dir / "encoding_summary.json", "w") as f:
            json.dump({
                "subject": subj,
                "roi": roi,
                "n_images": int(n),
                "n_voxels": int(Y.shape[1]),
                "feature_model": feat_tag,
                "outer_folds": OUTER_FOLDS,
                "inner_folds": INNER_FOLDS,
                "alphas": list(map(float, ALPHAS)),
                "chosen_alphas": list(map(float, chosen_alphas)),
                "r2_mean": enc_mean,
                "r2_median": enc_median,
                "r2_top10_mean": top10_mean
            }, f, indent=2)
        log(f"[{subj} {roi}] Saved encoding outputs.")

        if SAVE_PLOTS:
            log(f"[{subj} {roi}] Saving encoding histogram plot...")
            plt.figure()
            plt.hist(mean_r2, bins=40)
            plt.title(f"Ridge encoding R² — {subj} {roi} ({feat_tag})\n"
                      f"mean={enc_mean:.3f}, median={enc_median:.3f}")
            plt.xlabel("R² per voxel"); plt.ylabel("Count")
            plt.tight_layout()
            plt.savefig(out_dir / "plots" / "encoding_r2_hist.png")
            plt.close()

        # --- RSA (use the SAME randomized indices, filtered to within n) ---
        idx_eff = rdm_idx_subj[rdm_idx_subj < n]
        n_rdm_eff = len(idx_eff)
        log(f"[{subj} {roi}] RSA subset effective size: {n_rdm_eff} (<= n={n})")

        t0 = stamp()
        Y_rdm = Y[idx_eff]
        rdm_fmri = compute_rdm(Y_rdm, metric="correlation", zscore_columns=True)
        rdm_feat_eff = rdm_feat_subj[:n_rdm_eff, :n_rdm_eff]
        rsa_build_dur = stamp() - t0
        log(f"[{subj} {roi}] Built fMRI/feature RDMs in {rsa_build_dur:.2f}s | shapes fmri={rdm_fmri.shape}, feat={rdm_feat_eff.shape}")

        t0 = stamp()
        if DO_PERM and N_PERM > 0:
            log(f"[{subj} {roi}] RSA Spearman with permutations: N_PERM={N_PERM}")
            rho, pval = rsa_spearman_perm(rdm_fmri, rdm_feat_eff, n_perm=N_PERM, seed=SEED)
        else:
            iu = np.triu_indices_from(rdm_fmri, k=1)
            rho, _ = spearmanr(rdm_fmri[iu], rdm_feat_eff[iu])
            pval = float("nan")
        rsa_stat_dur = stamp() - t0
        log(f"[{subj} {roi}] RSA done in {rsa_stat_dur:.2f}s | rho={rho:.4f}, p={pval}")

        if SAVE_RDM_MATS:
            np.save(out_dir / "rdm_fmri.npy", rdm_fmri)
            np.save(out_dir / "rdm_feat.npy", rdm_feat_eff)
            np.save(out_dir / "rdm_indices_used.npy", idx_eff)
            log(f"[{subj} {roi}] Saved RDM matrices + indices.")

        with open(out_dir / "rsa_summary.json", "w") as f:
            json.dump({
                "subject": subj, "roi": roi,
                "feature_model": feat_tag,
                "rho_spearman": float(rho),
                "p_value_perm": float(pval),
                "n_perm": int(N_PERM if DO_PERM else 0),
                "n_rdm_images": int(n_rdm_eff)
            }, f, indent=2)
        log(f"[{subj} {roi}] Saved RSA summary.")

        if SAVE_PLOTS:
            log(f"[{subj} {roi}] Saving RSA plots...")
            fig = plt.figure(figsize=(5,4))
            plt.imshow(rdm_fmri, interpolation="none"); plt.colorbar()
            plt.title(f"RDM fMRI — {subj} {roi}")
            plt.tight_layout(); plt.savefig(out_dir / "plots" / "rdm_fmri.png"); plt.close(fig)

            fig = plt.figure(figsize=(5,4))
            plt.imshow(rdm_feat_eff, interpolation="none"); plt.colorbar()
            plt.title(f"RDM Features — {subj} ({feat_tag})")
            plt.tight_layout(); plt.savefig(out_dir / "plots" / "rdm_feat.png"); plt.close(fig)

            iu = np.triu_indices_from(rdm_fmri, k=1)
            plt.figure(figsize=(4.2,4.2))
            plt.scatter(rdm_feat_eff[iu], rdm_fmri[iu], s=6, alpha=0.5)
            plt.xlabel("Feature distances"); plt.ylabel("fMRI distances")
            plt.title(f"RSA: ρ={rho:.3f}, p={pval:.4f}")
            plt.tight_layout(); plt.savefig(out_dir / "plots" / "rsa_scatter.png"); plt.close()

        summary_rows.append({
            "subject": subj,
            "roi": roi,
            "feature_model": feat_tag,
            "n_images": n,
            "n_voxels": Y.shape[1],
            "r2_mean": enc_mean,
            "r2_median": enc_median,
            "r2_top10_mean": top10_mean,
            "rsa_rho": float(rho),
            "rsa_p": float(pval)
        })
        n_pairs_total += 1
        log(f"[{subj} {roi}] Pair complete in {stamp()-pair_t0:.2f}s.")

    log(f"--- [{subj}] Subject complete in {stamp()-subj_t0:.2f}s ---")

# save global summary
if summary_rows:
    df_sum = pd.DataFrame(summary_rows)
    RSA_BASE.mkdir(parents=True, exist_ok=True)
    df_sum.to_csv(RSA_BASE / "summary_rsa.csv", index=False)
    log(f"\n== DONE: wrote {len(df_sum)} rows to {RSA_BASE/'summary_rsa.csv'} ==")
    log(f"Total (subj,roi) pairs processed: {n_pairs_total} | total time {stamp()-total_t0:.2f}s")
    try:
        from IPython.display import display
        display(df_sum)
    except Exception:
        print(df_sum.head())
else:
    log("\n== No results produced. Check paths/DEBUG/RESUME/FORCE settings. ==")


== Pipeline start ==
Subjects: 8 | ROIs: 3 | RESUME=False FORCE=True | DEBUG=False DEBUG_N_IMAGES=None
Paths: MULTIROI_BASE=/content/drive/MyDrive/algonauts_outputs/multiROI | RSA_BASE=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip

--- [SUBJECT 1/8] subj01 ---
[subj01] Probe ROI: EBA -> /content/drive/MyDrive/algonauts_outputs/multiROI/EBA/subj01.npy
[subj01] n_images (after DEBUG cap if any): 9841
[subj01] Getting features via get_or_make_features(...).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


open_clip_model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

CLIP feats [subj01]: 100%|██████████| 308/308 [28:37<00:00,  5.58s/it]

[subj01] Features ready: F_full shape=(9841, 512) | feature_model=CLIP_ViT-B-32 | took 1723.70s
[subj01] Choosing RSA subset: requested=600, effective=600
[subj01] Computing feature RDM on F_rdm shape=(600, 512)
[subj01] Feature RDM done, shape=(600, 600) | took 0.07s
[subj01] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj01/rdm_indices.npy
[RUN ] subj01 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj01





[subj01 EBA] Shapes aligned: F=(9841, 512), Y=(9841, 6237)
[subj01 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj01 EBA] Encoding done in 33.54s | r2_mean=0.3066, r2_median=0.2840, r2_top10_mean=0.5973
[subj01 EBA] Saved encoding outputs.
[subj01 EBA] RSA subset effective size: 600 (<= n=9841)
[subj01 EBA] Built fMRI/feature RDMs in 0.89s | shapes fmri=(600, 600), feat=(600, 600)
[subj01 EBA] RSA Spearman with permutations: N_PERM=200
[subj01 EBA] RSA done in 12.18s | rho=0.2150, p=0.004975124378109453
[subj01 EBA] Saved RSA summary.
[subj01 EBA] Pair complete in 48.94s.
[RUN ] subj01 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj01
[subj01 FFA] Shapes aligned: F=(9841, 512), Y=(9841, 1511)
[subj01 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj01 FFA] Encoding done in 15.66s | r2_mean=0.2987, r2_median=0.2615, r2_top10_mean=0.6473
[subj01 FFA] Saved encoding outputs.
[subj01 FFA] RSA subset effective si

CLIP feats [subj02]: 100%|██████████| 308/308 [28:43<00:00,  5.59s/it]

[subj02] Features ready: F_full shape=(9841, 512) | feature_model=CLIP_ViT-B-32 | took 1725.53s
[subj02] Choosing RSA subset: requested=600, effective=600
[subj02] Computing feature RDM on F_rdm shape=(600, 512)
[subj02] Feature RDM done, shape=(600, 600) | took 0.07s
[subj02] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj02/rdm_indices.npy
[RUN ] subj02 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj02





[subj02 EBA] Shapes aligned: F=(9841, 512), Y=(9841, 6709)
[subj02 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj02 EBA] Encoding done in 35.38s | r2_mean=0.3006, r2_median=0.2936, r2_top10_mean=0.5869
[subj02 EBA] Saved encoding outputs.
[subj02 EBA] RSA subset effective size: 600 (<= n=9841)
[subj02 EBA] Built fMRI/feature RDMs in 0.93s | shapes fmri=(600, 600), feat=(600, 600)
[subj02 EBA] RSA Spearman with permutations: N_PERM=200
[subj02 EBA] RSA done in 11.99s | rho=0.2139, p=0.004975124378109453
[subj02 EBA] Saved RSA summary.
[subj02 EBA] Pair complete in 52.02s.
[RUN ] subj02 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj02
[subj02 FFA] Shapes aligned: F=(9841, 512), Y=(9841, 1766)
[subj02 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj02 FFA] Encoding done in 16.07s | r2_mean=0.2295, r2_median=0.1896, r2_top10_mean=0.5531
[subj02 FFA] Saved encoding outputs.
[subj02 FFA] RSA subset effective si

CLIP feats [subj03]: 100%|██████████| 284/284 [26:01<00:00,  5.50s/it]

[subj03] Features ready: F_full shape=(9082, 512) | feature_model=CLIP_ViT-B-32 | took 1563.91s
[subj03] Choosing RSA subset: requested=600, effective=600
[subj03] Computing feature RDM on F_rdm shape=(600, 512)
[subj03] Feature RDM done, shape=(600, 600) | took 0.07s
[subj03] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj03/rdm_indices.npy
[RUN ] subj03 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj03





[subj03 EBA] Shapes aligned: F=(9082, 512), Y=(9082, 6585)
[subj03 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj03 EBA] Encoding done in 30.96s | r2_mean=0.2129, r2_median=0.1984, r2_top10_mean=0.4584
[subj03 EBA] Saved encoding outputs.
[subj03 EBA] RSA subset effective size: 600 (<= n=9082)
[subj03 EBA] Built fMRI/feature RDMs in 0.93s | shapes fmri=(600, 600), feat=(600, 600)
[subj03 EBA] RSA Spearman with permutations: N_PERM=200
[subj03 EBA] RSA done in 12.22s | rho=0.1965, p=0.004975124378109453
[subj03 EBA] Saved RSA summary.
[subj03 EBA] Pair complete in 46.96s.
[RUN ] subj03 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj03
[subj03 FFA] Shapes aligned: F=(9082, 512), Y=(9082, 1752)
[subj03 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj03 FFA] Encoding done in 14.92s | r2_mean=0.2528, r2_median=0.2124, r2_top10_mean=0.5835
[subj03 FFA] Saved encoding outputs.
[subj03 FFA] RSA subset effective si

CLIP feats [subj04]: 100%|██████████| 275/275 [26:16<00:00,  5.73s/it]

[subj04] Features ready: F_full shape=(8779, 512) | feature_model=CLIP_ViT-B-32 | took 1579.24s
[subj04] Choosing RSA subset: requested=600, effective=600
[subj04] Computing feature RDM on F_rdm shape=(600, 512)
[subj04] Feature RDM done, shape=(600, 600) | took 0.07s
[subj04] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj04/rdm_indices.npy
[RUN ] subj04 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj04





[subj04 EBA] Shapes aligned: F=(8779, 512), Y=(8779, 8100)
[subj04 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj04 EBA] Encoding done in 37.46s | r2_mean=0.2373, r2_median=0.2058, r2_top10_mean=0.5560
[subj04 EBA] Saved encoding outputs.
[subj04 EBA] RSA subset effective size: 600 (<= n=8779)
[subj04 EBA] Built fMRI/feature RDMs in 1.15s | shapes fmri=(600, 600), feat=(600, 600)
[subj04 EBA] RSA Spearman with permutations: N_PERM=200
[subj04 EBA] RSA done in 12.03s | rho=0.2005, p=0.004975124378109453
[subj04 EBA] Saved RSA summary.
[subj04 EBA] Pair complete in 51.93s.
[RUN ] subj04 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj04
[subj04 FFA] Shapes aligned: F=(8779, 512), Y=(8779, 2026)
[subj04 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj04 FFA] Encoding done in 15.84s | r2_mean=0.1991, r2_median=0.1535, r2_top10_mean=0.5448
[subj04 FFA] Saved encoding outputs.
[subj04 FFA] RSA subset effective si

CLIP feats [subj05]: 100%|██████████| 308/308 [29:07<00:00,  5.67s/it]

[subj05] Features ready: F_full shape=(9841, 512) | feature_model=CLIP_ViT-B-32 | took 1750.10s
[subj05] Choosing RSA subset: requested=600, effective=600
[subj05] Computing feature RDM on F_rdm shape=(600, 512)
[subj05] Feature RDM done, shape=(600, 600) | took 0.07s
[subj05] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj05/rdm_indices.npy
[RUN ] subj05 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj05





[subj05 EBA] Shapes aligned: F=(9841, 512), Y=(9841, 9359)
[subj05 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj05 EBA] Encoding done in 44.98s | r2_mean=0.3226, r2_median=0.3050, r2_top10_mean=0.6583
[subj05 EBA] Saved encoding outputs.
[subj05 EBA] RSA subset effective size: 600 (<= n=9841)
[subj05 EBA] Built fMRI/feature RDMs in 1.37s | shapes fmri=(600, 600), feat=(600, 600)
[subj05 EBA] RSA Spearman with permutations: N_PERM=200
[subj05 EBA] RSA done in 12.42s | rho=0.2344, p=0.004975124378109453
[subj05 EBA] Saved RSA summary.
[subj05 EBA] Pair complete in 61.72s.
[RUN ] subj05 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj05
[subj05 FFA] Shapes aligned: F=(9841, 512), Y=(9841, 2185)
[subj05 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj05 FFA] Encoding done in 16.70s | r2_mean=0.3155, r2_median=0.3032, r2_top10_mean=0.6382
[subj05 FFA] Saved encoding outputs.
[subj05 FFA] RSA subset effective si

CLIP feats [subj06]: 100%|██████████| 284/284 [27:09<00:00,  5.74s/it]

[subj06] Features ready: F_full shape=(9082, 512) | feature_model=CLIP_ViT-B-32 | took 1631.36s
[subj06] Choosing RSA subset: requested=600, effective=600
[subj06] Computing feature RDM on F_rdm shape=(600, 512)
[subj06] Feature RDM done, shape=(600, 600) | took 0.07s
[subj06] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj06/rdm_indices.npy
[RUN ] subj06 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj06





[subj06 EBA] Shapes aligned: F=(9082, 512), Y=(9082, 7665)
[subj06 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj06 EBA] Encoding done in 35.84s | r2_mean=0.2117, r2_median=0.1933, r2_top10_mean=0.4524
[subj06 EBA] Saved encoding outputs.
[subj06 EBA] RSA subset effective size: 600 (<= n=9082)
[subj06 EBA] Built fMRI/feature RDMs in 1.10s | shapes fmri=(600, 600), feat=(600, 600)
[subj06 EBA] RSA Spearman with permutations: N_PERM=200
[subj06 EBA] RSA done in 12.15s | rho=0.1593, p=0.004975124378109453
[subj06 EBA] Saved RSA summary.
[subj06 EBA] Pair complete in 50.48s.
[RUN ] subj06 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj06
[subj06 FFA] Shapes aligned: F=(9082, 512), Y=(9082, 1445)
[subj06 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj06 FFA] Encoding done in 14.85s | r2_mean=0.2080, r2_median=0.1715, r2_top10_mean=0.5192
[subj06 FFA] Saved encoding outputs.
[subj06 FFA] RSA subset effective si

CLIP feats [subj07]: 100%|██████████| 308/308 [30:57<00:00,  6.03s/it]

[subj07] Features ready: F_full shape=(9841, 512) | feature_model=CLIP_ViT-B-32 | took 1859.48s
[subj07] Choosing RSA subset: requested=600, effective=600
[subj07] Computing feature RDM on F_rdm shape=(600, 512)
[subj07] Feature RDM done, shape=(600, 600) | took 0.07s
[subj07] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj07/rdm_indices.npy





[RUN ] subj07 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj07
[subj07 EBA] Shapes aligned: F=(9841, 512), Y=(9841, 7254)
[subj07 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj07 EBA] Encoding done in 38.83s | r2_mean=0.2253, r2_median=0.1991, r2_top10_mean=0.5012
[subj07 EBA] Saved encoding outputs.
[subj07 EBA] RSA subset effective size: 600 (<= n=9841)
[subj07 EBA] Built fMRI/feature RDMs in 1.09s | shapes fmri=(600, 600), feat=(600, 600)
[subj07 EBA] RSA Spearman with permutations: N_PERM=200
[subj07 EBA] RSA done in 12.14s | rho=0.1962, p=0.004975124378109453
[subj07 EBA] Saved RSA summary.
[subj07 EBA] Pair complete in 54.36s.
[RUN ] subj07 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj07
[subj07 FFA] Shapes aligned: F=(9841, 512), Y=(9841, 1142)
[subj07 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj07 FFA] Encoding done in 13.92s | r2_mean=0.2283, r2_medi

CLIP feats [subj08]: 100%|██████████| 275/275 [29:02<00:00,  6.33s/it]

[subj08] Features ready: F_full shape=(8779, 512) | feature_model=CLIP_ViT-B-32 | took 1744.32s
[subj08] Choosing RSA subset: requested=600, effective=600
[subj08] Computing feature RDM on F_rdm shape=(600, 512)
[subj08] Feature RDM done, shape=(600, 600) | took 0.07s
[subj08] Saved RSA subset indices -> /content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/rdm_indices/subj08/rdm_indices.npy
[RUN ] subj08 EBA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/EBA/subj08





[subj08 EBA] Shapes aligned: F=(8779, 512), Y=(8779, 6774)
[subj08 EBA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj08 EBA] Encoding done in 31.61s | r2_mean=0.1140, r2_median=0.0886, r2_top10_mean=0.3351
[subj08 EBA] Saved encoding outputs.
[subj08 EBA] RSA subset effective size: 600 (<= n=8779)
[subj08 EBA] Built fMRI/feature RDMs in 0.94s | shapes fmri=(600, 600), feat=(600, 600)
[subj08 EBA] RSA Spearman with permutations: N_PERM=200
[subj08 EBA] RSA done in 12.19s | rho=0.1301, p=0.004975124378109453
[subj08 EBA] Saved RSA summary.
[subj08 EBA] Pair complete in 47.68s.
[RUN ] subj08 FFA | out_dir=/content/drive/MyDrive/algonauts_outputs/encoding_rsa_random_clip/FFA/subj08
[subj08 FFA] Shapes aligned: F=(8779, 512), Y=(8779, 2945)
[subj08 FFA] Encoding start: OUTER_FOLDS=3, INNER_FOLDS=2, |ALPHAS|=8
[subj08 FFA] Encoding done in 18.93s | r2_mean=0.1670, r2_median=0.1238, r2_top10_mean=0.4957
[subj08 FFA] Saved encoding outputs.
[subj08 FFA] RSA subset effective si

Unnamed: 0,subject,roi,feature_model,n_images,n_voxels,r2_mean,r2_median,r2_top10_mean,rsa_rho,rsa_p
0,subj01,EBA,CLIP_ViT-B-32,9841,6237,0.306595,0.283974,0.59734,0.214989,0.004975
1,subj01,FFA,CLIP_ViT-B-32,9841,1511,0.298692,0.261487,0.647296,0.199529,0.004975
2,subj01,PPA,CLIP_ViT-B-32,9841,2202,0.272191,0.261158,0.487026,0.176049,0.004975
3,subj02,EBA,CLIP_ViT-B-32,9841,6709,0.300571,0.293585,0.586923,0.213927,0.004975
4,subj02,FFA,CLIP_ViT-B-32,9841,1766,0.229466,0.189629,0.5531,0.17536,0.004975
5,subj02,PPA,CLIP_ViT-B-32,9841,2762,0.34551,0.363768,0.563257,0.17249,0.004975
6,subj03,EBA,CLIP_ViT-B-32,9082,6585,0.212855,0.19841,0.458421,0.196535,0.004975
7,subj03,FFA,CLIP_ViT-B-32,9082,1752,0.252767,0.212405,0.583455,0.183595,0.004975
8,subj03,PPA,CLIP_ViT-B-32,9082,3763,0.216577,0.216806,0.414908,0.13998,0.004975
9,subj04,EBA,CLIP_ViT-B-32,8779,8100,0.237306,0.205805,0.555998,0.200496,0.004975
