In [1]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# point to shortcut folder of algonauts_2023_tutorial_data
from pathlib import Path
import numpy as np

FOLDER_ID = "1DUf3nGNNFk6YjRjQtZPfAY5N105GoGJb"
BASE = Path(f"/content/drive/.shortcut-targets-by-id/{FOLDER_ID}")

# drop into the inner tutorial folder
DATA_ROOT = BASE / "algonauts_2023_tutorial_data"
print("DATA_ROOT exists?", DATA_ROOT.exists(), DATA_ROOT)

# list subjects
!ls -lah "$DATA_ROOT"

# verify LH/RH files for subj01
lh = DATA_ROOT / "subj01/training_split/training_fmri/lh_training_fmri.npy"
rh = DATA_ROOT / "subj01/training_split/training_fmri/rh_training_fmri.npy"
print("LH file exists?", lh.exists())
print("RH file exists?", rh.exists())

# load a quick shape check
X_lh = np.load(lh)
X_rh = np.load(rh)
X_lh.shape, X_rh.shape


DATA_ROOT exists? True /content/drive/.shortcut-targets-by-id/1DUf3nGNNFk6YjRjQtZPfAY5N105GoGJb/algonauts_2023_tutorial_data
total 45K
-r-------- 1 root root  13K Jan 21  2023 README.txt
dr-x------ 2 root root 4.0K Dec  2  2022 subj01
dr-x------ 2 root root 4.0K Dec  2  2022 subj02
dr-x------ 2 root root 4.0K Dec  2  2022 subj03
dr-x------ 2 root root 4.0K Dec  2  2022 subj04
dr-x------ 2 root root 4.0K Dec  2  2022 subj05
dr-x------ 2 root root 4.0K Dec  2  2022 subj06
dr-x------ 2 root root 4.0K Dec  2  2022 subj07
dr-x------ 2 root root 4.0K Dec  3  2022 subj08
LH file exists? True
RH file exists? True


((9841, 19004), (9841, 20544))

In [None]:
import numpy as np, pandas as pd, re
from pathlib import Path

"""
Batch extract bilateral ROI matrices for multiple subjects and ROIs (Algonauts 2023).

"""

from pathlib import Path
import numpy as np
import pandas as pd
import re

# ---------- DEBUG SWITCH ----------
DEBUG = False
DEBUG_SUBJECT   = "subj01"     # one subject to test
DEBUG_ROI_KEY   = "FFA"        # one ROI key from ROI_SPECS below
DEBUG_N_IMAGES  = None          # None for all images; else trims to first N

# ---------- CONFIG ----------
SUBJECTS  = [f"subj{idx:02d}" for idx in range(1, 9)]
# Key = short label; Value = (roi_class, [labels])
ROI_SPECS = {
    # Bodies
    "EBA": ("floc-bodies", ["EBA"]),

    # Faces (FFA-1/FFA-2 together)
    "FFA": ("floc-faces", ["FFA-1", "FFA-2"]),

    # Places
    "PPA": ("floc-places", ["PPA"]),
}


OUT_BASE = Path("/content/drive/MyDrive/algonauts_outputs/multiROI")
(OUT_BASE / "meta").mkdir(parents=True, exist_ok=True)

# Ensure DATA_ROOT exists
try:
    DATA_ROOT
except NameError:
    raise RuntimeError("Please define DATA_ROOT to your Algonauts folder before running this cell.")

# Apply DEBUG narrowing
if DEBUG:
    if DEBUG_ROI_KEY not in ROI_SPECS:
        raise ValueError(f"DEBUG_ROI_KEY '{DEBUG_ROI_KEY}' not in ROI_SPECS keys: {list(ROI_SPECS.keys())}")
    SUBJECTS = [DEBUG_SUBJECT]
    ROI_SPECS = {DEBUG_ROI_KEY: ROI_SPECS[DEBUG_ROI_KEY]}
print(f"[RUN] DEBUG={DEBUG} | subjects={SUBJECTS} | rois={list(ROI_SPECS.keys())} | n_images_limit={DEBUG_N_IMAGES}")

# ---------- HELPERS ----------
def load_training_fmri(sub_dir: Path):
    lh = sub_dir / "training_split" / "training_fmri" / "lh_training_fmri.npy"
    rh = sub_dir / "training_split" / "training_fmri" / "rh_training_fmri.npy"
    if not lh.exists() or not rh.exists():
        raise FileNotFoundError(f"Missing training fMRI:\n  {lh}\n  {rh}")
    X_lh = np.load(lh)
    X_rh = np.load(rh)
    if X_lh.shape[0] != X_rh.shape[0]:
        raise ValueError(f"LH/RH row mismatch: {X_lh.shape} vs {X_rh.shape}")
    return X_lh, X_rh

def get_roi_label_map(sub_dir: Path, roi_class: str, hemi: str):
    mask_path = sub_dir / "roi_masks" / f"{hemi}.{roi_class}_challenge_space.npy"
    map_path  = sub_dir / "roi_masks" / f"mapping_{roi_class}.npy"
    if not mask_path.exists() or not map_path.exists():
        raise FileNotFoundError(f"Missing ROI files:\n  {mask_path}\n  {map_path}")
    mask = np.load(mask_path)
    label_map = np.load(map_path, allow_pickle=True).item()
    return mask, label_map

def roi_bool_from_labels(mask: np.ndarray, label_map: dict, desired_names):
    wanted = set(desired_names)
    ids = [k for k, v in label_map.items() if v in wanted]
    if not ids:
        return np.zeros_like(mask, dtype=bool)
    return np.isin(mask, ids)

def parse_training_image_meta(imgs_dir: Path, n_imgs: int) -> pd.DataFrame:
    files = sorted(imgs_dir.glob("*.png"))
    meta = []
    for p in files[:n_imgs]:
        m = re.search(r"train-(\d+)_nsd-(\d+)\.png", p.name)
        train_idx, nsd_id = (int(m.group(1)), int(m.group(2))) if m else (None, None)
        meta.append((p.name, train_idx, nsd_id))
    return pd.DataFrame(meta, columns=["filename", "train_idx_1based", "nsd_id"])

# ---------- RUN ----------
rows = []
for subj in SUBJECTS:
    sub_dir = DATA_ROOT / subj
    if not sub_dir.exists():
        print(f"[SKIP] {subj}: folder not found -> {sub_dir}")
        continue

    try:
        X_lh, X_rh = load_training_fmri(sub_dir)
    except Exception as e:
        print(f"[SKIP] {subj}: {e}")
        continue

    n_imgs_all = X_lh.shape[0]
    # Limit images in DEBUG mode (keeps alignment with sorted filenames)
    if DEBUG and DEBUG_N_IMAGES is not None:
        n_keep = min(n_imgs_all, int(DEBUG_N_IMAGES))
        X_lh = X_lh[:n_keep]
        X_rh = X_rh[:n_keep]
    else:
        n_keep = n_imgs_all

    # Save image meta once per subject (respecting any DEBUG slice)
    try:
        meta_df = parse_training_image_meta(sub_dir / "training_split" / "training_images", n_keep)
        meta_df.to_csv(OUT_BASE / "meta" / f"{subj}_train_images_meta.csv", index=False)
    except Exception as e:
        print(f"[WARN] Could not save image meta for {subj}: {e}")

    for roi_key, (roi_class, labels) in ROI_SPECS.items():
        out_dir = OUT_BASE / roi_key
        out_dir.mkdir(parents=True, exist_ok=True)

        try:
            lh_mask, lh_map = get_roi_label_map(sub_dir, roi_class, "lh")
            rh_mask, rh_map = get_roi_label_map(sub_dir, roi_class, "rh")

            lh_idx = roi_bool_from_labels(lh_mask, lh_map, labels)
            rh_idx = roi_bool_from_labels(rh_mask, rh_map, labels)

            X_roi_lh = X_lh[:, lh_idx] if lh_idx.any() else np.empty((n_keep, 0))
            X_roi_rh = X_rh[:, rh_idx] if rh_idx.any() else np.empty((n_keep, 0))
            X_roi = np.hstack([X_roi_lh, X_roi_rh])

            rows.append({
                "subject": subj,
                "roi_key": roi_key,
                "roi_class": roi_class,
                "labels": ",".join(labels),
                "lh_vertices": int(lh_idx.sum()),
                "rh_vertices": int(rh_idx.sum()),
                "bilat_vertices": int(lh_idx.sum() + rh_idx.sum()),
                "n_images": int(n_keep),
                "saved": bool(X_roi.shape[1] > 0)
            })

            if X_roi.shape[1] == 0:
                print(f"[WARN] {subj} {roi_key}: no vertices; skipping save.")
                continue

            np.save(out_dir / f"{subj}.npy", X_roi)
            print(f"[OK] {subj} {roi_key}: {X_roi.shape} -> {out_dir / f'{subj}.npy'}")

        except FileNotFoundError as e:
            print(f"[SKIP] {subj} {roi_key}: {e}")
        except Exception as e:
            print(f"[ERR ] {subj} {roi_key}: {e}")

# Summary CSV
summary = pd.DataFrame(rows)
summary_path = OUT_BASE / "summary_vertex_counts.csv"
summary.to_csv(summary_path, index=False)
print(f"\nSaved summary -> {summary_path}")
try:
    from IPython.display import display
    display(summary[["subject","roi_key","lh_vertices","rh_vertices","bilat_vertices","n_images","saved"]]
            .sort_values(["roi_key","subject"]))
except Exception:
    pass

[RUN] DEBUG=False | subjects=['subj01', 'subj02', 'subj03', 'subj04', 'subj05', 'subj06', 'subj07', 'subj08'] | rois=['EBA', 'FFA', 'PPA'] | n_images_limit=None
[OK] subj01 EBA: (9841, 6237) -> /content/drive/MyDrive/algonauts_outputs/multiROI/EBA/subj01.npy
[OK] subj01 FFA: (9841, 1511) -> /content/drive/MyDrive/algonauts_outputs/multiROI/FFA/subj01.npy
[OK] subj01 PPA: (9841, 2202) -> /content/drive/MyDrive/algonauts_outputs/multiROI/PPA/subj01.npy
[OK] subj02 EBA: (9841, 6709) -> /content/drive/MyDrive/algonauts_outputs/multiROI/EBA/subj02.npy
[OK] subj02 FFA: (9841, 1766) -> /content/drive/MyDrive/algonauts_outputs/multiROI/FFA/subj02.npy
[OK] subj02 PPA: (9841, 2762) -> /content/drive/MyDrive/algonauts_outputs/multiROI/PPA/subj02.npy
[OK] subj03 EBA: (9082, 6585) -> /content/drive/MyDrive/algonauts_outputs/multiROI/EBA/subj03.npy
[OK] subj03 FFA: (9082, 1752) -> /content/drive/MyDrive/algonauts_outputs/multiROI/FFA/subj03.npy
[OK] subj03 PPA: (9082, 3763) -> /content/drive/MyDrive

Unnamed: 0,subject,roi_key,lh_vertices,rh_vertices,bilat_vertices,n_images,saved
0,subj01,EBA,2837,3400,6237,9841,True
3,subj02,EBA,3059,3650,6709,9841,True
6,subj03,EBA,3017,3568,6585,9082,True
9,subj04,EBA,3538,4562,8100,8779,True
12,subj05,EBA,3618,5741,9359,9841,True
15,subj06,EBA,3605,4060,7665,9082,True
18,subj07,EBA,3873,3381,7254,9841,True
21,subj08,EBA,3349,3425,6774,8779,True
1,subj01,FFA,552,959,1511,9841,True
4,subj02,FFA,645,1121,1766,9841,True
