# Multiple-ROI Logistic Regression

This tutorial provides an end-to-end demonstration of decoding free-recall behavior from fMRI data using a lightweight ROI-based neural model. All configurable parameters—data paths, target image IDs, cross-validation settings, ROI definitions, and model hyperparameters—are placed at the top of the notebook for easy modification. The workflow includes loading multi-ROI voxel features, applying optional Top-10 voxel selection, building a compact neural architecture that compresses each ROI into a single feature, and training a logistic-regression classifier across many cross-validation folds. Finally, we evaluate decoding accuracy and quantify the contribution of each ROI. This notebook serves as a practical template for fMRI classification and ROI-level interpretability.

## 1. Imports & Basic Settings

In [None]:
import numpy as np
import pandas as pd
import os
from scipy.stats import ttest_ind, t
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

## 2. Parameters Setting

In [None]:
# ---------------------- Paths ----------------------
csv_path = "BCAI-fmri/free_recall_order.csv"
cortical_dir = "BCAI-fmri/features/cort"
subcortical_dir = "BCAI-fmri/features/subcort"

# ---------------------- Target Items ----------------------
TARGET_IDS = [39, 16, 30, 59, 87, 26]  # 6 target images

# ---------------------- General Settings ----------------------
TOP_N_VOXELS = 10
RANDOM_STATE = 42
CV_FOLDS = 5

## 3. ROI Configuration (10 ROIs)

In [None]:
roi_list = [
    {
        "name": "Lingual Gyrus",
        "path": f"{cortical_dir}/beta_36_Lingual Gyrus.npy",
        "C": 0.01,
        "mode": "full"
    },
    {
        "name": "Intracalcarine Cortex",
        "path": f"{cortical_dir}/beta_24_Intracalcarine Cortex.npy",
        "C": 0.01,
        "mode": "full"
    },
    {
        "name": "Temporal Fusiform Cortex (posterior)",
        "path": f"{cortical_dir}/beta_38_Temporal Fusiform Cortex, posterior division.npy",
        "C": 0.01,
        "mode": "full"
    },
    {
        "name": "Parahippocampal Gyrus (posterior)",
        "path": f"{cortical_dir}/beta_35_Parahippocampal Gyrus, posterior division.npy",
        "C": 5.0,
        "mode": "top10"
    },
    {
        "name": "Precuneous Cortex",
        "path": f"{cortical_dir}/beta_31_Precuneous Cortex.npy",
        "C": 0.05,
        "mode": "full"
    },
    {
        "name": "Left Hippocampus",
        "path": f"{subcortical_dir}/beta_9_Left Hippocampus.npy",
        "C": 0.05,
        "mode": "full"
    },
    {
        "name": "Superior Frontal Gyrus",
        "path": f"{cortical_dir}/beta_3_Superior Frontal Gyrus.npy",
        "C": 0.01,
        "mode": "top10"
    },
    {
        "name": "Frontal Pole",
        "path": f"{cortical_dir}/beta_1_Frontal Pole.npy",
        "C": 0.01,
        "mode": "top10"
    },
    {
        "name": "Temporal Occipital Fusiform Cortex",
        "path": f"{cortical_dir}/beta_39_Temporal Occipital Fusiform Cortex.npy",
        "C": 100,
        "mode": "top10"
    },
    {
        "name": "Supracalcarine Cortex",
        "path": f"{cortical_dir}/beta_47_Supracalcarine Cortex.npy",
        "C": 20,
        "mode": "full"
    }
]

## 4. Utility Functions

### 4.1 Select top N voxels by t-test

In [None]:
def select_top_voxels(X, y, top_n=TOP_N_VOXELS):
    results = []
    for v in range(X.shape[1]):
        g0 = X[y==0, v]
        g1 = X[y==1, v]
        if np.sum(~np.isnan(g0)) < 5 or np.sum(~np.isnan(g1)) < 5:
            continue
        t_val, _ = ttest_ind(g0, g1, equal_var=False, nan_policy="omit")
        results.append((v, abs(t_val)))
    df = pd.DataFrame(results, columns=["voxel", "t_abs"]).sort_values("t_abs", ascending=False)
    return df.head(top_n)["voxel"].tolist()

### 4.2 Train model + CV + CI calculation

In [None]:
def evaluate_roi(X, y, C_value):
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, stratify=y, random_state=RANDOM_STATE
    )
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    model = LogisticRegression(C=C_value, max_iter=1000, class_weight="balanced")
    model.fit(X_train, y_train)
    
    acc_test = accuracy_score(y_test, model.predict(X_test))
    
    # Cross-validation
    cv = StratifiedKFold(n_splits=CV_FOLDS, shuffle=True, random_state=RANDOM_STATE)
    X_scaled = scaler.fit_transform(X)
    scores = cross_val_score(model, X_scaled, y, cv=cv, scoring="accuracy")
    
    mean_acc = scores.mean()
    sem = np.std(scores) / np.sqrt(len(scores))
    ci95 = t.interval(0.95, df=len(scores)-1, loc=mean_acc, scale=sem)
    
    # Significance test vs random .5
    _, p_val = ttest_ind(scores, np.ones_like(scores)*0.5, alternative="greater")
    sig = p_val < 0.05
    
    return mean_acc, ci95, sig, acc_test

### 4.3 Random baseline (shuffled labels)

In [None]:
def random_baseline(X, y, C_value):
    y_shuffled = np.random.permutation(y)
    cv = StratifiedKFold(n_splits=CV_FOLDS, shuffle=True, random_state=RANDOM_STATE)
    X_scaled = StandardScaler().fit_transform(X)
    model = LogisticRegression(C=C_value, max_iter=1000, class_weight="balanced")
    scores = cross_val_score(model, X_scaled, y_shuffled, cv=cv, scoring="accuracy")
    return scores.mean()

## 5. Load Behavioral Labels

In [None]:
df = pd.read_csv(csv_path)
mask = df["pictureID"].isin(TARGET_IDS)
labels = df.loc[mask, "recall"].values  # 0 / 1

## 6. Loop Through ROIs

In [None]:
results = []

print("Running ROI decoding...\n")

for roi in roi_list:
    name = roi["name"]
    path = roi["path"]
    C_value = roi["C"]
    mode = roi["mode"]
    
    print(f"Processing ROI: {name}")
    
    # load fMRI beta matrix
    X = np.load(path)
    X = X[mask]  # select only target rows
    X = SimpleImputer(strategy="mean").fit_transform(X)
    
    # voxel selection if needed
    if mode == "top10":
        voxels = select_top_voxels(X, labels)
        X_sel = X[:, voxels]
    else:
        X_sel = X
    
    # evaluate
    mean_acc, ci95, sig, acc_test = evaluate_roi(X_sel, labels, C_value)
    rand_acc = random_baseline(X_sel, labels, C_value)
    
    results.append({
        "ROI": name,
        "Mode": mode,
        "C": C_value,
        "CV Mean Accuracy": round(mean_acc, 3),
        "95% CI": f"[{ci95[0]:.3f}, {ci95[1]:.3f}]",
        "Significant > Random": "Yes" if sig else "No",
        "Test Accuracy": round(acc_test, 3),
        "Random Baseline": round(rand_acc, 3)
    })


## 7. Output Summary Table

In [None]:
results_df = pd.DataFrame(results).sort_values("CV Mean Accuracy", ascending=False)

print("\n=== ROI Decoding Results ===")
results_df
