<a href="https://colab.research.google.com/github/grabuffo/BrainStim_ANN_fMRI_HCP/blob/main/notebooks/Make_dataframe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- 1Ô∏è‚É£ Mount Google Drive ---
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# --- 2Ô∏è‚É£ Clone GitHub repo (contains src/NPI.py) ---
!rm -rf /content/BrainStim_ANN_fMRI_HCP
!git clone https://github.com/grabuffo/BrainStim_ANN_fMRI_HCP.git

# --- 3Ô∏è‚É£ Define paths ---
import os, sys, gc
repo_dir    = "/content/BrainStim_ANN_fMRI_HCP"
data_dir    = "/content/drive/MyDrive/Colab Notebooks/Brain_Stim_ANN/data"
dataframe_dir  = os.path.join(data_dir, "dataframe")
preproc_dir = os.path.join(data_dir, "preprocessed_subjects")
ects_dir    = os.path.join(preproc_dir, "ECts_MLP")
os.makedirs(ects_dir, exist_ok=True)

if repo_dir not in sys.path:
    sys.path.append(repo_dir)

# --- 4Ô∏è‚É£ Imports ---
import numpy as np
from scipy import stats
import torch
import torch.serialization
from src import NPI

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("‚úÖ Repo loaded from:", repo_dir)
print("Using device:", device)

# --- 5Ô∏è‚É£ Choose which subjects to process ---
# either specify manually:
#subjects = ["id_100206"]
# or automatically detect all
subjects = sorted({fn.split("_signals.npy")[0]
                   for fn in os.listdir(preproc_dir)
                   if fn.endswith("_signals.npy")})

Mounted at /content/drive
Cloning into 'BrainStim_ANN_fMRI_HCP'...
remote: Enumerating objects: 237, done.[K
remote: Counting objects: 100% (64/64), done.[K
remote: Compressing objects: 100% (54/54), done.[K
remote: Total 237 (delta 28), reused 10 (delta 10), pack-reused 173 (from 1)[K
Receiving objects: 100% (237/237), 29.45 MiB | 5.39 MiB/s, done.
Resolving deltas: 100% (69/69), done.
‚úÖ Repo loaded from: /content/BrainStim_ANN_fMRI_HCP
Using device: cpu


In [2]:
# ============================================
# üì¶ Load EC_t and X for all subjects
# ============================================

ECts = {}   # will hold EC_t arrays per subject
Xs   = {}   # will hold input X arrays per subject

for sid in subjects:
    ECt_path = os.path.join(ects_dir, f"{sid}_ECt.npy")
    X_path   = os.path.join(preproc_dir, f"{sid}_inputs.npy")

    if not os.path.exists(ECt_path):
        print(f"‚ö†Ô∏è Skipping {sid}: EC_t file not found.")
        continue
    if not os.path.exists(X_path):
        print(f"‚ö†Ô∏è Skipping {sid}: inputs file not found.")
        continue

    ECts[sid] = np.load(ECt_path)
    Xs[sid]   = np.load(X_path)

    print(f"‚úÖ Loaded {sid}: EC_t {ECts[sid].shape}, X {Xs[sid].shape}")

print(f"\nüéØ Loaded {len(ECts)} subjects successfully.")

‚úÖ Loaded id_100206: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_100307: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_100408: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101006: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101107: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101309: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101915: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_102008: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_102109: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_102311: EC_t (500, 450, 450), X (4677, 1350)

üéØ Loaded 10 subjects successfully.


In [4]:
# Make dataframe for responsiveness and predictability

import pandas as pd
from scipy.stats import pearsonr

results = []
N = 400  # number of cortical regions

for sid in ECts.keys():
    EC_t = ECts[sid][:, -N:, -N:]       # shape (M, N, N)
    M = EC_t.shape[0]
    x_t = Xs[sid][:M, -N:]

    # --- Responsiveness ---
    meanEC = np.mean(EC_t, axis=0)              # (N, N)
    Responsiveness = np.sum(meanEC**2, axis=1)  # (N,)

    # --- Predictability ---
    E_t = np.sum(x_t**2, axis=1)   # baseline energy (M,)
    Predictability = np.zeros(N)

    for ir in range(N):
        Effect_size_t = np.sum(EC_t[:, ir, :]**2, axis=1)  # (M,)
        Predictability[ir], _ = pearsonr(E_t, Effect_size_t)

    # --- Store in results list ---
    for region in range(N):
        results.append({
            "sid": sid,
            "roi": region,
            "responsiveness": Responsiveness[region],
            "predictability": Predictability[region]
        })

# Convert to DataFrame
df = pd.DataFrame(results)


In [9]:
df

Unnamed: 0,sid,roi,responsiveness,predictability
0,id_100206,0,0.000436,-0.494759
1,id_100206,1,0.000615,-0.401189
2,id_100206,2,0.000378,-0.471682
3,id_100206,3,0.000418,-0.361711
4,id_100206,4,0.000393,-0.433977
...,...,...,...,...
3995,id_102311,395,0.000343,-0.261130
3996,id_102311,396,0.000278,-0.410756
3997,id_102311,397,0.000322,-0.302693
3998,id_102311,398,0.000249,-0.255351


In [12]:
!pip install brainspace

Collecting brainspace
  Downloading brainspace-0.1.20-py3-none-any.whl.metadata (3.9 kB)
Collecting vtk<9.3.2,>=8.1.0 (from brainspace)
  Downloading vtk-9.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading brainspace-0.1.20-py3-none-any.whl (60.7 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m60.7/60.7 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading vtk-9.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (92.2 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m92.2/92.2 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vtk, brainspace
Successfully installed brainspace-0.1.20 vtk-9.3.1


In [14]:
from sklearn.metrics.pairwise import cosine_similarity
from brainspace.gradient import GradientMaps


# Empirical static FC for all subjects
FCs_emp = {}
all_gradients = {}

# --- Compute principal gradient per subject ---
for isu, sid in enumerate(subjects):
    sig_path = os.path.join(preproc_dir, f"{sid}_signals.npy")

    test_dur = 3000
    Z = np.load(sig_path)[-test_dur:, -N:]  # (M, N)

    FC = np.corrcoef(Z.T)  # (N, N)

    FCs_emp[sid] = FC

    # similarity of connectivity profiles
    A_affinity = cosine_similarity(FC)  # (N, N)

    gm = GradientMaps(n_components=10, approach='dm', kernel='normalized_angle')
    gm.fit(A_affinity)

    # principal gradient for this subject: (N,)
    all_gradients[sid] = gm.gradients_[:, 0]

# --- Convert gradients to a long-format DataFrame ---
grad_rows = []
for sid, grad in all_gradients.items():
    for region in range(N):
        grad_rows.append({
            "sid": sid,
            "roi": region,
            "principal_gradient": grad[region]
        })

grad_df = pd.DataFrame(grad_rows)

# --- Merge with your existing df (which already has sid & region) ---
df = df.merge(grad_df, on=["sid", "roi"], how="left")


In [15]:
df

Unnamed: 0,sid,roi,responsiveness,predictability,principal_gradient
0,id_100206,0,0.000436,-0.494759,-0.059405
1,id_100206,1,0.000615,-0.401189,0.010547
2,id_100206,2,0.000378,-0.471682,-0.019754
3,id_100206,3,0.000418,-0.361711,-0.065503
4,id_100206,4,0.000393,-0.433977,-0.058108
...,...,...,...,...,...
3995,id_102311,395,0.000343,-0.261130,0.081187
3996,id_102311,396,0.000278,-0.410756,0.064293
3997,id_102311,397,0.000322,-0.302693,0.045380
3998,id_102311,398,0.000249,-0.255351,0.117032


In [16]:
# Save as CSV
df.to_csv(os.path.join(dataframe_dir, "responsiveness_predictability.csv"),
          index=False)

# Also save as pickle for fast loading
df.to_pickle(os.path.join(dataframe_dir, "responsiveness_predictability.pkl"))