<a href="https://colab.research.google.com/github/grabuffo/BrainStim_ANN_fMRI_HCP/blob/main/notebooks/Make_dataframe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- 1Ô∏è‚É£ Mount Google Drive ---
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# --- 2Ô∏è‚É£ Clone GitHub repo (contains src/NPI.py) ---
!rm -rf /content/BrainStim_ANN_fMRI_HCP
!git clone https://github.com/grabuffo/BrainStim_ANN_fMRI_HCP.git

# --- 3Ô∏è‚É£ Define paths ---
import os, sys, gc
repo_dir    = "/content/BrainStim_ANN_fMRI_HCP"
data_dir    = "/content/drive/MyDrive/Colab Notebooks/Brain_Stim_ANN/data"
dataframe_dir  = os.path.join(data_dir, "dataframe")
preproc_dir = os.path.join(data_dir, "preprocessed_subjects")
ects_dir    = os.path.join(preproc_dir, "ECts_MLP")
os.makedirs(ects_dir, exist_ok=True)

if repo_dir not in sys.path:
    sys.path.append(repo_dir)

# --- 4Ô∏è‚É£ Imports ---
import numpy as np
from scipy import stats
import torch
import torch.serialization
from src import NPI

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("‚úÖ Repo loaded from:", repo_dir)
print("Using device:", device)

# --- 5Ô∏è‚É£ Choose which subjects to process ---
# either specify manually:
#subjects = ["id_100206"]
# or automatically detect all
subjects = sorted({fn.split("_signals.npy")[0]
                   for fn in os.listdir(preproc_dir)
                   if fn.endswith("_signals.npy")})

Mounted at /content/drive
Cloning into 'BrainStim_ANN_fMRI_HCP'...
remote: Enumerating objects: 289, done.[K
remote: Counting objects: 100% (116/116), done.[K
remote: Compressing objects: 100% (106/106), done.[K
remote: Total 289 (delta 50), reused 10 (delta 10), pack-reused 173 (from 1)[K
Receiving objects: 100% (289/289), 30.94 MiB | 32.76 MiB/s, done.
Resolving deltas: 100% (91/91), done.
‚úÖ Repo loaded from: /content/BrainStim_ANN_fMRI_HCP
Using device: cpu


In [None]:
# ============================================
# üì¶ Load EC_t and X for all subjects
# ============================================

ECts = {}   # will hold EC_t arrays per subject
Xs   = {}   # will hold input X arrays per subject

for sid in subjects:
    ECt_path = os.path.join(ects_dir, f"{sid}_ECt.npy")
    X_path   = os.path.join(preproc_dir, f"{sid}_inputs.npy")

    if not os.path.exists(ECt_path):
        print(f"‚ö†Ô∏è Skipping {sid}: EC_t file not found.")
        continue
    if not os.path.exists(X_path):
        print(f"‚ö†Ô∏è Skipping {sid}: inputs file not found.")
        continue

    ECts[sid] = np.load(ECt_path)
    Xs[sid]   = np.load(X_path)

    print(f"‚úÖ Loaded {sid}: EC_t {ECts[sid].shape}, X {Xs[sid].shape}")

print(f"\nüéØ Loaded {len(ECts)} subjects successfully.")

‚úÖ Loaded id_100206: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_100307: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_100408: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101006: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101107: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101309: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_101915: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_102008: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_102109: EC_t (500, 450, 450), X (4677, 1350)
‚úÖ Loaded id_102311: EC_t (500, 450, 450), X (4677, 1350)

üéØ Loaded 10 subjects successfully.


In [None]:
import numpy as np
from numpy.linalg import solve

def compute_hierarchy_and_trophic_coherence(EC: np.ndarray):
    """
    Compute hierarchical levels (gamma) and trophic coherence from an
    asymmetric effective connectivity matrix EC.

    Parameters
    ----------
    EC : (N, N) array_like
        Effective connectivity matrix; EC[i, j] is the influence of region i on region j
        (rows = sources, columns = targets).

    Returns
    -------
    gamma : (N,) ndarray
        Hierarchical level of each node (shifted so that min(gamma) = 0).
    trophic_coherence : float
        Scalar measure between roughly 0 and 1; higher = more coherent hierarchy.
    """
    # If EC is already rows=source, cols=target, then A = EC
    A = np.asarray(EC, dtype=float)

    # Column sums: total outgoing weight from each node (out-strength)
    d = A.sum(axis=0)  # shape (N,)

    # Row sums: total incoming weight to each node (in-strength)
    delta = A.sum(axis=1)  # shape (N,)

    # u = in + out; v = out - in
    u = d + delta
    v = d - delta

    # Build Lambda = diag(u) - A - A'
    Lambda = np.diag(u) - A - A.T

    # Fix one diagonal entry to 0 to make the system solvable/anchored
    Lambda[0, 0] = 0.0

    # Solve Lambda * gamma = v
    gamma = solve(Lambda, v)

    # Shift so the minimum hierarchical level is 0
    gamma = gamma - gamma.min()

    # Build matrix of squared deviations from "ideal" trophic step of 1
    X, Y = np.meshgrid(gamma, gamma)
    H = (X - Y - 1.0)**2

    # Compute weighted mean squared trophic distance
    F0 = np.sum(A * H) / np.sum(A)

    # Trophic coherence = 1 - F0
    trophic_coherence = 1.0 - F0

    return gamma, trophic_coherence

In [None]:
# Make dataframe for responsiveness and predictability

import pandas as pd
from scipy.stats import pearsonr

results = []
N = 400  # number of cortical regions

for sid in ECts.keys():
    EC_t = ECts[sid][:, -N:, -N:]       # shape (M, N, N)
    M = EC_t.shape[0]
    x_t = Xs[sid][:M, -N:]

    # --- Responsiveness ---
    meanEC = np.mean(EC_t, axis=0)              # (N, N)
    Responsiveness = np.sum(meanEC**2, axis=1)  # (N,)
    Hierarchy = compute_hierarchy_and_trophic_coherence(meanEC)[0] #(N,)

    # --- Predictability ---
    E_t = np.sum(x_t**2, axis=1)   # baseline energy (M,)
    Predictability = np.zeros(N)

    for ir in range(N):
        Effect_size_t = np.sum(EC_t[:, ir, :]**2, axis=1)  # (M,)
        Predictability[ir], _ = pearsonr(E_t, Effect_size_t)

    # --- Store in results list ---
    for region in range(N):
        results.append({
            "sid": sid,
            "roi": region,
            "responsiveness": Responsiveness[region],
            "predictability": Predictability[region],
            "hierarchy": Hierarchy[region]
        })

# Convert to DataFrame
df = pd.DataFrame(results)


In [None]:
df

Unnamed: 0,sid,roi,responsiveness,predictability,hierarchy
0,id_100206,0,0.000436,-0.494759,1.910873
1,id_100206,1,0.000615,-0.401189,1.549246
2,id_100206,2,0.000378,-0.471682,2.395887
3,id_100206,3,0.000418,-0.361711,1.729795
4,id_100206,4,0.000393,-0.433977,1.492068
...,...,...,...,...,...
3995,id_102311,395,0.000343,-0.261130,19.080519
3996,id_102311,396,0.000278,-0.410756,18.214204
3997,id_102311,397,0.000322,-0.302693,18.378216
3998,id_102311,398,0.000249,-0.255351,18.629194


In [2]:
!pip install brainspace

Collecting brainspace
  Downloading brainspace-0.1.21-py3-none-any.whl.metadata (3.9 kB)
Collecting vtk>=8.1.0 (from brainspace)
  Downloading vtk-9.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (5.6 kB)
Downloading brainspace-0.1.21-py3-none-any.whl (60.7 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m60.7/60.7 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading vtk-9.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (112.3 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m112.3/112.3 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vtk, brainspace
Successfully installed brainspace-0.1.21 vtk-9.5.2


In [4]:
N=400

In [6]:
from sklearn.metrics.pairwise import cosine_similarity
from brainspace.gradient import GradientMaps
import pandas as pd

# Empirical static FC for all subjects
FCs_emp = {}
all_gradients = {}

# --- Compute principal gradient per subject ---
for isu, sid in enumerate(subjects):
    sig_path = os.path.join(preproc_dir, f"{sid}_signals.npy")

    test_dur = 3000
    Z = np.load(sig_path)[-test_dur:, -N:]  # (M, N)

    FC = np.corrcoef(Z.T)  # (N, N)

    FCs_emp[sid] = FC

    # similarity of connectivity profiles
    A_affinity = cosine_similarity(FC)  # (N, N)

    gm = GradientMaps(n_components=10, approach='dm', kernel='normalized_angle')
    gm.fit(A_affinity)

    # principal gradient for this subject: (N,)
    all_gradients[sid] = gm.gradients_[:, 0]

# --- Convert gradients to a long-format DataFrame ---
grad_rows = []
for sid, grad in all_gradients.items():
    for region in range(N):
        grad_rows.append({
            "sid": sid,
            "roi": region,
            "principal_gradient": grad[region]
        })

grad_df = pd.DataFrame(grad_rows)

In [None]:
# --- Merge with your existing df (which already has sid & region) ---
df = df.merge(grad_df, on=["sid", "roi"], how="left")

In [21]:
subjects[0]

'id_100206'

In [None]:
df

Unnamed: 0,sid,roi,responsiveness,predictability,hierarchy,principal_gradient
0,id_100206,0,0.000436,-0.494759,1.910873,-0.053380
1,id_100206,1,0.000615,-0.401189,1.549246,0.010695
2,id_100206,2,0.000378,-0.471682,2.395887,-0.019872
3,id_100206,3,0.000418,-0.361711,1.729795,-0.061738
4,id_100206,4,0.000393,-0.433977,1.492068,-0.053638
...,...,...,...,...,...,...
3995,id_102311,395,0.000343,-0.261130,19.080519,0.081725
3996,id_102311,396,0.000278,-0.410756,18.214204,0.063341
3997,id_102311,397,0.000322,-0.302693,18.378216,0.044282
3998,id_102311,398,0.000249,-0.255351,18.629194,0.116244


In [None]:
# Save as CSV
df.to_csv(os.path.join(dataframe_dir, "responsiveness_predictability_herarchy.csv"),
          index=False)

# Also save as pickle for fast loading
df.to_pickle(os.path.join(dataframe_dir, "responsiveness_predictability_herarchy.pkl"))

In [20]:
np.asarray(grad_df[grad_df['sid']=='id_100206']['principal_gradient'])

array([-0.05277209,  0.01083288, -0.01978232, -0.06138297, -0.05316657,
        0.02421802,  0.04342252, -0.0313856 ,  0.03421375, -0.06232753,
       -0.07662802,  0.05684985,  0.0163275 ,  0.00572048,  0.07155598,
       -0.0029681 , -0.05484867,  0.01899668, -0.00246702,  0.05114801,
       -0.03646155, -0.00722474, -0.03706724, -0.05000584, -0.0499802 ,
       -0.04765047, -0.03340243, -0.04989991, -0.0520839 , -0.0467862 ,
       -0.08079112, -0.06672563, -0.01067749, -0.05120554, -0.08923878,
       -0.02659352, -0.09770474,  0.00294996, -0.05398697, -0.08968742,
       -0.09183452, -0.07267602, -0.07408797, -0.09582142, -0.00908836,
       -0.07590866, -0.0948794 , -0.02548302, -0.0719701 , -0.0574763 ,
       -0.0938623 , -0.08308646, -0.08556407, -0.08209742, -0.06000241,
       -0.05720705, -0.05558921, -0.08749367, -0.07555002, -0.092252  ,
       -0.0919002 , -0.07115512, -0.09706868, -0.06193651, -0.06664536,
       -0.01626201, -0.08914484, -0.06535804, -0.06020609, -0.02

In [24]:
np.asarray(df[df['sid']=='id_100206']['responsiveness'])

NameError: name 'df' is not defined

In [None]:
df

Unnamed: 0,sid,roi,responsiveness,predictability,hierarchy
0,id_100206,0,0.000436,-0.494759,1.910873
1,id_100206,1,0.000615,-0.401189,1.549246
2,id_100206,2,0.000378,-0.471682,2.395887
3,id_100206,3,0.000418,-0.361711,1.729795
4,id_100206,4,0.000393,-0.433977,1.492068
...,...,...,...,...,...
3995,id_102311,395,0.000343,-0.261130,19.080519
3996,id_102311,396,0.000278,-0.410756,18.214204
3997,id_102311,397,0.000322,-0.302693,18.378216
3998,id_102311,398,0.000249,-0.255351,18.629194
