In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# --- Setup & Imports ----------------------------------------------------------
import numpy as np
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
import os, sys
import pickle
from scipy import stats
import scipy.io as sio
from scipy.io import loadmat
import pandas as pd
from scipy.stats import pearsonr, linregress
# Point Python to your src/ folder
sys.path.append(os.path.abspath("/content/drive/MyDrive/0.Estudis/3.CBC/Projects/BrainStim_ANN-main/src"))

from preprocessing import *
from NPI import *
from group_analysis import *
from connectivity import *


# Import fMRI data (HCP)

In [None]:
! pip install mat73



In [None]:
import os, gc, h5py, numpy as np
from numpy.lib.format import open_memmap

base_dir = os.path.abspath('/content/drive/MyDrive/0.Estudis/3.CBC/Projects/BrainStim_ANN-main/')
run_files = {
    "REST1_LR": os.path.join(base_dir, "data", "fmri", "Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST1_LR.mat"),
    "REST1_RL": os.path.join(base_dir, "data", "fmri", "Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST1_RL.mat"),
    "REST2_LR": os.path.join(base_dir, "data", "fmri", "Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST2_LR.mat"),
    "REST2_RL": os.path.join(base_dir, "data", "fmri", "Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST2_RL.mat")
}
run_order = ["REST1_LR", "REST1_RL", "REST2_LR", "REST2_RL"]

PAD_POLICY = "trim"   # "pad" (pad to T_max with NaN) or "trim" (trim to T_min)
dtype = np.float32
out_path = os.path.join(base_dir, "data", "fmri", "HCP_concat_ts.npy")

def list_subjects(h5path, run_key):
    with h5py.File(h5path, "r") as f:
        return sorted(f["HCP"][run_key].keys(), key=lambda k: int(k.split("_")[-1]))

subject_sets = [set(list_subjects(run_files[k], k)) for k in run_order]
subject_ids = sorted(set.intersection(*subject_sets), key=lambda k: int(k.split("_")[-1]))
n_subj = len(subject_ids)

with h5py.File(run_files[run_order[0]], "r") as f0:
    ds0 = f0["HCP"][run_order[0]][subject_ids[0]]["ts"]
    if ds0.shape[0] <= ds0.shape[1]:
        nodes_axis, time_axis = 0, 1
    else:
        nodes_axis, time_axis = 1, 0
    n_nodes = ds0.shape[nodes_axis]

# ---- First pass: gather per-run T targets (T_max or T_min across common subjects)
T_targets = []
per_run_subject_T = {}  # optional diagnostics
for run_key in run_order:
    Ts = []
    with h5py.File(run_files[run_key], "r") as f:
        grp = f["HCP"][run_key]
        for sid in subject_ids:
            ds = grp[sid]["ts"]
            assert ds.shape[nodes_axis] == n_nodes, f"Node mismatch in {run_key} / {sid}"
            Ts.append(ds.shape[time_axis])
    per_run_subject_T[run_key] = Ts
    if PAD_POLICY == "pad":
        T_targets.append(max(Ts))  # pad up to the longest
    else:
        T_targets.append(min(Ts))  # trim down to the shortest

total_T = int(np.sum(T_targets))

# ---- Preallocate on-disk memmap and fill with NaN
out = open_memmap(out_path, mode="w+", dtype=dtype, shape=(n_subj, n_nodes, total_T))
out[:] = np.nan  # ensures any padding remains NaN on disk

sid2idx = {sid: i for i, sid in enumerate(subject_ids)}

# ---- Second pass: write data run-by-run, subject-by-subject with pad/trim
t0 = 0
for run_key, T_target in zip(run_order, T_targets):
    print(f"{run_key}: target length {T_target} (policy={PAD_POLICY})")
    with h5py.File(run_files[run_key], "r") as f:
        grp = f["HCP"][run_key]
        for sid in subject_ids:
            ds = grp[sid]["ts"]
            arr = ds[()]
            if nodes_axis == 1:
                arr = arr.T  # make (nodes, time)
            arr = arr.astype(dtype, copy=False)

            t_len = arr.shape[1]
            if PAD_POLICY == "pad":
                # write as much as we have (rest already NaN)
                L = min(t_len, T_target)
                out[sid2idx[sid], :, t0:t0+L] = arr[:, :L]
                # if t_len > T_target, silently truncate to target
            else:  # "trim"
                # trim to exactly T_target
                if t_len < T_target:
                    # optional: skip this subject or raise; here we pad to NaN just for consistency
                    # but then the run would effectively be shorter for this subject.
                    # Better: choose PAD_POLICY="pad" for heterogeneous lengths.
                    pass
                L = min(t_len, T_target)
                out[sid2idx[sid], :, t0:t0+L] = arr[:, :L]

            del arr
            gc.collect()
    t0 += T_target
    gc.collect()

# Finish
del out
gc.collect()
print("Done. On-disk array:", out_path)


Run from here!

In [3]:
import os, numpy as np

base_dir = os.path.abspath('/content/drive/MyDrive/0.Estudis/3.CBC/Projects/BrainStim_ANN-main/')
out_path = os.path.join(base_dir, "data", "fmri", "HCP_concat_ts.npy")

X = np.load(out_path, mmap_mode="r")   # shape: (S, N, T)

subjects = [X[i].transpose(1, 0) for i in range(X.shape[0])]

(2900, 450)
float32


In [5]:
subjects = preprocess_groups(subjects)

In [7]:
groups = {"HCP": subjects}

print(subjects[0].shape)  #(T, N)

(2900, 450)
float64


In [8]:
results = train_models_for_groups(
    groups,
    steps=3,
    batch_size=50,
    train_prop=0.8,
    num_epochs=100,
    lr=1e-3,
    l2=5e-5,
    min_windows=50,
    save_dir=os.path.join(base_dir, "models"),
    save_prefix="ANN_subject"
)



=== Group: HCP — 1002 subjects ===
[OK]  HCP[0] — T=2900, N=450, W=2897 | final train=0.065942, test=0.352331 (149.0s)
[OK]  HCP[1] — T=2900, N=450, W=2897 | final train=0.403358, test=0.478631 (157.8s)
[OK]  HCP[2] — T=2900, N=450, W=2897 | final train=0.103200, test=0.294534 (184.8s)
[OK]  HCP[3] — T=2900, N=450, W=2897 | final train=0.307343, test=0.428453 (154.5s)
[OK]  HCP[4] — T=2900, N=450, W=2897 | final train=0.143008, test=0.168261 (212.2s)
[OK]  HCP[5] — T=2900, N=450, W=2897 | final train=0.078918, test=0.325198 (174.8s)
[OK]  HCP[6] — T=2900, N=450, W=2897 | final train=0.237352, test=0.309079 (152.7s)


KeyboardInterrupt: 

In [9]:
plt.figure(figsize=(4,3))
plt.plot(results['CNT'][0]['history']['train_loss'],label='train_loss')
plt.plot(results['CNT'][0]['history']['test_loss'],label='test_loss')
plt.legend()
plt.xlabel('Training epochs')
plt.tight_layout()
plt.show()

NameError: name 'results' is not defined

<Figure size 400x300 with 0 Axes>