<a href="https://colab.research.google.com/github/grabuffo/BrainStim_ANN_fMRI_HCP/blob/main/notebooks/Process_TMS_fMRI_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Setup cell ---

# 1 - Mount Drive (for data only)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# 2 - Clone repo (only if missing)
import os
if not os.path.exists("/content/BrainStim_ANN_fMRI_HCP"):
    !git clone https://github.com/grabuffo/BrainStim_ANN_fMRI_HCP.git
else:
    print("Repo already exists âœ…")

# 3 - Set paths
repo_dir = "/content/BrainStim_ANN_fMRI_HCP"
data_dir = "/content/drive/MyDrive/Colab Notebooks/Brain_Stim_ANN/data"

import sys
sys.path.append(repo_dir)

# 4 - Imports
from src.preprocessing_hcp import *
from src.NPI import *

import numpy as np
import matplotlib.pyplot as plt
import h5py
import gc

print("âœ… Environment ready!")
print("Repo directory:", repo_dir)
print("Data directory:", data_dir)

Mounted at /content/drive
Repo already exists âœ…
âœ… Environment ready!
Repo directory: /content/BrainStim_ANN_fMRI_HCP
Data directory: /content/drive/MyDrive/Colab Notebooks/Brain_Stim_ANN/data


In [None]:
# --- Preprocessing and data extraction ---

# Parameters
n_nodes = 450                # number of brain regions (parcels)
remove_points = 30           # remove first 30 TRs
using_steps = 3              # window length for multi2one
number_of_subjects = 10      # or 996 if all subjects
dtype = np.float32

# ------------------------------------------------------------------------------
# 1 - Locate fMRI run files
# ------------------------------------------------------------------------------

run_files = {
    "REST1_LR": os.path.join(data_dir, "fmri/Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST1_LR.mat"),
    "REST1_RL": os.path.join(data_dir, "fmri/Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST1_RL.mat"),
    "REST2_LR": os.path.join(data_dir, "fmri/Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST2_LR.mat"),
    "REST2_RL": os.path.join(data_dir, "fmri/Schaefer2018_400Parcels_7Networks_order_Tian_Subcortex_S3_REST2_RL.mat")
}
run_order = list(run_files.keys())

print(f"âœ… Found {len(run_files)} fMRI runs:")
for k in run_order:
    print("  -", k)

âœ… Found 4 fMRI runs:
  - REST1_LR
  - REST1_RL
  - REST2_LR
  - REST2_RL


In [None]:
# ------------------------------------------------------------------------------
# 2 - Find common subjects across runs
# ------------------------------------------------------------------------------

def list_subjects(h5path, run_key):
    """Return list of subjects for a given run file."""
    with h5py.File(h5path, "r") as f:
        return sorted(f["HCP"][run_key].keys(), key=lambda k: int(k.split("_")[-1]))

subject_sets = [set(list_subjects(run_files[k], k)) for k in run_order]
subject_ids = sorted(set.intersection(*subject_sets), key=lambda k: int(k.split("_")[-1]))

print(f"\nâœ… Found {len(subject_ids)} subjects present in all runs.")
subject_ids = subject_ids[:number_of_subjects]
print(f"   â†’ Will process first {len(subject_ids)} subjects.\n")


âœ… Found 996 subjects present in all runs.
   â†’ Will process first 10 subjects.



In [None]:
# ------------------------------------------------------------------------------
# 3 - Process each subject
# ------------------------------------------------------------------------------

save_dir = os.path.join(data_dir, "preprocessed_subjects")
os.makedirs(save_dir, exist_ok=True)

for sid in subject_ids:
    print(f"ðŸš€ Processing subject {sid}")
    subj_runs = []

    for run_key in run_order:
        # Load fMRI time series (T, N)
        with h5py.File(run_files[run_key], "r") as f:
            ts = f["HCP"][run_key][sid]["ts"][()]

        # Print shape before and after possible transpose
        print(f"      Original shape for {run_key}: {ts.shape}", end="")
        if ts.shape[0] < ts.shape[1]:
            ts = ts.T
            print(f" â†’ Transposed to {ts.shape}")
        else:
            print(" (kept as is)")

        # Remove first 30 time points
        ts = ts[remove_points:, :n_nodes]

        # Apply bandpass filtering
        ts_filt = bandpass_filter_timeseries(ts)
        subj_runs.append(ts_filt)

        # Free memory from this run
        del ts, ts_filt
        gc.collect()

    # Concatenate all runs â†’ (T_total, N)
    signals = np.concatenate(subj_runs, axis=0)
    print(f"   â†’ Signals shape: {signals.shape}")

    # Create inputs and targets
    inputs, targets = multi2one(signals, steps=using_steps)
    print(f"   â†’ Inputs: {inputs.shape}, Targets: {targets.shape}")

    # Save to disk
    np.save(os.path.join(save_dir, f"{sid}_signals.npy"), signals)
    np.save(os.path.join(save_dir, f"{sid}_inputs.npy"), inputs)
    np.save(os.path.join(save_dir, f"{sid}_targets.npy"), targets)
    print(f"   âœ… Saved preprocessed data for {sid}\n")

    # Cleanup
    del subj_runs, signals, inputs, targets
    gc.collect()

print("ðŸŽ¯ All subjects processed successfully!")
print(f"ðŸ“‚ Saved results in: {save_dir}")

ðŸš€ Processing subject id_100206
      Original shape for REST1_LR: (1200, 450) (kept as is)
      Original shape for REST1_RL: (1200, 450) (kept as is)
      Original shape for REST2_LR: (1200, 450) (kept as is)
      Original shape for REST2_RL: (1200, 450) (kept as is)
   â†’ Signals shape: (4680, 450)
   â†’ Inputs: (4677, 1350), Targets: (4677, 450)
   âœ… Saved preprocessed data for id_100206

ðŸš€ Processing subject id_100307
      Original shape for REST1_LR: (1200, 450) (kept as is)
      Original shape for REST1_RL: (1200, 450) (kept as is)
      Original shape for REST2_LR: (1200, 450) (kept as is)
      Original shape for REST2_RL: (1200, 450) (kept as is)
   â†’ Signals shape: (4680, 450)
   â†’ Inputs: (4677, 1350), Targets: (4677, 450)
   âœ… Saved preprocessed data for id_100307

ðŸš€ Processing subject id_100408
      Original shape for REST1_LR: (1200, 450) (kept as is)
      Original shape for REST1_RL: (1200, 450) (kept as is)
      Original shape for REST2_LR: (12

### Understanding `signals`, `inputs`, and `targets`

Let **T** = total number of time points and **S** = number of past steps.

---

**`signals`**  
- Shape: **(T, N)**  
- Preprocessed fMRI time series (after removing first 30 TRs, filtering, and concatenating runs).  
- `signals[t, n]` = BOLD activity of region *n* at time *t*.

---

**`inputs`**  
- Shape: **(T âˆ’ S, N Ã— S)**  
- Each row contains the flattened activity of all N regions over the past S time points.  
- Represents the temporal context used for prediction.

---

**`targets`**  
- Shape: **(T âˆ’ S, N)**  
- Each row is the brain activity at the next time point following the input window.  
- What the model aims to predict.
