In [1]:
# ==========================================
# 1) Connect Google Drive
# ==========================================
from google.colab import drive
drive.mount('/content/drive')

print("✓ Google Drive mounted!")


# ==========================================
# 2) Set up project paths
# ==========================================
import os

# Path to your FineMI / extracted data root
# ⚠️ Update ONLY IF your folder name is different
extract_dir = "/content/drive/MyDrive/multi_joint_mi_dataset/extracted"

print("Project root (extract_dir):", extract_dir)

# Common subfolders
aligned_dir = os.path.join(extract_dir, "aligned_datasets")
raw_eeg_dir = os.path.join(extract_dir, "FineMI/FineMI")  # optional, if needed

# ==========================================
# 3) Check existence of required folders
# ==========================================
if not os.path.exists(extract_dir):
    raise FileNotFoundError(f"extract_dir not found: {extract_dir}")

if not os.path.exists(aligned_dir):
    raise FileNotFoundError(
        f"aligned_datasets folder not found at: {aligned_dir}\n"
        f"Make sure your previous notebook saved files correctly."
    )

print("✓ Required folders found!")
print(" - aligned_dir:", aligned_dir)


# ==========================================
# 4) List what aligned files exist
# ==========================================
import glob

aligned_files = sorted(glob.glob(os.path.join(aligned_dir, "subject*_aligned.npz")))

print(f"Found {len(aligned_files)} aligned files.")
for f in aligned_files:
    print("  •", os.path.basename(f))

if len(aligned_files) == 0:
    print("⚠ No aligned dataset files found. Did the preprocessing notebook run?")
else:
    print("\n✓ Ready to load aligned subjects!")


Mounted at /content/drive
✓ Google Drive mounted!
Project root (extract_dir): /content/drive/MyDrive/multi_joint_mi_dataset/extracted
✓ Required folders found!
 - aligned_dir: /content/drive/MyDrive/multi_joint_mi_dataset/extracted/aligned_datasets
Found 18 aligned files.
  • subject10_aligned.npz
  • subject11_aligned.npz
  • subject12_aligned.npz
  • subject13_aligned.npz
  • subject14_aligned.npz
  • subject15_aligned.npz
  • subject16_aligned.npz
  • subject17_aligned.npz
  • subject18_aligned.npz
  • subject1_aligned.npz
  • subject2_aligned.npz
  • subject3_aligned.npz
  • subject4_aligned.npz
  • subject5_aligned.npz
  • subject6_aligned.npz
  • subject7_aligned.npz
  • subject8_aligned.npz
  • subject9_aligned.npz

✓ Ready to load aligned subjects!


In [2]:
# Load all aligned datasets and prepare for training

import numpy as np
import os
import glob
from collections import defaultdict

# Find all aligned files
aligned_dir = os.path.join(extract_dir, "aligned_datasets")
aligned_files = sorted(glob.glob(os.path.join(aligned_dir, "subject*_aligned.npz")))

print(f"Found {len(aligned_files)} aligned dataset files")
print("="*60)


MATCH_RATE_THRESHOLD = 80.0  #Minimum % match rate you accept.lower than this will be excluded
bad_subjects = set()          #Empty set to remember the names of subjects you exclude (e.g. "subject10")
#create a collection of unique, unordered elements (it's like implementing a finite set)
all_eeg_data = []   #List that will collect eeg_data arrays from each included subject.
all_fnirs_data = [] #List that will collect fnirs_data arrays from each included subject.
all_labels = []     #List that will collect label arrays from each included subject.
subject_ids = []    #Will store a subject ID for each trial, useful later for subject-wise splits.
alignment_stats = []#Will store dictionaries with info about each subject’s alignment quality.

for file_path in aligned_files:#Iterates through each file path, e.g. "../subject2_aligned.npz".
    subject_name = os.path.basename(file_path).replace("_aligned.npz", "")
#os.path.basename(file_path) → gets just the file name, e.g. "subject2_aligned.npz".
#.replace("_aligned.npz", "") → removes this suffix → you get "subject2".

    subject_num = int(subject_name.replace("subject", ""))
 #.replace("subject", "") → from "subject2" to "2".
 #int(...) → converts "2" to integer 2. Because we are extracting the subject ID from the name, we should convert.


    data = np.load(file_path, allow_pickle=True)#Loads the .npz file into a NumPy NpzFile object.

    eeg_data = data['eeg_data']
    #Loads EEG array for this subject, likely shaped (n_trials, n_channels_eeg, n_time_eeg).
    fnirs_data = data['fnirs_data']
    #Loads fNIRS array for this subject, (n_trials, n_channels_fnirs, n_time_fnirs)
    labels = data['labels']
    #Loads label vector, shape (n_trials,), values in 0..7
    align_info = data['alignment_info'].item() if hasattr(data['alignment_info'], 'item') else data['alignment_info']
    #data['alignment_info'] is often stored as a zero-dimensional object array (like array(dict_obj, dtype=object)), so:
    #f it has .item(), we call .item() to extract the actual Python dict.
    #Otherwise we use it as-is.
  #Result: align_info is a Python dict with keys like 'raw_match_rate', 'best_shift', etc.
    raw_match_rate = align_info.get('raw_match_rate', 100.0)


    #Tries to read 'raw_match_rate' key from the dict. If not found, defaults to 100.0!

    # Check alignment quality. CHECKING IF THE SUBJECT IS GOOD ENOUGH
    if raw_match_rate < MATCH_RATE_THRESHOLD:
        bad_subjects.add(subject_name)    #Add "subjectX" to the excluded set.
        print(f"⚠ {subject_name}: EXCLUDED (raw_match_rate={raw_match_rate:.1f}% < {MATCH_RATE_THRESHOLD}%)")
        continue      #Skip the rest of the loop body (do not add this subject’s data to the global lists); move on to the next file.

    # Include this subject. THIS IS EXCECUTED ONLY IF THE SUBJECT PASSED THE ALIGNMENT THRESHOLD!
    all_eeg_data.append(eeg_data)     #Add this subject’s EEG trials array to the list.
    all_fnirs_data.append(fnirs_data) #same
    all_labels.append(labels)         #same
    subject_ids.extend([subject_num] * len(labels))
    #Creates a list like [2, 2, 2, ..., 2] (one per trial for subject 2) and extends subject_ids with it.
    #After the loop, subject_ids has length = total number of trials from all good subjects. BE ANDAZEYE TEDADE TRIAL HA ID DARIM.



    #Appends a dict with:
    #'subject': "subject2", etc.
    #'n_trials': how many trials this subject contributed.
    #'best_shift': the time shift that gave best alignment (if stored).
    #'raw_match_rate'

    #alignment_stats becomes a list of “rows” we can later convert to a DataFrame or print.
    alignment_stats.append({
        'subject': subject_name,
        'n_trials': len(labels),
        'best_shift': align_info.get('best_shift', 0),
        'raw_match_rate': raw_match_rate
    })

    print(f"✓ {subject_name}: {len(labels)} trials, shift={align_info.get('best_shift', 0)}, match_rate={raw_match_rate:.1f}%")

###############################################################################
if bad_subjects:          #IF THE SET IS NOT EMPTY AND SOME SUBJECTS WERE EXCLUDED, Loop through sorted bad subject names and print each.
    print(f"\n{'='*60}")
    print("Excluded Subjects (raw_match_rate < 80%)")
    print(f"{'='*60}")
    for subj in sorted(bad_subjects): #Loop through sorted bad subject names and print each.
        print(f"  - {subj}")
    print(f"\nTotal excluded: {len(bad_subjects)} subjects")  #Print how many total were excluded.
else:
    print(f"\n✓ No subjects excluded (all have raw_match_rate >= {MATCH_RATE_THRESHOLD}%)")
#################################################################################


# Concatenate all included subjects
if len(all_eeg_data) == 0:        #Before concatenating, check if any subject made it through.
    raise ValueError("No subjects passed the alignment quality threshold! Check your data.")

eeg_all = np.concatenate(all_eeg_data, axis=0)    #concatenation along axis 0 (so we'll have total_trials,channel,timepoint)
fnirs_all = np.concatenate(all_fnirs_data, axis=0)  #same
labels_all = np.concatenate(all_labels, axis=0)      #stacking all label vectors into one big vector (total_trials,).
subject_ids_all = np.array(subject_ids)

print(f"\n{'='*60}")
print("Dataset Summary")
print(f"{'='*60}")
print(f"Included subjects: {len(all_eeg_data)}")
print(f"Excluded subjects: {len(bad_subjects)}")
print(f"Total trials: {len(labels_all)}")
print(f"EEG shape: {eeg_all.shape}")
print(f"fNIRS shape: {fnirs_all.shape}")
print(f"Labels shape: {labels_all.shape}")
print(f"Label distribution: {np.bincount(labels_all)}")
print(f"Unique subjects: {len(np.unique(subject_ids_all))}")

# Store in variables for next steps
print(f"\n✓ Data loaded and ready for model training!")
print(f"\nAvailable variables:")
print(f"  - eeg_all: EEG data ({eeg_all.shape})")
print(f"  - fnirs_all: fNIRS data ({fnirs_all.shape})")
print(f"  - labels_all: Labels ({labels_all.shape})")
print(f"  - subject_ids_all: Subject IDs for each trial")


Found 18 aligned dataset files
⚠ subject10: EXCLUDED (raw_match_rate=42.8% < 80.0%)
✓ subject11: 280 trials, shift=8, match_rate=87.5%
✓ subject12: 320 trials, shift=1, match_rate=100.0%
✓ subject13: 320 trials, shift=0, match_rate=100.0%
⚠ subject14: EXCLUDED (raw_match_rate=66.6% < 80.0%)
✓ subject15: 320 trials, shift=0, match_rate=100.0%
✓ subject16: 320 trials, shift=0, match_rate=100.0%
✓ subject17: 320 trials, shift=0, match_rate=100.0%
✓ subject18: 320 trials, shift=0, match_rate=100.0%
✓ subject1: 360 trials, shift=0, match_rate=100.0%
✓ subject2: 320 trials, shift=0, match_rate=100.0%
✓ subject3: 320 trials, shift=0, match_rate=100.0%
✓ subject4: 320 trials, shift=0, match_rate=100.0%
✓ subject5: 321 trials, shift=0, match_rate=100.0%
✓ subject6: 320 trials, shift=0, match_rate=100.0%
✓ subject7: 320 trials, shift=0, match_rate=100.0%
✓ subject8: 319 trials, shift=0, match_rate=99.7%
✓ subject9: 320 trials, shift=0, match_rate=100.0%

Excluded Subjects (raw_match_rate < 80%)


In [3]:
import os
import glob
import numpy as np

print("===== PER-FILE SANITY CHECKS =====")

aligned_dir = os.path.join(extract_dir, "aligned_datasets")
aligned_files = sorted(glob.glob(os.path.join(aligned_dir, "subject*_aligned.npz")))

if not aligned_files:
    raise FileNotFoundError(f"No aligned files found in {aligned_dir}")

print(f"Found {len(aligned_files)} aligned files\n")

per_file_ok = True

for file_path in aligned_files:
    subject_name = os.path.basename(file_path).replace("_aligned.npz", "")
    data = np.load(file_path, allow_pickle=True)

    eeg_data = data['eeg_data']
    fnirs_data = data['fnirs_data']
    labels = data['labels']

    # Basic shapes
    print(f"--- {subject_name} ---")
    print("  EEG shape   :", eeg_data.shape)
    print("  fNIRS shape :", fnirs_data.shape)
    print("  Labels shape:", labels.shape)

    # Check trial count consistency
    n_eeg_trials   = eeg_data.shape[0]
    n_fnirs_trials = fnirs_data.shape[0]
    n_label_trials = labels.shape[0]

    if not (n_eeg_trials == n_fnirs_trials == n_label_trials):
        print("  ⚠ MISMATCH in trial counts!",
              f"(EEG={n_eeg_trials}, fNIRS={n_fnirs_trials}, labels={n_label_trials})")
        per_file_ok = False

    # Quick label sanity check
    unique_labels = np.unique(labels)
    print("  Unique labels:", unique_labels)

    # NaN / inf checks (per subject)
    has_nan_eeg   = np.isnan(eeg_data).any()
    has_nan_fnirs = np.isnan(fnirs_data).any()
    has_nan_labels = np.isnan(labels).any()

    has_inf_eeg   = np.isinf(eeg_data).any()
    has_inf_fnirs = np.isinf(fnirs_data).any()
    has_inf_labels = np.isinf(labels).any()

    if has_nan_eeg or has_nan_fnirs or has_nan_labels:
        print("  ⚠ NaNs detected in this subject "
              f"(EEG={has_nan_eeg}, fNIRS={has_nan_fnirs}, labels={has_nan_labels})")
        per_file_ok = False

    if has_inf_eeg or has_inf_fnirs or has_inf_labels:
        print("  ⚠ Infs detected in this subject "
              f"(EEG={has_inf_eeg}, fNIRS={has_inf_fnirs}, labels={has_inf_labels})")
        per_file_ok = False

    print()

if per_file_ok:
    print("✓ All per-file checks passed (shapes & basic sanity look OK)\n")
else:
    print("⚠ Some issues detected above — fix before training!\n")


# ===== GLOBAL / CONCATENATED ARRAY CHECKS =====
print("===== GLOBAL CONCATENATED ARRAY CHECKS =====")

try:
    # These should already exist from your previous loading code
    print("eeg_all shape        :", eeg_all.shape)
    print("fnirs_all shape      :", fnirs_all.shape)
    print("labels_all shape     :", labels_all.shape)
    print("subject_ids_all shape:", subject_ids_all.shape)

    # Dimensionality expectations
    if eeg_all.ndim != 3:
        print("  ⚠ eeg_all is not 3D (expected (trials, channels, time))")
    if fnirs_all.ndim != 3:
        print("  ⚠ fnirs_all is not 3D (expected (trials, channels, time))")
    if labels_all.ndim != 1:
        print("  ⚠ labels_all is not 1D")
    if subject_ids_all.ndim != 1:
        print("  ⚠ subject_ids_all is not 1D")

    # Trial counts consistency
    n_trials_eeg   = eeg_all.shape[0]
    n_trials_fnirs = fnirs_all.shape[0]
    n_trials_labels = labels_all.shape[0]
    n_trials_ids    = subject_ids_all.shape[0]

    print("\nTrial counts:")
    print("  EEG trials       :", n_trials_eeg)
    print("  fNIRS trials     :", n_trials_fnirs)
    print("  Label entries    :", n_trials_labels)
    print("  Subject ID entries:", n_trials_ids)

    if len({n_trials_eeg, n_trials_fnirs, n_trials_labels, n_trials_ids}) != 1:
        print("  ⚠ MISMATCH in trial counts across global arrays!")
    else:
        print("  ✓ Trial counts consistent across all arrays")

    # Label distribution
    print("\nLabel distribution (global):")
    print("  np.bincount(labels_all) =", np.bincount(labels_all))

    # Global NaN / inf check
    print("\nNaN / Inf checks (global):")
    print("  NaNs in EEG   :", np.isnan(eeg_all).any())
    print("  NaNs in fNIRS :", np.isnan(fnirs_all).any())
    print("  NaNs in labels:", np.isnan(labels_all).any())
    print("  Infs in EEG   :", np.isinf(eeg_all).any())
    print("  Infs in fNIRS :", np.isinf(fnirs_all).any())
    print("  Infs in labels:", np.isinf(labels_all).any())

except NameError as e:
    print("⚠ Some global arrays (eeg_all, fnirs_all, labels_all, subject_ids_all) "
          "are not defined yet.")
    print("  Error:", e)


===== PER-FILE SANITY CHECKS =====
Found 18 aligned files

--- subject10 ---
  EEG shape   : (137, 68, 1126)
  fNIRS shape : (137, 24, 40)
  Labels shape: (137,)
  Unique labels: [0 1 2 3 4 5 6 7]

--- subject11 ---
  EEG shape   : (280, 68, 1126)
  fNIRS shape : (280, 24, 40)
  Labels shape: (280,)
  Unique labels: [0 1 2 3 4 5 6 7]

--- subject12 ---
  EEG shape   : (320, 68, 1126)
  fNIRS shape : (320, 24, 40)
  Labels shape: (320,)
  Unique labels: [0 1 2 3 4 5 6 7]

--- subject13 ---
  EEG shape   : (320, 68, 1126)
  fNIRS shape : (320, 24, 40)
  Labels shape: (320,)
  Unique labels: [0 1 2 3 4 5 6 7]

--- subject14 ---
  EEG shape   : (213, 68, 1126)
  fNIRS shape : (213, 24, 40)
  Labels shape: (213,)
  Unique labels: [0 1 2 3 4 5 6 7]

--- subject15 ---
  EEG shape   : (320, 68, 1126)
  fNIRS shape : (320, 24, 40)
  Labels shape: (320,)
  Unique labels: [0 1 2 3 4 5 6 7]

--- subject16 ---
  EEG shape   : (320, 68, 1126)
  fNIRS shape : (320, 24, 40)
  Labels shape: (320,)
  Un

In [4]:
# Install PyTorch if needed
!pip install torch torchvision -q

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")


PyTorch version: 2.9.0+cu126
CUDA available: True
CUDA device: NVIDIA A100-SXM4-80GB


In [5]:
import os, random
import numpy as np
import torch
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"  # or ":16:8"

SEED = 42

def seed_everything(seed=42, deterministic=True):
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if deterministic:
        # CuDNN / CUDA determinism
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        # Use deterministic algorithms when available
        torch.use_deterministic_algorithms(False)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        # Required for some CUDA deterministic behavior (esp. matmul)
        os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

seed_everything(SEED, deterministic=True)
print("Seeded & deterministic mode ON")


def seed_worker(worker_id):
    worker_seed = SEED + worker_id
    np.random.seed(worker_seed)
    random.seed(worker_seed)
    torch.manual_seed(worker_seed)

g = torch.Generator()
g.manual_seed(SEED)


Seeded & deterministic mode ON


<torch._C.Generator at 0x7c60399ff5d0>

In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit


# ===========================================================
# CONFIG
# ===========================================================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

BINARY_CLASSES = [0, 6]
N_FOLDS = 5
BATCH_SIZE = 32
BASE_CHANNELS = 64     #Width of the ConvBranch (number of filters)

# Global teacher pretrain (on all subjects except target subject)
EPOCHS_GLOBAL_PRETRAIN = 20
LR_GLOBAL = 3e-4    #Learning rate for global teacher pretraining

# Teacher fold fine-tune
EPOCHS_TEACHER_FT = 15 #Max epochs to adapt the teacher to the target subject fold. Shouldn't be too long, can overfit that subject’s train split.
LR_TEACHER_FT = 1e-4

# Student training
EPOCHS_STUDENT = 20   #Max training epochs for baseline and KD students.
LR_STUDENT = 3e-4

WEIGHT_DECAY = 1e-4
PATIENCE = 6      #Early stopping patience on validation accuracy

# Distillation (base)
DISTILL_ALPHA = 0.4   #Weights CE vs KD in your combined loss. Higher alpha (e.g., 0.7) → more “trust labels”, less teacher influence (safer)
DISTILL_T = 3.0
#Temperature softens teacher probabilities (and student logits in KL)
#Higher T → softer probabilities → KD carries more “dark knowledge” (class similarity), can help.

# Train students with plain CE first, then turn on KD. early student logits are garbage
WARMUP_EPOCHS = 6

# ---- UPGRADE 2: sample-level teacher confidence gating ----
KD_CONF_THRESH = 0.70  # apply KD only if teacher max prob >= this
#For every single trial, the teacher outputs two numbers (logits), one per class. You convert them to probabilities. max of these will be confidence
#example:If the teacher says [0.95, 0.05] → confidence = 0.95 (very confident)



# Optional: validation gating (model-level)
USE_VAL_GATE = True #Turns on “choose KD model only if it’s actually better on validation”
VAL_GATE_MARGIN = 0.02  #KD must beat baseline val accuracy by at least +2% to be selected.
#For each fold, you train two separate students:
#Baseline student (trained with CE only)
#KD student (trained with CE + distillation)
#Then you evaluate both on the validation set (the held-out 20% from the training fold).
#If the KD student is clearly better on validation, you use the KD student for the final test set.
#If not, you ignore KD and use the baseline student.

# ===========================================================
# DATASET
# ===========================================================
class MI_Dataset(Dataset):
    def __init__(self, eeg, fnirs, y):
      #we have these three
      #Converts inputs to NumPy arrays
      #Expected shapes:eeg is typically (N, C_eeg, T_eeg), fnirs is typically (N, C_fnirs, T_fnirs)
        self.eeg = np.asarray(eeg, dtype=np.float32)
        self.fnirs = np.asarray(fnirs, dtype=np.float32)
        self.y = np.asarray(y, dtype=np.int64)
    #Tells PyTorch how many samples (trials) exist.
    def __len__(self):
        return len(self.y)
    #Defines what to return for sample index i.
    def __getitem__(self, i):
      #Returns a dictionary for one sample:
      #"eeg": tensor shaped (C_eeg, T_eeg)
      #"fnirs": tensor shaped (C_fnirs, T_fnirs)
        return {
            "eeg": torch.from_numpy(self.eeg[i]).float(),
            "fnirs": torch.from_numpy(self.fnirs[i]).float(),
            "y": torch.tensor(self.y[i], dtype=torch.long)
        }


#It returns a dictionary so that one dataset can serve multiple models.
#Teacher needs: eeg + fnirs + y
#EEG student needs: eeg + y
#fNIRS student needs: fnirs + y
#can return any kind of object, but we prefer a dictionary here

# ===========================================================
# FILTER + NORMALIZE
# ===========================================================

#This function extracts only the two chosen classes and remaps labels to 0/1
#Inputs:
#full EEG array
#full fNIRS array
#original labels (0–7)
#subject IDs per trial
#two class IDs you want (e.g., 0 and 7)
def global_binary_filter(eeg, fnirs, labels, subjects, c0, c1):

    #Creates a boolean mask selecting trials belonging to either class.
    #yani inja label bayad = task pair
    mask = (labels == c0) | (labels == c1)
    eeg = eeg[mask]
    fnirs = fnirs[mask]
    y = np.where(labels[mask] == c0, 0, 1).astype(np.int64) #Give me the task ID of every trial that survived the mask.
    #example:task_ids = [0, 3, 7, 2, 7, 1]
    #mask     = [T, F, T, F, T, F]
    #task_ids[mask] = [0, 7, 7]
    #then, If the task is c0, label it as class 0; otherwise label it as class 1.”

    subj = subjects[mask]
    #This keeps subject IDs aligned with the filtered trials.
    return eeg, fnirs, y, subj




#Computes the mean per channel
#Averages over:
#axis=0 → all trials
#axis=2 → all time points
# =.  mean over all trials and all time of channel c
def compute_mean_std(X):
    mean = X.mean(axis=(0, 2), keepdims=True).astype(np.float32)
    #keepdims=True:dont remove any dims. keep it as (1, C, 1)
    std = (X.std(axis=(0, 2), keepdims=True) + 1e-8).astype(np.float32)
    return mean, std
    #Returns channel-wise normalization statistics, computed only from the data you pass i (e.g., training split only → leakage-free).

def normalize(X, mean, std):
    return ((X - mean) / std).astype(np.float32, copy=False)
#summary:
#For each channel independently:
#Mean becomes ~0
#Standard deviation becomes ~1
#Scale differences across channels are removed
# ===========================================================
# MODELS
# ===========================================================
class ConvBranch(nn.Module):
    def __init__(self, in_ch, base=64, dropout=0.3):
      #in_ch → number of input channels
      #EEG: e.g. 64 channels
      #fNIRS: e.g. 24 channels
      #base → number of convolution filters (model capacity)

        super().__init__()
        self.net = nn.Sequential(
            #Conv1d: learns temporal patterns across time
            nn.Conv1d(in_ch, base, kernel_size=7, padding=3),
            #kernel_size=7: captures short-to-mid temporal dynamics(refer to the paper to see what values are best later
            #padding=3: keeps time length unchanged?)

            nn.BatchNorm1d(base),
           #GELU:better than ReLU for nois signals
            nn.GELU(),
            nn.MaxPool1d(2),  #halves time resolution → robustness to noise


            #another block because Builds hierarchical temporal features
            nn.Conv1d(base, base, kernel_size=5, padding=2),
            nn.BatchNorm1d(base),
            nn.GELU(),
            nn.MaxPool1d(2),
            #third block:Doubles feature dimension (base → 2*base)
            #AdaptiveAvgPool1d(1):
            #Collapses entire time dimension
            #Output becomes time-invariant
            nn.Conv1d(base, base * 2, kernel_size=3, padding=1),
            nn.BatchNorm1d(base * 2),
            nn.GELU(),
            nn.AdaptiveAvgPool1d(1),
            #output shape will be:(N, 2*base, 1)

            nn.Flatten(), #(N,2*base)
            nn.Dropout(dropout),
        )
        self.out_dim = base * 2   #This is used later when building classifiers.

    def forward(self, x):
        return self.net(x)

class Teacher_EEG_FNIRS(nn.Module):     #This is the privileged model.
    def __init__(self, eeg_channels, fnirs_channels, num_classes=2, base_channels=64, dropout=0.3):
        super().__init__()
        self.eeg_branch = ConvBranch(eeg_channels, base=base_channels, dropout=dropout)
        self.fnirs_branch = ConvBranch(fnirs_channels, base=max(16, base_channels // 2), dropout=dropout) ##Smaller capacity for fNIRS
        #because:
        #1.fNIRS usually has fewer channels
        #2.Lower temporal resolution
        fusion_dim = self.eeg_branch.out_dim + self.fnirs_branch.out_dim
        #This is late fusion:
        #Learn features separately
        #Combine only at the representation level



        #classification head
        self.head = nn.Sequential(
            nn.Linear(fusion_dim, 128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )
        #######flow########
        #EEG   → ConvBranch → fe
        #fNIRS → ConvBranch → ff
        #[fe || ff] → classifier → logits

    def forward(self, eeg, fnirs):
        fe = self.eeg_branch(eeg)
        ff = self.fnirs_branch(fnirs)
        f = torch.cat([fe, ff], dim=1)
        return self.head(f)

class Student_EEG(nn.Module):   #this is what runs at deployment when only EEG is available

#Same backbone as teacher EEG branch → fair comparison.
    def __init__(self, eeg_channels, num_classes=2, base_channels=64, dropout=0.3):
        super().__init__()
        self.branch = ConvBranch(eeg_channels, base=base_channels, dropout=dropout)
        #classifier head
        #Smaller than teacher → lower capacity → realistic deployment model.
        self.head = nn.Sequential(
            nn.Linear(self.branch.out_dim, 128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )

    def forward(self, eeg):
        f = self.branch(eeg)
        return self.head(f)

class Student_fNIRS(nn.Module):
#Symmetric to EEG student, but with lower capacity
    def __init__(self, fnirs_channels, num_classes=2, base_channels=32, dropout=0.3):
        super().__init__()
        self.branch = ConvBranch(fnirs_channels, base=max(16, base_channels), dropout=dropout)
        self.head = nn.Sequential(
            nn.Linear(self.branch.out_dim, 128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )

    def forward(self, fnirs):
        f = self.branch(fnirs)
        return self.head(f)

# ===========================================================
# TRAIN / EVAL HELPERS
# ===========================================================
def train_teacher_epoch(model, loader, opt, crit):
  #This trains one epoch of the multimodal teacher (EEG + fNIRS)

    model.train()   #Put model in training mode
    for b in loader:      #Each b is a dictionary from your MI_Dataset
        eeg = b["eeg"].to(DEVICE)   #Move data to GPU/CPU: Model and data are on the same device
        fn = b["fnirs"].to(DEVICE)
        y = b["y"].to(DEVICE)
        opt.zero_grad()
        logits = model(eeg, fn)
        loss = crit(logits, y)
        loss.backward()
        opt.step()

@torch.no_grad()
def eval_teacher(model, loader):  #Evaluates teacher without training
    model.eval()
    correct = total = 0
    for b in loader:
        eeg = b["eeg"].to(DEVICE)
        fn = b["fnirs"].to(DEVICE)
        y = b["y"].to(DEVICE)
        logits = model(eeg, fn)
        correct += (logits.argmax(1) == y).sum().item()
        total += y.size(0)
    return correct / max(total, 1)

def train_eeg_baseline_epoch(student, loader, opt, crit):   #This trains an EEG-only student, without KD.
    student.train()
    for b in loader:
        eeg = b["eeg"].to(DEVICE)
        y = b["y"].to(DEVICE)
        opt.zero_grad()
        logits = student(eeg)
        loss = crit(logits, y)
        loss.backward()
        opt.step()

def train_fnirs_baseline_epoch(student, loader, opt, crit):   #This trains an fnirs only student
    student.train()
    for b in loader:
        fn = b["fnirs"].to(DEVICE)
        y = b["y"].to(DEVICE)
        opt.zero_grad()
        logits = student(fn)
        loss = crit(logits, y)
        loss.backward()
        opt.step()

@torch.no_grad()
def eval_eeg(student, loader):    #evaluating eeg
    student.eval()
    correct = total = 0
    for b in loader:
        eeg = b["eeg"].to(DEVICE)
        y = b["y"].to(DEVICE)
        logits = student(eeg)
        correct += (logits.argmax(1) == y).sum().item()
        total += y.size(0)
    return correct / max(total, 1)

@torch.no_grad()
def eval_fnirs(student, loader):      #evaluating fnirs
    student.eval()
    correct = total = 0
    for b in loader:
        fn = b["fnirs"].to(DEVICE)
        y = b["y"].to(DEVICE)
        logits = student(fn)
        correct += (logits.argmax(1) == y).sum().item()
        total += y.size(0)
    return correct / max(total, 1)

# ===========================================================
# KD (UPGRADED): warm-start + teacher confidence gating
# ===========================================================
def confidence_mask(teacher_logits, thresh=0.7):    #only trust the teacher when it’s confident
    probs = F.softmax(teacher_logits, dim=1)
    conf, _ = probs.max(dim=1)
    return conf >= thresh

def kd_loss_vec(student_logits, teacher_logits, y, alpha=DISTILL_ALPHA, T=DISTILL_T):

  #Computes a vector of losses (one loss per sample), combining:
  #normal supervised loss (CE with ground-truth labels)
  #KD loss (KL divergence between student and teacher softened probabilities)
    ce = F.cross_entropy(student_logits, y, reduction="none")
    log_p_s = F.log_softmax(student_logits / T, dim=1)  #Divides logits by temperature T to “soften” them
    p_t = F.softmax(teacher_logits / T, dim=1)
    kl = F.kl_div(log_p_s, p_t, reduction="none").sum(dim=1) * (T * T)
    return alpha * ce + (1 - alpha) * kl

def train_eeg_kd_epoch(student, teacher, loader, opt, epoch,
                       warmup_epochs=WARMUP_EPOCHS, conf_thresh=KD_CONF_THRESH):
    student.train()
    teacher.eval()
    for b in loader:
        eeg = b["eeg"].to(DEVICE)
        fn = b["fnirs"].to(DEVICE)
        y = b["y"].to(DEVICE)

        opt.zero_grad()
        s_logits = student(eeg)

        if epoch < warmup_epochs:
            loss = F.cross_entropy(s_logits, y)
        else:
            with torch.no_grad():
                t_logits = teacher(eeg, fn)
                m = confidence_mask(t_logits, conf_thresh)

            if m.sum().item() == 0:
                loss = F.cross_entropy(s_logits, y)
            else:
                kd = kd_loss_vec(s_logits[m], t_logits[m], y[m]).mean()
                ce = F.cross_entropy(s_logits, y)
                loss = 0.5 * ce + 0.5 * kd

        loss.backward()
        opt.step()

def train_fnirs_kd_epoch(student, teacher, loader, opt, epoch,
                         warmup_epochs=WARMUP_EPOCHS, conf_thresh=KD_CONF_THRESH):
    student.train()
    teacher.eval()
    for b in loader:
        eeg = b["eeg"].to(DEVICE)
        fn = b["fnirs"].to(DEVICE)
        y = b["y"].to(DEVICE)

        opt.zero_grad()
        s_logits = student(fn)

        if epoch < warmup_epochs:
            loss = F.cross_entropy(s_logits, y)
        else:
            with torch.no_grad():
                t_logits = teacher(eeg, fn)
                m = confidence_mask(t_logits, conf_thresh)

            if m.sum().item() == 0:
                loss = F.cross_entropy(s_logits, y)
            else:
                kd = kd_loss_vec(s_logits[m], t_logits[m], y[m]).mean()
                ce = F.cross_entropy(s_logits, y)
                loss = 0.5 * ce + 0.5 * kd

        loss.backward()
        opt.step()

# ===========================================================
# GLOBAL TEACHER PRETRAIN (exclude target subject)
# ===========================================================
def pretrain_global_teacher(exclude_subject, eeg_all, fnirs_all, y_all, subj_all,
                            eeg_channels, fnirs_channels):
    mask = subj_all != exclude_subject
    eeg = eeg_all[mask]
    fn = fnirs_all[mask]
    y = y_all[mask]

    # normalize on pretrain pool only
    eeg_m, eeg_s = compute_mean_std(eeg)
    fn_m, fn_s = compute_mean_std(fn)
    eeg = normalize(eeg, eeg_m, eeg_s)
    fn = normalize(fn, fn_m, fn_s)

    loader = DataLoader(MI_Dataset(eeg, fn, y), batch_size=64, shuffle=True)

    teacher = Teacher_EEG_FNIRS(
        eeg_channels=eeg_channels,
        fnirs_channels=fnirs_channels,
        num_classes=2,
        base_channels=BASE_CHANNELS,
        dropout=0.1
    ).to(DEVICE)

    opt = torch.optim.AdamW(teacher.parameters(), lr=LR_GLOBAL, weight_decay=WEIGHT_DECAY)
    crit = nn.CrossEntropyLoss()

    for _ in range(EPOCHS_GLOBAL_PRETRAIN):
        train_teacher_epoch(teacher, loader, opt, crit)

    return teacher

# ===========================================================
# BUILD GLOBAL BINARY ARRAYS
# ===========================================================
GLOBAL_EEG, GLOBAL_FNIRS, GLOBAL_Y, GLOBAL_SUBJ = global_binary_filter(
    eeg_all, fnirs_all, labels_all, subject_ids_all,
    BINARY_CLASSES[0], BINARY_CLASSES[1]
)

print("\nGLOBAL DATASET READY")
print(f"Trials: {len(GLOBAL_Y)} | Subjects: {len(np.unique(GLOBAL_SUBJ))}")
print("Class0:", int((GLOBAL_Y == 0).sum()), "Class1:", int((GLOBAL_Y == 1).sum()))

# ===========================================================
# MAIN RUN
# ===========================================================
print("\n" + "=" * 100)
print("LEAKAGE-FREE: Teacher (EEG+fNIRS) -> EEG-student + fNIRS-student (baseline vs KD-upgraded vs VAL-gated)")
print("=" * 100)

teacher_means = []
eeg_base_means, eeg_kd_means, eeg_gate_means = [], [], []
fn_base_means, fn_kd_means, fn_gate_means = [], [], []

for subj in np.unique(GLOBAL_SUBJ):
    m = GLOBAL_SUBJ == subj
    eeg_s = GLOBAL_EEG[m]
    fn_s  = GLOBAL_FNIRS[m]
    y_s   = GLOBAL_Y[m]

    print(f"\nSubject {subj} | trials={len(y_s)}")
    if len(np.unique(y_s)) < 2:
        print("  Skipping: only one class present.")
        continue

    # Global teacher pretrain on other subjects (no leakage)
    teacher_global = pretrain_global_teacher(
        exclude_subject=subj,
        eeg_all=GLOBAL_EEG, fnirs_all=GLOBAL_FNIRS, y_all=GLOBAL_Y, subj_all=GLOBAL_SUBJ,
        eeg_channels=eeg_s.shape[1],
        fnirs_channels=fn_s.shape[1],
    )

    skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

    fold_T = []

    fold_eeg_base, fold_eeg_kd, fold_eeg_gate = [], [], []
    fold_fn_base,  fold_fn_kd,  fold_fn_gate  = [], [], []

    for fold, (train_idx, test_idx) in enumerate(skf.split(np.zeros(len(y_s)), y_s), 1):
        # Inner split train -> train/val
        sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=SEED)
        tr_sub, va_sub = next(sss.split(np.zeros(len(train_idx)), y_s[train_idx]))
        idx_tr = train_idx[tr_sub]
        idx_va = train_idx[va_sub]
        idx_te = test_idx

        eeg_tr, fn_tr, y_tr = eeg_s[idx_tr], fn_s[idx_tr], y_s[idx_tr]
        eeg_va, fn_va, y_va = eeg_s[idx_va], fn_s[idx_va], y_s[idx_va]
        eeg_te, fn_te, y_te = eeg_s[idx_te], fn_s[idx_te], y_s[idx_te]

        # Fold-train-only normalization (leakage-free)
        eeg_m, eeg_std = compute_mean_std(eeg_tr)
        fn_m, fn_std   = compute_mean_std(fn_tr)

        eeg_tr = normalize(eeg_tr, eeg_m, eeg_std)
        eeg_va = normalize(eeg_va, eeg_m, eeg_std)
        eeg_te = normalize(eeg_te, eeg_m, eeg_std)

        fn_tr  = normalize(fn_tr, fn_m, fn_std)
        fn_va  = normalize(fn_va, fn_m, fn_std)
        fn_te  = normalize(fn_te, fn_m, fn_std)

        tr_loader = DataLoader(MI_Dataset(eeg_tr, fn_tr, y_tr), batch_size=BATCH_SIZE, shuffle=True)
        va_loader = DataLoader(MI_Dataset(eeg_va, fn_va, y_va), batch_size=BATCH_SIZE, shuffle=False)
        te_loader = DataLoader(MI_Dataset(eeg_te, fn_te, y_te), batch_size=BATCH_SIZE, shuffle=False)

        # Weighted CE from train split (used for teacher + baselines)
        counts = np.bincount(y_tr, minlength=2)
        if counts.min() == 0:
            crit = nn.CrossEntropyLoss()
        else:
            w = (1.0 / counts)
            w = w / w.sum() * 2.0
            crit = nn.CrossEntropyLoss(weight=torch.tensor(w, dtype=torch.float32, device=DEVICE))

        # -----------------------
        # Teacher fold fine-tune (init from global)
        # -----------------------
        teacher = Teacher_EEG_FNIRS(
            eeg_channels=eeg_tr.shape[1],
            fnirs_channels=fn_tr.shape[1],
            num_classes=2,
            base_channels=BASE_CHANNELS,
            dropout=0.1
        ).to(DEVICE)
        teacher.load_state_dict(teacher_global.state_dict())

        opt_t = torch.optim.AdamW(teacher.parameters(), lr=LR_TEACHER_FT, weight_decay=WEIGHT_DECAY)

        best_va_t, best_state_t, bad_t = -1.0, None, 0
        for _ in range(EPOCHS_TEACHER_FT):
            train_teacher_epoch(teacher, tr_loader, opt_t, crit)
            va_t = eval_teacher(teacher, va_loader)
            if va_t > best_va_t:
                best_va_t = va_t
                best_state_t = {k: v.detach().cpu().clone() for k, v in teacher.state_dict().items()}
                bad_t = 0
            else:
                bad_t += 1
                if bad_t >= PATIENCE:
                    break
        if best_state_t is not None:
            teacher.load_state_dict(best_state_t)

        t_test = eval_teacher(teacher, te_loader)
        fold_T.append(t_test)

        # ==========================================================
        # EEG STUDENT: baseline CE
        # ==========================================================
        eeg_base = Student_EEG(eeg_channels=eeg_tr.shape[1], base_channels=BASE_CHANNELS, dropout=0.3).to(DEVICE)
        opt_eb = torch.optim.AdamW(eeg_base.parameters(), lr=LR_STUDENT, weight_decay=WEIGHT_DECAY)

        best_va_eb, best_state_eb, bad = -1.0, None, 0
        for _ in range(EPOCHS_STUDENT):
            train_eeg_baseline_epoch(eeg_base, tr_loader, opt_eb, crit)
            va = eval_eeg(eeg_base, va_loader)
            if va > best_va_eb:
                best_va_eb = va
                best_state_eb = {k: v.detach().cpu().clone() for k, v in eeg_base.state_dict().items()}
                bad = 0
            else:
                bad += 1
                if bad >= PATIENCE:
                    break
        if best_state_eb is not None:
            eeg_base.load_state_dict(best_state_eb)
        eeg_base_test = eval_eeg(eeg_base, te_loader)

        # ==========================================================
        # EEG STUDENT: KD (UPGRADED warm + confidence-gated)
        # ==========================================================
        eeg_kd = Student_EEG(eeg_channels=eeg_tr.shape[1], base_channels=BASE_CHANNELS, dropout=0.3).to(DEVICE)
        opt_ek = torch.optim.AdamW(eeg_kd.parameters(), lr=LR_STUDENT, weight_decay=WEIGHT_DECAY)

        best_va_ek, best_state_ek, bad = -1.0, None, 0
        for ep in range(EPOCHS_STUDENT):
            train_eeg_kd_epoch(eeg_kd, teacher, tr_loader, opt_ek, epoch=ep)
            va = eval_eeg(eeg_kd, va_loader)
            if va > best_va_ek:
                best_va_ek = va
                best_state_ek = {k: v.detach().cpu().clone() for k, v in eeg_kd.state_dict().items()}
                bad = 0
            else:
                bad += 1
                if bad >= PATIENCE:
                    break
        if best_state_ek is not None:
            eeg_kd.load_state_dict(best_state_ek)
        eeg_kd_test = eval_eeg(eeg_kd, te_loader)

        # VAL gate: choose baseline or KD model
        if USE_VAL_GATE and (best_va_ek > best_va_eb + VAL_GATE_MARGIN):
            eeg_gate_test = eeg_kd_test
        else:
            eeg_gate_test = eeg_base_test

        # ==========================================================
        # fNIRS STUDENT: baseline CE
        # ==========================================================
        fn_base = Student_fNIRS(fnirs_channels=fn_tr.shape[1], base_channels=32, dropout=0.3).to(DEVICE)
        opt_fb = torch.optim.AdamW(fn_base.parameters(), lr=LR_STUDENT, weight_decay=WEIGHT_DECAY)

        best_va_fb, best_state_fb, bad = -1.0, None, 0
        for _ in range(EPOCHS_STUDENT):
            train_fnirs_baseline_epoch(fn_base, tr_loader, opt_fb, crit)
            va = eval_fnirs(fn_base, va_loader)
            if va > best_va_fb:
                best_va_fb = va
                best_state_fb = {k: v.detach().cpu().clone() for k, v in fn_base.state_dict().items()}
                bad = 0
            else:
                bad += 1
                if bad >= PATIENCE:
                    break
        if best_state_fb is not None:
            fn_base.load_state_dict(best_state_fb)
        fn_base_test = eval_fnirs(fn_base, te_loader)

        # ==========================================================
        # fNIRS STUDENT: KD (UPGRADED warm + confidence-gated)
        # ==========================================================
        fn_kd = Student_fNIRS(fnirs_channels=fn_tr.shape[1], base_channels=32, dropout=0.3).to(DEVICE)
        opt_fk = torch.optim.AdamW(fn_kd.parameters(), lr=LR_STUDENT, weight_decay=WEIGHT_DECAY)

        best_va_fk, best_state_fk, bad = -1.0, None, 0
        for ep in range(EPOCHS_STUDENT):
            train_fnirs_kd_epoch(fn_kd, teacher, tr_loader, opt_fk, epoch=ep)
            va = eval_fnirs(fn_kd, va_loader)
            if va > best_va_fk:
                best_va_fk = va
                best_state_fk = {k: v.detach().cpu().clone() for k, v in fn_kd.state_dict().items()}
                bad = 0
            else:
                bad += 1
                if bad >= PATIENCE:
                    break
        if best_state_fk is not None:
            fn_kd.load_state_dict(best_state_fk)
        fn_kd_test = eval_fnirs(fn_kd, te_loader)

        # VAL gate for fNIRS
        if USE_VAL_GATE and (best_va_fk > best_va_fb + VAL_GATE_MARGIN):
            fn_gate_test = fn_kd_test
        else:
            fn_gate_test = fn_base_test

        # Collect fold results
        fold_eeg_base.append(eeg_base_test)
        fold_eeg_kd.append(eeg_kd_test)
        fold_eeg_gate.append(eeg_gate_test)

        fold_fn_base.append(fn_base_test)
        fold_fn_kd.append(fn_kd_test)
        fold_fn_gate.append(fn_gate_test)

        print(f" Fold {fold}/{N_FOLDS} | T={t_test:.3f} | "
              f"EEG: base={eeg_base_test:.3f} kd={eeg_kd_test:.3f} gate={eeg_gate_test:.3f} | "
              f"fNIRS: base={fn_base_test:.3f} kd={fn_kd_test:.3f} gate={fn_gate_test:.3f}")

    # Subject means
    Tm = float(np.mean(fold_T))

    ebm, ekm, egm = float(np.mean(fold_eeg_base)), float(np.mean(fold_eeg_kd)), float(np.mean(fold_eeg_gate))
    fbm, fkm, fgm = float(np.mean(fold_fn_base)),  float(np.mean(fold_fn_kd)),  float(np.mean(fold_fn_gate))

    teacher_means.append(Tm)
    eeg_base_means.append(ebm); eeg_kd_means.append(ekm); eeg_gate_means.append(egm)
    fn_base_means.append(fbm);  fn_kd_means.append(fkm);  fn_gate_means.append(fgm)

    print(f"Subject {subj} means | T={Tm:.3f} | "
          f"EEG(base/kd/gate)={ebm:.3f}/{ekm:.3f}/{egm:.3f} | "
          f"fNIRS(base/kd/gate)={fbm:.3f}/{fkm:.3f}/{fgm:.3f}")

# Final summary
print("\n" + "=" * 100)
print("FINAL SUMMARY (UPGRADED KD, leakage-free)")
print("=" * 100)
print(f"Subjects evaluated: {len(teacher_means)}")
print(f"Teacher Overall mean        = {np.mean(teacher_means):.3f} ± {np.std(teacher_means):.3f}")

print(f"EEG-only Baseline mean      = {np.mean(eeg_base_means):.3f} ± {np.std(eeg_base_means):.3f}")
print(f"EEG-only KD-upgraded mean   = {np.mean(eeg_kd_means):.3f} ± {np.std(eeg_kd_means):.3f}")
print(f"EEG-only VAL-gated mean     = {np.mean(eeg_gate_means):.3f} ± {np.std(eeg_gate_means):.3f}")

print(f"fNIRS-only Baseline mean    = {np.mean(fn_base_means):.3f} ± {np.std(fn_base_means):.3f}")
print(f"fNIRS-only KD-upgraded mean = {np.mean(fn_kd_means):.3f} ± {np.std(fn_kd_means):.3f}")
print(f"fNIRS-only VAL-gated mean   = {np.mean(fn_gate_means):.3f} ± {np.std(fn_gate_means):.3f}")

print(f"(KD warmup={WARMUP_EPOCHS} epochs, conf_thresh={KD_CONF_THRESH}, VAL gate margin={VAL_GATE_MARGIN}, gate={USE_VAL_GATE})")


Using device: cuda

GLOBAL DATASET READY
Trials: 1279 | Subjects: 16
Class0: 640 Class1: 639

LEAKAGE-FREE: Teacher (EEG+fNIRS) -> EEG-student + fNIRS-student (baseline vs KD-upgraded vs VAL-gated)

Subject 1 | trials=90
 Fold 1/5 | T=0.556 | EEG: base=0.611 kd=0.556 gate=0.611 | fNIRS: base=0.500 kd=0.500 gate=0.500
 Fold 2/5 | T=0.556 | EEG: base=0.667 kd=0.556 gate=0.556 | fNIRS: base=0.500 kd=0.500 gate=0.500
 Fold 3/5 | T=0.389 | EEG: base=0.556 kd=0.444 gate=0.556 | fNIRS: base=0.500 kd=0.500 gate=0.500
 Fold 4/5 | T=0.444 | EEG: base=0.667 kd=0.778 gate=0.667 | fNIRS: base=0.389 kd=0.444 gate=0.389
 Fold 5/5 | T=0.611 | EEG: base=0.500 kd=0.722 gate=0.722 | fNIRS: base=0.500 kd=0.500 gate=0.500
Subject 1 means | T=0.511 | EEG(base/kd/gate)=0.600/0.611/0.622 | fNIRS(base/kd/gate)=0.478/0.489/0.478

Subject 2 | trials=80
 Fold 1/5 | T=0.562 | EEG: base=0.375 kd=0.500 gate=0.375 | fNIRS: base=0.500 kd=0.562 gate=0.562
 Fold 2/5 | T=0.625 | EEG: base=0.500 kd=0.500 gate=0.500 | fNIR