# Prepare the folder

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!git clone https://github.com/atomiiw/EEG-Model-Fine-tune.git

Cloning into 'EEG-Model-Fine-tune'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 35 (delta 1), reused 35 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (35/35), 958.75 KiB | 43.58 MiB/s, done.
Resolving deltas: 100% (1/1), done.


In [10]:
%cd EEG-Model-Fine-tune

/content/EEG-Model-Fine-tune


In [None]:
!pip install -r MIRepNet/requirements.txt

# Baseline Performance: Before Fine-tuning
Current output: among {0, 1, 2, 3}   
Expected output: among {0, 1, ..., 7, 8}   
Current accuracy: 8%-15%   
Accuracy if just randomly guessing: 11%     

Why does accuracy differ every time?  
'Loaded 108/110 parameters from pretrained model'   
The 2 final layer weights are randomly initialized


In [18]:
print("Working dir:", os.getcwd())


Working dir: /content/EEG-Model-Fine-tune


In [29]:
import torch
import numpy as np
from model.mlm import mlm_mask
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

# ==== CONFIG ====
DATASET_NAME = "basic"
WEIGHT_PATH = "weight/MIRepNet.pth"   # pretrained weights (4-class)
BATCH_SIZE = 32
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ==== LOAD DATA ====
X = np.load(f'data/{DATASET_NAME}/X_test.npy')   # (N, 128, 200)
y = np.load(f'data/{DATASET_NAME}/labels_test.npy')  # (N,)
print("Loaded data:", X.shape, y.shape)

# convert to tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)

# ==== LOAD MODEL ====
model = mlm_mask(
    emb_size=256,
    depth=6,
    n_classes=4,     # pretrained model expects 4 outputs
    pretrainmode=False,
    pretrain=WEIGHT_PATH
).to(DEVICE)

model.eval()

# ==== EVALUATE ====
correct = 0
total = 0

with torch.no_grad():
    for data, labels in loader:
        data, labels = data.to(DEVICE), labels.to(DEVICE)
        _, outputs = model(data)
        # expected to return amongst {0, 1, 2, 3}
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total * 100
print(f"\n✅ Raw pretrained MIRepNet accuracy on your dataset: {accuracy:.2f}%")
print(f"Correct: {correct} / {total}")

Loaded data: (144, 128, 200) (144,)
Loaded 108/110 parameters from pretrained model

✅ Raw pretrained MIRepNet accuracy on your dataset: 11.81%
Correct: 17 / 144


# BASIC: Fine-tune

In [23]:
%cd MIRepNet

/content/EEG-Model-Fine-tune/MIRepNet


In [27]:
!python finetune.py --dataset_name basic --model_name MIRepNet --num_classes 9 --val_split 0.8 --epochs 10


Starting EEG Classification with Configurable Hyperparameters

original data shape: (1096, 128, 200) labels shape: (1096,)
preprocessed data shape: (1096, 128, 200) preprocessed labels shape: (1096,)
before processed： (219, 128, 200)
after processed： (219, 45, 200)
before processed： (877, 128, 200)
after processed： (877, 45, 200)
Loaded 108/110 parameters from pretrained model
Seed: 666, Subject: 0

Predicted: [5 1 7 6 0 5 8 2 5 7 5 0 5 5 0 7 5 7 1 0 7 5 5 2 5 7 5 5 7 5 2 8]
Actual:    [5 0 6 1 3 5 3 0 1 1 8 1 8 2 6 7 2 8 2 4 6 7 5 4 7 5 2 3 7 7 8 3]
Got 95 out of 877 correct. Accuracy: 10.832383124287343.
Predicted: [7 1 7 6 6 6 0 4 0 1 1 0 7 7 4 1 7 7 1 6 7 7 5 4 7 7 5 5 4 3 3 4]
Actual:    [5 0 6 1 3 5 3 0 1 1 8 1 8 2 6 7 2 8 2 4 6 7 5 4 7 5 2 3 7 7 8 3]
Got 95 out of 877 correct. Accuracy: 10.832383124287343.
Predicted: [3 3 7 2 3 3 0 2 5 3 7 0 7 7 0 4 6 2 3 6 4 2 5 2 3 4 4 5 4 3 3 4]
Actual:    [5 0 6 1 3 5 3 0 1 1 8 1 8 2 6 7 2 8 2 4 6 7 5 4 7 5 2 3 7 7 8 3]
Got 102 out of 877 co

# BASIC: Load the fine-tuned weights & test on full dataset

In [None]:
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from utils.utils import process_and_replace_loader
from model.mlm import mlm_mask

# ==== CONFIG ====
DATASET_NAME = "mydata_test"
WEIGHT_PATH = "./weight/mydata_MIRepNet_finetuned.pth"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 32

# ==== LOAD RAW DATA ====
X = np.load(f'./data/{DATASET_NAME}/X.npy')
y = np.load(f'./data/{DATASET_NAME}/labels.npy')
print("Loaded:", X.shape, y.shape)

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)
loader = DataLoader(TensorDataset(X_tensor, y_tensor),
                    batch_size=BATCH_SIZE, shuffle=False)

# ==== PREPROCESS (same as training) ====
loader = process_and_replace_loader(loader,
                                    ischangechn=True,
                                    dataset=DATASET_NAME)

# ==== LOAD MODEL ====
model = mlm_mask(
    emb_size=256,
    depth=6,
    n_classes=9,
    pretrainmode=False,
    pretrain=None
).to(DEVICE)

model.load_state_dict(torch.load(WEIGHT_PATH))
model.eval()

# ==== EVALUATE ====
correct, total = 0, 0
all_preds, all_labels = [], []

with torch.no_grad():
    for data, labels in loader:
        data, labels = data.to(DEVICE), labels.to(DEVICE)
        _, outputs = model(data)
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        correct += (preds == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total * 100
print(f"\n🎯 Accuracy on preprocessed data: {accuracy:.2f}% "
      f"({correct}/{total})")

Loaded: (346, 111, 200) (346,)
before processed： (346, 111, 200)
after processed： (346, 45, 200)

🎯 Accuracy on preprocessed data: 91.62% (317/346)


# Split out a separate test set, provided a test ratio

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
import os

# ==== CONFIG ====
train_ratio = 0.9   # e.g. 0.9 for 90% train / 10% test
data_dir = "./data/mydata"
test_dir = "./data/mydata_test"
os.makedirs(test_dir, exist_ok=True)

# ==== LOAD ====
def load_or_empty(folder):
    X_path, y_path = f"{folder}/X.npy", f"{folder}/labels.npy"
    if os.path.exists(X_path) and os.path.exists(y_path):
        return np.load(X_path), np.load(y_path)
    return np.empty((0,)), np.empty((0,))

X_train_old, y_train_old = load_or_empty(data_dir)
X_test_old, y_test_old = load_or_empty(test_dir)

# ==== MERGE ====
if X_train_old.size == 0 and X_test_old.size == 0:
    raise FileNotFoundError("❌ No data found in mydata/ or mydata_test/")
elif X_test_old.size == 0:
    print("ℹ️ Only found mydata/, splitting it now...")
    X_full, y_full = X_train_old, y_train_old
else:
    print("ℹ️ Found both mydata/ and mydata_test/, merging before re-split...")
    X_full = np.concatenate([X_train_old, X_test_old])
    y_full = np.concatenate([y_train_old, y_test_old])

print(f"\n📦 Total samples: {len(y_full)} | Shape: {X_full.shape}")

# ==== SPLIT ====
test_ratio = 1 - train_ratio
X_train, X_test, y_train, y_test = train_test_split(
    X_full, y_full, test_size=test_ratio, random_state=42, stratify=y_full
)

# ==== SAVE ====
np.save(f"{data_dir}/X.npy", X_train)
np.save(f"{data_dir}/labels.npy", y_train)
np.save(f"{test_dir}/X.npy", X_test)
np.save(f"{test_dir}/labels.npy", y_test)

# ==== REPORT ====
print(f"\n✅ Split complete:")
print(f"Train set: {X_train.shape}, Labels: {y_train.shape}")
print(f"Test set:  {X_test.shape}, Labels: {y_test.shape}")
print(f"\n🎯 {train_ratio*100:.0f}% train / {test_ratio*100:.0f}% test split saved.")

ℹ️ Only found mydata/, splitting it now...

📦 Total samples: 3456 | Shape: (3456, 111, 200)

✅ Split complete:
Train set: (3110, 111, 200), Labels: (3110,)
Test set:  (346, 111, 200), Labels: (346,)

🎯 90% train / 10% test split saved.


In [None]:
!git rm --cached "preprocess data/pt_decoding_data_S62.pkl"


fatal: pathspec 'preprocess data/pt_decoding_data_S62.pkl' did not match any files


In [None]:
!git status

On branch master

No commits yet

Changes to be committed:
  (use "git rm --cached <file>..." to unstage)
	[32mnew file:   .gitignore[m
	[32mnew file:   MIRepNet.md[m
	[32mnew file:   MIRepNet/.gitignore[m
	[32mnew file:   MIRepNet/LICENSE[m
	[32mnew file:   MIRepNet/MIRepNet_Finetuning.ipynb[m
	[32mnew file:   MIRepNet/README.md[m
	[32mnew file:   MIRepNet/asset/1[m
	[32mnew file:   MIRepNet/asset/Datasets.jpg[m
	[32mnew file:   MIRepNet/asset/High-quality_Data_Construction.jpg[m
	[32mnew file:   MIRepNet/asset/Main_Results.jpg[m
	[32mnew file:   MIRepNet/asset/RepMI.jpg[m
	[32mnew file:   MIRepNet/dataset.py[m
	[32mnew file:   MIRepNet/finetune.py[m
	[32mnew file:   MIRepNet/model/ADFCNN.py[m
	[32mnew file:   MIRepNet/model/Conformer.py[m
	[32mnew file:   MIRepNet/model/Deep_Shallow_Conv.py[m
	[32mnew file:   MIRepNet/model/EDPNet.py[m
	[32mnew file:   MIRepNet/model/EEGNet.py[m
	[32mnew file:   MIRepNet/model/FBCNet.py[m
	[32mnew file:   MIRepN

# PATIENT: Establish Datasets

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from utils.utils import process_and_replace_loader

# ==== CONFIG ====
DATA_DIR = "./data/mydata_patients"
TRAIN_SAVE_DIR = "./data/mydata_patients_train"
TEST_SAVE_DIR = "./data/mydata_patients_test"
DATASET_NAME = "mydata"
HELD_OUT = "S26"   # choose which patient to hold out for testing
BATCH_SIZE = 32

os.makedirs(TRAIN_SAVE_DIR, exist_ok=True)
os.makedirs(TEST_SAVE_DIR, exist_ok=True)

# ==== Helper Functions ====
def load_patient(pid):
    """Load one patient's X and y arrays."""
    X = np.load(os.path.join(DATA_DIR, f"X_{pid}.npy"))
    y = np.load(os.path.join(DATA_DIR, f"y_{pid}.npy"))
    print(f"Loaded {pid}: X={X.shape}, y={y.shape}")
    return X, y

def preprocess_patient(X, y):
    """Apply EA + channel alignment (ischangechn=True)"""
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)
    loader = DataLoader(TensorDataset(X_tensor, y_tensor),
                        batch_size=BATCH_SIZE, shuffle=False)
    loader = process_and_replace_loader(loader,
                                        ischangechn=True,
                                        dataset=DATASET_NAME)
    all_X, all_y = [], []
    for xb, yb in loader:
        all_X.append(xb.numpy())
        all_y.append(yb.numpy())
    return np.concatenate(all_X), np.concatenate(all_y)

# ==== Load all patients and preprocess ====
patients = [f.split("_")[1].split(".")[0] for f in os.listdir(DATA_DIR) if f.startswith("X_")]

train_X, train_y = [], []
for pid in patients:
    X, y = load_patient(pid)
    Xp, yp = preprocess_patient(X, y)
    if pid == HELD_OUT:
        # save test set separately
        np.save(os.path.join(TEST_SAVE_DIR, "X.npy"), Xp)
        np.save(os.path.join(TEST_SAVE_DIR, "labels.npy"), yp)
        print(f"✅ Saved test set for {pid}: {Xp.shape}")
    else:
        train_X.append(Xp)
        train_y.append(yp)
        print(f"Added {pid} to training pool ({Xp.shape})")

# ==== Combine train patients ====
X_train = np.concatenate(train_X)
y_train = np.concatenate(train_y)
print(f"\nCombined train shape: X={X_train.shape}, y={y_train.shape}")

# ==== Save train set ====
np.save(os.path.join(TRAIN_SAVE_DIR, "X.npy"), X_train)
np.save(os.path.join(TRAIN_SAVE_DIR, "labels.npy"), y_train)
print(f"✅ Saved train set to {TRAIN_SAVE_DIR}")

Loaded S14: X=(432, 111, 200), y=(432,)
before processed： (432, 111, 200)
after processed： (432, 45, 200)
Added S14 to training pool ((432, 45, 200))
Loaded S39: X=(411, 144, 200), y=(411,)
before processed： (411, 144, 200)
after processed： (411, 45, 200)
Added S39 to training pool ((411, 45, 200))
Loaded S22: X=(453, 74, 200), y=(453,)
before processed： (453, 74, 200)
after processed： (453, 45, 200)
Added S22 to training pool ((453, 45, 200))
Loaded S23: X=(453, 63, 200), y=(453,)
before processed： (453, 63, 200)
after processed： (453, 45, 200)
Added S23 to training pool ((453, 45, 200))
Loaded S58: X=(423, 171, 200), y=(423,)
before processed： (423, 171, 200)
after processed： (423, 45, 200)
Added S58 to training pool ((423, 45, 200))
Loaded S62: X=(534, 201, 200), y=(534,)
before processed： (534, 201, 200)
after processed： (534, 45, 200)
Added S62 to training pool ((534, 45, 200))
Loaded S33: X=(138, 149, 200), y=(138,)
before processed： (138, 149, 200)
after processed： (138, 45, 200

# Train on majority & evaluate on held-out patients

In [None]:
!python finetune.py \
    --dataset_name mydata_patients_train \
    --model_name MIRepNet \
    --num_classes 9 \
    --val_split 0.8 \
    --epochs 50

Starting EEG Classification with Configurable Hyperparameters

original data shape: (2844, 45, 200) labels shape: (2844,)
preprocessed data shape: (2844, 45, 200) preprocessed labels shape: (2844,)
before processed： (568, 45, 200)
after processed： (568, 45, 200)
before processed： (2276, 45, 200)
after processed： (2276, 45, 200)
Loaded 108/110 parameters from pretrained model
Seed: 666, Subject: 0

Predicted: [1 7 4 1 1 4 4 1 7 1 7 1 1 7 5 7 7 7 4 7 7 1 4 7 4 7 1 3 7 1 4 1]
Actual:    [5 1 1 0 3 3 2 6 4 5 8 0 8 5 8 2 3 6 5 7 6 7 7 2 4 1 0 8 3 0 1 5]
Got 273 out of 2276 correct. Accuracy: 11.994727592267134.
Predicted: [0 1 1 1 1 1 3 1 5 1 0 3 1 3 1 0 5 4 3 8 5 8 3 1 3 1 1 3 7 3 8 1]
Actual:    [5 1 1 0 3 3 2 6 4 5 8 0 8 5 8 2 3 6 5 7 6 7 7 2 4 1 0 8 3 0 1 5]
Got 271 out of 2276 correct. Accuracy: 11.906854130052725.
Predicted: [0 1 1 1 1 1 2 0 1 1 2 1 1 7 2 1 1 5 3 7 5 1 7 1 1 1 1 2 3 1 7 1]
Actual:    [5 1 1 0 3 3 2 6 4 5 8 0 8 5 8 2 3 6 5 7 6 7 7 2 4 1 0 8 3 0 1 5]
Got 265 out of 2276