Google Colab is cool

In [1]:
!pip install torcheeg pytorch-lightning torch-scatter

# possibly also needed (if errors) torch-sparse torch-cluster torch-spline-conv (if using GNN) torch-geometric

Collecting torcheeg
  Downloading torcheeg-1.1.3.tar.gz (251 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/251.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.4/251.4 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pytorch-lightning
  Downloading pytorch_lightning-2.5.1-py3-none-any.whl.metadata (20 kB)
Collecting scipy<=1.10.1,>=1.7.3 (from torcheeg)
  Downloading scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.9/58.9 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting lmdb>=1.3.0 (from torcheeg)
  Downloading lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting mne>=1.0.3 (from torcheeg)
  Downloading mne-1.9.0-py3-none-any.whl.metadata (20 kB)
Collecting xmltodict>=0.13.0

In [12]:
!pip install --upgrade numpy

Collecting numpy
  Downloading numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m86.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scipy 1.10.1 requires numpy<1.27.0,>=1.19.5, but you have numpy 2.2.4 which is incompatible.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 2.2.4 wh

In [13]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA version: {torch.version.cuda}")

if torch.cuda.is_available():
    accelerator = "gpu"
    pin_memory = True
    print("GPU is available. Training will use GPU acceleration.")
else:
    accelerator = "cpu"
    pin_memory = False
    print("WARNING: GPU not available, using CPU instead")

PyTorch version: 2.6.0+cu124
CUDA version: 12.4
GPU is available. Training will use GPU acceleration.


In [14]:
from torcheeg.datasets import DREAMERDataset
from torcheeg import transforms
from torcheeg.datasets.constants import DREAMER_CHANNEL_LOCATION_DICT
from torcheeg.model_selection import KFoldGroupbyTrial
from torcheeg.models import CCNN
from torcheeg.trainers import ClassifierTrainer

from torch.utils.data import DataLoader, random_split
from torch.utils.data import DataLoader

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

import os


from google.colab import drive
drive.mount('/content/drive')
BASE_PATH = '/content/drive/MyDrive/datasets/DREAMER'
# BASE_PATH = './data'

IO_PATH = os.path.join(BASE_PATH, '/transformed/')
MAT_FILE_PATH = os.path.join(BASE_PATH, '/DREAMER.mat')
MODEL_SAVE_PATH = os.path.join(BASE_PATH, '/model/')

if not (os.path.exists(IO_PATH) and os.path.exists(MAT_FILE_PATH) and os.path.exists(MODEL_SAVE_PATH)):
    raise("Make sure the base path is correct and accessible - it should have 'transformed' folder, 'model' folder, and 'DREAMER.mat' file")


SEED:int = 42
BATCH_SIZE:int = 64
N_WORKERS:int = 4   # rule of thumb: n_workers = 4 * n_gpus

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [7]:
# https://zenodo.org/records/546113
# Katsigiannis, S., & Ramzan, N. (2017). DREAMER: A Database for Emotion Recognition through EEG and ECG Signals from Wireless Low-cost Off-the-Shelf Devices [Data set]. Zenodo. https://doi.org/10.1109/JBHI.2017.2688239
# we predict arousal (intensity of emotion) and valence (negativity of emotion) on scale of 1-5, negative and aroused means fear (or masochism (me fr))
dataset = DREAMERDataset(
    io_path=IO_PATH,
    mat_path=MAT_FILE_PATH,

    offline_transform=transforms.Compose([
        transforms.BandDifferentialEntropy(apply_to_baseline=True),
        transforms.ToGrid(DREAMER_CHANNEL_LOCATION_DICT, apply_to_baseline=True)
    ]),

    online_transform=transforms.Compose([
        transforms.BaselineRemoval(),
         transforms.ToTensor()
    ]),

    label_transform=transforms.Compose([
        transforms.Select(['valence', 'arousal']),
        lambda x: torch.tensor(1 if x[0] < 3 and x[1] > 3 else 0, dtype=torch.long)
    ]),

    num_worker=N_WORKERS
)

NameError: name 'DREAMERDataset' is not defined

In [None]:
# validation is used during training for evaluating each batch
# test is saved to after (even if we don't train on val anyway, mechanisms like picking the best may still have 'val bias')
total_size = len(dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    dataset,
    [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(SEED)
)

print(f"Dataset split: {train_size} training, {val_size} validation, {test_size} test samples")



# when training a GPU, we put the memory on a special 'pinned' region for faster transfer between CPU and GPU
# we shuffle the data in training to avoid it learning some patterns there (across batches) - no need on eval, since metrics will be the same
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=N_WORKERS,
    pin_memory=pin_memory
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=N_WORKERS,
    pin_memory=pin_memory
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=N_WORKERS,
    pin_memory=pin_memory
)

In [None]:
model = CCNN(
    num_classes=2,  # Binary classification (fear vs. non-fear)
    in_channels=4,  # 4 frequency bands
    grid_size=(7, 7)    # 14 channels mapped to 7 by 7 grid (not every one has an electrode)
)


callbacks = [
    # saves the currently model to specific file
    ModelCheckpoint(
        dirpath=MODEL_SAVE_PATH,
        filename='best-model-{epoch:02d}-{val_accuracy:.4f}',
        monitor='val_accuracy',
        mode='max',
        save_top_k=1
    ),

    # saves the last model to specific file
    ModelCheckpoint(
        dirpath=MODEL_SAVE_PATH,
        filename='last-model',
        save_last=True
    ),

    # tries to detect overfitting by stopping if the val_accuracy stops improving for 10 epochs
    EarlyStopping(
        monitor='val_accuracy',
        patience=10,
        mode='max',
        verbose=True
    )
]


trainer = ClassifierTrainer(
    model=model,
    num_classes=2,
    lr=1e-4,
    weight_decay=1e-4,
    accelerator=accelerator
)

In [None]:
print("\nStarting model training...")
trainer.fit(
    train_loader,
    val_loader,
    max_epochs=100,
    default_root_dir=MODEL_SAVE_PATH,
    callbacks=callbacks,
    enable_progress_bar=True,
    enable_model_summary=True
)
# ends once the trainer is done (so most likely through EarlyStop, otherwise max_epochs)

In [None]:
print("\nEvaluating model on test set...")
test_results = trainer.test(test_loader)[0]
print(f"Test accuracy: {test_results['test_accuracy']:.4f}")
print(f"Test precision: {test_results['test_precision']:.4f}")
print(f"Test recall: {test_results['test_recall']:.4f}")
print(f"Test F1 score: {test_results['test_f1']:.4f}")


# the trainer saves the model checkpoints periodically and keeps data about eachs ones performance
best_model_path = trainer.checkpoint_callback.best_model_path
if not best_model_path:
    best_model_path = os.path.join(MODEL_SAVE_PATH + '/last-model.ckpt')


# Save the best model in .pth format
model = CCNN.load_from_checkpoint(best_model_path, num_classes=2, in_channels=4, grid_size=(7, 7))
torch.save(model.state_dict(), os.path.join(MODEL_SAVE_PATH, 'fear_prediction_model.pth'))

# Print a summary of the training process
print("\nTraining Summary:")
print(f"Best validation accuracy: {trainer.checkpoint_callback.best_model_score:.4f}")
print(f"Best model saved at: {best_model_path}")
print(f"Final model saved as: {os.path.join(MODEL_SAVE_PATH, 'fear_prediction_model.pth')}")