Google Colab is cool

In [1]:
!nvidia-smi

Sat Mar 29 10:32:33 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
# Check current versions
!python -c "import numpy; print('Current numpy version:', numpy.__version__)"
!python -c "import torch; print('Current torch version:', torch.__version__)"

# Install desired versions with CUDA 12.4
!pip install numpy==1.26.4
!pip install torch==2.3.0+cu124 -f https://download.pytorch.org/whl/torch_stable.html
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.3.0+cu124.html
!pip install torcheeg
!pip install pytorch-lightning

# This will show the versions in new Python processes
!python -c "import numpy; print('Installed numpy version:', numpy.__version__)"
!python -c "import torch; print('Installed torch version:', torch.__version__)"

print("\n*** IMPORTANT: Please restart the runtime now for changes to take effect in this notebook ***")

Current numpy version: 2.0.2
Current torch version: 2.6.0+cu124
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m60.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
Successfully installed numpy-1.26.4
Looking in links: https://download.pytorch.org/whl/torch_stable.html
[31mERROR: Could not find a version that satisfies the requirement torch==2.3.0+cu124 (from versions: 1.13.0, 1.13.0+cpu, 1.13.0+cu116, 1.13.0+cu117, 1.13.0+cu117.with.pypi

In [None]:
!pip torch-sparse torch-cluster torch-spline-conv torch-geometric

In [1]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA version: {torch.version.cuda}")

if torch.cuda.is_available():
    accelerator = "gpu"
    pin_memory = True
    print("GPU is available. Training will use GPU acceleration.")
else:
    accelerator = "cpu"
    pin_memory = False
    print("WARNING: GPU not available, using CPU instead")

PyTorch version: 2.6.0+cu124
CUDA version: 12.4
GPU is available. Training will use GPU acceleration.


In [2]:
from torcheeg.datasets import DREAMERDataset
from torcheeg import transforms
from torcheeg.datasets.constants import DREAMER_CHANNEL_LOCATION_DICT
from torcheeg.model_selection import KFoldGroupbyTrial
from torcheeg.models import CCNN
from torcheeg.trainers import ClassifierTrainer

from torch.utils.data import DataLoader, random_split
from torch.utils.data import DataLoader

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

import os

Mounted at /content/drive


TypeError: exceptions must derive from BaseException

In [13]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
# !cp /content/drive/MyDrive/datasets/DREAMER/DREAMER.mat /content/DREAMER.mat
# os.mkdir("/content/model")
# IO_PATH = '/content/io/'
# MODEL_SAVE_PATH = '/content/model/'
# MAT_FILE_PATH = '/content/DREAMER.mat'

IO_PATH = '/content/drive/MyDrive/datasets/DREAMER/io/'
MODEL_SAVE_PATH = '/content/drive/MyDrive/datasets/DREAMER/model/
MAT_FILE_PATH = '/content/drive/MyDrive/datasets/DREAMER/DREAMER.mat'


# if os.path.exists(IO_PATH):
#     raise Exception("io folder ins't allowed to exist")

if not os.path.exists(MODEL_SAVE_PATH)
    raise Exception("make sure model folder exists")

if not os.path.exists(MAT_FILE_PATH):
    raise Exception("make sure MAT file exists")


SEED:int = 42
BATCH_SIZE:int = 64
N_WORKERS:int = 4   # rule of thumb: n_workers = 4 * n_gpus

In [21]:
# https://zenodo.org/records/546113
# Katsigiannis, S., & Ramzan, N. (2017). DREAMER: A Database for Emotion Recognition through EEG and ECG Signals from Wireless Low-cost Off-the-Shelf Devices [Data set]. Zenodo. https://doi.org/10.1109/JBHI.2017.2688239
# we predict arousal (intensity of emotion) and valence (negativity of emotion) on scale of 1-5, negative and aroused means fear (or masochism (me fr))

# if this is throwing an error about corrupted, make sure the folder is
# populated by the right stuff (from running this thing the last time)
# or does NOT exist
dataset = DREAMERDataset(
    io_path=IO_PATH,
    mat_path=MAT_FILE_PATH,

    offline_transform=transforms.Compose([
        transforms.BandDifferentialEntropy(apply_to_baseline=True),
        transforms.ToGrid(DREAMER_CHANNEL_LOCATION_DICT, apply_to_baseline=True)
    ]),

    online_transform=transforms.Compose([
        transforms.BaselineRemoval(),
        transforms.ToTensor()
    ]),

    label_transform=transforms.Compose([
        transforms.Select(['valence', 'arousal']),
        lambda x: torch.tensor(1 if x[0] < 3 and x[1] > 3 else 0, dtype=torch.long)
    ]),

    num_worker=N_WORKERS
)

# will populate io_path - the next time this is run, we won't take 55 minutes, since
# all we need to do are the online_transforms

[2025-03-29 11:25:20] INFO (torcheeg/MainThread) 🔍 | Processing EEG data. Processed EEG data has been cached to [92m/content/io[0m.
INFO:torcheeg:🔍 | Processing EEG data. Processed EEG data has been cached to [92m/content/io[0m.
[2025-03-29 11:25:20] INFO (torcheeg/MainThread) ⏳ | Monitoring the detailed processing of a record for debugging. The processing of other records will only be reported in percentage to keep it clean.
INFO:torcheeg:⏳ | Monitoring the detailed processing of a record for debugging. The processing of other records will only be reported in percentage to keep it clean.
[PROCESS]: 100%|██████████| 23/23 [37:53<00:00, 98.83s/it] 
[2025-03-29 12:20:36] INFO (torcheeg/MainThread) ✅ | All processed EEG data has been cached to /content/io.
INFO:torcheeg:✅ | All processed EEG data has been cached to /content/io.
[2025-03-29 12:20:36] INFO (torcheeg/MainThread) 😊 | Please set [92mio_path[0m to [92m/content/io[0m for the next run, to directly read from the cache if y

In [None]:
# validation is used during training for evaluating each batch
# test is saved to after (even if we don't train on val anyway, mechanisms like picking the best may still have 'val bias')
total_size = len(dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    dataset,
    [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(SEED)
)

print(f"Dataset split: {train_size} training, {val_size} validation, {test_size} test samples")



# when training a GPU, we put the memory on a special 'pinned' region for faster transfer between CPU and GPU
# we shuffle the data in training to avoid it learning some patterns there (across batches) - no need on eval, since metrics will be the same
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=N_WORKERS,
    pin_memory=pin_memory
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=N_WORKERS,
    pin_memory=pin_memory
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=N_WORKERS,
    pin_memory=pin_memory
)

In [None]:
model = CCNN(
    num_classes=2,  # Binary classification (fear vs. non-fear)
    in_channels=4,  # 4 frequency bands
    grid_size=(7, 7)    # 14 channels mapped to 7 by 7 grid (not every one has an electrode)
)


callbacks = [
    # saves the currently model to specific file
    ModelCheckpoint(
        dirpath=MODEL_SAVE_PATH,
        filename='best-model-{epoch:02d}-{val_accuracy:.4f}',
        monitor='val_accuracy',
        mode='max',
        save_top_k=1
    ),

    # saves the last model to specific file
    ModelCheckpoint(
        dirpath=MODEL_SAVE_PATH,
        filename='last-model',
        save_last=True
    ),

    # tries to detect overfitting by stopping if the val_accuracy stops improving for 10 epochs
    EarlyStopping(
        monitor='val_accuracy',
        patience=10,
        mode='max',
        verbose=True
    )
]


# wrapper over pl.LightningModule
# TODO
# - precision="16-mixed" # T4 gpu works well for this (isn't supported here)
trainer = ClassifierTrainer(
    model=model,
    num_classes=2,
    lr=1e-4,
    weight_decay=1e-4,
    accelerator=accelerator,
)

In [None]:
print("\nStarting model training...")
trainer.fit(
    train_loader,
    val_loader,
    max_epochs=100,
    default_root_dir=MODEL_SAVE_PATH,
    callbacks=callbacks,
    enable_progress_bar=True,
    enable_model_summary=True
)
# ends once the trainer is done (so most likely through EarlyStop, otherwise max_epochs)

In [None]:
print("\nEvaluating model on test set...")
test_results = trainer.test(test_loader)[0]
print(f"Test accuracy: {test_results['test_accuracy']:.4f}")
print(f"Test precision: {test_results['test_precision']:.4f}")
print(f"Test recall: {test_results['test_recall']:.4f}")
print(f"Test F1 score: {test_results['test_f1']:.4f}")


# the trainer saves the model checkpoints periodically and keeps data about eachs ones performance
best_model_path = trainer.checkpoint_callback.best_model_path
if not best_model_path:
    best_model_path = f'{MODEL_SAVE_PATH}last-model.ckpt'

final_model_path = f'{MODEL_SAVE_PATH}fear_prediction_model.pth'

# Save the best model in .pth format
model = CCNN.load_from_checkpoint(best_model_path, num_classes=2, in_channels=4, grid_size=(7, 7))
torch.save(model.state_dict(), final_model_path)

# Print a summary of the training process
print("\nTraining Summary:")
print(f"Best validation accuracy: {trainer.checkpoint_callback.best_model_score:.4f}")
print(f"Best model saved at: {best_model_path}")
print(f"Final model saved as: {final_model_path)}")