In [1]:
import random
import sys
from typing import Literal

import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.optim as optim
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader, Dataset
from torchmetrics import Accuracy

sys.path.append("src")

import utils

In [2]:
random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7fa475388f70>

In [3]:
torch.cuda.is_available()

True

In [4]:
train = utils.read_data("train")
test = utils.read_data("test")

In [5]:
train_sequences = [(train[key]['ts'].to_numpy(), 1 if train[key]['class'] == "a" else 0) for key in train]
test_sequences = [(test[key]['ts'].to_numpy(), 1 if test[key]['class'] == "a" else 0) for key in test]
random.shuffle(test_sequences)
val_sequences = test_sequences[:100]
test_sequences = test_sequences[100:]

In [6]:
test_sequences[0][0].shape

(256, 64)

In [7]:
len(train_sequences)

468

In [8]:
len(val_sequences), len(test_sequences)

(100, 380)

In [9]:
class EEGDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]
        return dict(
            sequence=torch.Tensor(sequence),
            label=torch.tensor(label).long()
        )

In [10]:
class EEGDataModule(pl.LightningDataModule):
    def __init__(self, train_sequences, val_sequences, test_sequences, batch_size):
        super().__init__()
        self.train_sequences = train_sequences
        self.val_sequences = val_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_dataset = EEGDataset(self.train_sequences)
        self.val_dataset = EEGDataset(self.val_sequences)
        self.test_dataset = EEGDataset(self.test_sequences)

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=6
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=6
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=6
        )

In [11]:
N_EP0CHS = 15
BATCH_SIZE = 32

data_module = EEGDataModule(train_sequences, val_sequences, test_sequences, BATCH_SIZE)

In [12]:
class EEGModel(nn.Module):
    def __init__(self, n_features: int = 64, n_hidden: int = 256, n_layers: int = 3,
                 rnn: Literal["lstm", "gru"] = "lstm", dropout: float = 0.3):
        super().__init__()

        match rnn:
            case "lstm":
                self.rnn_class = nn.LSTM
            case "gru":
                self.rnn_class = nn.GRU
            case _:
                raise ValueError("Invalid rnn architecture")

        self.rnn = self.rnn_class(
            input_size=n_features,
            hidden_size=n_hidden,
            num_layers=n_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout
        )
        self.classifier = nn.Linear(2 * n_hidden, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        self.rnn.to(x.device)
        self.classifier.to(x.device)

        out, _ = self.rnn(x)
        out = out.max(-2).values
        out = self.classifier(out)
        out = self.sigmoid(out)
        return out

In [73]:
class EEGPredictior(pl.LightningModule):
    def __init__(self, n_features, *args, **kwargs):
        super().__init__()
        self.model = EEGModel(n_features, *args, **kwargs)
        self.criterion = nn.BCELoss()
        self.acc = Accuracy(task="binary")

    def forward(self, x, labels=None):
        output = self.model(x)
        loss = 0
        if labels is not None:
            loss = self.criterion(output, labels.float().unsqueeze(-1))
        return loss, output

    def training_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = (outputs > 0.5).float().squeeze(-1)
        step_accuracy = self.acc(predictions, labels)

        self.log("train_loss", loss, prog_bar=True, logger=True)
        self.log("train_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def validation_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = (outputs > 0.5).float().squeeze(-1)
        step_accuracy = self.acc(predictions, labels)

        self.log("val_loss", loss, prog_bar=True, logger=True)
        self.log("val_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def test_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = (outputs > 0.5).float().squeeze(-1)
        step_accuracy = self.acc(predictions, labels)

        self.log("test_loss", loss, prog_bar=True, logger=True)
        self.log("test_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)

In [74]:
model = EEGPredictior(n_features=64, n_hidden=256, rnn="gru", n_layers=3)

In [75]:
%reload_ext tensorboard
%tensorboard --logdir./lightning_logs

Launching TensorBoard...

In [76]:
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    verbose=True,
    monitor="val_loss",
    mode="min"
)

logger = TensorBoardLogger("lightning_logs", name="EEG")

trainer = pl.Trainer(
    logger=logger,
    callbacks=checkpoint_callback,
    max_epochs=N_EP0CHS,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [77]:
trainer.fit(model, data_module)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | EEGModel       | 2.9 M 
1 | criterion | BCELoss        | 0     
2 | acc       | BinaryAccuracy | 0     
---------------------------------------------
2.9 M     Trainable params
0         Non-trainable params
2.9 M     Total params
11.442    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 0, global step 15: 'val_loss' reached 0.56139 (best 0.56139), saving model to '/home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 1, global step 30: 'val_loss' reached 0.54101 (best 0.54101), saving model to '/home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 2, global step 45: 'val_loss' reached 0.38793 (best 0.38793), saving model to '/home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 3, global step 60: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 4, global step 75: 'val_loss' reached 0.35856 (best 0.35856), saving model to '/home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 5, global step 90: 'val_loss' reached 0.32888 (best 0.32888), saving model to '/home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 6, global step 105: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 7, global step 120: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 8, global step 135: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 9, global step 150: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 10, global step 165: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 11, global step 180: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 12, global step 195: 'val_loss' reached 0.28640 (best 0.28640), saving model to '/home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

Epoch 13, global step 210: 'val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

Epoch 14, global step 225: 'val_loss' reached 0.27712 (best 0.27712), saving model to '/home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=15` reached.


In [78]:
trainer.test(dataloaders=data_module.test_dataloader())

  rank_zero_warn(
Restoring states from the checkpoint path at /home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/jkarolczak/Projects/eeg-alcoholics/checkpoints/best-checkpoint-v27.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.9210526347160339
        test_loss           0.25355789065361023
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.25355789065361023, 'test_accuracy': 0.9210526347160339}]