In [2]:
import sys

import numpy as np
import pandas as pd
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pytorch_lightning as pl


from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from torchmetrics import Accuracy

sys.path.append("src")

import utils

In [3]:
torch.cuda.is_available()

True

In [4]:
train = utils.read_data("train")
test = utils.read_data("test")

In [5]:
import random

random.seed(2137)

In [6]:
train_sequences = [(train[key]['ts'].to_numpy(), 1 if train[key]['class'] == "a" else 0) for key in train]
test_sequences = [(test[key]['ts'].to_numpy(), 1 if test[key]['class'] == "a" else 0)for key in test]
random.shuffle(test_sequences)
val_sequences = test_sequences[:100]
test_sequences = test_sequences[100:]

In [7]:
test_sequences[0][0].shape

(256, 64)

In [8]:
len(train_sequences)

468

In [9]:
len(val_sequences), len(test_sequences)

(100, 380)

In [10]:
class EEGDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]
        return dict(
            sequence=torch.Tensor(sequence),
            label=torch.tensor(label).long()
        )

In [11]:
class EEGDataModule(pl.LightningDataModule):
    def __init__(self, train_sequences, val_sequences, test_sequences, batch_size):
        super().__init__()
        self.train_sequences = train_sequences
        self.val_sequences = val_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_dataset = EEGDataset(self.train_sequences)
        self.val_dataset = EEGDataset(self.val_sequences)
        self.test_dataset = EEGDataset(self.test_sequences)

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False
        )

In [20]:
N_EP0CHS = 10
BATCH_SIZE = 32

data_module = EEGDataModule(train_sequences, val_sequences, test_sequences, BATCH_SIZE)

In [21]:
class EEGModel(nn.Module):
    def __init__(self, n_features = 64, n_classes = 2, n_hidden=256, n_layers=3):
        super().__init__()
        self.n_hidden = n_hidden

        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size = n_hidden,
            num_layers = n_layers,
            batch_first = True,
            dropout = 0.4
        )

        self.classifier = nn.Linear(n_hidden, n_classes)

    def forward(self, x):
        self.lstm.flatten_parameters()
        _, (hidden, _) = self.lstm(x)

        out = hidden[-1]
        return self.classifier(out)

In [22]:
class EEGPredictior(pl.LightningModule):
    def __init__(self, n_features, n_classes):
        super().__init__()
        self.model = EEGModel(n_features, n_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.acc = Accuracy(task="binary")

    def forward(self, x, labels = None):
        output = self.model(x)
        loss = 0
        if labels is not None:
            loss = self.criterion(output, labels)
        return loss, output
    
    def training_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = self.acc(predictions, labels)

        self.log("train_loss", loss, prog_bar=True, logger=True)
        self.log("train_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss":loss, "accuracy":step_accuracy}
    
    def validation_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = self.acc(predictions, labels)

        self.log("val_loss", loss, prog_bar=True, logger=True)
        self.log("val_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss":loss, "accuracy":step_accuracy}
    
    def test_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = torch.argmax(outputs, dim=1)
        step_accuracy = self.acc(predictions, labels)

        self.log("test_loss", loss, prog_bar=True, logger=True)
        self.log("test_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss":loss, "accuracy":step_accuracy}
    
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.0001)

In [23]:
model = EEGPredictior(n_features= 64, n_classes=2)

In [24]:
%load_ext tensorboard
%tensorboard --logdir ./lightning_logs

In [25]:
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename= "best-checkpoint",
    save_top_k=1,
    verbose=True,
    monitor="val_loss",
    mode="min"
)

logger = TensorBoardLogger("lightning_logs", name="EEG")

trainer = pl.Trainer(
    logger=logger,
    callbacks=checkpoint_callback,
    max_epochs=N_EP0CHS,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [26]:
trainer.fit(model, data_module)

You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | EEGModel         | 1.4 M 
1 | criterion | CrossEntropyLoss | 0     
2 | acc       | BinaryAccuracy   | 0     
-----------------------------------------------
1.4 M     Trainable params
0         Non-trainable params
1.4 M     Total params
5.532     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 15/15 [00:00<00:00, 16.82it/s, v_num=7, train_loss=0.689, train_accuracy=0.700, val_loss=0.689, val_accuracy=0.600]

Epoch 0, global step 15: 'val_loss' reached 0.68894 (best 0.68894), saving model to 'C:\\Users\\Jurek\\Desktop\\ROBOTYKA3\\work\\eeg-alcoholics\\checkpoints\\best-checkpoint-v6.ckpt' as top 1


Epoch 1: 100%|██████████| 15/15 [00:00<00:00, 19.31it/s, v_num=7, train_loss=0.689, train_accuracy=0.500, val_loss=0.684, val_accuracy=0.600]

Epoch 1, global step 30: 'val_loss' reached 0.68393 (best 0.68393), saving model to 'C:\\Users\\Jurek\\Desktop\\ROBOTYKA3\\work\\eeg-alcoholics\\checkpoints\\best-checkpoint-v6.ckpt' as top 1


Epoch 2: 100%|██████████| 15/15 [00:00<00:00, 18.30it/s, v_num=7, train_loss=0.654, train_accuracy=0.800, val_loss=0.671, val_accuracy=0.620]

Epoch 2, global step 45: 'val_loss' reached 0.67139 (best 0.67139), saving model to 'C:\\Users\\Jurek\\Desktop\\ROBOTYKA3\\work\\eeg-alcoholics\\checkpoints\\best-checkpoint-v6.ckpt' as top 1


Epoch 3: 100%|██████████| 15/15 [00:00<00:00, 18.14it/s, v_num=7, train_loss=0.628, train_accuracy=0.750, val_loss=0.636, val_accuracy=0.680]

Epoch 3, global step 60: 'val_loss' reached 0.63615 (best 0.63615), saving model to 'C:\\Users\\Jurek\\Desktop\\ROBOTYKA3\\work\\eeg-alcoholics\\checkpoints\\best-checkpoint-v6.ckpt' as top 1


Epoch 4: 100%|██████████| 15/15 [00:00<00:00, 18.63it/s, v_num=7, train_loss=0.468, train_accuracy=0.900, val_loss=0.535, val_accuracy=0.710]

Epoch 4, global step 75: 'val_loss' reached 0.53456 (best 0.53456), saving model to 'C:\\Users\\Jurek\\Desktop\\ROBOTYKA3\\work\\eeg-alcoholics\\checkpoints\\best-checkpoint-v6.ckpt' as top 1


Epoch 5: 100%|██████████| 15/15 [00:00<00:00, 18.69it/s, v_num=7, train_loss=0.359, train_accuracy=0.900, val_loss=0.545, val_accuracy=0.750]

Epoch 5, global step 90: 'val_loss' was not in top 1


Epoch 6: 100%|██████████| 15/15 [00:00<00:00, 18.59it/s, v_num=7, train_loss=0.243, train_accuracy=0.900, val_loss=0.531, val_accuracy=0.740]

Epoch 6, global step 105: 'val_loss' reached 0.53093 (best 0.53093), saving model to 'C:\\Users\\Jurek\\Desktop\\ROBOTYKA3\\work\\eeg-alcoholics\\checkpoints\\best-checkpoint-v6.ckpt' as top 1


Epoch 7: 100%|██████████| 15/15 [00:00<00:00, 16.61it/s, v_num=7, train_loss=0.111, train_accuracy=0.950, val_loss=0.586, val_accuracy=0.730] 

Epoch 7, global step 120: 'val_loss' was not in top 1


Epoch 8: 100%|██████████| 15/15 [00:00<00:00, 18.17it/s, v_num=7, train_loss=0.0323, train_accuracy=1.000, val_loss=0.650, val_accuracy=0.780]

Epoch 8, global step 135: 'val_loss' was not in top 1


Epoch 9: 100%|██████████| 15/15 [00:00<00:00, 16.69it/s, v_num=7, train_loss=0.0389, train_accuracy=1.000, val_loss=0.806, val_accuracy=0.750]

Epoch 9, global step 150: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 15/15 [00:00<00:00, 16.64it/s, v_num=7, train_loss=0.0389, train_accuracy=1.000, val_loss=0.806, val_accuracy=0.750]


In [29]:
trainer.test(dataloaders=data_module.test_dataloader())

You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at C:\Users\Jurek\Desktop\ROBOTYKA3\work\eeg-alcoholics\checkpoints\best-checkpoint-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at C:\Users\Jurek\Desktop\ROBOTYKA3\work\eeg-alcoholics\checkpoints\best-checkpoint-v6.ckpt


Testing DataLoader 0: 100%|██████████| 12/12 [00:00<00:00, 20.83it/s]


[{'test_loss': 0.537832498550415, 'test_accuracy': 0.7736842036247253}]