In [1]:
import random
import sys
from typing import Literal

import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.optim as optim
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader, Dataset
from torchmetrics import Accuracy
import lime
import numpy as np
from lime import lime_tabular
sys.path.append("src")

import utils

In [2]:
random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7fb8f376cb90>

In [3]:
torch.cuda.is_available()

True

In [4]:
train = utils.read_data("train")
test = utils.read_data("test")

In [5]:
train['Data238.csv']['ts'].columns

Index(['AF1', 'AF2', 'AF7', 'AF8', 'AFZ', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6',
       'CP1', 'CP2', 'CP3', 'CP4', 'CP5', 'CP6', 'CPZ', 'CZ', 'F1', 'F2', 'F3',
       'F4', 'F5', 'F6', 'F7', 'F8', 'FC1', 'FC2', 'FC3', 'FC4', 'FC5', 'FC6',
       'FCZ', 'FP1', 'FP2', 'FPZ', 'FT7', 'FT8', 'FZ', 'O1', 'O2', 'OZ', 'P1',
       'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'PO1', 'PO2', 'PO7', 'PO8',
       'POZ', 'PZ', 'T7', 'T8', 'TP7', 'TP8', 'X', 'Y', 'nd'],
      dtype='object', name='sensor position')

In [6]:
train_sequences = [(train[key]['ts'].to_numpy(), 1 if train[key]['class'] == "a" else 0) for key in train]
test_sequences = [(test[key]['ts'].to_numpy(), 1 if test[key]['class'] == "a" else 0) for key in test]
random.shuffle(test_sequences)
val_sequences = test_sequences[:100]
test_sequences = test_sequences[100:]

In [7]:
test_sequences[0][0].shape

(256, 64)

In [8]:
len(train_sequences)

468

In [9]:
len(val_sequences), len(test_sequences)

(100, 380)

In [10]:
class EEGDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence, label = self.sequences[idx]
        return dict(
            sequence=torch.Tensor(sequence),
            label=torch.tensor(label).long()
        )

In [11]:
class EEGDataModule(pl.LightningDataModule):
    def __init__(self, train_sequences, val_sequences, test_sequences, batch_size):
        super().__init__()
        self.train_sequences = train_sequences
        self.val_sequences = val_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size

    def setup(self, stage=None):
        self.train_dataset = EEGDataset(self.train_sequences)
        self.val_dataset = EEGDataset(self.val_sequences)
        self.test_dataset = EEGDataset(self.test_sequences)

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=6
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=6
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=6
        )

In [12]:
N_EP0CHS = 15
BATCH_SIZE = 32

data_module = EEGDataModule(train_sequences, val_sequences, test_sequences, BATCH_SIZE)

In [13]:
class EEGModel(nn.Module):
    def __init__(self, n_features: int = 64, n_hidden: int = 256, n_layers: int = 3,
                 rnn: Literal["lstm", "gru"] = "lstm", dropout: float = 0.3):
        super().__init__()

        match rnn:
            case "lstm":
                self.rnn_class = nn.LSTM
            case "gru":
                self.rnn_class = nn.GRU
            case _:
                raise ValueError("Invalid rnn architecture")

        self.rnn = self.rnn_class(
            input_size=n_features,
            hidden_size=n_hidden,
            num_layers=n_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout
        )
        self.classifier = nn.Linear(2 * n_hidden, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        self.rnn.to(x.device)
        self.classifier.to(x.device)

        out, _ = self.rnn(x)
        out = out.max(-2).values
        out = self.classifier(out)
        out = self.sigmoid(out)
        return out

In [14]:
class EEGPredictior(pl.LightningModule):
    def __init__(self, n_features, *args, **kwargs):
        super().__init__()
        self.model = EEGModel(n_features, *args, **kwargs)
        self.criterion = nn.BCELoss()
        self.acc = Accuracy(task="binary")

    def forward(self, x, labels=None):
        output = self.model(x)
        loss = 0
        if labels is not None:
            loss = self.criterion(output, labels.float().unsqueeze(-1))
        return loss, output
    
    def predict(self, sequence):
        loss, output = self(torch.Tensor(sequence))
        predictions = (output > 0.5).float().squeeze(-1)
        return predictions

    def training_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = (outputs > 0.5).float().squeeze(-1)
        step_accuracy = self.acc(predictions, labels)

        self.log("train_loss", loss, prog_bar=True, logger=True)
        self.log("train_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def validation_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = (outputs > 0.5).float().squeeze(-1)
        step_accuracy = self.acc(predictions, labels)

        self.log("val_loss", loss, prog_bar=True, logger=True)
        self.log("val_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def test_step(self, batch, batch_idx):
        sequences = batch["sequence"]
        labels = batch["label"]
        loss, outputs = self(sequences, labels)
        predictions = (outputs > 0.5).float().squeeze(-1)
        step_accuracy = self.acc(predictions, labels)

        self.log("test_loss", loss, prog_bar=True, logger=True)
        self.log("test_accuracy", step_accuracy, prog_bar=True, logger=True)
        return {"loss": loss, "accuracy": step_accuracy}

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)

In [15]:
model = EEGPredictior(n_features=64, n_hidden=256, rnn="gru", n_layers=3, dropout=0.25)

In [16]:
%reload_ext tensorboard
%tensorboard --logdir./lightning_logs

ERROR: Failed to launch TensorBoard (exited with 2).
Contents of stderr:
TensorFlow installation not found - running with reduced feature set.
usage: tensorboard [-h] [--helpfull] [--logdir PATH] [--logdir_spec PATH_SPEC]
                   [--host ADDR] [--bind_all] [--port PORT]
                   [--reuse_port BOOL] [--load_fast {false,auto,true}]
                   [--extra_data_server_flags EXTRA_DATA_SERVER_FLAGS]
                   [--grpc_creds_type {local,ssl,ssl_dev}]
                   [--grpc_data_provider PORT] [--purge_orphaned_data BOOL]
                   [--db URI] [--db_import] [--inspect] [--version_tb]
                   [--tag TAG] [--event_file PATH] [--path_prefix PATH]
                   [--window_title TEXT] [--max_reload_threads COUNT]
                   [--reload_interval SECONDS] [--reload_task TYPE]
                   [--reload_multifile BOOL]
                   [--reload_multifile_inactive_secs SECONDS]
                   [--generic_data TYPE]
            

In [17]:
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    verbose=True,
    monitor="val_loss",
    mode="min"
)

logger = TensorBoardLogger("lightning_logs", name="EEG")

trainer = pl.Trainer(
    logger=logger,
    callbacks=checkpoint_callback,
    max_epochs=N_EP0CHS,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [18]:
trainer.fit(model, data_module)

You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type           | Params
---------------------------------------------
0 | model     | EEGModel       | 2.9 M 
1 | criterion | BCELoss        | 0     
2 | acc       | BinaryAccuracy | 0     
---------------------------------------------
2.9 M     Trainable params
0         Non-trainable params
2.9 M     Total params
11.442    Total estimated model params size (MB)


                                                                           

  rank_zero_warn(


Epoch 0: 100%|██████████| 15/15 [00:01<00:00, 13.12it/s, v_num=6, train_loss=0.683, train_accuracy=0.600, val_loss=0.588, val_accuracy=0.640]

Epoch 0, global step 15: 'val_loss' reached 0.58766 (best 0.58766), saving model to '/home/wo0kie3/Desktop/alco/eeg-alcoholics/checkpoints/best-checkpoint-v6.ckpt' as top 1


Epoch 1: 100%|██████████| 15/15 [00:00<00:00, 17.00it/s, v_num=6, train_loss=0.471, train_accuracy=0.800, val_loss=0.364, val_accuracy=0.810]

Epoch 1, global step 30: 'val_loss' reached 0.36401 (best 0.36401), saving model to '/home/wo0kie3/Desktop/alco/eeg-alcoholics/checkpoints/best-checkpoint-v6.ckpt' as top 1


Epoch 2: 100%|██████████| 15/15 [00:00<00:00, 16.84it/s, v_num=6, train_loss=0.157, train_accuracy=0.950, val_loss=0.477, val_accuracy=0.820]

Epoch 2, global step 45: 'val_loss' was not in top 1


Epoch 3: 100%|██████████| 15/15 [00:00<00:00, 16.14it/s, v_num=6, train_loss=0.071, train_accuracy=1.000, val_loss=0.470, val_accuracy=0.830] 

Epoch 3, global step 60: 'val_loss' was not in top 1


Epoch 4: 100%|██████████| 15/15 [00:00<00:00, 15.32it/s, v_num=6, train_loss=0.0736, train_accuracy=1.000, val_loss=0.374, val_accuracy=0.830]

Epoch 4, global step 75: 'val_loss' was not in top 1


Epoch 5: 100%|██████████| 15/15 [00:00<00:00, 16.49it/s, v_num=6, train_loss=0.0354, train_accuracy=1.000, val_loss=0.394, val_accuracy=0.880]

Epoch 5, global step 90: 'val_loss' was not in top 1


Epoch 6: 100%|██████████| 15/15 [00:00<00:00, 16.85it/s, v_num=6, train_loss=0.0209, train_accuracy=1.000, val_loss=0.360, val_accuracy=0.900] 

Epoch 6, global step 105: 'val_loss' reached 0.36010 (best 0.36010), saving model to '/home/wo0kie3/Desktop/alco/eeg-alcoholics/checkpoints/best-checkpoint-v6.ckpt' as top 1


Epoch 7: 100%|██████████| 15/15 [00:01<00:00, 14.85it/s, v_num=6, train_loss=0.00922, train_accuracy=1.000, val_loss=0.300, val_accuracy=0.930]

Epoch 7, global step 120: 'val_loss' reached 0.29960 (best 0.29960), saving model to '/home/wo0kie3/Desktop/alco/eeg-alcoholics/checkpoints/best-checkpoint-v6.ckpt' as top 1


Epoch 8: 100%|██████████| 15/15 [00:00<00:00, 16.85it/s, v_num=6, train_loss=0.0152, train_accuracy=1.000, val_loss=0.336, val_accuracy=0.920] 

Epoch 8, global step 135: 'val_loss' was not in top 1


Epoch 9: 100%|██████████| 15/15 [00:00<00:00, 17.95it/s, v_num=6, train_loss=0.00163, train_accuracy=1.000, val_loss=0.359, val_accuracy=0.930]

Epoch 9, global step 150: 'val_loss' was not in top 1


Epoch 10: 100%|██████████| 15/15 [00:00<00:00, 18.89it/s, v_num=6, train_loss=0.00111, train_accuracy=1.000, val_loss=0.303, val_accuracy=0.930] 

Epoch 10, global step 165: 'val_loss' was not in top 1


Epoch 11: 100%|██████████| 15/15 [00:00<00:00, 16.43it/s, v_num=6, train_loss=0.000496, train_accuracy=1.000, val_loss=0.310, val_accuracy=0.930]

Epoch 11, global step 180: 'val_loss' was not in top 1


Epoch 12: 100%|██████████| 15/15 [00:00<00:00, 20.19it/s, v_num=6, train_loss=0.000766, train_accuracy=1.000, val_loss=0.332, val_accuracy=0.930]

Epoch 12, global step 195: 'val_loss' was not in top 1


Epoch 13: 100%|██████████| 15/15 [00:00<00:00, 16.24it/s, v_num=6, train_loss=0.000617, train_accuracy=1.000, val_loss=0.344, val_accuracy=0.930]

Epoch 13, global step 210: 'val_loss' was not in top 1


Epoch 14: 100%|██████████| 15/15 [00:00<00:00, 20.38it/s, v_num=6, train_loss=0.000358, train_accuracy=1.000, val_loss=0.352, val_accuracy=0.930]

Epoch 14, global step 225: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|██████████| 15/15 [00:00<00:00, 20.35it/s, v_num=6, train_loss=0.000358, train_accuracy=1.000, val_loss=0.352, val_accuracy=0.930]


In [19]:
trainer.test(dataloaders=data_module.test_dataloader())

  rank_zero_warn(
You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/wo0kie3/Desktop/alco/eeg-alcoholics/checkpoints/best-checkpoint-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/wo0kie3/Desktop/alco/eeg-alcoholics/checkpoints/best-checkpoint-v6.ckpt


Testing DataLoader 0: 100%|██████████| 12/12 [00:00<00:00, 67.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Runningstage.testing metric      DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.9184210300445557
        test_loss           0.2879190742969513
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.2879190742969513, 'test_accuracy': 0.9184210300445557}]

In [20]:
X_train = []
y_train = []
for exm in train_sequences:
    X_train.append(exm[0])
    y_train.append(exm[1])

In [21]:
column_names = ['AF1', 'AF2', 'AF7', 'AF8', 'AFZ', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6',
       'CP1', 'CP2', 'CP3', 'CP4', 'CP5', 'CP6', 'CPZ', 'CZ', 'F1', 'F2', 'F3',
       'F4', 'F5', 'F6', 'F7', 'F8', 'FC1', 'FC2', 'FC3', 'FC4', 'FC5', 'FC6',
       'FCZ', 'FP1', 'FP2', 'FPZ', 'FT7', 'FT8', 'FZ', 'O1', 'O2', 'OZ', 'P1',
       'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'PO1', 'PO2', 'PO7', 'PO8',
       'POZ', 'PZ', 'T7', 'T8', 'TP7', 'TP8', 'X', 'Y', 'nd']

In [22]:
np.array(X_train).shape

(468, 256, 64)

In [23]:
explainer = lime_tabular.RecurrentTabularExplainer(
    training_data=np.array(X_train),
    training_labels=np.array(y_train),
    feature_names=column_names,
    mode='classification'
)

In [None]:
exp = explainer.explain_instance(
    data_row = np.array(X_train[0]),
    classifier_fn = model.predict)

In [None]:
exp.show_in_notebook()