In [25]:
from typing import List, Tuple
import os
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))

torch.manual_seed(4321)
torch.use_deterministic_algorithms(True)

NVIDIA GeForce RTX 3080


## Prepare Dataset

In [16]:
class BCIDataset(Dataset):
    _data_files: List[str]
    _data_loaded: bool = False

    _data: np.ndarray
    _labels: np.ndarray

    def __init__(
        self, data_files: List[str] = ["S4b_train.npz", "X11b_train.npz"]
    ):
        super(BCIDataset, self).__init__()
        for file in data_files:
            if not os.path.exists(file):
                raise FileNotFoundError(f"The data file {file} does not exist.")
        
        self._data_files = data_files

    def __len__(self):
        # lazy loading
        if not self._data_loaded:
            self._load_data()

        return self._labels.shape[0]

    def __getitem__(self, index: int) -> Tuple[torch.Tensor]:
        # lazy loading
        if not self._data_loaded:
            self._load_data()

        return torch.from_numpy(self._data[index]), self._labels[index]

    def _load_data(self):
        data = []
        labels = []

        for file in self._data_files:
            with np.load(file) as f:
                data.append(f["signal"])
                labels.append(f["label"])

        self._data = np.concatenate(data, axis = 0)
        self._labels = np.concatenate(labels, axis = 0)

        self._data = np.expand_dims(self._data, axis=1).swapaxes(-1, -2)
        self._labels -= 1

        mask = np.where(np.isnan(self._data))
        self._data[mask] = np.nanmean(self._data)

        self._data_loaded = True

train_dataset = BCIDataset(data_files = ["S4b_train.npz", "X11b_train.npz"])
test_dataset = BCIDataset(data_files = ["S4b_test.npz", "X11b_test.npz"])

## Build Models

In [50]:
class Model(nn.Module):
    _activation_dict = {
        "ELU": lambda: nn.ELU(alpha = 1.0),
        "ReLU": lambda: nn.ReLU(),
        "LeakyReLU": lambda: nn.LeakyReLU(negative_slope = 0.01)
    }
    _activation_name: str

    def __init__(self, activation: str):
        super(Model, self).__init__()
        if activation not in self._activation_dict:
            raise NotImplementedError(f"The activation function {activation} is not implemented.")

        self.activation = None
        self.activation_name = activation

    @property
    def activation(self) -> nn.Module:
        return self._activation_dict[self._activation_name]()

    @activation.setter
    def activation(self, _):
        pass

    @property
    def activation_name(self) -> str:
        return self._activation_name

    @activation_name.setter
    def activation_name(self, name: str):
        self._activation_name = name

### EEGNet

#### Architecture
![EEGNet](assets/EEGNet.jpg)

In [55]:
class EEGNet(Model):
    first_conv: nn.Sequential
    depthwise_conv: nn.Sequential
    separable_conv: nn.Sequential
    classfier: nn.Sequential

    def __init__(self, activation: str = "ELU"):
        super(EEGNet, self).__init__(activation)

        self.first_conv = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size = (1, 51), stride = (1, 1), padding = (0, 25), bias = False),
            nn.BatchNorm2d(16, eps = 1e-5, momentum = 0.1, affine = True, track_running_stats = True)
        )
        self.depthwise_conv = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size = (2, 1), stride = (1, 1), groups = 16, bias = False),
            nn.BatchNorm2d(32, eps = 1e-5, momentum = 0.1, affine = True, track_running_stats = True),
            self.activation,
            nn.AvgPool2d(kernel_size = (1, 4), stride = (1, 4), padding = 0),
            nn.Dropout(p = 0.25)
        )
        # input: batch, 32, 2, 188
        self.separable_conv = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size = (1, 15), stride = (1, 1), padding = (0, 7), bias = False),
            nn.BatchNorm2d(32, eps = 1e-5, momentum = 0.1, affine = True, track_running_stats = True),
            self.activation,
            nn.AvgPool2d(kernel_size = (1, 8), stride = (1, 8), padding = 0),
            nn.Dropout(p = 0.25)
        )
        self.classfier = nn.Sequential(
            nn.Linear(736, 2, bias = True)
        )

    def forward(self, x):
        x = self.first_conv(x)
        x = self.depthwise_conv(x)
        x = self.separable_conv(x)
        return self.classfier(x)

network = EEGNet(activation = "ELU")
print(network)

EEGNet(
  (first_conv): Sequential(
    (0): Conv2d(1, 16, kernel_size=(1, 51), stride=(1, 1), padding=(0, 25), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (depthwise_conv): Sequential(
    (0): Conv2d(16, 32, kernel_size=(2, 1), stride=(1, 1), groups=16, bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ELU(alpha=1.0)
    (3): AvgPool2d(kernel_size=(1, 4), stride=(1, 4), padding=0)
    (4): Dropout(p=0.25, inplace=False)
  )
  (separable_conv): Sequential(
    (0): Conv2d(32, 32, kernel_size=(1, 15), stride=(1, 1), padding=(0, 7), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ELU(alpha=1.0)
    (3): AvgPool2d(kernel_size=(1, 8), stride=(1, 8), padding=0)
    (4): Dropout(p=0.25, inplace=False)
  )
  (classfier): Sequential(
    (0): Linear(in_features=736, out_features=2, bias=True)
  )
)


### DeepConvNet

#### Architecture
> Parameters: C = 2, T = 750, N = 2

![DeepConvNet](assets/DeepConvNet.jpg)

In [60]:
class DeepConvNet(Model):
    first_conv_block: nn.Sequential
    conv_blocks: nn.ModuleList
    classfier: nn.Sequential

    def __init__(self, activation: str = "ELU"):
        super(DeepConvNet, self).__init__(activation)

        in_channels = 1
        out_channels = 25
        self.first_conv_block = nn.Sequential(
            # H = 2, W = 750
            nn.Conv2d(in_channels, out_channels, kernel_size = (1, 5), stride = (1, 1), bias = True),
            # H = 2, W = 750 - 5 + 1 = 746
            nn.Conv2d(out_channels, out_channels, kernel_size = (2, 1), stride = (1, 1), bias = True),
            # H = 1, W = 746
            nn.BatchNorm2d(out_channels, eps = 1e-5, momentum = 0.1, affine = True, track_running_stats = True),
            self.activation,
            nn.MaxPool2d((1, 2)),
            # H = 1, W = 373
            nn.Dropout(p = 0.5)
        )

        kernels = (50, 100, 200)
        self.conv_blocks = nn.ModuleList()
        for kernel in kernels:
            in_channels = out_channels
            out_channels = kernel
            self.conv_blocks.append(nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size = (1, 5), stride = (1, 1), bias = True),
                nn.BatchNorm2d(out_channels, eps = 1e-5, momentum = 0.1, affine = True, track_running_stats = True),
                self.activation,
                nn.MaxPool2d((1, 2)),
                nn.Dropout(p = 0.5)
            ))
        # conv_blocks[0]: H = 1, W = (373 - 5 + 1) / 2 = 183 (floor)
        # conv_blocks[1]: H = 1, W = (183 - 5 + 1) / 2 = 89 (floor)
        # conv_blocks[2]: H = 1, W = (89 - 5 + 1) / 2 = 42 (floor)

        self.classfier = nn.Sequential(
            nn.Flatten(),
            # Batch_Size, 200 * 1 * 42 = 8400
            nn.Linear(8400, 2, bias = True)
        )

    def forward(self, x):
        x = self.first_conv_block(x)
        x = self.conv_blocks(x)
        return self.classfier(x)

network = DeepConvNet(activation = "ELU")
print(network)

DeepConvNet(
  (first_conv_block): Sequential(
    (0): Conv2d(1, 25, kernel_size=(1, 5), stride=(1, 1))
    (1): Conv2d(25, 25, kernel_size=(2, 1), stride=(1, 1))
    (2): BatchNorm2d(25, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ELU(alpha=1.0)
    (4): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
    (5): Dropout(p=0.5, inplace=False)
  )
  (conv_blocks): ModuleList(
    (0): Sequential(
      (0): Conv2d(25, 50, kernel_size=(1, 5), stride=(1, 1))
      (1): BatchNorm2d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ELU(alpha=1.0)
      (3): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
      (4): Dropout(p=0.5, inplace=False)
    )
    (1): Sequential(
      (0): Conv2d(50, 100, kernel_size=(1, 5), stride=(1, 1))
      (1): BatchNorm2d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ELU(alpha=1.0)
      (3): Max

## Training

In [62]:
from typing import Dict

class Trainer:
    _models: List[Model]
    _optimizer: torch.optim.Optimizer
    _loss: nn.Module
    _train_dataloader: DataLoader
    _test_dataloader: DataLoader
    _epoch_size: int

    _accuracy_history: Dict[str, List[float]]
    _highest_accuracy_dict: Dict[str, float]
    _loss_history: Dict[str, List[float]]

    def __init__(
        self, models: List[Model], optimizer: torch.optim.Optimizer, loss: nn.Module, train_dataloader: DataLoader, test_dataloader: DataLoader, epoch_size: int = 300
    ):
        self._models = models
        self._optimizer = optimizer
        self._loss = loss
        self._train_dataloader = train_dataloader
        self._test_dataloader = test_dataloader
        self._epoch_size = epoch_size

    def _calculate_accuracy(self, y_pred: torch.Tensor, y_true: torch.Tensor):
        return torch.mean(y_pred == y_true)

    def show_loss(self):
        epochs = list(range(1, self._epoch_size + 1))

        plt.figure()
        plt.title("train loss")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        for model, loss_history in self._loss_history.items():
            plt.plot(epochs, loss_history, label = model)
        plt.legend()
        plt.show()

    def show_accuracy_figure(self):
        epochs = list(range(1, self._epoch_size + 1))

        plt.figure()
        plt.title(f"Activation function comparison ({self._models[0]._get_name()})")
        plt.xlabel("Epoch")
        plt.ylabel("Accuracy(%)")
        plt.ylim(top = 100)
        for name, accuracy_history in self._accuracy_history.items():
            accuracy_history = np.array(accuracy_history) * 100
            plt.plot(epochs, accuracy_history, label = name)
        plt.legend()
        plt.show()

    def show_accuracy_table(self):
        columns = [model.activation_name for model in self._models]
        rows = [self._models[0]._get_name()]
        cell_text = [self._highest_accuracy_dict[model.activation_name] for model in self._models]

        plt.table(cellText = cell_text, rowLabels = rows, colLabels = columns)
        plt.show()

    def train(self):
        pass

    def test(self):
        pass

In [23]:
BATCH_SIZE: int = 64
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

LEARNING_RATE: float = 1e-2
model = EEGNet(activation = "ELU")
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
loss = nn.CrossEntropyLoss()

EPOCH_SIZE: int = 300
EEGNet_trainer = Trainer(model, optimizer, loss, train_dataloader, test_dataloader, epoch_size = EPOCH_SIZE)
EEGNet_trainer