In [3]:
"""


Original paper - https://arxiv.org/abs/1611.08024



"""

In [73]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

In [74]:
# load the dataframe from the pickle file
import pickle
dir = "C:/Users/gusta/OneDrive/Skrivebord/KI & Data/Bachelor/LegeData"
with open(f"{dir}/dataframe.pkl", "rb") as f:
    df = pickle.load(f)

# keep these channels only, these are the indexes: 
""" 
Fp1 -> 0
Fp2 -> 33
F3  -> 4
F4  -> 38
C3  -> 12
C4  -> 48
P3  -> 20
P4  -> 55
O1  -> 26
O2  -> 61
F7  -> 6
F8  -> 40
T7  -> 14
T8  -> 50
P7  -> 22
P8  -> 57
Fz  -> 36
Cz  -> 46
Pz  -> 30 

but add 1 to each index, since the first channel is channel_1
"""

df = df[["channel_1", "channel_34", "channel_5", "channel_39", "channel_13", "channel_49", "channel_21", "channel_56", "channel_27", "channel_62", "channel_7", "channel_41", "channel_15", "channel_51", "channel_23", "channel_58", "channel_37", "channel_47", "channel_31", "label"]]

patient_ids = np.repeat([1,2,3,4,5,6,7,8,9,10],76288)  # Make sure to have this aligned with your epochs/labels

# Normalize per patient (within training and test sets)
data = df.drop("label", axis=1).values
data_norm = []
for patient_id in np.unique(patient_ids):
    patient_data = data[patient_ids == patient_id]
    scaler = StandardScaler()
    patient_data_scaled = scaler.fit_transform(patient_data)
    print(f"mean of patient {patient_id}: {np.min(patient_data_scaled), np.max(patient_data_scaled)}")
    data_norm.append(patient_data_scaled)

data_norm = np.concatenate(data_norm, axis=0)
# add labels back
y = df["label"].values
data = data_norm

# make data a dataframe again
df = pd.DataFrame(data)
print(f"size of df: {df.shape}")

# split the data into epochs of 256 datapoints each
epochs = []
for i in range(0, len(df), 256):
    epochs.append(df.iloc[i:i+256].values)

# convert the list of epochs to a numpy array
epochs = np.array(epochs)
print(f"size of epochs: {epochs.shape}")

# save the SCALED epochs
with open(f"{dir}/scaled_rawEEG_epochs.pkl", "wb") as f:
    pickle.dump(epochs, f)




mean of patient 1: (np.float64(-2.5061664365916747), np.float64(3.2048110636610154))
mean of patient 2: (np.float64(-2.2856199344616766), np.float64(3.1704214250154332))
mean of patient 3: (np.float64(-2.6244058798465795), np.float64(2.326236025125405))
mean of patient 4: (np.float64(-2.9839669159521462), np.float64(3.0033738566092985))
mean of patient 5: (np.float64(-2.7180645928577865), np.float64(1.9617189652654394))
mean of patient 6: (np.float64(-3.8235823428136357), np.float64(1.5538075585523163))
mean of patient 7: (np.float64(-2.17836904765987), np.float64(2.700097334829556))
mean of patient 8: (np.float64(-1.9391087092804788), np.float64(4.868952462591622))
mean of patient 9: (np.float64(-3.278040789072896), np.float64(2.18437991679923))
mean of patient 10: (np.float64(-1.6112388744411374), np.float64(4.696875071895533))
size of df: (762880, 19)
size of epochs: (2980, 256, 19)


In [75]:
labels = []
for _ in range(10):
    labels.append(np.repeat([1, 0], 149))
labels = np.concatenate(labels)

print(f"size of labels: {labels.shape}")


size of labels: (2980,)


In [76]:
channels = 19
sample_len = 256


class EEGNet(nn.Module):
    def __init__(self):
        super(EEGNet, self).__init__()
        self.T = sample_len
        self.dropout_rate = 0.1
        
        # Layer 1
        self.conv1 = nn.Conv2d(1, 16, (1, channels), padding = 0)
        self.batchnorm1 = nn.BatchNorm2d(16, False)
        
        # Layer 2
        self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
        self.conv2 = nn.Conv2d(1, 4, (2, 32))
        self.batchnorm2 = nn.BatchNorm2d(4, False)
        self.pooling2 = nn.MaxPool2d(2, 4)
        
        # Layer 3
        self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
        self.conv3 = nn.Conv2d(4, 4, (8, 4))
        self.batchnorm3 = nn.BatchNorm2d(4, False)
        self.pooling3 = nn.MaxPool2d((2, 4))
        
        # FC Layer
        # NOTE: This dimension will depend on the number of timestamps per sample in your data.
        # I have 120 timepoints. 
        self.fc1 = nn.Linear(8*2 * (sample_len // 32), 1)
        

    def forward(self, x):
        # Layer 1
        x = F.elu(self.conv1(x))
        x = self.batchnorm1(x)
        x = F.dropout(x, self.dropout_rate)
        x = x.permute(0, 3, 1, 2)
        
        # Layer 2
        x = self.padding1(x)
        x = F.elu(self.conv2(x))
        x = self.batchnorm2(x)
        x = F.dropout(x, self.dropout_rate)
        x = self.pooling2(x)
        
        # Layer 3
        x = self.padding2(x)
        x = F.elu(self.conv3(x))
        x = self.batchnorm3(x)
        x = F.dropout(x, self.dropout_rate)
        x = self.pooling3(x)
        
        # FC Layer
        x = x.reshape(-1, 128)
        x = F.sigmoid(self.fc1(x))
        return x


net = EEGNet()
print(net.forward(Variable(torch.Tensor(1,1,sample_len,channels))))
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

def init_weights(m):
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)

net.apply(init_weights)


tensor([[0.4268]], grad_fn=<SigmoidBackward0>)


EEGNet(
  (conv1): Conv2d(1, 16, kernel_size=(1, 19), stride=(1, 1))
  (batchnorm1): BatchNorm2d(16, eps=False, momentum=0.1, affine=True, track_running_stats=True)
  (padding1): ZeroPad2d((16, 17, 0, 1))
  (conv2): Conv2d(1, 4, kernel_size=(2, 32), stride=(1, 1))
  (batchnorm2): BatchNorm2d(4, eps=False, momentum=0.1, affine=True, track_running_stats=True)
  (pooling2): MaxPool2d(kernel_size=2, stride=4, padding=0, dilation=1, ceil_mode=False)
  (padding2): ZeroPad2d((2, 1, 4, 3))
  (conv3): Conv2d(4, 4, kernel_size=(8, 4), stride=(1, 1))
  (batchnorm3): BatchNorm2d(4, eps=False, momentum=0.1, affine=True, track_running_stats=True)
  (pooling3): MaxPool2d(kernel_size=(2, 4), stride=(2, 4), padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=128, out_features=1, bias=True)
)

Evaluate function


In [77]:
import torch
from torch.autograd import Variable
import numpy as np
from sklearn.metrics import accuracy_score



# Define simple evaluation function
def evaluate(model, dataloader, sample_len, channels):
    model.eval()
    model_predictions = []
    true_labels = []

    for _, (data, target) in enumerate(dataloader):

      inputs, targets = data , target
      # permute inputs from (batch_size, channels, sample_len) to (batch_size, sample_len, channels)
      inputs = inputs.permute(0, 2, 1)
      inputs = inputs.reshape(-1,1,sample_len,channels)


      data = Variable((inputs))
      target = Variable((targets))
      
      output = model(data).reshape(-1)

      model_predictions.extend(output.detach().numpy())
      true_labels.extend(target.detach().numpy())

    all_predictions = np.array(model_predictions) > 0.5
    all_targets = np.array(true_labels)



    accuracy = accuracy_score(all_targets, all_predictions)
    return accuracy

In [78]:
from torch.utils.data import DataLoader, Dataset

# Dataset class definition
class EEGDataset(Dataset):
    def __init__(self, epochs, labels):
        self.df = epochs
        self.labels = labels

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Access the epoch and corresponding label
        data = self.df[idx]  # data shape: (256, 19)
        label = self.labels[idx]  # label shape: ()
        
        # Ensure the data is converted to float tensor
        data_tensor = torch.tensor(data, dtype=torch.float32)
        
        # Convert label to tensor (assumed to be scalar)
        label_tensor = torch.tensor(label, dtype=torch.float32) 
        
        return data_tensor, label_tensor




#### Training and evaluation loop

In [72]:
from sklearn.model_selection import LeaveOneGroupOut
batch_size = 16


# Initialize Leave-One-Group-Out cross-validator
logo = LeaveOneGroupOut()

# This is an array where each entry corresponds to a patient ID (e.g., [0, 0, 0, 1, 1, 1, ..., 9])
patient_ids = np.repeat([1,2,3,4,5,6,7,8,9,10],298)  # Make sure to have this aligned with your epochs/labels


num_epochs = 10

for epoch in range(num_epochs):  # Loop over the number of epochs
    print(f"Epoch {epoch+1}")
    running_loss = []

    for train_index, test_index in logo.split(epochs, labels, groups=patient_ids):
        # Training loop
        net.train()

        # Train and test split
        X_train, X_test = epochs[train_index,:,:], epochs[test_index,:,:]
        y_train, y_test = labels[train_index], labels[test_index]
        
        # Create DataLoader for training and testing
        train_dataset = EEGDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        test_dataset = EEGDataset(X_test, y_test)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        

        for i, data in enumerate(train_loader, 0):
            if i % 16 == 0:
                print(i, end=", ")
            
            # Get the inputs; data is a list of [inputs, labels/targets]
            inputs, targets = data
            inputs = inputs.reshape(-1, 1, sample_len, channels)  # Reshape for network input

            # Convert inputs and targets to variables
            inputs, targets = Variable(inputs), Variable(targets)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = net(inputs).reshape(-1)
            loss = criterion(outputs, targets)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            running_loss.append(loss.item())

            # Print training loss
            print(f"Training Loss: {running_loss[i]:.4f}")
        
        # Check performance on training and testing sets
        train_accuracy = evaluate(net, train_loader, sample_len, channels)
        test_accuracy = evaluate(net, test_loader, sample_len, channels)
        
        print(f"Train Accuracy: {train_accuracy*100:.2f}%")
        print(f"Test Accuracy: {test_accuracy*100:.2f}%")


Epoch 1
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 50.22%
Test Accuracy: 50.67%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 49.44%
Test Accuracy: 52.68%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 49.89%
Test Accuracy: 50.00%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 50.30%
Test Accuracy: 50.00%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 49.78%
Test Accuracy: 50.34%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 49.81%
Test Accuracy: 50.00%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 49.85%
Test Accuracy: 50.00%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accuracy: 49.96%
Test Accuracy: 50.34%
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, Training Loss: 0.6458
Train Accur

KeyboardInterrupt: 