## Preprocess

In [41]:
import pandas as pd
import os
# file_name = 'emile-1689186012.csv'
# # Open the file in pandas
# df = pd.read_csv(file_name)
# # split by images
# df['Image'] = df['Image'].astype('category')
# print(df['Image'].cat.categories)

# # subtract the first timestamp from all timestamps for every image category
# for image in df['Image'].cat.categories:
#     # save the difference between timestamp and image-timestamp as a new column
#     df.loc[df['Image'] == image, 'Time-Diff'] = df.loc[df['Image'] == image, 'Timestamp'] - df.loc[df['Image'] == image, 'Image-Timestamp']
#     # subtract the first timestamp from all timestamps
#     df.loc[df['Image'] == image, 'Image-Timestamp'] = df.loc[df['Image'] == image, 'Image-Timestamp'] - df.loc[df['Image'] == image, 'Image-Timestamp'].iloc[0]
#     df.loc[df['Image'] == image, 'Unity-Time'] = df.loc[df['Image'] == image, 'Unity-Time'] - df.loc[df['Image'] == image, 'Unity-Time'].iloc[0]
#     df.loc[df['Image'] == image, 'Timestamp'] = df.loc[df['Image'] == image, 'Timestamp'] - df.loc[df['Image'] == image, 'Timestamp'].iloc[0]
    
# # save to a new csv
# df.to_csv(f'{file_name}-processed.csv', index=False)

def preprocess_eeg(file_name):
    # open the file in pandas
    df = pd.read_csv(file_name)
    # split by images
    df['Image'] = df['Image'].astype('category')
    # extract the first 1000 eeg data for each image as a tensor
    eeg_data = []
    for image in df['Image'].cat.categories:
        eeg_data.append([df.loc[df['Image'] == image, ['TP9', 'AF7', 'AF8', 'TP10', 'Ref']].to_numpy()[:1000],image])
        # print(eeg_data[image].shape)
    return eeg_data

def preprocess_dataset(path):
    # get all csv files full path in path
    csv_files = [os.path.join(path, file) for file in os.listdir(path) if file.endswith('.csv')]
    print(csv_files)
    data = []
    # preprocess each file
    for file in csv_files:
        file_data = preprocess_eeg(file)
        # concatenate the data
        data = data + file_data
    return data

    


## Create Dataset

In [102]:
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
# Create a torch dataset for the EEG data
class EEGDataset(Dataset):
    def __init__(self, path, transform=None):
        self.data = preprocess_dataset(path)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample,target = self.data[idx]
        # convert the 5 dimensions to 6 by appending a 0 (dim needs to be even)
        sample = np.append(sample, np.zeros((1000,1)), axis=1)
        # conert target to one-hot vector
        target = np.eye(5)[target]

        if self.transform:
            sample = self.transform(sample)

        return torch.from_numpy(sample), torch.tensor(target)



## Define Model

In [107]:
import math
class EEGTransformer(torch.nn.Module):
    def __init__(self, nhead=1, dropout=0.1, num_classes=5,d_model=6,d_ff=512):
        super(EEGTransformer, self).__init__()
        self.encoder_layer = torch.nn.TransformerEncoderLayer(d_model=d_model, 
                                                                nhead=nhead,
                                                                dropout=dropout,
                                                                dim_feedforward=d_ff,
                                                                batch_first=True)
        self.classifier = torch.nn.Linear(d_model, num_classes)
        self.softmax = torch.nn.Softmax(dim=1)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
    
    
    def forward(self, x):
        # x shape: (batch_size, seq_len, d_model)
        x = x + self.pos_encoder(x)
        # x shape: (batch_size, seq_len, d_model)
        x = self.encoder_layer(x)
        # x shape: (batch_size, seq_len, num_classes)
        x = self.classifier(x)
        # x shape: (batch_size, seq_len, num_classes)
        x = self.softmax(x)
        return x
    
class PositionalEncoding(torch.nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 1000):
        super().__init__()
        self.dropout = torch.nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        print(position.shape)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        print(pe[0, :, 0::2].shape)
        print(pe[0, :, 1::2].shape)
        print(torch.cos(position * div_term).shape)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        x = x + self.pe[:x.size(1)]
        return self.dropout(x)


In [None]:
import torch.nn as nn
import torch.nn.functional as F
# modified from https://github.com/vlawhern/arl-eegmodels
class EEGNet(nn.Module):
    def __init__(self):
        super(EEGNet, self).__init__()
        self.T = 120
        
        # Layer 1
        self.conv1 = nn.Conv2d(1, 16, (1, 64), padding = 0)
        self.batchnorm1 = nn.BatchNorm2d(16, False)
        
        # Layer 2
        self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
        self.conv2 = nn.Conv2d(1, 4, (2, 32))
        self.batchnorm2 = nn.BatchNorm2d(4, False)
        self.pooling2 = nn.MaxPool2d(2, 4)
        
        # Layer 3
        self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
        self.conv3 = nn.Conv2d(4, 4, (8, 4))
        self.batchnorm3 = nn.BatchNorm2d(4, False)
        self.pooling3 = nn.MaxPool2d((2, 4))
        
        # FC Layer
        # NOTE: This dimension will depend on the number of timestamps per sample in your data.
        # I have 120 timepoints. 
        self.fc1 = nn.Linear(4*2*7, 1)
        

    def forward(self, x):
        # Layer 1
        x = F.relu(self.conv1(x))
        x = self.batchnorm1(x)
        x = F.dropout(x, 0.25)
        x = x.permute(0, 3, 1, 2)
        
        # Layer 2
        x = self.padding1(x)
        x = F.relu(self.conv2(x))
        x = self.batchnorm2(x)
        x = F.dropout(x, 0.25)
        x = self.pooling2(x)
        
        # Layer 3
        x = self.padding2(x)
        x = F.relu(self.conv3(x))
        x = self.batchnorm3(x)
        x = F.dropout(x, 0.25)
        x = self.pooling3(x)
        
        # FC Layer
        x = x.view(-1, 4*2*7)
        x = F.sigmoid(self.fc1(x))
        return x


# net = EEGNet().cuda(0)
# print net.forward(Variable(torch.Tensor(np.random.rand(1, 1, 120, 64)).cuda(0)))
# criterion = nn.BCELoss()
# optimizer = optim.Adam(net.parameters())

In [None]:
# instantiate a pytorch transformer model
model = EEGTransformer(d_ff=512).to(torch.float32)
# model = torch.nn.LSTM(6, hidden_size=256,num_layers=2, batch_first=True)

def train_transformer_on_batch(model,data,optimizer,loss_fn):
     # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data
    inputs = inputs.to(torch.float32)
    labels = labels.to(torch.long)
    # zero the parameter gradients
    optimizer.zero_grad()
    # forward + backward + optimize
    outputs = model(inputs)
    loss = loss_fn(outputs, labels)
    loss.backward()
    optimizer.step()
    return outputs

def train_lstm_on_batch(model,data,optimizer,loss_fn):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data
    inputs = inputs.to(torch.float32)
    labels = labels.to(torch.long)
    # zero the parameter gradients
    optimizer.zero_grad()
    # forward + backward + optimize
    outputs,h_n,c_n = model(inputs)
    loss = loss_fn(outputs, labels)
    loss.backward()
    optimizer.step()
    return outputs,h_n,c_n
    

## Train

In [108]:
# create a dataset
dataset = EEGDataset('dataset/emile')
# create a dataloader
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=0)
# check the first batch
for i, data in enumerate(dataloader):
    inputs, labels = data
    print(f"Input shape: {inputs.shape}, Label shape: {labels.shape}")
    break

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# loss function
loss_fn = torch.nn.CrossEntropyLoss()
# train the model
for epoch in range(10):
    for i, data in enumerate(dataloader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(torch.float32)
        labels = labels.to(torch.long)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        # print
        print(f'Epoch: {epoch}, Batch: {i}, Loss: {loss.item()}')
print('Finished Training')


['dataset/emile/emile-1689186012.csv']
Input shape: torch.Size([4, 1000, 6]), Label shape: torch.Size([4, 5])
torch.Size([1000, 1])
torch.Size([1000, 3])
torch.Size([1000, 3])
torch.Size([1000, 3])
Epoch: 0, Batch: 0, Loss: 6.907671928405762
Epoch: 0, Batch: 1, Loss: 6.908023834228516
Epoch: 1, Batch: 0, Loss: 6.907820224761963
Epoch: 1, Batch: 1, Loss: 6.907395839691162
Epoch: 2, Batch: 0, Loss: 6.907721519470215
Epoch: 2, Batch: 1, Loss: 6.908022403717041
Epoch: 3, Batch: 0, Loss: 6.9076738357543945
Epoch: 3, Batch: 1, Loss: 6.907707214355469
Epoch: 4, Batch: 0, Loss: 6.907719612121582
Epoch: 4, Batch: 1, Loss: 6.907750129699707
Epoch: 5, Batch: 0, Loss: 6.90783166885376
Epoch: 5, Batch: 1, Loss: 6.907077789306641
Epoch: 6, Batch: 0, Loss: 6.90779972076416
Epoch: 6, Batch: 1, Loss: 6.907143592834473
Epoch: 7, Batch: 0, Loss: 6.9076056480407715
Epoch: 7, Batch: 1, Loss: 6.907698154449463
Epoch: 8, Batch: 0, Loss: 6.907811164855957
Epoch: 8, Batch: 1, Loss: 6.906796455383301
Epoch: 9, 

## Analyze Results