In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import torch
from torch import nn
import torch.nn.functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import pandas as pd
# from brainflow.data_filter import DataFilter, FilterTypes, DetrendOperations, WindowOperations
from sklearn.metrics import f1_score
import torch.optim

In [2]:
class ToTensor(object):
    def __call__(self, sample):
        window, labels = sample['window'], sample['labels']
        return {'window': torch.tensor(window.values).to(torch.float32), 
                'labels': torch.tensor(labels).to(torch.float32)}#.values)}

class AbsoluteValue(object):
    def __call__(self, sample):
        window, labels = sample['window'], sample['labels']
        return {'window': torch.abs(window), 
                'labels': labels}

class MinNormalize(object):
    def __call__(self, sample):
        window, labels = sample['window'], sample['labels']
        for channel in range(len(window)):
            #print(len(window))
            min = torch.min(window[channel])
            window[channel] = window[channel] - min
            window[channel] = window[channel] / torch.max(window[channel])
            window[channel] = torch.nan_to_num(window[channel])
        return {'window': (window.unsqueeze(0)), 
                'labels': labels}

# class FixLabels(object):
#     def __call__(self, sample):
#         window, labels = sample['window'], sample['labels']
#         if labels[0] == 2:
#             labels = labels - 1
#         return {'window': window, 
#                 'labels': labels}

class EEGDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.csv_file = pd.read_csv(csv_file, delimiter='\t', header=None)
        self.transform = transform
    
    def __len__(self):
        return len(self.csv_file)
    
    def __getitem__(self, idx):
        # if idx >= len(self.csv_file) / 625:
        #     return

        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        sample_size = 625
        
        # window = self.csv_file.iloc[idx*625:idx*625+625, 1:17].transpose()
        window = self.csv_file.iloc[idx*sample_size:idx*sample_size+sample_size, 1:17]
        labels = [self.csv_file.iloc[idx*sample_size, 32]]#:idx*625+625, 32] # remove brackets for timestep prediction
        sample = {'window': window, 'labels': labels}

        if self.transform:
            sample = self.transform(sample)
        
        return sample

In [3]:
eeg_dataset = EEGDataset(csv_file='./redBlueFull.csv', transform=transforms.Compose([ToTensor(),
                                                                                  AbsoluteValue(),
                                                                                  MinNormalize()]))

# eeg_dataset = EEGDataset(csv_file='./fullData.csv', transform=transforms.Compose([ToTensor(),
#                                                                                   AbsoluteValue()]))

#eeg_dataset = EEGDataset(csv_file='./fullData.csv', transform=transforms.Compose([ToTensor()]))

eeg_dataset = torch.utils.data.Subset(eeg_dataset, range(0, 480))


# kuba_dataset = EEGDataset(csv_file='./redBlueFull.csv', transform=transforms.Compose([ToTensor(),
#                                                                                   AbsoluteValue(),
#                                                                                   MinNormalize()]))

# kuba_dataset = torch.utils.data.Subset(kuba_dataset, range(0, 480))

# kuba_loader = torch.utils.data.DataLoader(kuba_dataset, batch_size=48,
#                                             shuffle=True, num_workers=1)

# for x in eeg_dataset:
#     print(x["labels"])

print(eeg_dataset[0]['window'].shape)

#print(eeg_dataset[0]['window'][0][1])

# for channel in range(len(eeg_dataset[0]['window'][0])):
#     print(torch.min(eeg_dataset[0]['window'][0][channel]))

print("Full max:", torch.max(eeg_dataset[0]['window'][0]))
print("Full min:", torch.min(eeg_dataset[0]['window'][0]))

torch.manual_seed(50) # 80
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(eeg_dataset, [0.6, 0.2, 0.2])

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True)

print(len(train_dataset), len(val_dataset), len(test_dataset))

zerocount = 0
onecount = 0

for i in range(len(train_dataset)):
    if train_dataset[i]['labels'][0] == 1:
        onecount += 1
    else:
        zerocount += 1

print(onecount, zerocount)

zerocount = 0
onecount = 0

for i in range(len(val_dataset)):
    if val_dataset[i]['labels'][0] == 1:
        onecount += 1
    else:
        zerocount += 1

print(onecount, zerocount)

zerocount = 0
onecount = 0

for i in range(len(test_dataset)):
    if test_dataset[i]['labels'][0] == 1:
        onecount += 1
    else:
        zerocount += 1

print(onecount, zerocount)

print(train_dataset[7])

torch.Size([1, 625, 16])
Full max: tensor(1.)
Full min: tensor(0.)
288 96 96
144 144
47 49
49 47
{'window': tensor([[[1.0000, 0.8881, 0.1018,  ..., 0.7791, 0.0499, 0.0000],
         [1.0000, 0.8880, 0.1014,  ..., 0.7807, 0.0495, 0.0000],
         [1.0000, 0.8880, 0.1028,  ..., 0.7785, 0.0499, 0.0000],
         ...,
         [1.0000, 0.8881, 0.0985,  ..., 0.7829, 0.0487, 0.0000],
         [1.0000, 0.8874, 0.1052,  ..., 0.7775, 0.0488, 0.0000],
         [1.0000, 0.8881, 0.0987,  ..., 0.7832, 0.0488, 0.0000]]]), 'labels': tensor([1.])}


In [4]:
print(test_dataset[0]['window'].shape)

torch.Size([1, 625, 16])


In [5]:
num_train = len(train_dataset)
# window_tensors = torch.empty(size=(num_train, 1, 16, 625))
# label_tensors = torch.empty(size=(num_train, 1))

# for i in range(num_train):
#     window_tensors[i] = train_dataset[i]['window']
#     label_tensors[i] = train_dataset[i]['labels']

# torch.save(window_tensors, 'window_tensors.pt')
# torch.save(label_tensors, 'label_tensors.pt')

# window_tensors = torch.load('window_tensors.pt')
# label_tensors = torch.load('label_tensors.pt')

# print(window_tensors.shape)
# print(label_tensors.shape)

In [6]:
num_val = len(val_dataset)
# val_window_tensors = torch.empty(size=(num_val, 1, 16, 625))
# val_label_tensors = torch.empty(size=(num_val, 1))

# for i in range(num_val):
#     val_window_tensors[i] = val_dataset[i]['window']
#     val_label_tensors[i] = val_dataset[i]['labels']

# torch.save(val_window_tensors, 'val_window_tensors.pt')
# torch.save(val_label_tensors, 'val_label_tensors.pt')

# val_window_tensors = torch.load('val_window_tensors.pt')
# val_label_tensors = torch.load('val_label_tensors.pt')

# print(val_window_tensors.shape)
# print(val_label_tensors.shape)

In [7]:
class EEGNet(nn.Module):
    def __init__(self):
        super(EEGNet, self).__init__()
        self.T = 625
        
        # Layer 1
        self.conv1 = nn.Conv2d(1, 16, (1, 16), padding = 0)
        self.batchnorm1 = nn.BatchNorm2d(16, False)
        
        # Layer 2
        self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
        self.conv2 = nn.Conv2d(1, 4, (2, 32))
        self.batchnorm2 = nn.BatchNorm2d(4, False)
        self.pooling2 = nn.MaxPool2d(2, 4)
        
        # Layer 3
        self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
        self.conv3 = nn.Conv2d(4, 4, (8, 4))
        self.batchnorm3 = nn.BatchNorm2d(4, False)
        self.pooling3 = nn.MaxPool2d((2, 4))
        
        # FC Layer
        # NOTE: This dimension will depend on the number of timestamps per sample in your data.
        # I have 120 timepoints. 
        self.fc1 = nn.Linear(4*2*39, 1)
        

    def forward(self, x):
        # Layer 1
        x = F.elu(self.conv1(x))
        x = self.batchnorm1(x)
        #x = F.dropout(x, 0.25)
        x = x.permute(0, 3, 1, 2)
        
        # Layer 2
        x = self.padding1(x)
        x = F.elu(self.conv2(x))
        x = self.batchnorm2(x)
        #x = F.dropout(x, 0.25)
        x = self.pooling2(x)
        
        # Layer 3
        x = self.padding2(x)
        x = F.elu(self.conv3(x))
        x = self.batchnorm3(x)
        #x = F.dropout(x, 0.25)
        x = self.pooling3(x)
        
        # FC Layer
        x = x.reshape(-1, 4*2*39)
        x = F.sigmoid(self.fc1(x))
        return x
    
    # def forward(self, x):
    #     # Layer 1
    #     print(x.shape)
    #     x = F.elu(self.conv1(x))
    #     print(x.shape)
    #     x = self.batchnorm1(x)
    #     x = F.dropout(x, 0.25)
    #     x = x.permute(0, 3, 1, 2)
    #     print(x.shape)
        
    #     # Layer 2
    #     x = self.padding1(x)
    #     print(x.shape)
    #     x = F.elu(self.conv2(x))
    #     print(x.shape)
    #     x = self.batchnorm2(x)
    #     x = F.dropout(x, 0.25)
    #     x = self.pooling2(x)
    #     print(x.shape)
        
    #     # Layer 3
    #     x = self.padding2(x)
    #     print(x.shape)
    #     x = F.elu(self.conv3(x))
    #     print(x.shape)
    #     x = self.batchnorm3(x)
    #     x = F.dropout(x, 0.25)
    #     x = self.pooling3(x)
        
    #     # FC Layer
    #     print(x.shape)
    #     x = x.reshape(-1, 4*2*39)
    #     print(x.shape)
    #     x = F.sigmoid(self.fc1(x))
    #     return x

model = EEGNet()
# model(torch.randn((625,)).unsqueeze(0))

In [8]:
optimizer = torch.optim.Adam(model.parameters())#, lr=0.001)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")
criterion = nn.BCELoss()

In [9]:
# lossi = []

# model.train()
# for epoch in tqdm(range(10000)):
#     optimizer.zero_grad()
#     y_pred = model(train_dataset[0]['window'])

#     # if epoch == 0:
#     #     print(y_pred)

#     loss = criterion(y_pred, train_dataset[0]['labels'])
#     lossi.append(loss.item())

#     loss.backward()
#     optimizer.step()
    
#     # if epoch <= 1:
#     #     for p in model.parameters():
#     #         print(p.grad)
    

# plt.plot(lossi)

# print(lossi)

In [10]:
losses = []
lossi = []
vallosses = []
vallossi = []

In [11]:
minValLoss = 9999

for epoch in tqdm(range(5000)):
    for i_batch, sample_batched in enumerate(train_dataloader):
        optimizer.zero_grad()
        model.train()
        inputs, labels = sample_batched['window'], sample_batched['labels']
        #print(sample_batched)
        #print(inputs.shape, labels.shape)
        y_pred = model(inputs)
        # print(y_pred)

        # if epoch == 0:
        #     print(y_pred)
        
        #print(i)
        #print(window_tensors[i])
        #print(y_pred, label_tensors[i])
        loss = criterion(y_pred, labels)
        lossi.append(loss.item())
        loss.backward()
        optimizer.step()

        model.eval()
        for val_i_batch, val_sample_batched in enumerate(val_dataloader):
            val_inputs, val_labels = val_sample_batched['window'], val_sample_batched['labels']
            val_pred = model(val_inputs)
            loss = criterion(val_pred, val_labels)
            vallossi.append(loss.item())
        
        # scheduler.step(np.mean(vallossi))
        
        # if epoch <= 1:
        #     for p in model.parameters():
        #         print(p.grad)
    
    losses.append(np.mean(lossi))
    avgvalloss = np.mean(vallossi)
    if avgvalloss < minValLoss:
        minValLoss = avgvalloss
        torch.save(model.state_dict(), 'bestvalmodel.pt')
    vallosses.append(avgvalloss)
    if epoch % 5 == 0:
        plt.plot(losses, "b")
        plt.plot(vallosses, "g")
        plt.savefig('./valloss.png')
        plt.close()
    lossi = []
    vallossi = []
    

plt.plot(losses, "b")
plt.plot(vallosses, "g")

print(losses)

print(losses[-1])
torch.save(model.state_dict(), 'finaltrainmodel.pt')



 20%|██        | 1021/5000 [7:40:02<29:52:49, 27.03s/it]


KeyboardInterrupt: 

In [None]:
minindex = torch.argmin(torch.tensor(vallosses))
maxindex = torch.argmax(torch.tensor(vallosses))
print(losses[minindex], vallosses[minindex])
print(vallosses[maxindex], vallosses[maxindex] - vallosses[minindex])

In [None]:
modelWeights = torch.load('./bestvalmodel.pt', weights_only=True)
#modelWeights = torch.load('./finaltrainmodel.pt', weights_only=True)
model = EEGNet()
model.load_state_dict(modelWeights)
model.eval()
truepreds = 0
falsepreds = 0
truepositives = 0
falsepositives = 0
truenegatives = 0
falsenegatives = 0
preds = []
labels = []

with torch.no_grad():
    for i in range(len(test_dataset)):
        if i < 3:
            print(test_dataset[i]['window'])
        y_pred = model(test_dataset[i]['window'])
        # print(y_pred, test_dataset[i]['labels'])
        pred = (y_pred >= 0.5)
        preds.append(pred)
        label = test_dataset[i]['labels']
        labels.append(label)
        if pred == label:
            truepreds += 1
            if pred == 1:
                truepositives += 1
            else:
                truenegatives += 1
        else:
            falsepreds += 1
            if pred == 1:
                falsepositives += 1
            else:
                falsenegatives += 1

print(truepreds / (truepreds + falsepreds))
print(truepositives, truenegatives, falsepositives, falsenegatives)

f1Score = 2 * truepositives / (2 * truepositives + falsepositives + falsenegatives)
print(f1Score)

f1ScoreSKL = f1_score(labels, preds)
print(f1ScoreSKL)

# for x in test_dataset:
#     print(x["labels"])

print(len(test_dataset))
        

In [None]:
print((eeg_dataset[0]['window']))
#print(torch.min((no_normalize[0]['window'] - torch.min(no_normalize[0]['window']))/(torch.max(no_normalize[0]['window']))))

In [None]:
# torch.save(model, 'oct7model.pt')