In [1]:
import os
from zipfile import ZipFile
with ZipFile('BONN epilepsy dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/BONN_epilepsy_dataset')
parent_dir = '/content/BONN_epilepsy_dataset'

In [2]:
parent_dir = '/content/BONN_epilepsy_dataset/BONN epilepsy dataset'
print("Contents of the updated dataset directory:")
print(os.listdir(parent_dir))

Contents of the updated dataset directory:
['O', 'F', 'S']


In [3]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import os

parent_dir = '/content/BONN_epilepsy_dataset/BONN epilepsy dataset'
directories = ['S', 'F', 'O']
labels_map = {'S': 'boundary', 'F': 'unknown', 'O': 'safe'}  #label mapping: "safe" and "boundary"

def load_data(parent_dir, directories):
    eeg_data = []
    labels = []

    for dir_name in directories:
        dir_path = os.path.join(parent_dir, dir_name)
        for file_name in sorted(os.listdir(dir_path)):
            file_path = os.path.join(dir_path, file_name)
            with open(file_path, 'r') as file:
                data_points = [int(line.strip()) for line in file]
                eeg_data.append(data_points)
                labels.append(labels_map[dir_name])

    return np.array(eeg_data), np.array(labels)

eeg_data, labels = load_data(parent_dir, directories)

def extract_features(eeg_data):
    """
    Extracts statistical features from EEG data.
    This includes variance, kurtosis, and entropy.
    """
    eeg_data_safe = np.where(eeg_data > 0, eeg_data, 1e-9)

    variance = np.var(eeg_data, axis=1)
    kurtosis = np.mean((eeg_data - np.mean(eeg_data, axis=1, keepdims=True))**4, axis=1)
    entropy = -np.sum(eeg_data_safe * np.log2(eeg_data_safe), axis=1)
    features = np.vstack([variance, kurtosis, entropy]).T
    return features

#kNN boundary detection for F samples
def kNN_boundary_detection(f_data, knn_model):
    """
    Classifies F samples into 'safe', 'boundary', or 'noise' based on kNN model trained on O and S.
    """
    features = extract_features(f_data)
    predictions = knn_model.predict(features)
    return predictions

#training data (only O and S samples)
o_indices = labels == 'safe'
s_indices = labels == 'boundary'

#we combined O and S samples
train_data = np.vstack([eeg_data[o_indices], eeg_data[s_indices]])
train_labels = np.concatenate([labels[o_indices], labels[s_indices]])

train_features = extract_features(train_data)

#trainning the kNN classifier on O and S samples
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(train_features, train_labels)

#F samples classification
f_indices = labels == 'unknown'
f_data = eeg_data[f_indices]
eeg_types_f = kNN_boundary_detection(f_data, knn)

print("Classifications for F (epileptic non-ictal) samples:", eeg_types_f)


Classifications for F (epileptic non-ictal) samples: ['safe' 'boundary' 'safe' 'safe' 'safe' 'safe' 'safe' 'boundary'
 'boundary' 'boundary' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe'
 'safe' 'safe' 'safe' 'boundary' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe'
 'safe' 'boundary' 'boundary' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe'
 'safe' 'safe' 'safe' 'safe' 'boundary' 'safe' 'safe' 'safe' 'safe' 'safe'
 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe'
 'safe' 'safe' 'safe' 'boundary' 'boundary' 'safe' 'safe' 'safe' 'safe'
 'safe' 'safe' 'safe' 'safe' 'boundary' 'safe' 'safe' 'boundary' 'safe'
 'safe' 'boundary' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe'
 'safe' 'boundary' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe' 'safe'
 'safe' 'safe' 'safe' 'boundary' 'safe' 'safe']


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

latent_dim = 100
data_dim = eeg_data.shape[1]
num_classes = 3  # 0 = safe, 1 = boundary, 2 = noise
batch_size = 64
epochs = 500
lr = 0.0002
beta1 = 0.5

class ConditionalGenerator(nn.Module):
    def __init__(self, latent_dim, data_dim, num_classes):
        super(ConditionalGenerator, self).__init__()
        self.label_embedding = nn.Embedding(num_classes, num_classes)
        self.model = nn.Sequential(
            nn.Linear(latent_dim + num_classes, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, data_dim),
            nn.Tanh()
        )

    def forward(self, z, labels):
        label_embeddings = self.label_embedding(labels)
        input = torch.cat((z, label_embeddings), dim=1)
        return self.model(input)


class ConditionalDiscriminator(nn.Module):
    def __init__(self, data_dim, num_classes):
        super(ConditionalDiscriminator, self).__init__()
        self.label_embedding = nn.Embedding(num_classes, num_classes)
        self.model = nn.Sequential(
            nn.Linear(data_dim + num_classes, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x, labels):
        label_embeddings = self.label_embedding(labels)
        input = torch.cat((x, label_embeddings), dim=1)
        return self.model(input)


generator = ConditionalGenerator(latent_dim, data_dim, num_classes).to(device)
discriminator = ConditionalDiscriminator(data_dim, num_classes).to(device)
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(beta1, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, 0.999))
criterion = nn.BCELoss()
eeg_data_normalized = (eeg_data - np.min(eeg_data)) / (np.max(eeg_data) - np.min(eeg_data))
eeg_data_normalized = 2 * eeg_data_normalized - 1


def classify_data(eeg_data, labels, k=5):
    from sklearn.neighbors import NearestNeighbors
    neighbors = NearestNeighbors(n_neighbors=k).fit(eeg_data)
    types = []
    distances, indices = neighbors.kneighbors(eeg_data)

    for i, dist in enumerate(indices):
        l = sum(labels[neighbor] == 0 for neighbor in dist)  #we count non-ictal (safe) samples
        if k >= l > 0.5 * k:
            types.append(0)  #safe
        elif 0 < l <= 0.5 * k:
            types.append(1)  #boundary
        else:
            types.append(2)  #noise
    return np.array(types)

real_labels = classify_data(eeg_data_normalized, labels)
eeg_tensor = torch.tensor(eeg_data_normalized, dtype=torch.float32).to(device)
label_tensor = torch.tensor(real_labels, dtype=torch.long).to(device)
dataloader = DataLoader(TensorDataset(eeg_tensor, label_tensor), batch_size=batch_size, shuffle=True)

for epoch in range(epochs):
    for real_samples, real_class_labels in dataloader:
        real_samples = real_samples.to(device)
        real_class_labels = real_class_labels.to(device)
        batch_size_curr = real_samples.size(0)
        z = torch.randn(batch_size_curr, latent_dim).to(device)
        fake_class_labels = torch.randint(0, num_classes, (batch_size_curr,), dtype=torch.long).to(device)
        fake_samples = generator(z, fake_class_labels).detach()
        real_targets = torch.ones(batch_size_curr, 1).to(device)
        fake_targets = torch.zeros(batch_size_curr, 1).to(device)
        optimizer_D.zero_grad()
        real_loss = criterion(discriminator(real_samples, real_class_labels), real_targets)
        fake_loss = criterion(discriminator(fake_samples, fake_class_labels), fake_targets)
        d_loss = real_loss + fake_loss
        d_loss.backward()
        optimizer_D.step()
        z = torch.randn(batch_size_curr, latent_dim).to(device)
        target_class_labels = torch.randint(0, num_classes, (batch_size_curr,), dtype=torch.long).to(device)
        fake_samples = generator(z, target_class_labels)
        optimizer_G.zero_grad()
        g_loss = criterion(discriminator(fake_samples, target_class_labels), real_targets)
        g_loss.backward()
        optimizer_G.step()

    if (epoch + 1) % 50 == 0:
        print(f"Epoch [{epoch+1}/{epochs}] | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")

for label in range(num_classes):
    z = torch.randn(1000, latent_dim).to(device)
    labels = torch.full((1000,), label, dtype=torch.long).to(device)
    synthetic_data = generator(z, labels).detach().cpu().numpy()
    print(f"Synthetic data for class {label}:")
    print(synthetic_data)


Epoch [50/500] | D Loss: 0.6471 | G Loss: 2.7824
Epoch [100/500] | D Loss: 0.5809 | G Loss: 3.2908
Epoch [150/500] | D Loss: 0.5234 | G Loss: 5.7301
Epoch [200/500] | D Loss: 1.1102 | G Loss: 3.3755
Epoch [250/500] | D Loss: 0.7679 | G Loss: 4.4525
Epoch [300/500] | D Loss: 0.5460 | G Loss: 4.5680
Epoch [350/500] | D Loss: 0.5910 | G Loss: 6.0243
Epoch [400/500] | D Loss: 0.8792 | G Loss: 7.7901
Epoch [450/500] | D Loss: 0.5640 | G Loss: 6.6575
Epoch [500/500] | D Loss: 0.5424 | G Loss: 3.2405
Synthetic data for class 0:
[[-0.13512728 -0.1303159  -0.11821997 ... -0.10040642 -0.15358537
  -0.16299437]
 [-0.2188824  -0.2050671  -0.20556138 ... -0.15817036 -0.2596087
  -0.26030073]
 [-0.16966383 -0.15120155 -0.14558811 ... -0.12701839 -0.19798803
  -0.19730628]
 ...
 [-0.20016119 -0.17500661 -0.17878284 ... -0.1440734  -0.22840936
  -0.24232003]
 [-0.20182867 -0.18056881 -0.16984576 ... -0.14019625 -0.23345041
  -0.21809308]
 [-0.17684287 -0.15460502 -0.15071091 ... -0.12145904 -0.1904117

In [8]:
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

parent_dir = '/content/BONN_epilepsy_dataset/BONN epilepsy dataset'

def load_bonn_data(parent_dir):
    data = []
    labels = []
    for category_dir in os.listdir(parent_dir):
        category_path = os.path.join(parent_dir, category_dir)
        if os.path.isdir(category_path):
            label = len(labels)
            for file in os.listdir(category_path):
                file_path = os.path.join(category_path, file)
                sample = np.loadtxt(file_path)
                data.append(sample)
                labels.append(label)
    return np.array(data), np.array(labels)

real_data, real_labels = load_bonn_data(parent_dir)

X_train, X_test, y_train, y_test = train_test_split(real_data, real_labels, test_size=0.2, random_state=42)
k = 10  #number of neighbors
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
print("Unique labels in y_train:", np.unique(y_train))


#print("Evaluation on Real Data:")
y_pred_real = knn.predict(X_test)
#print(classification_report(y_test, y_pred_real))
#print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_real))


Unique labels in y_train: [  0 100 200]


In [9]:
# Mapping dictionary
label_mapping = {0: 0, 100: 1, 200: 2}

# Apply the mapping to y_train
y_train_mapped = np.array([label_mapping[label] for label in y_train])

# Verify the mapping
print("Unique labels in y_train_mapped:", np.unique(y_train_mapped))


Unique labels in y_train_mapped: [0 1 2]


In [10]:
print("Evaluation on Real Data:")
y_pred_real = knn.predict(X_test)
print(classification_report(y_test, y_pred_real))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_real))

Evaluation on Real Data:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        22
         100       0.28      1.00      0.43        16
         200       1.00      0.05      0.09        22

    accuracy                           0.28        60
   macro avg       0.43      0.35      0.17        60
weighted avg       0.44      0.28      0.15        60

Confusion Matrix:
 [[ 0 22  0]
 [ 0 16  0]
 [ 1 20  1]]


In [12]:
import numpy as np
import torch
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def generate_synthetic_data(class_label, num_samples):
    """
    Generate synthetic data using the GAN.
    Args:
        class_label (int): The class for which data is to be generated.
        num_samples (int): Number of samples to generate.
    Returns:
        numpy.ndarray: Synthetic data of shape (num_samples, num_features).
    """
    generator.eval()
    z = torch.randn(num_samples, latent_dim).to(device)
    labels = torch.full((num_samples,), class_label, dtype=torch.long).to(device)
    with torch.no_grad():
        synthetic_data = generator(z, labels).detach().cpu().numpy()

    return synthetic_data

num_synthetic_samples = 200
synthetic_data = []
synthetic_labels = []

#we generate synthetic data for each class
for class_label in [0, 1, 2]:  #classes: 0 = safe 1 = boundary 2 = noise
    data = generate_synthetic_data(class_label, num_synthetic_samples)
    synthetic_data.append(data)
    synthetic_labels.append(np.full(num_synthetic_samples, class_label))
synthetic_data = np.vstack(synthetic_data)
synthetic_labels = np.concatenate(synthetic_labels)

knn = KNeighborsClassifier(n_neighbors=10)
#knn.fit(X_train, y_train)
knn.fit(X_train, y_train_mapped)
y_pred_synthetic = knn.predict(synthetic_data)
print("Evaluation on Synthetic Data:")
print(classification_report(synthetic_labels, y_pred_synthetic))
print("Confusion Matrix:\n", confusion_matrix(synthetic_labels, y_pred_synthetic))


Evaluation on Synthetic Data:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       200
           1       0.33      1.00      0.50       200
           2       0.00      0.00      0.00       200

    accuracy                           0.33       600
   macro avg       0.11      0.33      0.17       600
weighted avg       0.11      0.33      0.17       600

Confusion Matrix:
 [[  0 200   0]
 [  0 200   0]
 [  0 200   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
