In [1]:
# Cell 1: Import Required Libraries
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import keras
import scipy
import sklearn
import os
from scipy.signal import butter, filtfilt
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report



In [2]:
# Cell 2: Load EEG Data
data_folder = r"D:\dataset eeg"

# Get all .npz files in the folder
npz_files = [f for f in os.listdir(data_folder) if f.endswith('.npz')]

# Load all .npz files into a dictionary
eeg_data = {}
for file in npz_files:
    file_path = os.path.join(data_folder, file)
    data = np.load(file_path, allow_pickle=True)
    eeg_data[file] = data

# Print keys of the first file
first_file = npz_files[0]
print(f"Keys in {first_file}: {list(eeg_data[first_file].keys())}")


Keys in eeg-predictive_train.npz: ['train_signals', 'train_labels']


In [3]:
# Cell 3: Merge Training Data
train_signals_1 = eeg_data["eeg-predictive_train.npz"]["train_signals"]
train_labels_1 = eeg_data["eeg-predictive_train.npz"]["train_labels"]

train_signals_2 = eeg_data["eeg-seizure_train.npz"]["train_signals"]
train_labels_2 = eeg_data["eeg-seizure_train.npz"]["train_labels"]

# Merge the datasets
train_signals = np.concatenate((train_signals_1, train_signals_2), axis=0)
train_labels = np.concatenate((train_labels_1, train_labels_2), axis=0)

# Print shape of merged data
print("Merged Train Signals Shape:", train_signals.shape)
print("Merged Train Labels Shape:", train_labels.shape)


Merged Train Signals Shape: (45948, 23, 256)
Merged Train Labels Shape: (45948,)


In [4]:
# Find indices of seizure (1) and non-seizure (0) samples
seizure_indices = np.where(train_labels == 1)[0]
non_seizure_indices = np.where(train_labels == 0)[0]

# Randomly select 5,000 samples from each class
np.random.seed(42)  # For reproducibility
seizure_sample = np.random.choice(seizure_indices, 5000, replace=False)
non_seizure_sample = np.random.choice(non_seizure_indices, 5000, replace=False)

# Combine selected samples
selected_indices = np.concatenate((seizure_sample, non_seizure_sample))

# Subset the dataset
train_signals_balanced = train_signals[selected_indices]
train_labels_balanced = train_labels[selected_indices]

# Print new dataset size
print("Balanced Train Signals Shape:", train_signals_balanced.shape)
print("Balanced Train Labels Shape:", train_labels_balanced.shape)

# Check label distribution
unique, counts = np.unique(train_labels_balanced, return_counts=True)
print("New Label Distribution:", dict(zip(unique, counts)))

Balanced Train Signals Shape: (10000, 23, 256)
Balanced Train Labels Shape: (10000,)
New Label Distribution: {0.0: 5000, 1.0: 5000}


In [5]:
# Compute Mean Absolute Value (MAV) for each sample
mav = np.mean(np.abs(train_signals_balanced), axis=(1,2))

# Define thresholds for classification
severe_threshold = np.percentile(mav, 90)  # Top 10% = Severe
mild_threshold = np.percentile(mav, 50)    # Middle 50-90% = Mild

# Assign new labels based on amplitude
new_labels = []
for value in mav:
    if value >= severe_threshold:
        new_labels.append(1)  # Severe Epileptic
    elif value >= mild_threshold:
        new_labels.append(2)  # Mild Epileptic
    else:
        new_labels.append(0)  # Non-Epileptic

train_labels_balanced = np.array(new_labels)

# Check new label distribution
unique, counts = np.unique(train_labels_balanced, return_counts=True)
print("Updated Label Distribution:", dict(zip(unique, counts)))



Updated Label Distribution: {0: 5000, 1: 1000, 2: 4000}


In [6]:
# Normalize EEG signals (Min-Max Scaling)
train_signals_balanced = (train_signals_balanced - np.min(train_signals_balanced)) / (np.max(train_signals_balanced) - np.min(train_signals_balanced))

print("EEG Signals After Normalization:")
print("Min:", np.min(train_signals_balanced), "Max:", np.max(train_signals_balanced))

EEG Signals After Normalization:
Min: 0.0 Max: 1.0


In [7]:
# Bandpass filter function (removes noise outside 0.5–40 Hz)
def bandpass_filter(data, lowcut=0.5, highcut=40.0, fs=256, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, data, axis=-1)

# Apply bandpass filter to EEG signals
train_signals_filtered = bandpass_filter(train_signals_balanced)

print("EEG Signals After Bandpass Filtering:")
print("Shape:", train_signals_filtered.shape)

EEG Signals After Bandpass Filtering:
Shape: (10000, 23, 256)


In [8]:
from tensorflow.keras.utils import to_categorical

# Convert labels to categorical (one-hot encoding)
train_labels_onehot = to_categorical(train_labels_balanced, num_classes=3)

# Print first 5 samples to check encoding
print("Example of One-Hot Encoded Labels:")
print(train_labels_onehot[:5])

Example of One-Hot Encoded Labels:
[[0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]]


In [9]:
# Feature Extraction using InceptionV3 (adapted for time series)
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling1D, Reshape, Resizing, Flatten, Dense
from tensorflow.keras.models import Model

# Reshape input to (samples, time_steps, channels)
train_signals_inception = np.transpose(train_signals_filtered, (0, 2, 1))

# Add a dummy height dimension to make it compatible with InceptionV3
train_signals_inception = np.expand_dims(train_signals_inception, axis=-1)  # Add channel dimension
train_signals_inception = np.repeat(train_signals_inception, 3, axis=-1) #repeat to make 3 channels.

# Resize the second dimension to at least 75
target_width = 75
train_signals_inception = Resizing(height=train_signals_inception.shape[1], width=target_width)(train_signals_inception)

# Load InceptionV3 (without top layers)
base_model = InceptionV3(weights=None, include_top=False, input_shape=(train_signals_inception.shape[1], target_width, 3))

# Add custom layers for time series
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation='relu')(x) #Reduce feature dimension.
x = Reshape((1, -1))(x)

model_inception = Model(inputs=base_model.input, outputs=x)

# Feature extraction
features = model_inception.predict(train_signals_inception)

print("InceptionV3 Feature Shape:", features.shape)

# Transformer Model
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dense, Input
from tensorflow.keras.models import Model

# Transformer Parameters
num_heads = 4 #reduce number of heads.
ff_dim = 1024 #reduce ff_dim

input_shape = features.shape[1:]

# Transformer Layers
inputs = Input(shape=input_shape)
attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=features.shape[-1])(inputs, inputs)
norm_output = LayerNormalization(epsilon=1e-6)(inputs + attention_output)
ffn_output = Dense(ff_dim, activation='relu')(norm_output)
ffn_output = Dense(features.shape[-1])(ffn_output)
transformer_output = LayerNormalization(epsilon=1e-6)(norm_output + ffn_output)

# Classification Layers
flat_output = tf.keras.layers.Flatten()(transformer_output)
outputs = Dense(3, activation='softmax')(flat_output)

# Transformer Model
transformer_model = Model(inputs=inputs, outputs=outputs)

# Compile the model
transformer_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
transformer_model.fit(features, train_labels_onehot, epochs=10, batch_size=80, validation_split=0.2)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 317ms/step
InceptionV3 Feature Shape: (10000, 1, 1024)
Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m672s[0m 7s/step - accuracy: 0.4287 - loss: 2.8848 - val_accuracy: 0.6095 - val_loss: 0.8487
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m664s[0m 7s/step - accuracy: 0.4523 - loss: 1.0067 - val_accuracy: 0.3540 - val_loss: 0.8960
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m660s[0m 7s/step - accuracy: 0.4414 - loss: 1.0366 - val_accuracy: 0.6095 - val_loss: 0.9222
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m660s[0m 7s/step - accuracy: 0.4463 - loss: 1.0553 - val_accuracy: 0.6095 - val_loss: 0.8280
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m654s[0m 7s/step - accuracy: 0.4494 - loss: 0.9872 - val_accuracy: 0.3540 - val_loss: 0.8987
Epoch 6/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x188392f3980>