# Data PreProcessing

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

# 1. Load dataset
df = pd.read_csv("/kaggle/input/epileptic-seizure-recognition/Epileptic Seizure Recognition.csv")

# 2. Drop index column if present
if 'Unnamed' in df.columns:
    df.drop('Unnamed', axis=1, inplace=True)

# 3. Convert multiclass to binary:
# Class 1 = seizure (label 1), Classes 2–5 = non-seizure (label 0)
df['y_binary'] = (df['y'] == 1).astype(int)

# 4. Define feature columns and extract data
feature_cols = [f'X{i}' for i in range(1, 179)]  # X1 to X178
X = df[feature_cols].values                     # Shape: (11500, 178)
y = df['y_binary'].values                       # Shape: (11500,)

# 5. Standardize features (mean 0, std 1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 6. Reshape for RNN input: (samples, timesteps, features_per_timestep)
# Here: each sample has 178 timesteps, 1 feature per timestep
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)  # (11500, 178, 1)

# 7. Split data: 60% train, 20% validation, 20% test
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_reshaped, y, test_size=0.2, stratify=y, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.25, stratify=y_train_val, random_state=42)

# 8. Compute class weights for imbalanced data
class_weights = class_weight.compute_class_weight(
    class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# 9. Print shapes and weights
print("Data shapes:")
print(f"  Train:      {X_train.shape}")
print(f"  Validation: {X_val.shape}")
print(f"  Test:       {X_test.shape}")
print(f"Class weights: {class_weight_dict}")


Data shapes:
  Train:      (6900, 178, 1)
  Validation: (2300, 178, 1)
  Test:       (2300, 178, 1)
Class weights: {0: 0.625, 1: 2.5}


# **Complex RNN**


In [2]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.optimizers import Adam

def build_model(hp):
    model = Sequential()
    model.add(Bidirectional(
        LSTM(hp.Int('lstm_units1', min_value=32, max_value=128, step=16),
             return_sequences=True, activation='tanh'),
        input_shape=(178, 1)
    ))
    model.add(Dropout(hp.Float('dropout1', 0.2, 0.5, step=0.1)))
    model.add(BatchNormalization())

    model.add(Bidirectional(
        LSTM(hp.Int('lstm_units2', min_value=16, max_value=64, step=16),
             activation='tanh')
    ))
    model.add(Dropout(hp.Float('dropout2', 0.2, 0.5, step=0.1)))
    model.add(BatchNormalization())

    model.add(Dense(hp.Int('dense_units', 32, 128, step=32), activation='relu'))
    model.add(Dropout(hp.Float('dropout3', 0.2, 0.5, step=0.1)))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('lr', [1e-3, 1e-4, 5e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model



2025-05-22 16:23:48.336106: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747931028.587208      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747931028.659254      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Bayesian Hyperparameter Tuning

In [3]:
import keras_tuner as kt
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score


# 2. Setup tuner
tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=20,
    directory='seizure_tuning',
    project_name='rnn_seizure_detection'
)

# 3. Early stopping
stop_early = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# 4. Hyperparameter search
tuner.search(X_train, y_train,
             epochs=30,
             validation_data=(X_val, y_val),
             callbacks=[stop_early],
             batch_size=32)

# 5. Get best model and best hyperparameters
best_model = tuner.get_best_models(num_models=1)[0]
best_hp = tuner.get_best_hyperparameters(1)[0]
print("Best hyperparameters:", best_hp.values)

# 6. Retrain on train+val data (optional)
best_model.fit(X_train, y_train,
               epochs=30,
               validation_data=(X_val, y_val),
               batch_size=32,
               callbacks=[stop_early])

# 7. Accuracy evaluation
train_preds = (best_model.predict(X_train) > 0.5).astype(int)
val_preds = (best_model.predict(X_val) > 0.5).astype(int)
test_preds = (best_model.predict(X_test) > 0.5).astype(int)

train_acc = accuracy_score(y_train, train_preds)
val_acc = accuracy_score(y_val, val_preds)
test_acc = accuracy_score(y_test, test_preds)

print(f"Training Accuracy:   {train_acc:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")
print(f"Test Accuracy:       {test_acc:.4f}")


Trial 20 Complete [00h 15m 08s]
val_accuracy: 0.9817391037940979

Best val_accuracy So Far: 0.9817391037940979
Total elapsed time: 04h 35m 07s
Best hyperparameters: {'lstm_units1': 32, 'dropout1': 0.30000000000000004, 'lstm_units2': 64, 'dropout2': 0.30000000000000004, 'dense_units': 96, 'dropout3': 0.4, 'lr': 0.0005}
Epoch 1/30


  saveable.load_own_variables(weights_store.get(inner_path))


[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 174ms/step - accuracy: 0.9809 - loss: 0.0536 - val_accuracy: 0.9787 - val_loss: 0.0670
Epoch 2/30
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 170ms/step - accuracy: 0.9817 - loss: 0.0439 - val_accuracy: 0.9761 - val_loss: 0.0800
Epoch 3/30
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 168ms/step - accuracy: 0.9855 - loss: 0.0391 - val_accuracy: 0.9796 - val_loss: 0.0780
Epoch 4/30
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 170ms/step - accuracy: 0.9818 - loss: 0.0507 - val_accuracy: 0.9800 - val_loss: 0.0771
Epoch 5/30
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 169ms/step - accuracy: 0.9853 - loss: 0.0408 - val_accuracy: 0.9813 - val_loss: 0.0658
Epoch 6/30
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 169ms/step - accuracy: 0.9872 - loss: 0.0399 - val_accuracy: 0.9765 - val_loss: 0.0857
Epoch 7/30
[1m216/21