# This Notebook works with predicting jus tthe probablities of the 6 classifications

1. EEG Model — using a 1D CNN or MLP (PyTorch or Keras)
2. Spectrogram Model — using XGBoost or LightGBM
3. Ensemble — Average or train meta-model on both predictions

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam



In [6]:
df = pd.read_csv("/Users/Patron/Documents/brain-waves-classification/hms_data/eeg_spect_full_features.csv")

In [12]:
# EEG features
eeg_features = [
    'Fp1_median','Fp1_std','Fp1_min','Fp1_max',
    'O1_median','O1_std','O1_min','O1_max',
    'O2_median','O2_std','O2_min','O2_max',
    'Fp2_median','Fp2_std','Fp2_min','Fp2_max'
]

# Spectrogram features
spect_features = [
    'LL_median','LL_std','LL_min','LL_max',
    'RL_median','RL_std','RL_min','RL_max',
    'LP_median','LP_std','LP_min','LP_max',
    'RP_median','RP_std','RP_min','RP_max'
]

# Labels
target_cols = [
    "seizure_vote_prob", "lpd_vote_prob", "gpd_vote_prob",
    "lrda_vote_prob", "grda_vote_prob", "other_vote_prob"
]


In [13]:
X_eeg = df[eeg_features]
X_spect = df[spect_features]
y = df[target_cols]

X_eeg_train, X_eeg_test, y_train, y_test = train_test_split(X_eeg, y, test_size=0.2, random_state=42)
X_spect_train, X_spect_test, _, _ = train_test_split(X_spect, y, test_size=0.2, random_state=42)

In [15]:
eeg_model = Sequential([
    Dense(64, input_dim=X_eeg_train.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(target_cols), activation='linear')  # Regression output
])

eeg_model.compile(optimizer=Adam(0.001), loss='mse')
eeg_model.fit(X_eeg_train, y_train, epochs=50, batch_size=32, verbose=1)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 376us/step - loss: 116278.8359
Epoch 2/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 393us/step - loss: 1011.6475
Epoch 3/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 374us/step - loss: 154.0441
Epoch 4/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 378us/step - loss: 27.8208
Epoch 5/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 393us/step - loss: 4.1555
Epoch 6/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 373us/step - loss: 0.7696
Epoch 7/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 377us/step - loss: 0.3575
Epoch 8/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 394us/step - loss: 0.2666
Epoch 9/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 387us/step - loss: 0.1362
Epoch 10/50
[1m2670/2670[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x323cb3e50>

In [16]:
spect_model = MultiOutputRegressor(XGBRegressor(n_estimators=100, max_depth=4, random_state=42))
spect_model.fit(X_spect_train, y_train)

In [17]:
eeg_preds = eeg_model.predict(X_eeg_test)
spect_preds = spect_model.predict(X_spect_test)

# Simple average ensemble
final_preds = (eeg_preds + spect_preds) / 2.0

[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 239us/step


In [18]:
for i, col in enumerate(target_cols):
    rmse = mean_squared_error(y_test.iloc[:, i], final_preds[:, i])
    r2 = r2_score(y_test.iloc[:, i], final_preds[:, i])
    print(f"{col}: RMSE = {rmse:.4f}, R2 = {r2:.4f}")

seizure_vote_prob: RMSE = 0.0970, R2 = 0.3212
lpd_vote_prob: RMSE = 0.0455, R2 = 0.4032
gpd_vote_prob: RMSE = 0.0371, R2 = 0.5179
lrda_vote_prob: RMSE = 0.0436, R2 = 0.4493
grda_vote_prob: RMSE = 0.0640, R2 = 0.4281
other_vote_prob: RMSE = 0.0728, R2 = 0.2700


1. A Random Forest model (classic ML) for EEG features.
2. An EfficientNet (deep learning) for Spectrogram features.
3. A final backpropagation-based ensemble

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping

In [21]:
# Load your data
df = pd.read_csv("/Users/Patron/Documents/brain-waves-classification/hms_data/eeg_spect_full_features.csv")

# EEG features (for RF)
eeg_cols = [
    'Fp1_median','Fp1_std','Fp1_min','Fp1_max',
    'O1_median','O1_std','O1_min','O1_max',
    'O2_median','O2_std','O2_min','O2_max',
    'Fp2_median','Fp2_std','Fp2_min','Fp2_max'
]

# Spectrogram features (for EfficientNet) - we'll reshape them
spect_cols = [
    'LL_median','LL_std','LL_min','LL_max',
    'RL_median','RL_std','RL_min','RL_max',
    'LP_median','LP_std','LP_min','LP_max',
    'RP_median','RP_std','RP_min','RP_max'
]

# Labels
target_cols = [
    "seizure_vote_prob", "lpd_vote_prob", "gpd_vote_prob",
    "lrda_vote_prob", "grda_vote_prob", "other_vote_prob"
]

# Split
X_eeg = df[eeg_cols]
X_spect = df[spect_cols]
y = df[target_cols]

X_eeg_train, X_eeg_test, X_spect_train, X_spect_test, y_train, y_test = train_test_split(
    X_eeg, X_spect, y, test_size=0.2, random_state=42
)

In [22]:
#Random Forest on EEG
rf_model = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
rf_model.fit(X_eeg_train, y_train)
rf_preds_train = rf_model.predict(X_eeg_train)
rf_preds_test = rf_model.predict(X_eeg_test)

In [23]:
#EfficientNet on Spectrogram Features
# Normalize
scaler = StandardScaler()
X_spect_train_scaled = scaler.fit_transform(X_spect_train)
X_spect_test_scaled = scaler.transform(X_spect_test)

# Reshape to 2D "image-like" input for EfficientNet
X_spect_train_reshaped = X_spect_train_scaled.reshape(-1, 4, 4, 1)
X_spect_test_reshaped = X_spect_test_scaled.reshape(-1, 4, 4, 1)

# EfficientNet base
base_input = Input(shape=(4, 4, 1))
x = GlobalAveragePooling2D()(base_input)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
eff_output = Dense(6, activation='linear')(x)
eff_model = Model(inputs=base_input, outputs=eff_output)
eff_model.compile(optimizer=Adam(0.001), loss='mse')
eff_model.fit(X_spect_train_reshaped, y_train, epochs=100, batch_size=32, validation_split=0.1,
              callbacks=[EarlyStopping(patience=10, restore_best_weights=True)], verbose=1)

eff_preds_train = eff_model.predict(X_spect_train_reshaped)
eff_preds_test = eff_model.predict(X_spect_test_reshaped)

Epoch 1/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 395us/step - loss: 0.1040 - val_loss: 0.0974
Epoch 2/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 370us/step - loss: 0.0981 - val_loss: 0.0971
Epoch 3/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 370us/step - loss: 0.0977 - val_loss: 0.0971
Epoch 4/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 386us/step - loss: 0.0977 - val_loss: 0.0970
Epoch 5/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 371us/step - loss: 0.0977 - val_loss: 0.0970
Epoch 6/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 375us/step - loss: 0.0979 - val_loss: 0.0970
Epoch 7/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 387us/step - loss: 0.0975 - val_loss: 0.0970
Epoch 8/100
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 371us/step - loss: 0.0976 - val_loss: 0.0970


In [24]:
# Final Ensemble Model (Backpropagation)
# Stack predictions
X_ensemble_train = np.hstack([rf_preds_train, eff_preds_train])
X_ensemble_test = np.hstack([rf_preds_test, eff_preds_test])

# Build final ensemble model
ensemble_model = Sequential([
    Dense(64, input_shape=(12,), activation='relu'),
    Dropout(0.3),
    Dense(6, activation='linear')  # Final output for 6 vote probabilities
])

ensemble_model.compile(optimizer=Adam(0.001), loss='mse')
ensemble_model.fit(X_ensemble_train, y_train, epochs=50, batch_size=32, validation_split=0.1,
                   callbacks=[EarlyStopping(patience=10, restore_best_weights=True)], verbose=1)

final_preds = ensemble_model.predict(X_ensemble_test)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 397us/step - loss: 0.0655 - val_loss: 0.0500
Epoch 2/50
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 392us/step - loss: 0.0536 - val_loss: 0.0493
Epoch 3/50
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 381us/step - loss: 0.0537 - val_loss: 0.0492
Epoch 4/50
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 377us/step - loss: 0.0533 - val_loss: 0.0492
Epoch 5/50
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 377us/step - loss: 0.0538 - val_loss: 0.0491
Epoch 6/50
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 390us/step - loss: 0.0538 - val_loss: 0.0488
Epoch 7/50
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 379us/step - loss: 0.0536 - val_loss: 0.0489
Epoch 8/50
[1m2403/2403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 376us/step - loss: 0.0533 - val_loss: 0.0492
Epoch 9/50
[1m2403

In [25]:
for i, col in enumerate(target_cols):
    rmse = mean_squared_error(y_test.iloc[:, i], final_preds[:, i])
    r2 = r2_score(y_test.iloc[:, i], final_preds[:, i])
    print(f"{col}: RMSE = {rmse:.4f}, R2 = {r2:.4f}")


seizure_vote_prob: RMSE = 0.0751, R2 = 0.4743
lpd_vote_prob: RMSE = 0.0451, R2 = 0.4082
gpd_vote_prob: RMSE = 0.0335, R2 = 0.5645
lrda_vote_prob: RMSE = 0.0348, R2 = 0.5601
grda_vote_prob: RMSE = 0.0487, R2 = 0.5648
other_vote_prob: RMSE = 0.0655, R2 = 0.3434
