In [12]:
import pandas as pd
import numpy as np

# Example CSV loading
data = pd.read_csv("/Users/mohamedeldagla/Desktop/senior year/New Trends in AI/Epilepsy - GANs/Data/Epileptic Seizure Recognition.csv")

X = data.iloc[:, 1:179].values  
y = data.iloc[:, 179].values      

In [13]:
print(X.shape, " ", y.shape)

(11500, 178)   (11500,)


In [14]:
from tensorflow.keras.utils import to_categorical

# Adjust labels from {1,2,3,4,5} to {0,1,2,3,4} if needed
y_adj = y - 1  # so classes become 0,1,2,3,4
y_ohe = to_categorical(y_adj, num_classes=5)

In [15]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [16]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import (
    Dense, LeakyReLU, Dropout, Concatenate, BatchNormalization
)
from tensorflow.keras.optimizers import Adam

data_dim = 178       # Number of EEG features per sample
num_classes = 5      # We have 5 classes
latent_dim = 100     # Dimension of the noise vector

In [17]:
from tensorflow.keras.layers import Dense, BatchNormalization

def build_generator(latent_dim, num_classes, data_dim):
    """
    Builds a generator with multiple dense layers, batch normalization, etc.
    """
    noise_input = Input(shape=(latent_dim,))
    label_input = Input(shape=(num_classes,))

    # Concatenate noise + label
    merged_input = Concatenate()([noise_input, label_input])

    x = Dense(256)(merged_input)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)

    x = Dense(512)(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)

    x = Dense(512)(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)

    # Output layer (EEG data dimension)
    out = Dense(data_dim, activation='linear')(x)

    model = Model([noise_input, label_input], out, name="Generator")
    return model

In [18]:
import tensorflow as tf
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, LeakyReLU, Dropout, Concatenate

def build_discriminator(data_dim, num_classes):
    """
    Builds a more powerful discriminator with multiple hidden layers and dropout.
    """
    data_input = Input(shape=(data_dim,))
    label_input = Input(shape=(num_classes,))

    # Concatenate data + label
    merged_input = Concatenate()([data_input, label_input])

    x = Dense(512)(merged_input)
    x = LeakyReLU(alpha=0.2)(x)
    x = Dropout(0.3)(x)

    x = Dense(512)(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Dropout(0.3)(x)

    # Extra layer for more capacity
    x = Dense(256)(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Dropout(0.3)(x)

    # Output (real/fake)
    validity = Dense(1, activation='sigmoid')(x)

    model = Model([data_input, label_input], validity, name="Discriminator")
    return model

In [19]:
discriminator_lr = 1e-4
generator_lr = 2e-4

# Build models
discriminator = build_discriminator(data_dim, num_classes)
discriminator.compile(
    loss='binary_crossentropy',
    optimizer=Adam(learning_rate=discriminator_lr, beta_1=0.5),
    metrics=['accuracy']
)

generator = build_generator(latent_dim, num_classes, data_dim)

# When we train the generator, we want the discriminator to be "frozen"
# so we only update generator weights.
discriminator.trainable = False

noise = Input(shape=(latent_dim,))
label = Input(shape=(num_classes,))
generated_data = generator([noise, label])
validity = discriminator([generated_data, label])

combined = Model([noise, label], validity)
combined.compile(
    loss='binary_crossentropy',
    optimizer=Adam(learning_rate=generator_lr, beta_1=0.5)
)



In [20]:
import numpy as np
import tensorflow as tf

epochs = 5
batch_size = 64
patience = 20        # For early stopping if desired
patience_counter = 0
best_g_loss = np.inf

# For logging if you want to plot later
d_loss_real_list = []
d_loss_fake_list = []
d_loss_list = []
g_loss_list = []

for epoch in range(epochs):
    # ---------------------
    #  Train Discriminator
    # ---------------------
    # 1) Sample a real batch
    idx = np.random.randint(0, X_scaled.shape[0], batch_size)
    real_samples = X_scaled[idx]  # shape (batch_size, 178)
    real_labels = y_ohe[idx]      # shape (batch_size, 5)

    # 2) Generate a fake batch
    noise = np.random.normal(0, 1, (batch_size, latent_dim))

    fake_classes = np.random.randint(0, num_classes, batch_size)
    fake_labels = tf.keras.utils.to_categorical(fake_classes, num_classes)

    # Use verbose=0 to suppress the "2/2" lines from Keras
    gen_samples = generator.predict([noise, fake_labels], verbose=0)

    # 3) Train on real
    d_loss_real = discriminator.train_on_batch(
        [real_samples, real_labels],
        np.ones((batch_size, 1))
    )
    # 4) Train on fake
    d_loss_fake = discriminator.train_on_batch(
        [gen_samples, fake_labels],
        np.zeros((batch_size, 1))
    )

    # Combine D losses
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # ---------------------
    #  Train Generator
    # ---------------------
    # Generator wants the discriminator to label its generated samples as real (1)
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_classes_for_gen = np.random.randint(0, num_classes, batch_size)
    fake_labels_for_gen = tf.keras.utils.to_categorical(fake_classes_for_gen, num_classes)

    valid_y = 0.9 * np.ones((batch_size, 1))  # generator wants disc to say "real"

    # Also set verbose=0 for this predict
    g_loss = combined.train_on_batch([noise, fake_labels_for_gen], valid_y)

    # Store logs if needed for plotting
    d_loss_real_list.append(d_loss_real[0])
    d_loss_fake_list.append(d_loss_fake[0])
    d_loss_list.append(d_loss[0])
    g_loss_list.append(g_loss)

    # ---------------------
    #  Print Progress
    # ---------------------
    # d_loss is [loss_value, accuracy_value], so d_loss[0] is the loss,
    # and d_loss[1] is the accuracy. Same for d_loss_real, d_loss_fake.
    print(
        f"Epoch {epoch+1}/{epochs} "
        f"[D real loss: {d_loss_real[0]:.4f}, D real acc: {d_loss_real[1]*100:.2f}%] "
        f"[D fake loss: {d_loss_fake[0]:.4f}, D fake acc: {d_loss_fake[1]*100:.2f}%] "
        f"[D combined loss: {d_loss[0]:.4f}, D combined acc: {d_loss[1]*100:.2f}%] "
        f"[G loss: {g_loss:.4f}]"
    )

    # ---------------------
    #  Early Stopping Check (optional)
    # ---------------------
    if g_loss < best_g_loss:
        best_g_loss = g_loss
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f"Early stopping triggered at epoch {epoch+1}.")
        break



Epoch 1/5 [D real loss: 0.8238, D real acc: 35.94%] [D fake loss: 0.7424, D fake acc: 53.12%] [D combined loss: 0.7831, D combined acc: 44.53%] [G loss: 0.9508]
Epoch 2/5 [D real loss: 0.8202, D real acc: 43.23%] [D fake loss: 0.7829, D fake acc: 48.05%] [D combined loss: 0.8015, D combined acc: 45.64%] [G loss: 0.9191]
Epoch 3/5 [D real loss: 0.7964, D real acc: 45.31%] [D fake loss: 0.7741, D fake acc: 48.18%] [D combined loss: 0.7852, D combined acc: 46.74%] [G loss: 0.9168]
Epoch 4/5 [D real loss: 0.7857, D real acc: 46.21%] [D fake loss: 0.7723, D fake acc: 47.07%] [D combined loss: 0.7790, D combined acc: 46.64%] [G loss: 0.9189]
Epoch 5/5 [D real loss: 0.7812, D real acc: 45.31%] [D fake loss: 0.7683, D fake acc: 47.19%] [D combined loss: 0.7747, D combined acc: 46.25%] [G loss: 0.9188]


In [21]:
import numpy as np

# How many samples you want per class
n_new = 500

# We can store the synthetic data and labels in lists or arrays
all_synthetic_samples = []
all_synthetic_labels = []  # integer labels, 0..4 (i.e. originally 1..5 in raw data)

for class_idx in range(num_classes):
    # Generate noise
    noise = np.random.normal(0, 1, (n_new, latent_dim))
    
    # One-hot label array for this class
    labels_one_hot = np.zeros((n_new, num_classes))
    labels_one_hot[:, class_idx] = 1  # set the corresponding class index to 1

    # Generate synthetic samples
    synthetic_samples = generator.predict([noise, labels_one_hot])
    
    # (Optional) Inverse transform if you scaled the data
    synthetic_samples = scaler.inverse_transform(synthetic_samples)
    
    # Store them
    all_synthetic_samples.append(synthetic_samples)
    
    # Create a label array of shape (n_new,) with value `class_idx`
    synthetic_labels_class = np.full((n_new,), class_idx)
    all_synthetic_labels.append(synthetic_labels_class)

# Convert lists to numpy arrays
all_synthetic_samples = np.concatenate(all_synthetic_samples, axis=0)  
all_synthetic_labels = np.concatenate(all_synthetic_labels, axis=0)   

print("Synthetic samples shape:", all_synthetic_samples.shape) 
print("Synthetic labels shape:", all_synthetic_labels.shape)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Synthetic samples shape: (2500, 178)
Synthetic labels shape: (2500,)


In [22]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical

# ------------------------------------------------------------------
# Suppose you already have:
#   X_scaled: shape (N, 178)  [ real features (scaled) ]
#   y_ohe:    shape (N, 5)    [ real labels (one-hot) ]
#   all_synthetic_samples: shape (N_syn, 178)  [ generated features ]
#   all_synthetic_labels:  shape (N_syn,)      [ generated labels as integers 0..4 ]
# ------------------------------------------------------------------

# Convert integer labels of synthetic data to one-hot
y_syn = to_categorical(all_synthetic_labels, num_classes=5)  # shape (N_syn, 5)

# Concatenate real + synthetic
X_syn = all_synthetic_samples
X_combined = np.concatenate((X_scaled, X_syn), axis=0)  # shape: (N + N_syn, 178)
y_combined = np.concatenate((y_ohe, y_syn), axis=0)     # shape: (N + N_syn, 5)

# Convert the one-hot labels back to integer class labels for scikit-learn
y_classes = np.argmax(y_combined, axis=1)  # shape: (N + N_syn,)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_combined, y_classes, test_size=0.2, random_state=42
)

# -------------------------
# 1) Random Forest
# -------------------------
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_preds))

# -------------------------
# 2) Logistic Regression
# -------------------------
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, lr_preds))

# -------------------------
# 3) MLP
# -------------------------
mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=500, random_state=42)
mlp.fit(X_train, y_train)
mlp_preds = mlp.predict(X_test)
print("MLP Accuracy:", accuracy_score(y_test, mlp_preds))

Random Forest Accuracy: 0.6164285714285714
Logistic Regression Accuracy: 0.25142857142857145
MLP Accuracy: 0.6475
