In [246]:
import pandas as pd
import tensorflow as tf
import numpy as np
import random
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

### setting a seed value for reproducibility

In [248]:
seed_value = 40
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)

In [249]:
X_train = pd.read_csv("X_train.csv")
X_val = pd.read_csv("X_val.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv").values.ravel()
y_val = pd.read_csv("y_val.csv").values.ravel()
y_test = pd.read_csv("y_test.csv").values.ravel()

In [250]:
# Helper function to evaluate the model
def evaluate_model(model, X, y):
    y_pred_probs = model.predict(X)
    y_pred = (y_pred_probs > 0.5).astype("int32").flatten()
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")
    return accuracy, precision, recall, f1

In [251]:
# Experiment 1: Baseline Neural Network (2 hidden layers)
print("Baseline Neural Network (2 hidden layers):")
baseline_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
baseline_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = baseline_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=1)
print("Validation Metrics:")
evaluate_model(baseline_model, X_val, y_val)
print("-" * 40)

Baseline Neural Network (2 hidden layers):
Epoch 1/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5137 - loss: 84.0774 - val_accuracy: 0.6266 - val_loss: 0.6739
Epoch 2/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 837us/step - accuracy: 0.5105 - loss: 0.7184 - val_accuracy: 0.6297 - val_loss: 0.6671
Epoch 3/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 609us/step - accuracy: 0.5166 - loss: 0.7119 - val_accuracy: 0.7234 - val_loss: 0.6698
Epoch 4/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 633us/step - accuracy: 0.5476 - loss: 0.7167 - val_accuracy: 0.6969 - val_loss: 0.6728
Epoch 5/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 609us/step - accuracy: 0.5550 - loss: 0.7131 - val_accuracy: 0.5578 - val_loss: 0.6765
Epoch 6/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 568us/step - accuracy: 0.5577 - loss: 0.7079 - val_accuracy: 

In [252]:
# Experiment 2: Deeper Neural Network (3 hidden layers)
print("\nDeeper Neural Network (3 hidden layers):")
deep_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
deep_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = deep_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=1)
print("Validation Metrics:")
evaluate_model(deep_model, X_val, y_val)
print("-" * 40)


Deeper Neural Network (3 hidden layers):
Epoch 1/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5487 - loss: 2.2140 - val_accuracy: 0.6453 - val_loss: 0.6371
Epoch 2/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step - accuracy: 0.6396 - loss: 0.7384 - val_accuracy: 0.7031 - val_loss: 0.5532
Epoch 3/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 546us/step - accuracy: 0.6616 - loss: 0.7059 - val_accuracy: 0.6875 - val_loss: 0.6377
Epoch 4/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 556us/step - accuracy: 0.7023 - loss: 0.7292 - val_accuracy: 0.8500 - val_loss: 0.4384
Epoch 5/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 579us/step - accuracy: 0.7872 - loss: 0.4952 - val_accuracy: 0.8344 - val_loss: 0.4459
Epoch 6/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 676us/step - accuracy: 0.8035 - loss: 0.4712 - val_accuracy: 0.

In [253]:
# Experiment 3: Smaller Neural Network (1 hidden layer)
print("\nSmaller Neural Network (1 hidden layer):")
small_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
small_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = small_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=1)
print("Validation Metrics:")
evaluate_model(small_model, X_val, y_val)
print("-" * 40)


Smaller Neural Network (1 hidden layer):
Epoch 1/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 904us/step - accuracy: 0.4999 - loss: 75.5853 - val_accuracy: 0.5188 - val_loss: 0.8298
Epoch 2/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 501us/step - accuracy: 0.5034 - loss: 0.8996 - val_accuracy: 0.6125 - val_loss: 0.6462
Epoch 3/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 508us/step - accuracy: 0.6138 - loss: 0.7005 - val_accuracy: 0.6781 - val_loss: 0.5740
Epoch 4/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534us/step - accuracy: 0.6958 - loss: 0.5973 - val_accuracy: 0.7125 - val_loss: 0.5525
Epoch 5/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 603us/step - accuracy: 0.7492 - loss: 0.5388 - val_accuracy: 0.7547 - val_loss: 0.4700
Epoch 6/20
[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 570us/step - accuracy: 0.7967 - loss: 0.4641 - val_accuracy:

In [254]:
# Experiment 4: Regularization (L2 weight decay)
print("\nNeural Network with L2 Regularization (weight decay):")
for reg in [0.001, 0.01, 0.1, 0.2, 0.5, 1.0]:
    print(f"L2 Regularization (λ={reg}):")
    regularized_model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(32, activation='relu', kernel_regularizer=l2(reg)),
        Dense(16, activation='relu', kernel_regularizer=l2(reg)),
        Dense(1, activation='sigmoid')
    ])
    regularized_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = regularized_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=0)
    print("Validation Metrics:")
    evaluate_model(regularized_model, X_val, y_val)
    print("-" * 40)



Neural Network with L2 Regularization (weight decay):
L2 Regularization (λ=0.001):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 438us/step
Accuracy: 0.83
Precision: 0.81
Recall: 0.94
F1-Score: 0.87
----------------------------------------
L2 Regularization (λ=0.01):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 444us/step
Accuracy: 0.90
Precision: 0.94
Recall: 0.90
F1-Score: 0.92
----------------------------------------
L2 Regularization (λ=0.1):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 465us/step
Accuracy: 0.79
Precision: 0.77
Recall: 0.95
F1-Score: 0.85
----------------------------------------
L2 Regularization (λ=0.2):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 497us/step
Accuracy: 0.69
Precision: 0.67
Recall: 0.97
F1-Score: 0.80
----------------------------------------
L2 Regularization (λ=0.5):
Validation Metrics:
[1m20/

### lets do a final test on testing data

In [256]:
# Best Neural Network Architecture
best_nn = Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

best_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
best_nn.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)  # Don't print training output

# Evaluate on Test Set
y_test_pred_probs = best_nn.predict(X_test)
y_test_pred = (y_test_pred_probs > 0.5).astype("int32").flatten()

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print(f"Test Set Metrics for Neural Network:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 958us/step
Test Set Metrics for Neural Network:
Accuracy: 0.36
Precision: 1.00
Recall: 0.00
F1-Score: 0.00


### we're getting patterns of overfitting, lets try doing k-folds as well

In [258]:
# Load preprocessed data
X = pd.read_csv("X_train.csv")
y = pd.read_csv("y_train.csv").values.ravel()

# Number of splits for K-Fold
k_folds = 5

# Store results for each model
nn_results = {'accuracy': [], 'precision': [], 'recall': [], 'f1_score': []}

# Initialize K-Fold
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

# ---- Neural Network K-Fold Cross-Validation ---- #
print("\n\n===== Neural Network K-Fold Cross-Validation =====\n")

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    print(f"Fold {fold + 1} / {k_folds}")
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Best Neural Network Architecture
    nn_model = Sequential([
        tf.keras.layers.Input(shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    
    nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    # Train the Neural Network
    nn_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)
    
    # Predict on test set
    y_pred_probs = nn_model.predict(X_test)
    y_pred = (y_pred_probs > 0.5).astype("int32").flatten()
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Store metrics for this fold
    nn_results['accuracy'].append(accuracy)
    nn_results['precision'].append(precision)
    nn_results['recall'].append(recall)
    nn_results['f1_score'].append(f1)
    
    print(f"Fold {fold + 1} Results - Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
    

# Print Neural Network final results
print("\n==== Neural Network Final K-Fold Results ====")
print(f"Mean Accuracy: {np.mean(nn_results['accuracy']):.2f}")
print(f"Mean Precision: {np.mean(nn_results['precision']):.2f}")
print(f"Mean Recall: {np.mean(nn_results['recall']):.2f}")
print(f"Mean F1-Score: {np.mean(nn_results['f1_score']):.2f}\n")



===== Neural Network K-Fold Cross-Validation =====

Fold 1 / 5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 911us/step
Fold 1 Results - Accuracy: 0.52, Precision: 0.51, Recall: 1.00, F1-Score: 0.68
Fold 2 / 5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Fold 2 Results - Accuracy: 0.49, Precision: 0.00, Recall: 0.00, F1-Score: 0.00
Fold 3 / 5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 854us/step
Fold 3 Results - Accuracy: 0.48, Precision: 0.48, Recall: 1.00, F1-Score: 0.65
Fold 4 / 5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 810us/step
Fold 4 Results - Accuracy: 0.47, Precision: 0.00, Recall: 0.00, F1-Score: 0.00
Fold 5 / 5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 754us/step
Fold 5 Results - Accuracy: 0.47, Precision: 0.47, Recall: 0.99, F1-Score: 0.63

==== Neural Network Final K-Fold Results ====
Mean Accuracy: 0.49
Mean Precision: 0.29
Mean Recall: 0.60
Mean F1-Score: 0.39



In [259]:
y_train = pd.read_csv("y_train.csv").values.ravel()
print(pd.Series(y_train).value_counts(normalize=True))

0.0    0.5
1.0    0.5
Name: proportion, dtype: float64


### our data seems to be unbalanced, this is why the accuracy is 62 cause the model seems to always predict 1

In [277]:
from sklearn.utils import class_weight

# ---- Step 1: Load Data ---- #
X_train = pd.read_csv("X_train.csv")
y_train = pd.read_csv("y_train.csv").values.ravel()

# ---- Step 3: Class Weights (Manually Set) ---- #
class_weight_dict = {0: 1.2, 1: 1.0}
print("Manual Class Weights:", class_weight_dict)

# ---- Step 4: Neural Network Architecture ---- #
nn_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_resampled.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

optimizer = Adam(learning_rate=0.0005, clipvalue=1.0)
nn_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# ---- Step 6: Train the Neural Network ---- #
history = nn_model.fit(
    X_resampled, 
    y_resampled, 
    epochs=50, 
    batch_size=64, 
    verbose=1, 
    validation_split=0.2, 
    class_weight=class_weight_dict,
)

# ---- Step 7: Evaluate Model on Test Data ---- #
X_test = pd.read_csv("X_test.csv")
y_test = pd.read_csv("y_test.csv").values.ravel()

loss, accuracy = nn_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy:.2f}")


Manual Class Weights: {0: 1.2, 1: 1.0}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.4945 - loss: 85.8661 - val_accuracy: 0.0311 - val_loss: 19.0184
Epoch 2/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5332 - loss: 33.5108 - val_accuracy: 0.0311 - val_loss: 0.7000
Epoch 3/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5358 - loss: 13.8089 - val_accuracy: 0.9229 - val_loss: 0.6099
Epoch 4/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5307 - loss: 6.5270 - val_accuracy: 0.0311 - val_loss: 0.7449
Epoch 5/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5901 - loss: 3.3861 - val_accuracy: 0.0311 - val_loss: 0.7107
Epoch 6/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5917 - loss: 2.7134 - val_accuracy: 0.0311 - val_loss: 0.7336
Epoch 7/50
[1m47/47[0m [32m━━━━━━