In [109]:
import pandas as pd
import tensorflow as tf
import numpy as np
import random
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### setting a seed value for reproducibility

In [111]:
seed_value = 40
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)

In [112]:
X_train = pd.read_csv("X_train.csv")
X_val = pd.read_csv("X_val.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv").values.ravel()
y_val = pd.read_csv("y_val.csv").values.ravel()
y_test = pd.read_csv("y_test.csv").values.ravel()

In [113]:
# Helper function to evaluate the model
def evaluate_model(model, X, y):
    y_pred_probs = model.predict(X)
    y_pred = (y_pred_probs > 0.5).astype("int32").flatten()
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")
    return accuracy, precision, recall, f1

In [114]:
# Experiment 1: Baseline Neural Network (2 hidden layers)
print("Baseline Neural Network (2 hidden layers):")
baseline_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
baseline_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = baseline_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=1)
print("Validation Metrics:")
evaluate_model(baseline_model, X_val, y_val)
print("-" * 40)

Baseline Neural Network (2 hidden layers):
Epoch 1/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3970 - loss: 131.6413 - val_accuracy: 0.6250 - val_loss: 0.9419
Epoch 2/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 653us/step - accuracy: 0.6073 - loss: 0.7250 - val_accuracy: 0.6266 - val_loss: 0.6972
Epoch 3/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 607us/step - accuracy: 0.6311 - loss: 0.6663 - val_accuracy: 0.6266 - val_loss: 0.6996
Epoch 4/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 643us/step - accuracy: 0.6271 - loss: 0.6636 - val_accuracy: 0.6266 - val_loss: 0.7060
Epoch 5/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 620us/step - accuracy: 0.6346 - loss: 0.6614 - val_accuracy: 0.6266 - val_loss: 0.7090
Epoch 6/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 603us/step - accuracy: 0.6505 - loss: 0.6593 - val_accuracy: 0.6266 - va

In [115]:
# Experiment 2: Deeper Neural Network (3 hidden layers)
print("\nDeeper Neural Network (3 hidden layers):")
deep_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
deep_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = deep_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=1)
print("Validation Metrics:")
evaluate_model(deep_model, X_val, y_val)
print("-" * 40)


Deeper Neural Network (3 hidden layers):
Epoch 1/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5425 - loss: 4.8057 - val_accuracy: 0.4219 - val_loss: 0.7403
Epoch 2/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589us/step - accuracy: 0.5680 - loss: 0.8271 - val_accuracy: 0.6578 - val_loss: 0.6584
Epoch 3/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590us/step - accuracy: 0.6655 - loss: 0.8158 - val_accuracy: 0.6812 - val_loss: 0.5968
Epoch 4/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 626us/step - accuracy: 0.6996 - loss: 0.6417 - val_accuracy: 0.6797 - val_loss: 0.6325
Epoch 5/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 612us/step - accuracy: 0.7277 - loss: 0.6703 - val_accuracy: 0.7094 - val_loss: 0.5215
Epoch 6/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 573us/step - accuracy: 0.8240 - loss: 0.4991 - val_accuracy: 0.6625 - val_l

In [116]:
# Experiment 3: Smaller Neural Network (1 hidden layer)
print("\nSmaller Neural Network (1 hidden layer):")
small_model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
small_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = small_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=1)
print("Validation Metrics:")
evaluate_model(small_model, X_val, y_val)
print("-" * 40)


Smaller Neural Network (1 hidden layer):
Epoch 1/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4191 - loss: 120.0025 - val_accuracy: 0.5813 - val_loss: 0.9754
Epoch 2/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 532us/step - accuracy: 0.5560 - loss: 0.8910 - val_accuracy: 0.6203 - val_loss: 1.0409
Epoch 3/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 586us/step - accuracy: 0.6143 - loss: 0.7118 - val_accuracy: 0.5422 - val_loss: 0.8383
Epoch 4/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 542us/step - accuracy: 0.6677 - loss: 0.6444 - val_accuracy: 0.6750 - val_loss: 0.5777
Epoch 5/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 525us/step - accuracy: 0.7456 - loss: 0.5346 - val_accuracy: 0.6844 - val_loss: 0.6305
Epoch 6/20
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 550us/step - accuracy: 0.7745 - loss: 0.4905 - val_accuracy: 0.7125 - val

In [117]:
# Experiment 4: Regularization (L2 weight decay)
print("\nNeural Network with L2 Regularization (weight decay):")
for reg in [0.001, 0.01, 0.1, 0.2, 0.5, 1.0]:
    print(f"L2 Regularization (λ={reg}):")
    regularized_model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(32, activation='relu', kernel_regularizer=l2(reg)),
        Dense(16, activation='relu', kernel_regularizer=l2(reg)),
        Dense(1, activation='sigmoid')
    ])
    regularized_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = regularized_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=0)
    print("Validation Metrics:")
    evaluate_model(regularized_model, X_val, y_val)
    print("-" * 40)



Neural Network with L2 Regularization (weight decay):
L2 Regularization (λ=0.001):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 445us/step
Accuracy: 0.68
Precision: 0.99
Recall: 0.48
F1-Score: 0.65
----------------------------------------
L2 Regularization (λ=0.01):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589us/step
Accuracy: 0.54
Precision: 0.99
Recall: 0.27
F1-Score: 0.43
----------------------------------------
L2 Regularization (λ=0.1):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 603us/step
Accuracy: 0.82
Precision: 0.80
Recall: 0.95
F1-Score: 0.87
----------------------------------------
L2 Regularization (λ=0.2):
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 427us/step
Accuracy: 0.83
Precision: 0.91
Recall: 0.81
F1-Score: 0.86
----------------------------------------
L2 Regularization (λ=0.5):
Validation Metrics:
[1m20/

In [118]:
# Experiment 5: Dropout Regularization
print("\nNeural Network with Dropout Regularization:")
for dropout_rate in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]:
    print(f"Dropout Rate: {dropout_rate}")
    dropout_model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dropout(dropout_rate),
        Dense(16, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])
    dropout_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = dropout_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), verbose=0)
    print("Validation Metrics:")
    evaluate_model(dropout_model, X_val, y_val)
    print("-" * 40)


Neural Network with Dropout Regularization:
Dropout Rate: 0.1
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 404us/step
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Dropout Rate: 0.2
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Dropout Rate: 0.3
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 579us/step
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Dropout Rate: 0.4
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455us/step
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Dropout Rate: 0.5
Validation Metrics:
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44

### lets do a final test on testing data

In [129]:
# Best Neural Network Architecture
best_nn = Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

best_nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
best_nn.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)  # Don't print training output

# Evaluate on Test Set
y_test_pred_probs = best_nn.predict(X_test)
y_test_pred = (y_test_pred_probs > 0.5).astype("int32").flatten()

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print(f"Test Set Metrics for Neural Network:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test Set Metrics for Neural Network:
Accuracy: 0.64
Precision: 0.64
Recall: 1.00
F1-Score: 0.78
