In [7]:
import time
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Dense, Dropout
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc

In [2]:
X_train = pd.read_csv('/content/X_train_final.csv').values
y_train = pd.read_csv('/content/y_train_final.csv').values
X_val = pd.read_csv('/content/X_val_processed.csv').values
y_val = pd.read_csv('/content/y_val_processed.csv').values
X_test = pd.read_csv('/content/X_test_processed.csv').values
y_test = pd.read_csv('/content/y_test_processed.csv').values

In [3]:
y_train = y_train.astype(np.float32)
y_val = y_val.astype(np.float32)
y_test = y_test.astype(np.float32)

X_train_combined = np.concatenate((X_train, X_val), axis=0)
y_train_combined = np.concatenate((y_train, y_val), axis=0)

input_dim = X_train.shape[1]
best_params = {
    'initial_dense_units': 32,
    'num_dense_layers_after_attention': 3,
    'dense_units_L1': 64,
    'dense_units_L2': 64,
    'dense_units_L3': 16,
    'learning_rate': 0.0073866678565409506,
    'batch_size': 32,
    'optimizer': 'rmsprop',
    'dropout_rate': 0.25,
}

In [4]:
def build_no_attention_model(input_dim, params):
    tf.keras.backend.clear_session()

    initial_dense_units = params['initial_dense_units']
    num_dense_layers = params['num_dense_layers_after_attention']
    dropout_rate = params['dropout_rate']
    learning_rate = params['learning_rate']

    optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

    inputs = Input(shape=(input_dim,), name='Input')
    x = Dense(initial_dense_units, activation='relu', name='initial_dense')(inputs)
    x = Dropout(dropout_rate, name='initial_dropout')(x)

    dense_units_list = [params['dense_units_L1'], params['dense_units_L2'], params['dense_units_L3']]

    for i in range(num_dense_layers):
        x = Dense(dense_units_list[i], activation='relu', name=f'dense_L{i+1}')(x)
        x = Dropout(dropout_rate, name=f'dropout_L{i+1}')(x)

    outputs = Dense(1, activation='sigmoid', name='Output')(x)

    model = Model(inputs=inputs, outputs=outputs, name='MLP_No_Attention')
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC()])

    return model

print("--- Building and Training Ablated Model: No Attention Layer ---")
no_attention_model = build_no_attention_model(input_dim, best_params)
no_attention_model.summary()

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='min', restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6, mode='min')
]

no_attention_model.fit(
    X_train_combined, y_train_combined,
    validation_split=0.1,
    epochs=200,
    batch_size=best_params['batch_size'],
    callbacks=callbacks,
    verbose=1
)

--- Building and Training Ablated Model: No Attention Layer ---


Epoch 1/200
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.7059 - auc: 0.7665 - loss: 0.5641 - val_accuracy: 0.7578 - val_auc: 0.8642 - val_loss: 0.4721 - learning_rate: 0.0074
Epoch 2/200
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8243 - auc: 0.8949 - loss: 0.4152 - val_accuracy: 0.8203 - val_auc: 0.9127 - val_loss: 0.3425 - learning_rate: 0.0074
Epoch 3/200
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8287 - auc: 0.9173 - loss: 0.3694 - val_accuracy: 0.8594 - val_auc: 0.9242 - val_loss: 0.3354 - learning_rate: 0.0074
Epoch 4/200
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8555 - auc: 0.9346 - loss: 0.3286 - val_accuracy: 0.8750 - val_auc: 0.9352 - val_loss: 0.3365 - learning_rate: 0.0074
Epoch 5/200
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8620 - auc: 0.9326 - loss: 0.339

<keras.src.callbacks.history.History at 0x7bd1ad23d510>

In [5]:
test_loss, test_accuracy, test_auc = no_attention_model.evaluate(X_test, y_test, verbose=0)
y_pred_proba = no_attention_model.predict(X_test).ravel()
y_pred_binary = (y_pred_proba > 0.5).astype(int)
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)
f1 = f1_score(y_test, y_pred_binary)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


In [8]:
trainable_count = np.sum([K.count_params(w) for w in no_attention_model.trainable_weights])
non_trainable_count = np.sum([K.count_params(w) for w in no_attention_model.non_trainable_weights])
total_parameters = trainable_count + non_trainable_count

In [9]:
start_time = time.time()
no_attention_model.predict(X_test, batch_size=best_params['batch_size'])
end_time = time.time()
inference_time_batch = (end_time - start_time)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


In [10]:
print("Ablated Model: No Attention Layer")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"AUC: {test_auc:.4f}")
print(f"Parameters: {total_parameters}")
print(f"Test Loss: {test_loss:.4f}")
print(f"Inference Time (s/batch): {inference_time_batch:.4f}")

Ablated Model: No Attention Layer
Accuracy: 0.9024
Precision: 0.8000
Recall: 0.8000
F1-Score: 0.8000
AUC: 0.9175
Parameters: 7745.0
Test Loss: 0.3015
Inference Time (s/batch): 0.1279
