# Comparative Intrusion Detection with Multiple Autoencoder Architectures
This notebook trains and compares two different autoencoder structures on the UNSW-NB15 dataset, and reports detailed metrics for both normal and attack classes.

In [38]:
# Step 1: Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, precision_recall_curve
)
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [39]:
# Step 2: Load and Split Data
df = pd.read_csv("UNSW-NB15P-MM-SAMPLE.csv")
Dn = df[df['Class'] == 0].drop(columns=['Class'])
Da = df[df['Class'] == 1].drop(columns=['Class'])
Dntr, Dnts = train_test_split(Dn, test_size=0.2, random_state=42)
Dts = pd.concat([Dnts, Da], ignore_index=True)
Dts_labels = np.array([0]*len(Dnts) + [1]*len(Da))

In [40]:
# Step 3: Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(Dntr)
X_test = scaler.transform(Dts)

In [41]:
# Step 4: Define Autoencoder Builder
def build_autoencoder(layer_sizes, dropout_rate=None):
    input_dim = X_train.shape[1]
    inp = Input(shape=(input_dim,))
    x = inp
    # Encoder
    for size in layer_sizes:
        x = Dense(size, activation='relu')(x)
        if dropout_rate:
            x = Dropout(dropout_rate)(x)
    # Decoder (reverse order)
    for size in layer_sizes[::-1][1:]:
        x = Dense(size, activation='relu')(x)
        if dropout_rate:
            x = Dropout(dropout_rate)(x)
    out = Dense(input_dim, activation='linear')(x)
    model = Model(inp, out)
    model.compile(optimizer=Adam(0.001), loss='mse')
    return model

In [42]:
# Step 5: Instantiate Models
model1 = build_autoencoder([32, 16])                     # Simple 32-16-32
model2 = build_autoencoder([64, 32, 16], dropout_rate=0.2)  # Deeper with dropout

In [43]:
# Step 6: Train Models
print('Training Model 1...')
model1.fit(X_train, X_train, epochs=15, batch_size=256, validation_split=0.1, verbose=1)
print('Training Model 2...')
model2.fit(X_train, X_train, epochs=20, batch_size=256, validation_split=0.1, verbose=1)

Training Model 1...
Epoch 1/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 450us/step - loss: 0.5417 - val_loss: 0.1367
Epoch 2/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 401us/step - loss: 0.1112 - val_loss: 0.0844
Epoch 3/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 399us/step - loss: 0.0751 - val_loss: 0.0628
Epoch 4/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402us/step - loss: 0.0574 - val_loss: 0.0514
Epoch 5/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 400us/step - loss: 0.0482 - val_loss: 0.0455
Epoch 6/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402us/step - loss: 0.0435 - val_loss: 0.0400
Epoch 7/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402us/step - loss: 0.0397 - val_loss: 0.0367
Epoch 8/15
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 415us/step - loss: 0.0360 - val_loss: 0.0354
Epoc

<keras.src.callbacks.history.History at 0x38f1e4ef0>

In [44]:
# Step 7: Evaluation Function Returns Full Report
def evaluate_model(model, X_train, X_test, y_test):
    # Predict and compute reconstruction error
    X_pred = model.predict(X_test)
    errors = np.mean((X_test - X_pred)**2, axis=1)
    # Optimal threshold via F1
    prec, rec, thr = precision_recall_curve(y_test, errors)
    f1_scores = 2 * (prec * rec) / (prec + rec + 1e-8)
    best_thr = thr[np.argmax(f1_scores)]
    y_pred = (errors > best_thr).astype(int)
    # Confusion matrix and report
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['Normal','Attack'])
    tn, fp, fn, tp = cm.ravel()
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)
    return cm, report, {'FPR': fpr, 'FNR': fnr}

In [45]:
# Step 8: Evaluate and Display Detailed Results
for name, model in [('AE_32-16', model1), ('AE_64-32-16_dropout', model2)]:
    print(f"\n=== {name} ===")
    cm, report, rates = evaluate_model(model, X_train, X_test, Dts_labels)
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", report)
    print("False Positive Rate:", rates['FPR'])
    print("False Negative Rate:", rates['FNR'])


=== AE_32-16 ===
[1m2837/2837[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162us/step
Confusion Matrix:
 [[63226  5331]
 [ 1863 20352]]

Classification Report:
               precision    recall  f1-score   support

      Normal       0.97      0.92      0.95     68557
      Attack       0.79      0.92      0.85     22215

    accuracy                           0.92     90772
   macro avg       0.88      0.92      0.90     90772
weighted avg       0.93      0.92      0.92     90772

False Positive Rate: 0.07776011202357162
False Negative Rate: 0.08386225523295071

=== AE_64-32-16_dropout ===
[1m2837/2837[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 174us/step
Confusion Matrix:
 [[64751  3806]
 [   76 22139]]

Classification Report:
               precision    recall  f1-score   support

      Normal       1.00      0.94      0.97     68557
      Attack       0.85      1.00      0.92     22215

    accuracy                           0.96     90772
   macro avg      