# Comparative Intrusion Detection with Multiple Autoencoder Architectures
This notebook trains and compares two different autoencoder structures on the UNSW-NB15 dataset, and reports detailed metrics for both normal and attack classes.

In [9]:
# Step 1: Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, precision_recall_curve
)
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [10]:
# Step 2: Load and Split Data
df = pd.read_csv("UNSW-NB15P-MM-SAMPLE.csv")
Dn = df[df['Class'] == 0].drop(columns=['Class'])
Da = df[df['Class'] == 1].drop(columns=['Class'])
Dntr, Dnts = train_test_split(Dn, test_size=0.2, random_state=42)
Dts = pd.concat([Dnts, Da], ignore_index=True)
Dts_labels = np.array([0]*len(Dnts) + [1]*len(Da))

In [11]:
# Step 3: Normalize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(Dntr)
X_test = scaler.transform(Dts)

In [12]:
# Step 4: Define Autoencoder Builder
def build_autoencoder(layer_sizes, dropout_rate=None):
    input_dim = X_train.shape[1]
    inp = Input(shape=(input_dim,))
    x = inp
    # Encoder
    for size in layer_sizes:
        x = Dense(size, activation='relu')(x)
        if dropout_rate:
            x = Dropout(dropout_rate)(x)
    # Decoder (reverse order)
    for size in layer_sizes[::-1][1:]:
        x = Dense(size, activation='relu')(x)
        if dropout_rate:
            x = Dropout(dropout_rate)(x)
    out = Dense(input_dim, activation='linear')(x)
    model = Model(inp, out)
    model.compile(optimizer=Adam(0.001), loss='mse')
    return model

In [13]:
# Step 5: Instantiate Models
model1 = build_autoencoder([32, 16])                     # Simple 32-16-32
model2 = build_autoencoder([64, 32, 16], dropout_rate=0.2)  # Deeper with dropout

In [14]:
# Step 6: Train Models
print('Training Model 1...')
model1.fit(X_train, X_train, epochs=20, batch_size=256, validation_split=0.1, verbose=1)
print('Training Model 2...')
model2.fit(X_train, X_train, epochs=20, batch_size=256, validation_split=0.1, verbose=1)

Training Model 1...
Epoch 1/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 461us/step - loss: 0.4991 - val_loss: 0.1324
Epoch 2/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 417us/step - loss: 0.1193 - val_loss: 0.0827
Epoch 3/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 408us/step - loss: 0.0795 - val_loss: 0.0670
Epoch 4/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 412us/step - loss: 0.0665 - val_loss: 0.0561
Epoch 5/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 406us/step - loss: 0.0562 - val_loss: 0.0492
Epoch 6/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 429us/step - loss: 0.0494 - val_loss: 0.0516
Epoch 7/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 408us/step - loss: 0.0484 - val_loss: 0.0387
Epoch 8/20
[1m965/965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 411us/step - loss: 0.0392 - val_loss: 0.0337
Epoc

<keras.src.callbacks.history.History at 0x330814710>

In [15]:
# Step 7: Evaluation Function Returns Full Report
def evaluate_model(model, X_train, X_test, y_test):
    # Predict and compute reconstruction error
    X_pred = model.predict(X_test)
    errors = np.mean((X_test - X_pred)**2, axis=1)
    # Optimal threshold via F1
    prec, rec, thr = precision_recall_curve(y_test, errors)
    f1_scores = 2 * (prec * rec) / (prec + rec + 1e-8)
    best_thr = thr[np.argmax(f1_scores)]
    y_pred = (errors > best_thr).astype(int)
    # Confusion matrix and report
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['Normal','Attack'])
    tn, fp, fn, tp = cm.ravel()
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)
    return cm, report, {'FPR': fpr, 'FNR': fnr}

In [16]:
# Step 8: Evaluate and Display Detailed Results
for name, model in [('AE_32-16', model1), ('AE_64-32-16_dropout', model2)]:
    print(f"\n=== {name} ===")
    cm, report, rates = evaluate_model(model, X_train, X_test, Dts_labels)
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", report)
    print("False Positive Rate:", rates['FPR'])
    print("False Negative Rate:", rates['FNR'])


=== AE_32-16 ===


[1m2837/2837[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163us/step
Confusion Matrix:
 [[64361  4196]
 [ 1071 21144]]

Classification Report:
               precision    recall  f1-score   support

      Normal       0.98      0.94      0.96     68557
      Attack       0.83      0.95      0.89     22215

    accuracy                           0.94     90772
   macro avg       0.91      0.95      0.92     90772
weighted avg       0.95      0.94      0.94     90772

False Positive Rate: 0.0612045451230363
False Negative Rate: 0.04821066846725185

=== AE_64-32-16_dropout ===
[1m2837/2837[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 183us/step
Confusion Matrix:
 [[64572  3985]
 [  183 22032]]

Classification Report:
               precision    recall  f1-score   support

      Normal       1.00      0.94      0.97     68557
      Attack       0.85      0.99      0.91     22215

    accuracy                           0.95     90772
   macro avg       0.92      0.97    