# Supervised XGBoost IDS: Detailed Metrics (Updated)
Removed deprecated `use_label_encoder` parameter; using `eval_metric` directly.

In [1]:
# Step 1: Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import xgboost as xgb
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [2]:
# Step 2: Load and Split Data
df = pd.read_csv('UNSW-NB15P-MM-SAMPLE.csv')  # ensure correct path
Dn = df[df['Class'] == 0].drop(columns=['Class'])
Da = df[df['Class'] == 1].drop(columns=['Class'])
Dntr, Dnts = train_test_split(Dn, test_size=0.2, random_state=42)
Da_train, Da_test = train_test_split(Da, test_size=0.5, random_state=42)
# Combine for supervised training and testing
X_train_raw = pd.concat([Dntr, Da_train], ignore_index=True)
y_train = np.array([0]*len(Dntr) + [1]*len(Da_train))
X_test_raw = pd.concat([Dnts, Da_test], ignore_index=True)
y_test = np.array([0]*len(Dnts) + [1]*len(Da_test))

In [3]:
# Step 3: Normalize Features
scaler = StandardScaler()
scaler.fit(Dntr)
X_train_scaled = scaler.transform(X_train_raw)
X_test_scaled = scaler.transform(X_test_raw)

In [4]:
# Step 4: Train Teacher Autoencoder
input_dim = X_train_scaled.shape[1]
inp = Input(shape=(input_dim,))
x = Dense(64, activation='relu')(inp)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.2)(x)
encoded = Dense(16, activation='relu', name='teacher_encoded')(x)
x = Dense(32, activation='relu')(encoded)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu')(x)
teacher_out = Dense(input_dim, activation='linear')(x)
teacher = Model(inp, teacher_out)
teacher.compile(optimizer=Adam(0.001), loss='mse')
es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
teacher.fit(X_train_scaled, X_train_scaled, epochs=20, batch_size=256,
            validation_split=0.1, callbacks=[es, rlr], verbose=1)

Epoch 1/20
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 894us/step - loss: 0.6264 - val_loss: 2.2814 - learning_rate: 0.0010
Epoch 2/20
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 843us/step - loss: 0.3149 - val_loss: 1.3043 - learning_rate: 0.0010
Epoch 3/20
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 840us/step - loss: 0.2729 - val_loss: 1.0914 - learning_rate: 0.0010
Epoch 4/20
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 690us/step - loss: 0.2545 - val_loss: 1.3783 - learning_rate: 0.0010
Epoch 5/20
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 709us/step - loss: 0.2427 - val_loss: 1.1161 - learning_rate: 0.0010
Epoch 6/20
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 778us/step - loss: 0.2291 - val_loss: 0.8434 - learning_rate: 0.0010
Epoch 7/20
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 781us/step - loss: 0.2198 - val_l

<keras.src.callbacks.history.History at 0x3000845c0>

In [5]:
# Step 5: Extract Encoded Features
encoder = Model(inputs=teacher.input,
               outputs=teacher.get_layer('teacher_encoded').output)
X_train_enc = encoder.predict(X_train_scaled)
X_test_enc = encoder.predict(X_test_scaled)

[1m8917/8917[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 163us/step
[1m2490/2490[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156us/step


In [6]:
# Step 6: Train XGBoost Classifier
clf = xgb.XGBClassifier(eval_metric='logloss')  # removed deprecated use_label_encoder
clf.fit(X_train_enc, y_train)

In [7]:
# Step 7: Predictions & Detailed Metrics
y_pred = clf.predict(X_test_enc)
y_prob = clf.predict_proba(X_test_enc)[:,1]

# Classification report for both classes
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
report = classification_report(y_test, y_pred, target_names=['Normal','Attack'])
print("Classification Report:\n", report)

# Confusion Matrix and rates
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()
fpr = fp / (fp + tn)
fnr = fn / (fn + tp)
print("Confusion Matrix:\n", cm)
print(f"False Positive Rate (Normal→Attack): {fpr:.4f}")
print(f"False Negative Rate (Attack→Normal): {fnr:.4f}")

# ROC-AUC
auc = roc_auc_score(y_test, y_prob)
print(f"ROC-AUC Score: {auc:.4f}")

Classification Report:
               precision    recall  f1-score   support

      Normal       0.99      0.99      0.99     68557
      Attack       0.97      0.92      0.94     11108

    accuracy                           0.98     79665
   macro avg       0.98      0.96      0.97     79665
weighted avg       0.98      0.98      0.98     79665

Confusion Matrix:
 [[68188   369]
 [  855 10253]]
False Positive Rate (Normal→Attack): 0.0054
False Negative Rate (Attack→Normal): 0.0770
ROC-AUC Score: 0.9989
