# 🛡️ UNSW-NB15 Intrusion Detection Model Retraining

This notebook retrains a model for intrusion detection using the UNSW-NB15 dataset. The workflow includes:
1. Importing libraries
2. Loading the dataset
3. Data cleaning
4. Encoding categorical features
5. Feature selection using `SelectKBest`
6. Scaling numeric features
7. Model training
8. Evaluation
9. Saving model and preprocessing objects

In [6]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder

# --- Step 1: Load dataset ---
df = pd.read_csv("UNSW_NB15_training-set.csv")

# Drop unnecessary columns
drop_cols = ['id', 'attack_cat']
df = df.drop(columns=[col for col in drop_cols if col in df.columns], errors='ignore')

# Define label column
LABEL_COL = "label"
y = df[LABEL_COL].apply(lambda x: 0 if x == 0 else 1)
X = df.drop(columns=[LABEL_COL])

# --- Step 2: Encode categorical values ---
categorical_cols = ['proto', 'service', 'state']
encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))  # encode directly in X
    encoders[col] = le

# Save encoders for future use
joblib.dump(encoders, "unsw_encoders.pkl")

# --- Step 3: Handle missing values ---
X = X.fillna(0)

# --- Step 4: Train/test split ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# --- Step 5: Train model ---
clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)
clf.fit(X_train, y_train)

# --- Step 6: Evaluate ---
y_pred = clf.predict(X_test)
metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "precision": precision_score(y_test, y_pred),
    "recall": recall_score(y_test, y_pred),
    "f1": f1_score(y_test, y_pred),
}
print(metrics)

# --- Step 7: Save model ---
joblib.dump(clf, "unsw_rf_full.pkl")
print("✅ Model and encoders saved successfully!")


{'accuracy': 0.9610197040120905, 'precision': 0.964840522227731, 'recall': 0.9783820017596045, 'f1': 0.9715640796288977}
✅ Model and encoders saved successfully!
