In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, RobustScaler
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils.class_weight import compute_class_weight
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import joblib

In [2]:
df = pd.read_csv("Dataset/Cleaned-IDS_Dataset.csv")

In [3]:
# Encode target labels
le = LabelEncoder()
df['Attack Type'] = le.fit_transform(df['Attack Type'])

In [4]:
# Feature/label split
X = df.drop(columns=['Attack Type'])
y = df['Attack Type']
features = X.columns.tolist()

In [5]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [6]:
# Scaling
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Compute class weights
unique_classes = np.unique(y_train)
class_weights = compute_class_weight(class_weight='balanced', classes=unique_classes, y=y_train)
weight_dict = dict(zip(unique_classes, class_weights))
scale_weights = [weight_dict[i] for i in range(len(weight_dict))]

In [8]:
# Train weighted XGBoost model
model = XGBClassifier(
    objective='multi:softprob',
    eval_metric='mlogloss',
    use_label_encoder=False,
    learning_rate=0.1,
    n_estimators=80,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    tree_method='hist',
    random_state=42,
    scale_pos_weight=scale_weights,
    n_jobs=-1
)

model.fit(
    X_train_scaled, y_train,
    verbose=False
)

Parameters: { "scale_pos_weight", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [9]:
# Save the finalized model for deployment
joblib.dump((model, features, le, scaler), "ids_xgb_model.pkl")

['ids_xgb_model.pkl']

In [19]:
# Evaluation
y_pred = model.predict(X_test_scaled)
report_text = classification_report(y_test, y_pred, target_names=le.classes_)
cm = confusion_matrix(y_test, y_pred)

# Show report
print("📊 Updated Classification Report (Weight-Sensitive Model):\n")
print(report_text)

📊 Updated Classification Report (Weight-Sensitive Model):

                precision    recall  f1-score   support

          Bots       0.95      0.69      0.80       389
   Brute Force       1.00      1.00      1.00      1830
          DDoS       1.00      1.00      1.00     25603
           DoS       1.00      1.00      1.00     38749
Normal Traffic       1.00      1.00      1.00    419012
 Port Scanning       0.99      1.00      0.99     18139
   Web Attacks       0.99      0.99      0.99       429

      accuracy                           1.00    504151
     macro avg       0.99      0.95      0.97    504151
  weighted avg       1.00      1.00      1.00    504151

