In [1]:
# 1️⃣ Import necessary libraries
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib  # For saving model in .pkl format
import os


In [2]:

# 2️⃣ Load the final preprocessed dataset
df = pd.read_csv("D:/FYP/data/datasets/final_dataset_02.csv")


In [3]:

# 3️⃣ Separate features and target
X = df.drop(columns=['label'])  # Features
y = df['label'].astype(int)     # Target (0 = Normal, 1 = Attack)

# 4️⃣ Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 5️⃣ Convert data to DMatrix (XGBoost's optimized data format)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)


In [4]:

# 6️⃣ Set XGBoost parameters
params = {
    'objective': 'binary:logistic',  # Binary classification
    'eval_metric': 'auc',           # Metric for evaluation
    'max_depth': 6,
    'learning_rate': 0.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# 7️⃣ Train the model with early stopping
model = xgb.train(
    params=params,
    dtrain=dtrain,
    num_boost_round=300,            # Equivalent to n_estimators
    evals=[(dtest, 'eval')],        # Validation set
    early_stopping_rounds=50,       # Early stopping
    verbose_eval=True               # Print progress
)


[0]	eval-auc:0.99823
[1]	eval-auc:1.00000
[2]	eval-auc:1.00000
[3]	eval-auc:0.99999
[4]	eval-auc:1.00000
[5]	eval-auc:1.00000
[6]	eval-auc:1.00000
[7]	eval-auc:1.00000
[8]	eval-auc:1.00000
[9]	eval-auc:1.00000
[10]	eval-auc:1.00000
[11]	eval-auc:1.00000
[12]	eval-auc:1.00000
[13]	eval-auc:1.00000
[14]	eval-auc:1.00000
[15]	eval-auc:1.00000
[16]	eval-auc:1.00000
[17]	eval-auc:1.00000
[18]	eval-auc:1.00000
[19]	eval-auc:1.00000
[20]	eval-auc:1.00000
[21]	eval-auc:1.00000
[22]	eval-auc:1.00000
[23]	eval-auc:1.00000
[24]	eval-auc:1.00000
[25]	eval-auc:1.00000
[26]	eval-auc:1.00000
[27]	eval-auc:1.00000
[28]	eval-auc:1.00000
[29]	eval-auc:1.00000
[30]	eval-auc:1.00000
[31]	eval-auc:1.00000
[32]	eval-auc:1.00000
[33]	eval-auc:1.00000
[34]	eval-auc:1.00000
[35]	eval-auc:1.00000
[36]	eval-auc:1.00000
[37]	eval-auc:1.00000
[38]	eval-auc:1.00000
[39]	eval-auc:1.00000
[40]	eval-auc:1.00000
[41]	eval-auc:1.00000
[42]	eval-auc:1.00000
[43]	eval-auc:1.00000
[44]	eval-auc:1.00000
[45]	eval-auc:1.0000

In [5]:

# 8️⃣ Make predictions (returns probabilities by default)
y_pred_proba = model.predict(dtest)
y_pred = (y_pred_proba > 0.5).astype(int)  # Convert to binary classes


In [6]:

# 9️⃣ Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print("✅ Model Accuracy:", round(accuracy * 100, 2), "%")
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))
print("\n🔍 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


✅ Model Accuracy: 100.0 %

📊 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     13386
           1       1.00      1.00      1.00     13385

    accuracy                           1.00     26771
   macro avg       1.00      1.00      1.00     26771
weighted avg       1.00      1.00      1.00     26771


🔍 Confusion Matrix:
 [[13386     0]
 [    0 13385]]


In [8]:

# 🔟 Save trained model in .pkl format using joblib
os.makedirs("/trained_models", exist_ok=True)  # Create directory if it doesn't exist
joblib.dump(model, "D:/FYP/models/trained_models/xgboost_native_model.pkl")
print("✅ Trained XGBoost model saved successfully: models/trained_models/xgboost_native_model.pkl")

✅ Trained XGBoost model saved successfully: models/trained_models/xgboost_native_model.pkl
