# Extended Gradient Boost (XGBoost Model)

### Model Training

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from xgboost import XGBClassifier
from joblib import dump

# Load CSVs
ddos_data = pd.read_csv("DDoS traffic.csv")
normal_data = pd.read_csv("normal traffic.csv")

normal_data['label'] = 0
ddos_data['label'] = 1

df = pd.concat([normal_data, ddos_data], ignore_index=True)
print("Loaded dataset:", df.shape)

# Clean data
# Drop completely empty columns
df = df.dropna(axis=1, how='all')

# Fill missing numeric values
for col in df.select_dtypes(include=[np.number]).columns:
    df[col] = df[col].fillna(0)

# Encode non-numeric columns
for col in df.select_dtypes(exclude=[np.number]).columns:
    df[col] = df[col].astype(str)
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])



# Separate features and label
X = df.drop(columns=['label'])
y = df['label']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Scale numeric features
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

# XGBoost model (handles imbalance well)
model = XGBClassifier(
    n_estimators=400,
    learning_rate=0.1,
    max_depth=8,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=(len(y_train[y_train==0]) / len(y_train[y_train==1])),
    random_state=42,
    n_jobs=-1,
    eval_metric='logloss'
)

model.fit(X_train_s, y_train)

# Evaluate
y_pred = model.predict(X_test_s)
y_prob = model.predict_proba(X_test_s)[:,1]

print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred, digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

try:
    print("ROC AUC:", round(roc_auc_score(y_test, y_prob), 4))
except:
    pass


# Save model + scaler
dump(model, "Xgboost_ddos_model.joblib")
dump(list(X.columns), "columns_xgboost.joblib")
dump(scaler, "scaler_xgboost.joblib")



print("\nModel saved as Xgboost_ddos_model.joblib")


Loaded dataset: (70792, 8)

=== Classification Report ===
              precision    recall  f1-score   support

           0     1.0000    0.9999    0.9999      7000
           1     0.9999    1.0000    0.9999      7159

    accuracy                         0.9999     14159
   macro avg     0.9999    0.9999    0.9999     14159
weighted avg     0.9999    0.9999    0.9999     14159

Confusion Matrix:
[[6999    1]
 [   0 7159]]
ROC AUC: 1.0

Model saved as Xgboost_ddos_model.joblib


In [2]:
# Load model and scaler
iso = dump("Xgboost_ddos_model.joblib")
scaler = dump("scaler_xgboost.joblib")

last_time = None  # To compute inter-arrival time

# MQTT Settings
broker = "172.25.59.232"   #  Home Assistant IP
port = 1883
topic = "home/sensor/#"   # Subscribe to all sensors
username = "mqtt_user"
password = "Dissertation@1"

def on_connect(client, userdata, flags, rc):
    print("Connected to MQTT Broker" if rc == 0 else "Failed to connect")
    client.subscribe(topic)

def on_message(client, userdata, msg):
    global last_time

    # Extract Features
    packet_length = len(msg.payload)
    protocol_encoded = 0  # MQTT fixed
    current_time = time.time()

    inter_arrival = 0 if last_time is None else current_time - last_time
    last_time = current_time

    # Build feature vector
    X_new = np.array([[packet_length, inter_arrival, protocol_encoded]])
    X_new_scaled = scaler.transform(X_new)

    # Predict
    prediction = iso.predict(X_new_scaled)[0]

    if prediction == -1:
        print(f"[ALERT]DDoS Attack Detected from topic, Size: {packet_length}, InterArrival: {inter_arrival:.4f}s")
    else:
        print(f"[INFO]Normal Traffic from topic, Size: {packet_length}, InterArrival: {inter_arrival:.4f}s")

# Connect to MQTT Broker
client = mqtt.Client()
client.username_pw_set(username, password)
client.on_connect = on_connect
client.on_message = on_message

client.connect(broker, port, 60)
print("Starting DDoS Detection...")
client.loop_forever()

TypeError: dump() missing 1 required positional argument: 'filename'