In [1]:
# 1️⃣ Import Required Libraries
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import joblib  # For saving the model in .pkl format
import os


In [2]:

# 2️⃣ Load Dataset
# Make sure your dataset path is correct
df = pd.read_csv("D:/FYP/data/datasets/final_dataset_02.csv")


In [3]:

#  3️⃣ Prepare Features (X) and Target (y)
X = df.drop(columns=['label'])  # All features except label
y = df['label'].astype(int)     # Ensure labels are integer type (0: normal, 1: malicious)


In [4]:

# 4️⃣ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [5]:

#  5️⃣ Define LightGBM Parameters
params = {
    'objective': 'binary',                # Binary classification
    'metric': 'auc',                      # Evaluation metric
    'boosting_type': 'gbdt',              # Gradient Boosting Decision Tree
    'learning_rate': 0.1,                 # Learning rate
    'num_leaves': 31,                     # Number of leaves in trees
    'max_depth': -1,                      # No limit on tree depth
    'verbosity': -1                       # Suppress logs
}


In [6]:

#  6️⃣ Create LightGBM Dataset Objects
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)


In [7]:

#  7️⃣ Train the LightGBM Model
lightgbm_model = lgb.train(
    params,
    train_data,
    valid_sets=[train_data, test_data],
    num_boost_round=500,
    callbacks=[lgb.early_stopping(50)],  # trying in this way
    verbose_eval=100
)


TypeError: train() got an unexpected keyword argument 'verbose_eval'

In [None]:

#  8️⃣ Make Predictions
y_pred_proba = lightgbm_model.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype(int)  # Convert probabilities to binary predictions


In [None]:

# 9️⃣ Evaluation Metrics
print("\n✅ Model Evaluation Results:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred_proba))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:

#  🔟 Save the Trained Model in .pkl Format
# Create directory if it doesn't exist
os.makedirs("trained_models", exist_ok=True)
joblib.dump(lightgbm_model, "D:/FYP/models/trained_models/lightgbm_intrusion_detection.pkl")
print("✅ Model saved successfully at: models/trained_models/lightgbm_intrusion_detection.pkl")
