In [1]:
# Model Selection & Evaluation by using CIC-IDS2017 dataset that have cleaned

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report

# Machine learning models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

In [4]:
# Load the dataset
df = pd.read_csv('Dataset/Cleaned-IDS_Dataset.csv')

In [6]:
# Define features and target
X = df.drop(columns=['Attack Type'])
y = df['Attack Type']

In [7]:
# Encode labels for XGBoost compatibility
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # Used only for XGBoost

In [8]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
X_train_enc, X_test_enc, y_train_enc, y_test_enc = train_test_split(X, y_encoded, stratify=y_encoded, test_size=0.2, random_state=42)

In [9]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Modelling - Logistic Regression 
from sklearn.linear_model import LogisticRegression

print("Model: Logistic Regression")
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)
print(classification_report(y_test, y_pred_lr))

Model: Logistic Regression
                precision    recall  f1-score   support

          Bots       0.62      0.01      0.03       389
   Brute Force       0.99      0.77      0.87      1830
          DDoS       0.99      0.97      0.98     25603
           DoS       0.99      0.92      0.96     38749
Normal Traffic       0.99      0.98      0.99    419012
 Port Scanning       0.73      0.99      0.84     18139
   Web Attacks       0.43      0.01      0.01       429

      accuracy                           0.98    504151
     macro avg       0.82      0.67      0.67    504151
  weighted avg       0.98      0.98      0.98    504151



In [11]:
# Modelling - Random Forest
from sklearn.ensemble import RandomForestClassifier

print("Model: Random Forest")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)
print(classification_report(y_test, y_pred_rf))

Model: Random Forest
                precision    recall  f1-score   support

          Bots       0.87      0.71      0.78       389
   Brute Force       1.00      1.00      1.00      1830
          DDoS       1.00      1.00      1.00     25603
           DoS       1.00      1.00      1.00     38749
Normal Traffic       1.00      1.00      1.00    419012
 Port Scanning       0.99      1.00      0.99     18139
   Web Attacks       1.00      0.97      0.98       429

      accuracy                           1.00    504151
     macro avg       0.98      0.95      0.97    504151
  weighted avg       1.00      1.00      1.00    504151



In [12]:
# Modelling - XGBoost
from xgboost import XGBClassifier

print("Model: XGBoost")
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb.fit(X_train_scaled, y_train_enc)
y_pred_xgb = xgb.predict(X_test_scaled)
print(classification_report(y_test_enc, y_pred_xgb, target_names=le.classes_))


Model: XGBoost


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


                precision    recall  f1-score   support

          Bots       0.91      0.74      0.82       389
   Brute Force       1.00      1.00      1.00      1830
          DDoS       1.00      1.00      1.00     25603
           DoS       1.00      1.00      1.00     38749
Normal Traffic       1.00      1.00      1.00    419012
 Port Scanning       0.99      1.00      0.99     18139
   Web Attacks       0.99      0.99      0.99       429

      accuracy                           1.00    504151
     macro avg       0.98      0.96      0.97    504151
  weighted avg       1.00      1.00      1.00    504151



In [None]:
# Modelling - SVM
from sklearn.svm import SVC

print("Model: SVM")
svm = SVC()
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)
print(classification_report(y_test, y_pred_svm))