In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB

# Load the dataset
df = pd.read_csv("./dataset_sdn.csv")

# Drop rows with missing values
df = df.dropna()

# Encode categorical features
label_encoder = LabelEncoder()
df['src'] = label_encoder.fit_transform(df['src'])
df['dst'] = label_encoder.fit_transform(df['dst'])
df['Protocol'] = label_encoder.fit_transform(df['Protocol'])

# Convert label to binary: 1 for DDoS attack, 0 for non-DDoS attack
df['label'] = label_encoder.fit_transform(df['label'])

# Define features (X) and target variable (y)
X = df.drop('label', axis=1)
y = df['label']

# Check unique values in target variable 'y'
print("Unique values in target variable 'y':", np.unique(y))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a function to evaluate models
def Evaluate_model(model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("Accuracy Score: {:.2f}%".format(accuracy_score(y_test, y_pred) * 100))
    print("Classification Report:\n{}".format(classification_report(y_test, y_pred)))
    print("Confusion Matrix:\n{}".format(confusion_matrix(y_test, y_pred)))

# Evaluate Logistic Regression model
print("Logistic Regression:")
Evaluate_model(LogisticRegression())

# Evaluate Random Forest Classifier
print("\nRandom Forest Classifier:")
Evaluate_model(RandomForestClassifier())

# Evaluate Decision Tree Classifier
print("\nDecision Tree Classifier:")
Evaluate_model(DecisionTreeClassifier())

# Evaluate Gaussian Naive Bayes Classifier
print("\nGaussian Naive Bayes Classifier:")
Evaluate_model(GaussianNB())


Unique values in target variable 'y': [0 1]
Logistic Regression:


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy Score: 69.72%
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.85      0.77     12613
           1       0.66      0.46      0.55      8155

    accuracy                           0.70     20768
   macro avg       0.69      0.66      0.66     20768
weighted avg       0.69      0.70      0.68     20768

Confusion Matrix:
[[10697  1916]
 [ 4373  3782]]

Random Forest Classifier:
Accuracy Score: 100.00%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12613
           1       1.00      1.00      1.00      8155

    accuracy                           1.00     20768
   macro avg       1.00      1.00      1.00     20768
weighted avg       1.00      1.00      1.00     20768

Confusion Matrix:
[[12613     0]
 [    0  8155]]

Decision Tree Classifier:
Accuracy Score: 100.00%
Classification Report:
              precision    recall  f1-score   support
