In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten
import pickle
import time

In [2]:
data = pd.read_csv('sampled_sdn_dataset.csv')

In [3]:
X = data.drop(columns=['Label']).values  # Features
y = data['Label'].values  # Target (encoded 0 or 1)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,test_size=0.2, random_state=42)


In [5]:
# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [6]:
# Reshape data for CNN-LSTM
X_train_resampled = X_train_resampled.reshape(X_train_resampled.shape[0], X_train_resampled.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [7]:
# Build CNN-LSTM model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train_resampled.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    LSTM(100, return_sequences=False),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [10]:
# Train the model
history = model.fit(X_train_resampled, y_train_resampled, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m4610/4610[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 11ms/step - accuracy: 0.9976 - loss: 0.0093 - val_accuracy: 0.9988 - val_loss: 0.0065
Epoch 2/10
[1m4610/4610[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 11ms/step - accuracy: 0.9989 - loss: 0.0049 - val_accuracy: 0.9993 - val_loss: 0.0038
Epoch 3/10
[1m4610/4610[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 11ms/step - accuracy: 0.9994 - loss: 0.0031 - val_accuracy: 0.9991 - val_loss: 0.0055
Epoch 4/10
[1m4610/4610[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 11ms/step - accuracy: 0.9990 - loss: 0.0040 - val_accuracy: 0.9993 - val_loss: 0.0034
Epoch 5/10
[1m4610/4610[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 11ms/step - accuracy: 0.9995 - loss: 0.0029 - val_accuracy: 0.9993 - val_loss: 0.0031
Epoch 6/10
[1m4610/4610[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 11ms/step - accuracy: 0.9990 - loss: 0.0049 - val_accuracy: 0.9992 - val_loss: 0.0042
Epoc

In [12]:
# Test the model
start_time = time.time()
y_pred = (model.predict(X_test) > 0.5).astype(int)
inference_time = time.time() - start_time

[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step


In [13]:
# Compute metrics
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
sensitivity = conf_matrix[1, 1] / (conf_matrix[1, 1] + conf_matrix[1, 0])

print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", report)
print("Accuracy:", accuracy)
print("Sensitivity:", sensitivity)
print("Inference Time (seconds):", inference_time)


Confusion Matrix:
 [[16908    44]
 [   12 23036]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     16952
           1       1.00      1.00      1.00     23048

    accuracy                           1.00     40000
   macro avg       1.00      1.00      1.00     40000
weighted avg       1.00      1.00      1.00     40000

Accuracy: 0.9986
Sensitivity: 0.9994793474488025
Inference Time (seconds): 4.928214073181152


In [14]:
# Save the model
with open('cnn_lstm__smote_model.pkl', 'wb') as file:
    pickle.dump(model, file)

# Test on custom data

download the union_dataset.csv file, please find the link in the readme file

In [15]:
# Test the saved model on another dataset
new_data = pd.read_csv('union_dataset.csv')  # Replace with actual test file path
X_new = new_data.drop(columns=['Label']).values.reshape(-1, X_train_resampled.shape[1], 1)
y_new = new_data['Label'].values

In [17]:
with open('cnn_lstm__smote_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [18]:
# Test the loaded model
start_time = time.time()
y_new_pred = (loaded_model.predict(X_new) > 0.5).astype(int)
new_inference_time = time.time() - start_time

[1m31214/31214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 4ms/step


In [19]:
# Compute new metrics
new_conf_matrix = confusion_matrix(y_new, y_new_pred)
new_report = classification_report(y_new, y_new_pred)
new_accuracy = accuracy_score(y_new, y_new_pred)
new_sensitivity = new_conf_matrix[1, 1] / (new_conf_matrix[1, 1] + new_conf_matrix[1, 0])


In [20]:
print("\nNew Dataset Results:")
print("Confusion Matrix:\n", new_conf_matrix)
print("Classification Report:\n", new_report)
print("Accuracy:", new_accuracy)
print("Sensitivity:", new_sensitivity)
print("Inference Time (seconds):", new_inference_time)


New Dataset Results:
Confusion Matrix:
 [[421530   1097]
 [   253 575938]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    422627
           1       1.00      1.00      1.00    576191

    accuracy                           1.00    998818
   macro avg       1.00      1.00      1.00    998818
weighted avg       1.00      1.00      1.00    998818

Accuracy: 0.9986484024116505
Sensitivity: 0.9995609094900822
Inference Time (seconds): 118.52576184272766
