In [1]:
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, LSTM

2025-06-16 11:32:32.344355: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
X_train = joblib.load('../../preprocessed_data/CICIDS/X_train.joblib')
X_test = joblib.load('../../preprocessed_data/CICIDS/X_test.joblib')
Y_train = joblib.load('../../preprocessed_data/CICIDS/Y_train.joblib')
Y_test = joblib.load('../../preprocessed_data/CICIDS/Y_test.joblib')

### CNN training

In [3]:
CNN_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Conv1D(32, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2), # keep the maximum each 2 values (divide by 2 the number of values)
    Dropout(0.3), # disable 30% of neurons => reduce overfitting
    Flatten(), # transform the structure from 3D to 2D
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid') # binary output (0=normal or 1=attack)
])

CNN_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

CNN_model.summary()

history = CNN_model.fit(X_train, Y_train, epochs=5, batch_size=64, validation_data=(X_test, Y_test))

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 67, 32)            128       
                                                                 
 max_pooling1d (MaxPooling1  (None, 33, 32)            0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 33, 32)            0         
                                                                 
 flatten (Flatten)           (None, 1056)              0         
                                                                 
 dense (Dense)               (None, 64)                67648     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                        

2025-06-16 11:32:34.248232: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 546899520 exceeds 10% of free system memory.


 2756/30962 [=>............................] - ETA: 1:02 - loss: 0.1486 - accuracy: 0.9403

KeyboardInterrupt: 

### LSTM training

In [None]:
LSTM_model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    LSTM(64, return_sequences=False),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

LSTM_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

LSTM_model.summary()


history = LSTM_model.fit(X_train, Y_train, epochs=3, batch_size=64, validation_data=(X_test, Y_test))


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                16896     
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 64)                4160      
                                                                 
 dropout_3 (Dropout)         (None, 64)                0         
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 21121 (82.50 KB)
Trainable params: 21121 (82.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/3


2025-06-12 15:12:19.659506: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 618234240 exceeds 10% of free system memory.


Epoch 2/3
Epoch 3/3


### AE-LSTM

In [None]:
input_dim = X_train.shape[1]  # n_features

# encoder
inputs = Input(shape=(input_dim, 1))
encoded = LSTM(64, return_sequences=False)(inputs)
encoded = Dense(32, activation='relu')(encoded)

# classifier
x = Dropout(0.3)(encoded)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(1, activation='sigmoid')(x)

AE_LSTM_model = Model(inputs, output)

AE_LSTM_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
AE_LSTM_model.summary()


history = AE_LSTM_model.fit(X_train, Y_train, epochs=3, batch_size=64, validation_data=(X_test, Y_test)
)


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 78, 1)]           0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                16896     
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dropout_4 (Dropout)         (None, 32)                0         
                                                                 
 dense_5 (Dense)             (None, 64)                2112      
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_6 (Dense)             (None, 1)                 65    

2025-06-12 15:47:01.536638: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 618234240 exceeds 10% of free system memory.


Epoch 2/3
Epoch 3/3


In [None]:
CNN_model.save("../models/cnn_CICIDS2017_model.keras")
LSTM_model.save("../models/lstm_CICIDS2017_model.keras")
AE_LSTM_model.save("../models/ae-lstm_CICIDS2017_model.keras")

In [None]:
import pickle

In [None]:
file_name_cnn = '../models/cnn_CICIDS2017.pkl' 
joblib.dump(CNN_model, file_name_cnn) 

file_name_lstm = '../models/lstm_CICIDS2017.pkl' 
joblib.dump(LSTM_model, file_name_lstm) 

file_name_ae_lstm = '../models/ae-lstm_CICIDS2017.pkl' 
joblib.dump(AE_LSTM_model, file_name_ae_lstm)

['../models/ae-lstm_CICIDS2017.pkl']

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred_prob = CNN_model.predict(X_test)


y_pred = (y_pred_prob > 0.5).astype("int32")

# (precision, recall, F1-score)
print(classification_report(Y_test, y_pred, target_names=["Normal", "Attacks"]))

# Confusion Matrix
cm = confusion_matrix(Y_test, y_pred)
print("Confusion matrix :\n", cm)

# Displaying the attack number
tn, fp, fn, tp = cm.ravel()
print(f"Attacks detected (True Positive) : {tp}")
print(f"Normal traffic detected (True Negative) : {tn}")
print(f"Attacks missed (False Negative) : {fn}")
print(f"False alarms (False Positive) : {fp}")


              precision    recall  f1-score   support

      Normal       0.99      0.98      0.99    682324
     Attacks       0.93      0.97      0.95    166899

    accuracy                           0.98    849223
   macro avg       0.96      0.98      0.97    849223
weighted avg       0.98      0.98      0.98    849223

Confusion matrix :
 [[670976  11348]
 [  5331 161568]]
Attacks detected (True Positive) : 161568
Normal traffic detected (True Negative) : 670976
Attacks missed (False Negative) : 5331
False alarms (False Positive) : 11348
