In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import glob
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 0 = all logs, 1 = info, 2 = warning, 3 = error
import tensorflow as tf

In [17]:
features = [
    'Protocol', 'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 'Total Length of Fwd Packets',
    'Total Length of Bwd Packets', 'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean',
    'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean',
    'Bwd Packet Length Std', 'Flow Bytes/s', 'Flow Packets/s', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags',
    'Bwd URG Flags', 'Fwd Header Length', 'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s', 'Min Packet Length',
    'Max Packet Length', 'Packet Length Mean', 'Packet Length Std', 'Packet Length Variance', 'FIN Flag Count',
    'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count', 'URG Flag Count', 'CWE Flag Count',
    'ECE Flag Count', 'Down/Up Ratio', 'Average Packet Size', 'Avg Fwd Segment Size', 'Avg Bwd Segment Size',
    'Subflow Fwd Packets', 'Subflow Fwd Bytes', 'Subflow Bwd Packets', 'Subflow Bwd Bytes',
    'Init_Win_bytes_forward', 'Init_Win_bytes_backward', 'act_data_pkt_fwd', 'min_seg_size_forward'
]

In [3]:
def load_parquet_files(file_list):
    dfs = []
    for f in file_list:
        df = pd.read_parquet(f)
        print(f"[INFO] Loaded {f} with shape: {df.shape}")
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

# === Load training and test data ===
train_files = sorted(glob.glob(r'/home/garv/Desktop/Cyber-Security/archive (1)/*training.parquet'))
test_files = sorted(glob.glob(r'/home/garv/Desktop/Cyber-Security/archive (1)/*testing.parquet'))

train_df = load_parquet_files(train_files)
test_df = load_parquet_files(test_files)
print("[INFO] Loading training data...")
train_df = load_parquet_files(train_files)

print("[INFO] Loading test data...")
test_df = load_parquet_files(test_files)

[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/LDAP-training.parquet with shape: (6715, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/MSSQL-training.parquet with shape: (10974, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/NetBIOS-training.parquet with shape: (1631, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/Portmap-training.parquet with shape: (5105, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/Syn-training.parquet with shape: (70336, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/UDP-training.parquet with shape: (17770, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/UDPLag-training.parquet with shape: (12639, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/DNS-testing.parquet with shape: (6703, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security/archive (1)/LDAP-testing.parquet with shape: (2831, 78)
[INFO] Loaded /home/garv/Desktop/Cyber-Security

In [4]:
train_df.columns = train_df.columns.str.strip()
test_df.columns = test_df.columns.str.strip()

In [5]:
common_features = [f for f in features if f in train_df.columns and f in test_df.columns]
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update = train_df[common_features + ['Label']]
test_df = test_df[common_features + ['Label']]
print(f"[INFO] Training data shape after filtering: {train_df.shape}")
print(f"[INFO] Test data shape after filtering: {test_df.shape}")
print(f"[INFO] Using {len(common_features)} features.")

[INFO] Training data shape after filtering: (125170, 41)
[INFO] Test data shape after filtering: (306201, 41)
[INFO] Using 40 features.


In [6]:
print(train_df.columns)


Index(['Protocol', 'Flow Duration', 'Total Fwd Packets',
       'Total Backward Packets', 'Fwd Packet Length Max',
       'Fwd Packet Length Min', 'Fwd Packet Length Mean',
       'Fwd Packet Length Std', 'Bwd Packet Length Max',
       'Bwd Packet Length Min', 'Bwd Packet Length Mean',
       'Bwd Packet Length Std', 'Flow Bytes/s', 'Flow Packets/s',
       'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags',
       'Fwd Header Length', 'Bwd Header Length', 'Fwd Packets/s',
       'Bwd Packets/s', 'Packet Length Mean', 'Packet Length Std',
       'Packet Length Variance', 'FIN Flag Count', 'SYN Flag Count',
       'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count', 'URG Flag Count',
       'CWE Flag Count', 'ECE Flag Count', 'Down/Up Ratio',
       'Avg Fwd Segment Size', 'Avg Bwd Segment Size', 'Subflow Fwd Packets',
       'Subflow Fwd Bytes', 'Subflow Bwd Packets', 'Subflow Bwd Bytes',
       'Label'],
      dtype='object')


In [9]:
print("Feature columns:", train_df.columns.tolist())


Feature columns: ['Protocol', 'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean', 'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Bwd Packet Length Std', 'Flow Bytes/s', 'Flow Packets/s', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length', 'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s', 'Packet Length Mean', 'Packet Length Std', 'Packet Length Variance', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count', 'URG Flag Count', 'CWE Flag Count', 'ECE Flag Count', 'Down/Up Ratio', 'Avg Fwd Segment Size', 'Avg Bwd Segment Size', 'Subflow Fwd Packets', 'Subflow Fwd Bytes', 'Subflow Bwd Packets', 'Subflow Bwd Bytes', 'Label']


In [10]:
train_df.replace([np.inf, -np.inf], 0, inplace=True)
test_df.replace([np.inf, -np.inf], 0, inplace=True)
train_df.fillna(0, inplace=True)
test_df.fillna(0, inplace=True)

In [11]:
# === Binary Label Encoding (Robust) ===
train_df['Label'] = train_df['Label'].astype(str).str.strip().str.lower().apply(lambda x: 0 if x == 'benign' else 1)
test_df['Label'] = test_df['Label'].astype(str).str.strip().str.lower().apply(lambda x: 0 if x == 'benign' else 1)


In [12]:
X_train = train_df[common_features].astype(np.float32)
y_train = train_df['Label'].astype(np.int32)

X_test = test_df[common_features].astype(np.float32)
y_test = test_df['Label'].astype(np.int32)

print(f"[INFO] X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"[INFO] X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

[INFO] X_train shape: (125170, 40), y_train shape: (125170,)
[INFO] X_test shape: (306201, 40), y_test shape: (306201,)


In [13]:
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Explicit input layer

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(1, activation='sigmoid')
])

E0000 00:00:1748178498.654946   23155 cuda_executor.cc:1228] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1748178498.655780   23155 gpu_device.cc:2341] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [14]:
model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [15]:
print("[INFO] Starting training...")
model.fit(X_train_scaled, y_train, epochs=15, batch_size=128, validation_split=0.2, verbose=1)

[INFO] Starting training...
Epoch 1/15
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.9717 - loss: 0.0907 - val_accuracy: 0.9941 - val_loss: 0.0231
Epoch 2/15
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9859 - loss: 0.0477 - val_accuracy: 0.9943 - val_loss: 0.0166
Epoch 3/15
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9893 - loss: 0.0359 - val_accuracy: 0.9945 - val_loss: 0.0165
Epoch 4/15
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9929 - loss: 0.0256 - val_accuracy: 0.9984 - val_loss: 0.0190
Epoch 5/15
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9942 - loss: 0.0216 - val_accuracy: 0.9974 - val_loss: 0.0176
Epoch 6/15
[1m783/783[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9945 - loss: 0.0209 - val_accuracy: 0.9984 - val_loss: 0.0095


<keras.src.callbacks.history.History at 0x7d4283458070>

In [18]:
print("[INFO] Evaluating model...")
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")

print("\n[RESULT] Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n[RESULT] Classification Report:\n", classification_report(y_test, y_pred))

[INFO] Evaluating model...
[1m9569/9569[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step

[RESULT] Confusion Matrix:
 [[ 51282    122]
 [  8339 246458]]

[RESULT] Classification Report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.92     51404
           1       1.00      0.97      0.98    254797

    accuracy                           0.97    306201
   macro avg       0.93      0.98      0.95    306201
weighted avg       0.98      0.97      0.97    306201



In [36]:
model.save("ddos_detection_model11.h5")
# model.save("ddos_detection_model01.keras")
import joblib
joblib.dump(scaler, "scaler1.pkl")

print("[INFO] Model and scaler saved.")



[INFO] Model and scaler saved.


In [41]:
import numpy as np

input_data = np.array([[-1.67101803, -0.66954652 , 0.03227004, -0.05543719, -0.33698766 ,-0.20719318,
  -0.26592836 ,-0.26839275 ,-0.26038845 ,-0.37300425 ,-0.28334341 ,-0.23785127,
  -0.23676508 ,-0.33507217 ,-0.22834296 , 0.         , 0.          ,0.,
   0.0966536  , 0.01435393 ,-0.32910057 ,-0.08554584 ,-0.31041599 ,-0.30933282,
  -0.20920671  ,0.         ,-0.03199463 ,-0.22834296  ,0.         ,-0.9434815,
  -0.44355997 ,-0.29931705  ,0.         ,-0.53004644 ,-0.26592836 ,-0.28334341,
   0.03227004  ,0.23694276 ,-0.05543719 ,-0.01861421]])

# input_data2=np.array([[ 1.33350433e+00 -6.69546461e-01 2.61359918e+01 -5.54371948e-02
#   -4.13743612e-01 -3.20569281e-01 -3.77299372e-01 -2.68392747e-01
#   -2.60388452e-01 -3.73004246e-01 -2.83343413e-01 -2.37851266e-01
#   -2.36244735e-01 -3.26049658e-01 -2.28342965e-01  0.00000000e+00
#    0.00000000e+00  0.00000000e+00  9.67186672e-02  1.43539318e-02
#   -3.20007729e-01 -8.55458380e-02 -4.19337911e-01 -3.09332821e-01
#   -2.09206714e-01  0.00000000e+00 -3.19946254e-02 -2.28342965e-01
#    0.00000000e+00 -9.43481505e-01 -4.43559972e-01 -2.99317050e-01
#    0.00000000e+00 -5.30046444e-01 -3.77299372e-01 -2.83343413e-01
#    2.61359918e+01  1.36469935e+02 -5.54371948e-02 -1.86142079e-02]])


# print(np.array_equal(input_data,input_data))

prediction = model.predict(input_data)
print(prediction)
input_data

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[[0.9294811]]


array([[-1.67101803, -0.66954652,  0.03227004, -0.05543719, -0.33698766,
        -0.20719318, -0.26592836, -0.26839275, -0.26038845, -0.37300425,
        -0.28334341, -0.23785127, -0.23676508, -0.33507217, -0.22834296,
         0.        ,  0.        ,  0.        ,  0.0966536 ,  0.01435393,
        -0.32910057, -0.08554584, -0.31041599, -0.30933282, -0.20920671,
         0.        , -0.03199463, -0.22834296,  0.        , -0.9434815 ,
        -0.44355997, -0.29931705,  0.        , -0.53004644, -0.26592836,
        -0.28334341,  0.03227004,  0.23694276, -0.05543719, -0.01861421]])

In [42]:
model.summary()
