In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam

In [21]:
file_path = 'Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv'
df = pd.read_csv(file_path)

print("Columns in the dataset:", df.columns)


categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


columns_to_drop = ["Flow ID", " Source IP", " Source Port", " Destination IP", " Destination Port", " Protocol", " Timestamp", "Fwd Header Length", "Bwd Header Length", "Fwd Packet Length Mean", "Bwd Packet Length Mean", "Bwd Packet Length Std", "Fwd Packets/s", "Bwd Packets/s", "Fwd IAT Mean", "Fwd IAT Std", "Bwd IAT Mean", "Bwd IAT Std", "Fwd PSH Flags", "Bwd PSH Flags", "Fwd URG Flags", "Bwd URG Flags", "Fwd Packets/s", "Bwd Packets/s", "Bwd Packet Length Max", "Bwd Packet Length Std", "Fwd Avg Bytes/Bulk", "Fwd Avg Packets/Bulk", "Fwd Avg Bulk Rate", "Bwd Avg Bytes/Bulk", "Bwd Avg Packets/Bulk", "Bwd Avg Bulk Rate", "Subflow Fwd Packets", "Subflow Bwd Packets", "Init_Win_bytes_forward", "Init_Win_bytes_backward", "act_data_pkt_fwd", "min_seg_size_forward"]
df.drop(columns_to_drop, axis=1, inplace=True, errors='ignore')


df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

print("Unique values in ' Label' column:", df[' Label'].unique())


benign_data = df[df[' Label'] == 0]  
ddos_data = df[df[' Label'] == 1]    

print(f"Number of BENIGN samples: {len(benign_data)}")
print(f"Number of DDoS samples: {len(ddos_data)}")

if len(benign_data) == 0 or len(ddos_data) == 0:
    raise ValueError("No samples found for BENIGN or DDoS labels. Check the dataset and labels.")


scaler = MinMaxScaler()
benign_data_scaled = scaler.fit_transform(benign_data)
ddos_data_scaled = scaler.fit_transform(ddos_data)

latent_dim = 100

Columns in the dataset: Index(['Flow ID', ' Source IP', ' Source Port', ' Destination IP',
       ' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration',
       ' Total Fwd Packets', ' Total Backward Packets',
       'Total Length of Fwd Packets', ' Total Length of Bwd Packets',
       ' Fwd Packet Length Max', ' Fwd Packet Length Min',
       ' Fwd Packet Length Mean', ' Fwd Packet Length Std',
       'Bwd Packet Length Max', ' Bwd Packet Length Min',
       ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s',
       ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max',
       ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std',
       ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean',
       ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags',
       ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags',
       ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s',
       ' Bwd Packets/s', ' Min 

In [12]:
def build_generator():
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(df.shape[1], activation='tanh'))
    return model

In [13]:
def build_discriminator():
    model = Sequential()
    model.add(Dense(512, input_dim=df.shape[1]))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [15]:
def train_gan(data, epochs, batch_size=64, save_interval=100, category="BENIGN"):
    half_batch = int(batch_size / 2)
    adam = Adam(learning_rate=0.0002, beta_1=0.5)

    generator = build_generator()
    discriminator = build_discriminator()
    discriminator.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

    z = Input(shape=(latent_dim,))
    generated_data = generator(z)
    discriminator.trainable = False
    validity = discriminator(generated_data)
    combined = Model(z, validity)
    combined.compile(loss='binary_crossentropy', optimizer=adam)

    min_loss_improvement = 1e-4  # Threshold for loss improvement
    previous_loss = np.inf

    for epoch in range(epochs):
        # Sample real and fake data
        idx = np.random.randint(0, data.shape[0], half_batch)
        real_data = data[idx]
        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        generated_data = generator.predict(noise)

        # Train discriminator on real and fake data
        d_loss_real = discriminator.train_on_batch(real_data, np.ones((half_batch, 1)))
        d_loss_fake = discriminator.train_on_batch(generated_data, np.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train generator via combined model
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.array([1] * batch_size)
        g_loss = combined.train_on_batch(noise, valid_y)

        # Printing the progress
        print(f"{category} Epoch {epoch + 1}/{epochs} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]")

        # Extracting the first element (loss) from g_loss and comparing it
        if isinstance(g_loss, list):
            g_loss_value = g_loss[0]  # Extract loss value from list
        else:
            g_loss_value = g_loss

        # Check for insignificant loss improvement
        if abs(previous_loss - g_loss_value) < min_loss_improvement:
            print(f"Training stopped early due to insignificant loss improvement at epoch {epoch + 1}")
            break
        previous_loss = g_loss_value

        # Saving the model at specified intervals
        if (epoch + 1) % save_interval == 0:
            save_generated_data(generator, epoch + 1, category)
            save_model(generator, f"{category}_generator_epoch_{epoch + 1}.h5")

In [16]:
def save_generated_data(generator, epoch, category):
    try:
        noise = np.random.normal(0, 1, (1000, latent_dim))
        generated_data = generator.predict(noise)
        
        generated_data = scaler.inverse_transform(generated_data)
        
        print(f"Generated data shape: {generated_data.shape}")
        print(f"First row of generated data: {generated_data[0]}")
        
        df_generated = pd.DataFrame(generated_data, columns=df.columns)

        for col in categorical_columns:
            if col in df_generated.columns:
                df_generated[col] = label_encoders[col].inverse_transform(df_generated[col].astype(int))
        
        for col in columns_to_drop:
            if col not in df_generated.columns:
                df_generated[col] = ""

        df_generated = df_generated[columns_to_drop + df.columns.tolist()]
        
        file_name = f'generated_{category}_packets_epoch_{epoch}.csv'
        df_generated.to_csv(file_name, index=False)
        print(f"Generated {category} data saved as {file_name}")
    
    except Exception as e:
        print(f"Error while saving generated data: {e}")

In [17]:
def save_model(generator, filename):
    generator.save(filename)
    print(f"Model saved as {filename}")

train_gan(benign_data_scaled, epochs=200, batch_size=64, save_interval=200, category="BENIGN")
train_gan(ddos_data_scaled, epochs=200, batch_size=64, save_interval=200, category="DDoS")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step




BENIGN Epoch 1/200 [D loss: 0.694170355796814, acc.: 75.0] [G loss: [array(0.71007496, dtype=float32), array(0.71007496, dtype=float32), array(0.5, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
BENIGN Epoch 2/200 [D loss: 0.7031886577606201, acc.: 58.33333730697632] [G loss: [array(0.70801497, dtype=float32), array(0.70801497, dtype=float32), array(0.5, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
BENIGN Epoch 3/200 [D loss: 0.7050981521606445, acc.: 55.000001192092896] [G loss: [array(0.7087981, dtype=float32), array(0.7087981, dtype=float32), array(0.5, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
BENIGN Epoch 4/200 [D loss: 0.7064563035964966, acc.: 53.57142686843872] [G loss: [array(0.7091882, dtype=float32), array(0.7091882, dtype=float32), array(0.5, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
BENIGN Epoch 