In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam

In [None]:
# Load the dataset (assuming the CSV file is in the same directory)
file_path = '/content/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv'
df = pd.read_csv(file_path)

In [None]:
# Print column names to debug
print("Original columns:", df.columns.tolist())

Original columns: ['Flow ID', ' Source IP', ' Source Port', ' Destination IP', ' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', 

In [None]:
# Identify categorical columns and convert them to numeric values
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [None]:
# List of columns to drop
columns_to_drop = ["Flow ID", " Source IP", " Source Port", " Destination IP", " Destination Port", " Protocol", " Timestamp"]

In [None]:
# Print column names after dropping to debug
print("Columns before dropping:", df.columns.tolist())

Columns before dropping: ['Flow ID', ' Source IP', ' Source Port', ' Destination IP', ' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Vari

In [None]:
# Drop unnecessary columns
df.drop(columns_to_drop, axis=1, inplace=True, errors='ignore')

In [None]:
# Print column names after dropping to debug
print("Columns after dropping:", df.columns.tolist())


Columns after dropping: [' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', 'FIN Flag Count', ' SYN Flag Count', ' RST Flag Count', ' PSH Flag Count', ' ACK Flag Count', ' URG Fl

In [None]:
# Check for and handle NaN or infinity values
df.replace([np.inf, -np.inf], np.nan, inplace=True)  # Replace inf with NaN
df.dropna(inplace=True)  # Drop rows with NaN values


In [None]:
# Normalize the data
scaler = MinMaxScaler()
data = scaler.fit_transform(df)

In [None]:
# GAN Parameters
latent_dim = 100
adam = Adam(learning_rate=0.0002, beta_1=0.5)

In [None]:
# Generator Model
def build_generator():
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(df.shape[1], activation='tanh'))
    return model

In [None]:
# Discriminator Model
def build_discriminator():
    model = Sequential()
    model.add(Dense(512, input_dim=df.shape[1]))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [None]:
# Building and compiling the models
generator = build_generator()
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Combine the models to create the GAN
z = Input(shape=(latent_dim,))
generated_data = generator(z)
discriminator.trainable = False
validity = discriminator(generated_data)
combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=adam)

In [None]:
# Training the GAN
def train_gan(epochs, batch_size=64, save_interval=1000):
    half_batch = int(batch_size / 2)

    for epoch in range(epochs):
        # Train Discriminator
        idx = np.random.randint(0, data.shape[0], half_batch)
        real_data = data[idx]
        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        generated_data = generator.predict(noise)

        d_loss_real = discriminator.train_on_batch(real_data, np.ones((half_batch, 1)))
        d_loss_fake = discriminator.train_on_batch(generated_data, np.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.array([1] * batch_size)
        g_loss = combined.train_on_batch(noise, valid_y)

        # Print the progress
        print(f"{epoch + 1}/{epochs} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]")

        # Save the generated data at save intervals
        if (epoch + 1) % save_interval == 0:
            save_generated_data(epoch + 1)

In [None]:
def save_generated_data(epoch):
    noise = np.random.normal(0, 1, (1000, latent_dim))
    generated_data = generator.predict(noise)
    generated_data = scaler.inverse_transform(generated_data)

    # Create a DataFrame with the original structure
    df_generated = pd.DataFrame(generated_data, columns=df.columns)

    # Convert numeric columns back to categorical if needed
    for col in categorical_columns:
        if col in df_generated.columns:
            df_generated[col] = label_encoders[col].inverse_transform(df_generated[col].astype(int))

    # Add the dropped columns back with empty values
    for col in columns_to_drop:
        if col not in df_generated.columns:
            df_generated[col] = ""

    # Reorder columns to match the original CSV
    df_generated = df_generated[columns_to_drop + df.columns.tolist()]

    # Save to CSV
    df_generated.to_csv(f'generated_packets_epoch_{epoch}.csv', index=False)
    print(f"Generated data saved as generated_packets_epoch_{epoch}.csv")

In [None]:
# Train the GAN
train_gan(epochs=10, batch_size=64, save_interval=10)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step




1/10 [D loss: 0.7010886669158936, acc.: 39.84375] [G loss: [array(0.6943585, dtype=float32), array(0.6943585, dtype=float32), array(0.453125, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
2/10 [D loss: 0.7004319429397583, acc.: 39.73958492279053] [G loss: [array(0.6953133, dtype=float32), array(0.6953133, dtype=float32), array(0.43229166, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
3/10 [D loss: 0.6990883350372314, acc.: 40.0390625] [G loss: [array(0.69605273, dtype=float32), array(0.69605273, dtype=float32), array(0.42578125, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
4/10 [D loss: 0.6960464715957642, acc.: 40.833333134651184] [G loss: [array(0.691939, dtype=float32), array(0.691939, dtype=float32), array(0.43125, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
5/10 [D loss: 0.6930409669876099, acc.: 41.489112377

In [None]:
import os
print(os.getcwd())


/content


In [None]:
import pandas as pd

# Specify the file path where the generated CSV file is saved
file_path = 'generated_packets_epoch_10.csv'

# Load the CSV file into a DataFrame
df_generated = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the content
print(df_generated.head())


   Flow ID   Source IP   Source Port   Destination IP   Destination Port  \
0      NaN         NaN           NaN              NaN                NaN   
1      NaN         NaN           NaN              NaN                NaN   
2      NaN         NaN           NaN              NaN                NaN   
3      NaN         NaN           NaN              NaN                NaN   
4      NaN         NaN           NaN              NaN                NaN   

    Protocol   Timestamp   Flow Duration   Total Fwd Packets  \
0        NaN         NaN     107550080.0          -34.835968   
1        NaN         NaN      83891060.0         1855.373200   
2        NaN         NaN    -103060610.0         1437.748800   
3        NaN         NaN     110572984.0         -731.353100   
4        NaN         NaN      96437300.0         1830.190200   

    Total Backward Packets  ...   min_seg_size_forward  Active Mean  \
0               2901.94730  ...               8.773498   36407708.0   
1               