In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam

In [None]:
# Load the dataset (assuming the CSV file is in the same directory)
file_path = '/content/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv'
df = pd.read_csv(file_path)

In [None]:
# Print column names to debug
print("Original columns:", df.columns.tolist())

Original columns: ['Flow ID', ' Source IP', ' Source Port', ' Destination IP', ' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', 

In [None]:
# Identify categorical columns and convert them to numeric values
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [None]:
# List of columns to drop
columns_to_drop = ["Flow ID", " Source IP", " Source Port", " Destination IP", " Destination Port", " Protocol", " Timestamp"]

In [None]:
# Print column names after dropping to debug
print("Columns before dropping:", df.columns.tolist())

Columns before dropping: ['Flow ID', ' Source IP', ' Source Port', ' Destination IP', ' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Vari

In [None]:
# Drop unnecessary columns
df.drop(columns_to_drop, axis=1, inplace=True, errors='ignore')

In [None]:
# Print column names after dropping to debug
print("Columns after dropping:", df.columns.tolist())

Columns after dropping: [' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', 'FIN Flag Count', ' SYN Flag Count', ' RST Flag Count', ' PSH Flag Count', ' ACK Flag Count', ' URG Fl

In [None]:
# Check for and handle NaN or infinity values
df.replace([np.inf, -np.inf], np.nan, inplace=True)  # Replace inf with NaN
df.dropna(inplace=True)  # Drop rows with NaN values

In [None]:
# Normalize the data
scaler = MinMaxScaler()
data = scaler.fit_transform(df)

In [None]:
# GAN Parameters
latent_dim = 100
adam = Adam(learning_rate=0.0002, beta_1=0.5)

In [None]:
def build_generator():
    model = Sequential()
    model.add(Dense(256, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(df.shape[1], activation='tanh'))
    return model

In [None]:
def build_discriminator():
    model = Sequential()
    model.add(Dense(1024, input_dim=df.shape[1]))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [None]:
# Building and compiling the models
generator = build_generator()
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Combine the models to create the GAN
z = Input(shape=(latent_dim,))
generated_data = generator(z)
discriminator.trainable = False
validity = discriminator(generated_data)
combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=adam)

In [None]:
# Training the GAN
def train_gan(epochs, batch_size=64, save_interval=1000):
    half_batch = int(batch_size / 2)

    for epoch in range(epochs):
        # Train Discriminator
        idx = np.random.randint(0, data.shape[0], half_batch)
        real_data = data[idx]
        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        generated_data = generator.predict(noise)

        d_loss_real = discriminator.train_on_batch(real_data, np.ones((half_batch, 1)))
        d_loss_fake = discriminator.train_on_batch(generated_data, np.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.array([1] * batch_size)
        g_loss = combined.train_on_batch(noise, valid_y)

        # Print the progress
        print(f"{epoch + 1}/{epochs} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]")

        # Save the generated data at save intervals
        if (epoch + 1) % save_interval == 0:
            save_generated_data(epoch + 1)

In [None]:
def save_generated_data(epoch):
    noise = np.random.normal(0, 1, (1000, latent_dim))
    generated_data = generator.predict(noise)
    generated_data = scaler.inverse_transform(generated_data)

    # Create a DataFrame with the original structure
    df_generated = pd.DataFrame(generated_data, columns=df.columns)

    # Convert numeric columns back to categorical if needed
    for col in categorical_columns:
        if col in df_generated.columns:
            df_generated[col] = label_encoders[col].inverse_transform(df_generated[col].astype(int))

    # Add the dropped columns back with empty values
    for col in columns_to_drop:
        if col not in df_generated.columns:
            df_generated[col] = ""

    # Reorder columns to match the original CSV
    df_generated = df_generated[columns_to_drop + df.columns.tolist()]

    # Save to CSV
    df_generated.to_csv(f'generated_packets_epoch_{epoch}.csv', index=False)
    print(f"Generated data saved as generated_packets_epoch_{epoch}.csv")

In [None]:
# Train the GAN
train_gan(epochs=10, batch_size=64, save_interval=10)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step




1/10 [D loss: 0.6912065744400024, acc.: 65.625] [G loss: [array(0.69064087, dtype=float32), array(0.69064087, dtype=float32), array(0.625, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step




2/10 [D loss: 0.6938214302062988, acc.: 53.25521230697632] [G loss: [array(0.6958473, dtype=float32), array(0.6958473, dtype=float32), array(0.4921875, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
3/10 [D loss: 0.697439968585968, acc.: 46.25000059604645] [G loss: [array(0.6991051, dtype=float32), array(0.6991051, dtype=float32), array(0.4375, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
4/10 [D loss: 0.7009832859039307, acc.: 43.10826063156128] [G loss: [array(0.7032947, dtype=float32), array(0.7032947, dtype=float32), array(0.40234375, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
5/10 [D loss: 0.7046542167663574, acc.: 39.39236104488373] [G loss: [array(0.70658153, dtype=float32), array(0.70658153, dtype=float32), array(0.378125, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
6/10 [D loss: 0.7072598338127136, acc.

In [None]:
# Specify the file path where the generated CSV file is saved
file_path = 'generated_packets_epoch_10.csv'

# Load the CSV file into a DataFrame
df_generated = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the content
print(df_generated.head())


   Flow ID   Source IP   Source Port   Destination IP   Destination Port  \
0      NaN         NaN           NaN              NaN                NaN   
1      NaN         NaN           NaN              NaN                NaN   
2      NaN         NaN           NaN              NaN                NaN   
3      NaN         NaN           NaN              NaN                NaN   
4      NaN         NaN           NaN              NaN                NaN   

    Protocol   Timestamp   Flow Duration   Total Fwd Packets  \
0        NaN         NaN     -49364084.0         -1460.94950   
1        NaN         NaN      12907312.0         -1674.70800   
2        NaN         NaN      70824536.0         -1784.60460   
3        NaN         NaN      22987750.0          -847.30927   
4        NaN         NaN      68040296.0          -641.21490   

    Total Backward Packets  ...   min_seg_size_forward  Active Mean  \
0               -2435.3533  ...              33.919582  21030858.00   
1               

FINAL CHANGE

In [None]:
# Load the dataset (assuming the CSV file is in the same directory)
file_path = '/content/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv'
df = pd.read_csv(file_path)

# Print column names to debug
print("Original columns:", df.columns.tolist())

# Identify categorical columns and convert them to numeric values
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# List of columns to drop
columns_to_drop = ["Flow ID", " Source IP", " Source Port", " Destination IP", " Destination Port", " Protocol", " Timestamp"]

# Print column names after dropping to debug
print("Columns before dropping:", df.columns.tolist())

# Drop unnecessary columns
df.drop(columns_to_drop, axis=1, inplace=True, errors='ignore')

# Print column names after dropping to debug
print("Columns after dropping:", df.columns.tolist())

# Check for and handle NaN or infinity values
df.replace([np.inf, -np.inf], np.nan, inplace=True)  # Replace inf with NaN
df.dropna(inplace=True)  # Drop rows with NaN values

# Normalize the data
scaler = MinMaxScaler()
data = scaler.fit_transform(df)

# GAN Parameters
latent_dim = 100
adam = Adam(learning_rate=0.0002, beta_1=0.5)

# Generator Model
def build_generator():
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(df.shape[1], activation='tanh'))
    return model

# Discriminator Model
def build_discriminator():
    model = Sequential()
    model.add(Dense(512, input_dim=df.shape[1]))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Building and compiling the models
generator = build_generator()
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

# Combine the models to create the GAN
z = Input(shape=(latent_dim,))
generated_data = generator(z)
discriminator.trainable = False
validity = discriminator(generated_data)
combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=adam)

# Training the GAN
def train_gan(epochs, batch_size=64, save_interval=1000):
    half_batch = int(batch_size / 2)

    for epoch in range(epochs):
        # Train Discriminator
        idx = np.random.randint(0, data.shape[0], half_batch)
        real_data = data[idx]
        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        generated_data = generator.predict(noise)

        d_loss_real = discriminator.train_on_batch(real_data, np.ones((half_batch, 1)))
        d_loss_fake = discriminator.train_on_batch(generated_data, np.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.array([1] * batch_size)
        g_loss = combined.train_on_batch(noise, valid_y)

        # Print the progress
        print(f"{epoch + 1}/{epochs} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]")

        # Save the generated data at save intervals
        if (epoch + 1) % save_interval == 0:
            save_generated_data(epoch + 1)

def save_generated_data(epoch):
    noise = np.random.normal(0, 1, (1000, latent_dim))
    generated_data = generator.predict(noise)
    generated_data = scaler.inverse_transform(generated_data)

    # Create a DataFrame with the original structure
    df_generated = pd.DataFrame(generated_data, columns=df.columns)

    # Convert numeric columns back to categorical if needed
    for col in categorical_columns:
        if col in df_generated.columns:
            df_generated[col] = label_encoders[col].inverse_transform(df_generated[col].astype(int))

    # Add the dropped columns back with empty values
    for col in columns_to_drop:
        if col not in df_generated.columns:
            df_generated[col] = ""

    # Reorder columns to match the original CSV
    df_generated = df_generated[columns_to_drop + df.columns.tolist()]

    # Save to CSV
    df_generated.to_csv(f'generated_packets_epoch_{epoch}.csv', index=False)
    print(f"Generated data saved as generated_packets_epoch_{epoch}.csv")

# Train the GAN
train_gan(epochs=10, batch_size=64, save_interval=10)


Original columns: ['Flow ID', ' Source IP', ' Source Port', ' Destination IP', ' Destination Port', ' Protocol', ' Timestamp', ' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', 'Fwd PSH Flags', ' Bwd PSH Flags', ' Fwd URG Flags', ' Bwd URG Flags', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step




1/10 [D loss: 0.6733845472335815, acc.: 68.75] [G loss: [array(0.70181674, dtype=float32), array(0.70181674, dtype=float32), array(0.46875, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
2/10 [D loss: 0.6877057552337646, acc.: 56.90103769302368] [G loss: [array(0.6975975, dtype=float32), array(0.6975975, dtype=float32), array(0.4921875, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
3/10 [D loss: 0.6903358697891235, acc.: 54.374998807907104] [G loss: [array(0.6956478, dtype=float32), array(0.6956478, dtype=float32), array(0.5, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
4/10 [D loss: 0.6920543909072876, acc.: 53.54352593421936] [G loss: [array(0.6975575, dtype=float32), array(0.6975575, dtype=float32), array(0.50390625, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
5/10 [D loss: 0.6947168111801147, acc.: 52.916669845

In [None]:
# Specify the file path where the generated CSV file is saved
file_path = 'generated_packets_epoch_10.csv'

# Load the CSV file into a DataFrame
df_generated = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the content
print(df_generated.head())

   Flow ID   Source IP   Source Port   Destination IP   Destination Port  \
0      NaN         NaN           NaN              NaN                NaN   
1      NaN         NaN           NaN              NaN                NaN   
2      NaN         NaN           NaN              NaN                NaN   
3      NaN         NaN           NaN              NaN                NaN   
4      NaN         NaN           NaN              NaN                NaN   

    Protocol   Timestamp   Flow Duration   Total Fwd Packets  \
0        NaN         NaN    -101779270.0          1383.78980   
1        NaN         NaN     117542360.0           616.71540   
2        NaN         NaN     117064570.0           511.20728   
3        NaN         NaN     -81121940.0          -449.81730   
4        NaN         NaN     113891140.0           358.02966   

    Total Backward Packets  ...   min_seg_size_forward  Active Mean  \
0              -2025.64720  ...              -1.138221  -31341248.0   
1              -

In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam

# Load the dataset
file_path = '/content/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX2.csv'
df = pd.read_csv(file_path)

# Identify categorical columns and convert them to numeric values
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Drop unnecessary columns
columns_to_drop = ["Flow ID", " Source IP", " Source Port", " Destination IP", " Destination Port", " Protocol", " Timestamp"]
df.drop(columns_to_drop, axis=1, inplace=True, errors='ignore')

# Replace infinity with NaN and drop missing values
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

# Normalize the data
scaler = MinMaxScaler()
data = scaler.fit_transform(df)

# GAN parameters
latent_dim = 100
adam = Adam(learning_rate=0.0002, beta_1=0.5)

# Generator model
def build_generator():
    input_layer = Input(shape=(latent_dim,))
    x = Dense(128)(input_layer)
    x = LeakyReLU(negative_slope=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Dense(256)(x)
    x = LeakyReLU(negative_slope=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Dense(512)(x)
    x = LeakyReLU(negative_slope=0.2)(x)
    x = BatchNormalization(momentum=0.8)(x)
    output_layer = Dense(df.shape[1], activation='tanh')(x)
    return Model(input_layer, output_layer)

# Discriminator model
def build_discriminator():
    input_layer = Input(shape=(df.shape[1],))
    x = Dense(512)(input_layer)
    x = LeakyReLU(negative_slope=0.2)(x)
    x = Dense(256)(x)
    x = LeakyReLU(negative_slope=0.2)(x)
    output_layer = Dense(1, activation='sigmoid')(x)
    return Model(input_layer, output_layer)


# Instantiate and compile models
generator = build_generator()
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

# Build combined model
z = Input(shape=(latent_dim,))
generated_data = generator(z)
discriminator.trainable = False
validity = discriminator(generated_data)
combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=adam)

def train_gan(epochs, batch_size=64, save_interval=10):
    half_batch = int(batch_size / 2)

    for epoch in range(epochs):
        # Train Discriminator
        idx = np.random.randint(0, data.shape[0], half_batch)
        real_data = tf.convert_to_tensor(data[idx], dtype=tf.float32)
        noise = tf.random.normal((half_batch, latent_dim))
        generated_data = generator(noise, training=False)

        d_loss_real = discriminator.train_on_batch(real_data, tf.ones((half_batch, 1)))
        d_loss_fake = discriminator.train_on_batch(generated_data, tf.zeros((half_batch, 1)))
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train Generator
        noise = tf.random.normal((batch_size, latent_dim))
        valid_y = tf.ones((batch_size, 1))
        g_loss = combined.train_on_batch(noise, valid_y)

        # Corrected print statement
        if isinstance(g_loss, list):
            g_loss_value = g_loss[0]
        else:
            g_loss_value = g_loss

        print(f"{epoch + 1}/{epochs} [D loss: {d_loss[0]:.4f}, acc.: {100 * d_loss[1]:.2f}] [G loss: {g_loss_value:.4f}]")

        if (epoch + 1) % save_interval == 0:
            save_generated_data(epoch + 1)



def save_generated_data(epoch):
    noise = tf.random.normal((1000, latent_dim))
    generated_data = generator.predict(noise)
    generated_data = scaler.inverse_transform(generated_data)
    df_generated = pd.DataFrame(generated_data, columns=df.columns)

    for col in categorical_columns:
        if col in df_generated.columns:
            df_generated[col] = label_encoders[col].inverse_transform(df_generated[col].astype(int))

    for col in columns_to_drop:
        if col not in df_generated.columns:
            df_generated[col] = ""

    df_generated = df_generated[columns_to_drop + df.columns.tolist()]
    df_generated.to_csv(f'generated_packets_epoch_{epoch}.csv', index=False)
    print(f"Generated data saved as generated_packets_epoch_{epoch}.csv")

# Train the GAN
train_gan(epochs=10, batch_size=64, save_interval=10)




1/10 [D loss: 0.6574, acc.: 81.25] [G loss: 0.6697]
2/10 [D loss: 0.6679, acc.: 72.92] [G loss: 0.6730]
3/10 [D loss: 0.6750, acc.: 69.38] [G loss: 0.6804]
4/10 [D loss: 0.6789, acc.: 67.02] [G loss: 0.6820]
5/10 [D loss: 0.6809, acc.: 66.22] [G loss: 0.6836]
6/10 [D loss: 0.6836, acc.: 65.31] [G loss: 0.6871]
7/10 [D loss: 0.6870, acc.: 63.72] [G loss: 0.6907]
8/10 [D loss: 0.6907, acc.: 62.65] [G loss: 0.6941]
9/10 [D loss: 0.6947, acc.: 61.21] [G loss: 0.6983]
10/10 [D loss: 0.6980, acc.: 60.21] [G loss: 0.7009]
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Generated data saved as generated_packets_epoch_10.csv


In [8]:
# Specify the file path where the generated CSV file is saved
file_path = 'generated_packets_epoch_10.csv'

# Load the CSV file into a DataFrame
df_generated = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to verify the content
print(df_generated.head())

   Flow ID   Source IP   Source Port   Destination IP   Destination Port  \
0      NaN         NaN           NaN              NaN                NaN   
1      NaN         NaN           NaN              NaN                NaN   
2      NaN         NaN           NaN              NaN                NaN   
3      NaN         NaN           NaN              NaN                NaN   
4      NaN         NaN           NaN              NaN                NaN   

    Protocol   Timestamp   Flow Duration   Total Fwd Packets  \
0        NaN         NaN      -2284358.0          -1883.2680   
1        NaN         NaN     107822350.0            618.9486   
2        NaN         NaN      79680780.0           1220.8859   
3        NaN         NaN    -107257670.0            854.8530   
4        NaN         NaN      54520700.0          -1014.1349   

    Total Backward Packets  ...   min_seg_size_forward  Active Mean  \
0               -1055.9469  ...             -43.140430  -32289076.0   
1               