In [9]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt 

In [10]:
# Load the KDD99 dataset
dataset = pd.read_csv("../Preprocess/finalInt.csv")

In [11]:
# Split the data into training and test sets
t_dataset = pd.read_csv("../Preprocess/finalInt_nohead.csv")
train_data = t_dataset[:int(len(dataset) * 0.9)]
test_data = t_dataset[int(len(dataset) * 0.9):]

In [12]:
# Define the generator model
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(256, input_dim=100))
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.BatchNormalization(momentum=0.8))
    model.add(tf.keras.layers.Dense(512))
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.BatchNormalization(momentum=0.8))
    model.add(tf.keras.layers.Dense(1024))
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.BatchNormalization(momentum=0.8))
    model.add(tf.keras.layers.Dense(np.prod(train_data.shape[1:]), activation='tanh'))
    model.add(tf.keras.layers.Reshape(target_shape=train_data.shape[1:]))
    return model

generator = make_generator_model()

In [13]:
# Define the discriminator model
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=train_data.shape[1:]))
    model.add(tf.keras.layers.Dense(1024))
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.Dense(512))
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.Dense(256))
    model.add(tf.keras.layers.LeakyReLU())
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    return model

discriminator = make_discriminator_model()

# Compile the discriminator
discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5), metrics=['accuracy'])

# Freeze the weights of the discriminator
discriminator.trainable = False

In [14]:
# Define the combined model for training the generator
def make_gan(discriminator, generator):
    model = tf.keras.Sequential()
    model.add(generator)
    model.add(discriminator)
    return model

gan = make_gan(discriminator, generator)

# Compile the combined model
gan.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))

In [15]:
# Train the GAN
def train(gan, discriminator, generator, train_data, batch_size=128, epochs=100):
    half_batch = int(batch_size / 2)
    for epoch in range(epochs):
        # Train the discriminator
        idx = np.random.randint(0, train_data.shape[0], half_batch)
        real_data = train_data[idx]
        noise = np.random.normal(0, 1, (half_batch, 100))
        fake_data = generator.predict(noise)
        real_labels = np.ones((half_batch, 1))
        fake_labels = np.zeros((half_batch, 1))
        d_loss_real = discriminator.train_on_batch(real_data, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator
        noise = np.random.normal(0, 1, (batch_size, 100))
        fake_labels = np.ones((batch_size, 1))
        g_loss = gan.train_on_batch(noise, fake_labels)

        # Print progress
        print("Epoch: %d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch + 1, d_loss[0], 100 * d_loss[1], g_loss))

train(gan, discriminator, generator, train_data, epochs=100)

KeyError: "None of [Int64Index([436050, 218095, 456732,  42598, 154271, 300079, 290671,  13266,\n             80490, 375894, 292359,  86011, 374649, 513205, 405037, 227073,\n             97298,  84305, 189142, 122012, 500191, 294785, 165455, 451129,\n            283835, 102021, 295544, 420603, 455014,  10095,   5959, 240437,\n            161120, 236578, 478916, 198700, 301665, 241446, 466159, 396880,\n            397556, 371807, 337898, 370141,  20150, 288715,  49852, 271938,\n             58644, 236547, 249730,  22982, 349215, 508188, 337682, 226402,\n            426308, 162477, 235079, 276414, 215339, 499232, 146885,   9273],\n           dtype='int64')] are in the [columns]"

In [None]:
# Generate new data
noise = np.random.normal(0, 1, (100, 100))
generated_data = generator.predict(noise)

In [None]:
# Plot the generated data
plt.scatter(dataset[:, 1], generated_data[:, 1], c=np.argmax(generated_data[:, 2:], axis=1))
plt.show()

In [None]:
# # Save the discriminator model
# discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))
# discriminator.save('../ML/GAN/discriminator_plot.h5')

# # Save the generator model
# generator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# generator.save('../ML/GAN/generator_plot.h5')

# # Save the combined model
# gan.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))
# gan.save('../ML/GAN/gan_plot.h5')

In [None]:
# # Load the discriminator model
# loaded_discriminator = tf.keras.models.load_model('../ML/GAN/discriminator.h5')

# # Load the generator model
# loaded_generator = tf.keras.models.load_model('../ML/GAN/generator.h5')

# # Load the combined model
# loaded_gan = tf.keras.models.load_model('../ML/GAN/gan.h5')

In [None]:
# # Tensorflow Warning Fix
# @tf.function(input_signature=(tf.TensorSpec(shape=[None], dtype=tf.int32),))
# def next_collatz(x):
#     print("Tracing with", x)
#     return tf.where(x % 2 == 0, x // 2, 3 * x + 1)

In [None]:
# # Custom input data
# input_data = np.array([[0,1,24,9,206,1491,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,17,20,0.0,0.0,0.0,0.0,1.0,0.0,0.1,0,255,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11]])
# # input_data = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2]])

# # Make a prediction using the model
# prediction_data = discriminator.predict(input_data)

# print(prediction_data)

In [17]:
import pandas as pd

# Load KDD99 dataset from CSV file
df = pd.read_csv('../dataset/kddcup.data.gz')

# Print the first few rows of the DataFrame
print(df.head())


   0  tcp  http  SF  215  45076  0.1  0.2  0.3  0.4  ...  0.17  0.00.6  \
0  0  tcp  http  SF  162   4528    0    0    0    0  ...     1     1.0   
1  0  tcp  http  SF  236   1228    0    0    0    0  ...     2     1.0   
2  0  tcp  http  SF  233   2032    0    0    0    0  ...     3     1.0   
3  0  tcp  http  SF  239    486    0    0    0    0  ...     4     1.0   
4  0  tcp  http  SF  238   1282    0    0    0    0  ...     5     1.0   

   0.00.7  0.00.8  0.00.9  0.00.10  0.00.11  0.00.12  0.00.13  normal.  
0     0.0    1.00     0.0      0.0      0.0      0.0      0.0  normal.  
1     0.0    0.50     0.0      0.0      0.0      0.0      0.0  normal.  
2     0.0    0.33     0.0      0.0      0.0      0.0      0.0  normal.  
3     0.0    0.25     0.0      0.0      0.0      0.0      0.0  normal.  
4     0.0    0.20     0.0      0.0      0.0      0.0      0.0  normal.  

[5 rows x 42 columns]
