# <center> Vanilla Generative Adversarial Network for NLS-KDD <center/>

In [1]:
import os
print(os.getcwd())  # 打印当前工作目录
print(os.path.exists('NSL-KDD'))  # 应该输出 True
print(os.path.exists('../utils'))  # 应该输出 True
print(os.path.exists('../models'))  # 应该输出 True


d:\WorkSpace\GAN\GANs_for_Network_Intrusion_Data\NSL-KDD\notebooks
False
True
True


In [2]:
import sys
sys.path.append('../utils')
sys.path.append('../models')
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from preprocessing import *
from classifiers import *
from utils import *

from matplotlib import pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K


## Read Data & Standard Scale

In [3]:
train,test, label_mapping = get_data(encoding="Label")
data_cols = list(train.columns[ train.columns != 'label' ])
x_train , x_test = preprocess(train,test,data_cols,"Robust",True)

y_train = x_train.label.values
y_test = x_test.label.values

data_cols = list(x_train.columns[ x_train.columns != 'label' ])

d:\WorkSpace\GAN\GANs_for_Network_Intrusion_Data\NSL-KDD\notebooks
True


## Define Generator, Descriminator & Full Generative Adversarial Network

In [4]:
def create_discriminator(data_dim, min_num_neurones):
    model = tf.keras.models.Sequential(name='Discriminator')
    
    model.add(Dense(min_num_neurones*2, activation='relu',input_dim = data_dim ))
    model.add(Dense(min_num_neurones, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer="sgd")
    
    return model


In [5]:
def create_generator(data_dim, min_num_neurones,noise_dim):
    
    model = tf.keras.models.Sequential(name='Generator')
    
    model.add(Dense(min_num_neurones, activation='relu',input_dim = noise_dim ))
    model.add(Dense(min_num_neurones*2, activation='relu'))
    model.add(Dense(min_num_neurones*4, activation='tanh'))
    
    model.add(Dense(data_dim))
    
    model.compile(loss='binary_crossentropy', optimizer="sgd")
    
    return model

In [6]:
def create_gan(discriminator, generator, z_dim):
    discriminator.trainable=False
    gan_input = Input(shape=(z_dim,))
    x = generator(gan_input)
    gan_output= discriminator(x)
    
    #gan_output = discriminator(generator(gan_input))
    gan= Model(inputs = gan_input, outputs = gan_output)
    gan.compile(loss='binary_crossentropy', optimizer='sgd')
    return gan

## Define batch generation & GAN training 

In [7]:
def get_batch(X, batch_size=1):
    """
    Parameters:
    -----------
    X : ndarray
        The input data to sample a into batch
    size : int (default = 1)
        Batch size

    Return Value: ndarray - random choice of samples from the input X of batch_size
    """
    batch_ix = np.random.choice(len(X), batch_size, replace=False)
    return X[batch_ix]

In [8]:
def training(arguments,X):
    
    [rand_noise_dim, nb_steps, batch_size,D_epochs, G_epochs, min_num_neurones] = arguments
    
    data_dim = X.shape[1]
    combined_loss, disc_loss_generated, disc_loss_real = [], [], []
    
    # Creating GAN
    generator = create_generator(data_dim,min_num_neurones,rand_noise_dim)
    discriminator = create_discriminator(data_dim,min_num_neurones)
    adversarial_model = create_gan(discriminator, generator,rand_noise_dim)
    
    #Start training
    for epoch in range(1,nb_steps + 1 ):
        K.set_learning_phase(1)
        
        #Train Discriminator
        discriminator.trainable=True
        for i in range(D_epochs):
            np.random.seed(i+epoch)
        
            noise = np.random.normal(0,1, size=(batch_size, rand_dim))
            generated_samples = generator.predict(noise)
            real_samples = get_batch(X,batch_size)
            
            d_l_r = discriminator.train_on_batch(real_samples, np.random.uniform(low=0.999, high=1.0, size=batch_size))
            d_l_g = discriminator.train_on_batch(generated_samples, np.random.uniform(low=0.0, high=0.0001, size=batch_size))
        
        #Freeze Discriminator
        discriminator.trainable = False
        disc_loss_generated.append(d_l_g)
        disc_loss_real.append(d_l_r)
        
        #Train Generator
        for i in range(G_epochs):
            np.random.seed(i+epoch)
            
            noise = np.random.normal(0,1, size = (batch_size, rand_dim))
            loss = adversarial_model.train_on_batch(noise, np.random.uniform(low=0.999, high=1.0, size=batch_size))
            
        combined_loss.append(loss)
        
        #Do checkpointing
        if epoch % 10 == 0:
            K.set_learning_phase(0)
            test_size = len(X)

            z = np.random.normal(3,2,size=(test_size, rand_dim))
            g_z = generator.predict(z)
            
            '''
            p = norm.pdf(X.T)
            q = norm.pdf(g_z.T)

            norm_p = p/p.sum(axis=1,keepdims=1)
            norm_q = q/q.sum(axis=1,keepdims=1)

            tf_kl = kullback_leibler_divergence(tf.convert_to_tensor(norm_p, np.float32), tf.convert_to_tensor(norm_q, np.float32))
            with tf.Session() as sess:
                print("Tensorflow kullback_leibler_divergence : {}".format(round(sum(sess.run(tf_kl)))))

            print("Ephoc : {} ,Loss on fake: {}, Loss on real : {}".format(epoch,d_l_g, d_l_r))
            '''
            fake_pred = np.array(adversarial_model.predict(z)).ravel()
            real_pred = np.array(discriminator.predict(X)).ravel()

            modelAccuracy(fake_pred,real_pred)

    return dict({"generator_model":generator,"discriminator_model":discriminator,\
            "combined_model":adversarial_model,"generator_loss":combined_loss,\
            "disc_loss_generated":disc_loss_generated,"disc_loss_real": disc_loss_real})
        

## Filter Train samples and set training parameters

In [9]:
K.clear_session()
#Generative Adversarial Networks
att_ind = np.where(y_train == label_mapping["probe"])[0]

x = x_train[data_cols].values[att_ind]
n_to_generate = 2000

rand_dim = 32
base_n_count = 100

combined_ep = 1000
batch_size = 128 if len(x) > 128 else len(x)

ep_d = 1
ep_g = 2
learning_rate = 0.0001#5e-5

## Training GAN

In [10]:
arguments = [rand_dim, combined_ep, batch_size, ep_d,ep_g, base_n_count]
res = training(arguments,x)



In [11]:
res["discriminator_model"].summary()

Model: "Discriminator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 200)               7200      
                                                                 
 dense_5 (Dense)             (None, 100)               20100     
                                                                 
 dense_6 (Dense)             (None, 1)                 101       
                                                                 
Total params: 27401 (107.04 KB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 27401 (107.04 KB)
_________________________________________________________________


In [12]:
generator = res["generator_model"]
noise = np.random.normal(0,1, size=(batch_size, rand_dim))
generated_samples = generator.predict(noise)



In [14]:
generated_samples.shape

(128, 35)

In [16]:
generated_samples = pd.DataFrame(generated_samples)
generated_samples.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,-0.033334,0.241033,0.173041,0.236394,-0.057264,-0.12551,-0.023805,0.006481,-0.208523,0.253497,...,-0.074651,-0.140434,0.03191,-0.19679,0.403829,-0.004132,-0.074146,0.047201,0.302533,0.010141
1,-0.13576,0.156115,0.341493,0.01873,-0.133486,-0.066119,-0.034175,-0.110924,-0.03916,0.005289,...,-0.155869,-0.172894,0.093111,-0.355993,0.444449,-0.128724,-0.090467,-0.016477,0.20531,0.22632
2,-0.095554,0.173565,0.253368,-0.075576,0.043795,-0.19365,0.049812,-0.185529,0.008186,-0.128904,...,-0.056773,0.00482,0.132103,-0.290188,0.456954,0.113698,-0.178891,0.116795,0.155727,0.250265
3,-0.053567,0.312443,0.153312,0.002017,-0.26016,0.072763,-0.129674,-0.057867,0.148588,0.066,...,0.077957,-0.218814,-0.144457,-0.348702,0.560083,0.171706,-0.157327,-0.077023,0.206261,0.11211
4,-0.32439,-0.042725,0.139694,0.007375,-0.181571,-0.179522,-0.051404,-0.09355,0.029456,0.188071,...,0.189786,-0.198371,0.084796,-0.274805,0.445162,-0.100635,-0.065208,-0.027885,0.27836,0.213076
