<a href="https://colab.research.google.com/github/thetinybug/Django-Project/blob/master/3.%20CompareDataset/Machine_Learning/A4/LR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#**0. Thư viện**

##Cài đặt 

In [40]:
# !pip3 numpy torc sklearn matplotlib pandas

##Import

In [41]:
# Add Module Path - To Import Custom Modules
# Modules Path
ModulePath = "/content/drive/My Drive/Study/KLTN/Google Colab/0.0 Python Modules/"

import sys
sys.path.append(ModulePath)

In [42]:
import numpy as np
import pandas as pd
import torch as th
from torch.autograd import Variable as V
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
import pickle

# Libs for Keras
from keras.models import load_model


# Own Custom Module to import models and constants
from models import *
from constants import *

import matplotlib.pyplot as plt
import math
import os
from datetime import date
import timeit

#**1. Chuẩn bị**

##Paths

In [43]:
# Base Path
base_path = "/content/drive/My Drive/Study/KLTN/"
Dataset_Path = base_path + "Dataset/NSL-KDD Processed/Final - For Using/"
SavedModelPath = base_path + "Saved Model/"
Generated_Dataset_Save_Path = base_path + "Generated Datasets/"

# Dataset Path
Trainsets_Path = Dataset_Path + 'Trainset/'
g_trainset_path = Trainsets_Path + "GAN-G.csv"
d_trainset_path = Trainsets_Path + "GAN-D.csv"
testset_path = Dataset_Path + "Testset/" + "KDDTest+.csv"

# GAN Saved Models Paths
GAN_Model_Path = SavedModelPath + 'GANModel/'

# IDS Pytorch Saved Models Paths
IDS_Saved_Path = SavedModelPath + 'IDSModel/'

##Global Variables

In [44]:
# Global Variables
N_FEATURES = 41
# IDS
IDS_INPUT_DIM = N_FEATURES
IDS_OUTPUT_DIM = 2
ATTACK_CATEGORIES = ['DOS', 'U2R_AND_R2L']

POS_FUNCTIONAL_FEATURES = {'DOS': DOS_FEATURES, 'U2R_AND_R2L': U2R_AND_R2L_FEATURES}
POS_NONFUNCTIONAL_FEATURES = {}
for attack_category, pos_functional_feature in POS_FUNCTIONAL_FEATURES.items():
    pos_nonfunctional_feature = []
    for i in range(N_FEATURES):
        if i not in pos_functional_feature:
            pos_nonfunctional_feature.append(i)
    POS_NONFUNCTIONAL_FEATURES[attack_category] = pos_nonfunctional_feature

IDS_MODELS = {'GaussianNB', 'MultinomialNB', 'ComplementNB', 'BernoulliNB', 'DT', 'RF', 'KNN'}

In [45]:
print('Position of Functional Features\n  ', POS_FUNCTIONAL_FEATURES)
print('Position of Nonfunctional Features:\n  ', POS_NONFUNCTIONAL_FEATURES)

Position of Functional Features
   {'DOS': [0, 1, 2, 3, 4, 5, 6, 7, 8, 23, 24, 25, 26, 27, 28, 29, 30], 'U2R_AND_R2L': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]}
Position of Nonfunctional Features:
   {'DOS': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], 'U2R_AND_R2L': [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]}


# **2. Định nghĩa Model**

##Models

In [46]:
# Load from model.py module

##Functions

### Processing Data Functions

In [47]:
# Processing Data Functions
# create_batch2 - Hàm tạo Batch
def create_batch2(x,batch_size):
    a = list(range(len(x)))
    np.random.shuffle(a)
    x = x[a]
    batch_x = [x[batch_size * i : (i+1)*batch_size,:] for i in range(len(x)//batch_size)]
    return np.array(batch_x)
# preprocess_malicious_data - Hàm tiền xử lý dữ liệu tấn công
def preprocess_malicious_data(dataset, attack_category):
    if attack_category != 'DOS' and attack_category != 'U2R_AND_R2L':
      raise ValueError("Preprocess Data Fail: Invalid Attack Category")
    attack_data = dataset[dataset['class'] == attack_category]
    del attack_data["class"]
    return np.array(attack_data)

### IDS Functions

In [48]:
# IDS Functions
# get_sklearn_ids_path - Get path for sklearn IDS Models
def get_sklearn_ids_path(model_name, attack_category, created_date):
    if attack_category != 'DOS' and attack_category != 'U2R_AND_R2L':
        raise ValueError("Preprocess Data Fail: Invalid Attack Category")
    ids_path = str(f"{IDS_Saved_Path}{attack_category}/Machine_Learning/created_date_{created_date}_{model_name}.pkl")
    if not os.path.exists(ids_path):
        raise ValueError(f"Invalid path: {ids_path}\nNot exist file!")
    return ids_path
# load_pytorch_ids_model - Load PyTorch IDS Models
def load_sklearn_ids_model(model_name, attack_category, created_date):
    ids_model_path = get_sklearn_ids_path(model_name, attack_category, created_date)
    with open(ids_model_path, 'rb') as file:
        pickle_model = pickle.load(file)
        print(f"{4*' '}Loaded IDS Model From: {ids_model_path}")
    return pickle_model

### GAN Functions

In [49]:
# GAN Functions
# init_generator - Khoi tao model GAN-G
def init_generator(input_dim, output_dim, adversarial_attack_type = 1):
    if adversarial_attack_type == 1:
        generator = Generator_A1(input_dim, output_dim)
    elif adversarial_attack_type == 2:
        generator = Generator_A2(input_dim, output_dim)
    elif adversarial_attack_type == 3:
        '''
            adversarial_attack_type 3: G-input is nonfunctional feature, G-out is adversarial nonfuncitonal feature.
            --> input_dim = output_dim = len(nonfunctional_features)
        ''' 
        generator = Generator_A2(output_dim, output_dim)
    elif adversarial_attack_type == 4:
        '''
            adversarial_attack_type 4: G-input in random noise nf-dim (num. of nonfunctional features)
            --> input_dim = output_dim = len(nonfunctional_features)
        '''
        generator = Generator_A1(output_dim, output_dim)
    else:
        raise ValueError("Init GAN - Generator: Invalid Adversarial Attack Type")
    return generator

# gen_adversarial_attack - Tao luu luong tan cong doi khang
def gen_adversarial_attack(generator, noise_dim, raw_attack, attack_category, adversarial_attack_type = 1):
    if adversarial_attack_type == 1 or adversarial_attack_type == 4:
        adversarial_attack = generator(noise_dim, raw_attack, attack_category, POS_NONFUNCTIONAL_FEATURES)
    elif adversarial_attack_type == 2:
        batch_size = len(raw_attack)
        noise = V(th.Tensor(np.random.uniform(0,1,(batch_size, noise_dim))))
        generator_out = generator(noise)
        adversarial_attack = gen_adversarial_attack_a2(generator_out, raw_attack, attack_category, POS_NONFUNCTIONAL_FEATURES)
    elif adversarial_attack_type == 3:
        pos_nonfunctional_feature = POS_NONFUNCTIONAL_FEATURES[attack_category]
        # get nonfunctional features val
        nonfunctional_feature = raw_attack[:, pos_nonfunctional_feature]
        generator_out = generator(nonfunctional_feature)
        adversarial_attack = gen_adversarial_attack_a2(generator_out, raw_attack, attack_category, POS_NONFUNCTIONAL_FEATURES)
    else:
        raise ValueError("Init GAN - Generator: Invalid Adversarial Attack Type")
    return adversarial_attack


# train_generator - Train Generator
def train_generator(generator, discriminator, optimizer_G, noise_dim, attack_traffic, attack_category, adversarial_attack_type):
    for p in discriminator.parameters():  
        p.requires_grad = False
    optimizer_G.zero_grad()        
    # GAN-G Generate Adversarial Attack
    adversarial_attack = gen_adversarial_attack(generator, noise_dim, attack_traffic, attack_category, adversarial_attack_type)
    # GAN-D predict, GAN-G update parameter
    D_pred = discriminator(adversarial_attack)
    g_loss = -th.mean(D_pred)
    g_loss.backward()
    optimizer_G.step()
    return g_loss

# train_discriminator - Train Discriminator
def train_discriminator(discriminator, ids_model, generator, critic_iters, clamp, optimizer_D, normal_b, noise_dim, attack_traffic, attack_category, adversarial_attack_type):
    run_d_loss = 0
    cnt = 0
    for p in discriminator.parameters(): 
        p.requires_grad = True
    for c in range(critic_iters):
        optimizer_D.zero_grad()
        for p in discriminator.parameters():
            p.data.clamp_(-clamp, clamp)
        # GAN-G Generate Adversarial Attack
        adversarial_attack = gen_adversarial_attack(generator, noise_dim, attack_traffic, attack_category, adversarial_attack_type)
        # Make data to feed IDS
        ids_input = th.cat((adversarial_attack,normal_b))
        l = list(range(len(ids_input)))
        np.random.shuffle(l)
        ids_input = V(th.Tensor(ids_input[l]))
        # IDS Predict
        ids_pred_label = V(th.Tensor(ids_model.predict(ids_input)))
        pred_normal = ids_input[ids_pred_label==0]
        pred_attack = ids_input[ids_pred_label==1]
        if len(pred_attack) == 0:
            cnt += 1
            break
        # Make GAN-D input
        D_noraml = discriminator(V(th.Tensor(pred_normal)))
        D_attack= discriminator(V(th.Tensor(pred_attack)))
        # Loss and Update Parameter
        loss_normal = th.mean(D_noraml)
        loss_attack = th.mean(D_attack)
        gradient_penalty = compute_gradient_penalty(discriminator, normal_b.data, adversarial_attack.data)
        d_loss = loss_attack - loss_normal #+ LAMBDA * gradient_penalty
        d_loss.backward()
        optimizer_D.step()
        run_d_loss += d_loss.item()
    return run_d_loss, cnt

# compute_gradient_penalty - Compute Gradient Penalty
def compute_gradient_penalty(D, normal_t, attack_t):
    alpha = th.Tensor(np.random.random((normal_t.shape[0], 1)))
    between_n_a = (alpha * normal_t + ((1 - alpha) * attack_t)).requires_grad_(True)
    d_between_n_a = D(between_n_a)
    adv = V(th.Tensor(normal_t.shape[0], 1).fill_(1.0), requires_grad=False)

    gradients = autograd.grad(
        outputs=d_between_n_a,
        inputs=between_n_a,
        grad_outputs=adv,
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
    )[0]
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty

### Generate Adversarial Attack Functions

In [50]:
# cal_dr - Tinh DR
def cal_dr(ids_model, normal, raw_attack, adversarial_attack):
    # Make data to feed IDS contain: Attack & Normal
    o_ids_input = th.cat((raw_attack, normal))
    a_ids_input = th.cat((adversarial_attack,normal))
    # Shuffle Input
    l = list(range(len(a_ids_input)))
    np.random.shuffle(l)
    o_ids_input = o_ids_input[l]
    a_ids_input = a_ids_input[l]
    # IDS Predict Label
    o_pred_label = th.Tensor(ids_model.predict(o_ids_input))
    a_pred_label = th.Tensor(ids_model.predict(a_ids_input))
    # True Label
    ids_true_label = np.r_[np.ones(BATCH_SIZE),np.zeros(BATCH_SIZE)][l]
    # Calc DR
    tn1, fn1, fp1, tp1 = confusion_matrix(ids_true_label,o_pred_label).ravel()
    tn2, fn2, fp2, tp2 = confusion_matrix(ids_true_label,a_pred_label).ravel()
    origin_dr = tp1/(tp1 + fp1)
    adversarial_dr = tp2/(tp2 + fp2)
    return origin_dr, adversarial_dr

###Save Generated Dataset

In [51]:
# save_generated_dataset - Save Generated Dataset
def save_generated_dataset(df, path):
    df.to_csv(path, index = False, header=True)
    print(f"	    Generated Dataset Saved\n	    Saved Path: {path}")

# **3. Run Model**

In [52]:
#@title **CONSTS RUN MODEL**
Default_IDS_Model_Created = {'LogicticRegression': '2020-07-10', 
                          'sklearn_SVM': '2020-07-11', 
                          'GaussianNB': '2020-07-08', 
                          'DT': '2020-07-08',
                          'RF': '2020-07-08',
                          'KNN': '2020-07-08'
                          }
ids_ml_model_name = "LogicticRegression" #@param ['LogicticRegression', 'sklearn_SVM', 'GaussianNB', 'DT', 'RF', 'KNN']
ids_created_date = 'Auto' #@param ["Auto", "2020-07-08","2020-07-10", "2020-07-11"]
if ids_created_date == 'Auto':
    ids_created_date = Default_IDS_Model_Created[ids_ml_model_name]
print(f"IDS: {ids_ml_model_name} - created on: \t{ids_created_date}")
GAN_variant = 'WGAN' #@param ['WGAN', 'WGANGP']
MAX_EPOCH = 100 #@param [100, 10, 2] {type:"raw"}
ATTACK_CATEGORIES = 'ALL' #@param ['ALL', 'DOS', 'U2R_AND_R2L']
if ATTACK_CATEGORIES == 'ALL':
    ATTACK_CATEGORIES = ['DOS', 'U2R_AND_R2L']
else:
    ATTACK_CATEGORIES = [ATTACK_CATEGORIES]
print(f"ATTACK_CATEGORIES: \t\t{ATTACK_CATEGORIES}")

ADVERSARIAL_ATTACK_TYPES = 4 #@param [0, 1, 2, 3, 4] {type:"raw"}
if ADVERSARIAL_ATTACK_TYPES == 0:
    ADVERSARIAL_ATTACK_TYPES = [1, 2, 3]
else:
    ADVERSARIAL_ATTACK_TYPES = [ADVERSARIAL_ATTACK_TYPES]
print(f"ADVERSARIAL_ATTACK_TYPES: \t{ADVERSARIAL_ATTACK_TYPES}")

NOISE_DIM = 9 #@param[0, 9, 41] {type:"raw"}

IDS: LogicticRegression - created on: 	2020-07-10
ATTACK_CATEGORIES: 		['DOS', 'U2R_AND_R2L']
ADVERSARIAL_ATTACK_TYPES: 	[4]


Attack Category: 0,1,2,3,4  
0: All  
1: G_in = noise (dim = 9)  
2: G_in = noise (dim = 9)  (same as 1)  
3: G_in = nonfunctional features  
4: G_in = noise (dim = nf --> Number of nonfunctional features)

## **3.2 Run Generate Adversarial Traffic**

In [53]:
#@title OPTION FOR GEN&SAVE

default_gan_model_time_created = {'LogicticRegression': '2020-07-21'}
gan_model_time_created = "Auto" #@param ["Auto"] {allow-input: true}
if gan_model_time_created == 'Auto':
    gan_model_time_created = default_gan_model_time_created[ids_ml_model_name]
print(f"GAN Model Created Time: {gan_model_time_created}")
BATCH_SIZE = 256 #@param ["256"] {type:"raw", allow-input: true}




GAN Model Created Time: 2020-07-21


In [54]:
# Load Testset
testset = pd.read_csv(testset_path)
DATASET_COLUMNS = testset.columns[0:41]
print(f"Amount of KDDTest+: \t\t{len(testset)}")
# test_normal
test_normal = np.array(testset[testset["class"] == 'Normal'])[:,:-1]
# Create batch of normal traffic
test_batch_normal = create_batch2(test_normal,BATCH_SIZE).astype('float64')
print(f"Amount of Normal:\t\t{len(test_normal)} ({len(test_batch_normal)} batchs - {BATCH_SIZE} records/batch)")

Amount of KDDTest+: 		20123
Amount of Normal:		9711 (37 batchs - 256 records/batch)


In [76]:
print(DATASET_COLUMNS)

Index(['duration', 'protocol_type', 'service', 'flag', 'src_bytes',
       'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot',
       'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell',
       'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
       'num_access_files', 'num_outbound_cmds', 'is_host_login',
       'is_guest_login', 'count', 'srv_count', 'serror_rate',
       'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate',
       'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
       'dst_host_srv_count', 'dst_host_same_srv_rate',
       'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate',
       'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
       'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
       'dst_host_srv_rerror_rate'],
      dtype='object')


In [55]:
# Adversarial Traffic Evaluating
print(f"{40*'='} ADVERSARIAL TRAFFIC EVALUATING {40*'='}")
print(f"{80*'='}\n IDS Model: {ids_ml_model_name}")

for adversarial_attack_type in ADVERSARIAL_ATTACK_TYPES:
    print(f"{80*'*'}\n Adversarial Attack Type : {adversarial_attack_type}")
    for attack_category in ATTACK_CATEGORIES:
        print(f"{80*'-'}\n - Attack Category: {attack_category}")
        
        # Load sklearn IDS Model
        ids_model = load_sklearn_ids_model(ids_ml_model_name, attack_category, ids_created_date)
        # Init GAN-G model
        G_OUTPUT_DIM = len(POS_NONFUNCTIONAL_FEATURES[attack_category])      # Generator input dimension is dimention of noise
        print(f"    nf              : {G_OUTPUT_DIM} (num. of nonfunctional features)")
        if adversarial_attack_type == 4 or adversarial_attack_type == 3:
            NOISE_DIM = G_OUTPUT_DIM                                        # Adversarial Attack Type 3,4: Noise dim = nonfunctional feature
        G_INPUT_DIM = NOISE_DIM                                             # G_INPUT_DIM = NOISE_DIM
        print(f"    GAN-G NOISE DIM : {NOISE_DIM}")
        print(f"    GAN-G INPUT DIM : {G_INPUT_DIM}")
        print(f"    GAN-G OUTPUT DIM: {G_OUTPUT_DIM}")
        generator = init_generator(G_INPUT_DIM,G_OUTPUT_DIM, adversarial_attack_type)
        # Load Attack Dataset
        test_raw_attack = preprocess_malicious_data(testset, attack_category)
        # Create batch of attack traffic
        batch_attack = create_batch2(test_raw_attack, BATCH_SIZE)
        n_batch_attack = len(batch_attack)
        print(f"{4*' '}Amout of {attack_category}:\t{len(test_raw_attack)} ({n_batch_attack} batchs - {BATCH_SIZE} records/batch)")
        
        # Calc DR through each epoch
        gan_g_folder_path = str(f"{GAN_Model_Path}Machine_Learning/{ids_ml_model_name}/{attack_category}/{adversarial_attack_type}/")
        print(f"{4*' '}GAN Models Folder: {gan_g_folder_path}")
        for epoch in range(0, MAX_EPOCH + 1, 10):
            # Load GAN-G Model
            model_g_save_name = f"time_created_{gan_model_time_created}_GAN_G_{1 if epoch == 0 else epoch}epoch.pth"
            gan_g_model_path = gan_g_folder_path + model_g_save_name
            param = th.load(gan_g_model_path,map_location=lambda x,y:x)
            generator.load_state_dict(param)
            generator.eval()

            generated_dataset = pd.DataFrame(columns=DATASET_COLUMNS)
            o_dr,a_dr =[],[]
            with th.no_grad():
                for idx, bn in enumerate(test_batch_normal):
                    normal_b = th.Tensor(bn)
                    attack_b = th.Tensor(batch_attack[idx % n_batch_attack])
                    # Generate Adversarial Traffic
                    adversarial_attack_b = gen_adversarial_attack(generator, NOISE_DIM, attack_b, attack_category, adversarial_attack_type).detach()
                    if (idx < n_batch_attack):
                        epoch_dataset = pd.DataFrame(data = np.array(adversarial_attack_b), columns=DATASET_COLUMNS)
                        generated_dataset = generated_dataset.append(epoch_dataset, ignore_index = True)
                    

                    # Calc DR
                    origin_dr, adversarial_dr = cal_dr(ids_model, normal_b, attack_b, adversarial_attack_b)
                    o_dr.append(origin_dr)
                    a_dr.append(adversarial_dr)
            eir = 1 - (np.mean(a_dr)/np.mean(o_dr))
            print(f"\t {epoch:3d} epochs:\tOrigin DR : {np.mean(o_dr)*100:.2f}% \t Adversarial DR : {np.mean(a_dr)*100:.2f}% \t EIR : {eir*100:.2f}%")
            
            generated_dataset_folder_path = str(f"{Generated_Dataset_Save_Path}Machine_Learning/{ids_ml_model_name}/{attack_category}/{adversarial_attack_type}/")
            if not os.path.exists(generated_dataset_folder_path):
                os.makedirs(generated_dataset_folder_path)
            generated_dataset_file_path = generated_dataset_folder_path + str(f"time_created_{date.today()}_{epoch}epoch.csv")
            save_generated_dataset(generated_dataset, generated_dataset_file_path)

 IDS Model: LogicticRegression
********************************************************************************
 Adversarial Attack Type : 4
--------------------------------------------------------------------------------
 - Attack Category: DOS
    Loaded IDS Model From: /content/drive/My Drive/Study/KLTN/Saved Model/IDSModel/DOS/Machine_Learning/created_date_2020-07-10_LogicticRegression.pkl
    nf              : 24 (num. of nonfunctional features)
    GAN-G NOISE DIM : 24
    GAN-G INPUT DIM : 24
    GAN-G OUTPUT DIM: 24
    Amout of DOS:	7460 (29 batchs - 256 records/batch)
    GAN Models Folder: /content/drive/My Drive/Study/KLTN/Saved Model/GANModel/Machine_Learning/LogicticRegression/DOS/4/
	   0 epochs:	Origin DR : 79.30% 	 Adversarial DR : 0.10% 	 EIR : 99.88%
	    Generated Dataset Saved
	    Saved Path: /content/drive/My Drive/Study/KLTN/Generated Datasets/Machine_Learning/LogicticRegression/DOS/4/time_created_2020-08-01_0epoch.csv
	  10 epochs:	Origin DR : 79.30% 	 Adversar

# **4. Compare Datasets**

In [59]:
#@title **CONSTS FOR COMPARE DATASET**
ATTACK_CATEGORIES = 'DOS' #@param ['ALL', 'DOS', 'U2R_AND_R2L']
if ATTACK_CATEGORIES == 'ALL':
    ATTACK_CATEGORIES = ['DOS', 'U2R_AND_R2L']
else:
    ATTACK_CATEGORIES = [ATTACK_CATEGORIES]
print(f"ATTACK_CATEGORIES: \t\t{ATTACK_CATEGORIES}")

ADVERSARIAL_ATTACK_TYPES = 4 #@param [0, 1, 2, 3, 4] {type:"raw"}
if ADVERSARIAL_ATTACK_TYPES == 0:
    ADVERSARIAL_ATTACK_TYPES = [1, 2, 3]
else:
    ADVERSARIAL_ATTACK_TYPES = [ADVERSARIAL_ATTACK_TYPES]
print(f"ADVERSARIAL_ATTACK_TYPES: \t{ADVERSARIAL_ATTACK_TYPES}")

ATTACK_CATEGORIES: 		['DOS']
ADVERSARIAL_ATTACK_TYPES: 	[4]


In [97]:
# PATH
if ATTACK_CATEGORIES[0] == 'DOS':
    adversarial_set_path = '/content/drive/My Drive/Study/KLTN/Generated Datasets/Machine_Learning/LogicticRegression/DOS/4/time_created_2020-08-01_100epoch.csv'
elif ATTACK_CATEGORIES[0] == 'U2R_AND_R2L':
    adversarial_set_path = '/content/drive/My Drive/Study/KLTN/Generated Datasets/Machine_Learning/LogicticRegression/U2R_AND_R2L/4/time_created_2020-08-01_100epoch.csv'
else:
    raise ValueError("Invalid Attack Category")

# Load dataset
testset = pd.read_csv(testset_path)
adversarial_set = pd.read_csv(adversarial_set_path)

# Data
adversarial_attack = np.array(adversarial_set)
print(f"Attack Category: {ATTACK_CATEGORIES[0]}\nAdversarial Attack Data Shape: {adversarial_attack.shape}")

Attack Category: DOS
Adversarial Attack Data Shape: (7424, 41)


In [118]:
# Compare Dataset
print(f"{40*'='} COMPARE DATASET {40*'='}")

for adversarial_attack_type in ADVERSARIAL_ATTACK_TYPES:
    print(f"{80*'*'}\n Adversarial Attack Type : {adversarial_attack_type}")
    for attack_category in ATTACK_CATEGORIES:
        print(f"{80*'-'}\n  # Attack Category: {attack_category}")

        # Load Attack Dataset
        test_raw_attack = preprocess_malicious_data(testset, attack_category)
        for nonfunctional_feature in  POS_NONFUNCTIONAL_FEATURES[attack_category]:
            print(f"Feature: {DATASET_COLUMNS[nonfunctional_feature]}")
            # cat_val = test_raw_attack[:, nonfunctional_feature].astype('category')
            np_cat_val = test_raw_attack[:, nonfunctional_feature]
            # cat_val = pd.Series(np_cat_val, dtype="category")
            cat_val = pd.Series(np_cat_val)
            print(f"{len(cat_val.unique()):4d}: {cat_val.unique()}")

            np_ad_cat_val = adversarial_attack[:, nonfunctional_feature]
            ad_cat_val = pd.Series(np_ad_cat_val)
            print(f"{len(ad_cat_val.unique()):4d}: {ad_cat_val.unique()}")

********************************************************************************
 Adversarial Attack Type : 4
--------------------------------------------------------------------------------
  # Attack Category: DOS
Feature: hot
   4: [0.         0.01980198 0.00990099 0.02970297]
   1: [0.]
Feature: num_failed_logins
   1: [0.]
   1: [1.]
Feature: logged_in
   2: [0. 1.]
   1: [0.]
Feature: num_compromised
   2: [0.         0.00125628]
   1: [0.]
Feature: root_shell
   1: [0.]
   1: [1.]
Feature: su_attempted
   1: [0.]
   1: [1.]
Feature: num_root
   1: [0.]
   1: [0.]
Feature: num_file_creations
   1: [0.]
   1: [0.]
Feature: num_shells
   1: [0.]
   1: [1.]
Feature: num_access_files
   1: [0.]
   1: [1.]
Feature: num_outbound_cmds
   1: [0.]
   1: [0.]
Feature: is_host_login
   1: [0.]
   1: [0.]
Feature: is_guest_login
   1: [0.]
   1: [0.]
Feature: count
 447: [0.00195695 0.06457926 0.25244618 0.99804305 0.02739726 0.0665362
 0.13502935 0.46966732 0.47358121 0.19373777 0.09589041 

The Saved Model stored in [Google Drive - GAN Model](https://drive.google.com/drive/u/1/folders/1VNFW-k5SbR0eGsJ_np3U-W3Rcz_n4I8N)

The Result of Code stored in [Github - Thesis](https://github.com/thetinybug/thesis-IDSGAN)