In [33]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
data_path = 'Data/NSL-KDD/modified/_Train.csv'

In [34]:
df = pd.read_csv(data_path)
print(df.shape)

(23780, 120)


In [3]:
targets = df.defects.unique()

In [4]:
# Get class wise dataset
for target in targets:
    sub_data_path = 'Data/NSL-KDD/modified/Train/Saperate_Classes/kddcup10_' + target.split('.')[0] + '.csv'
    sub_df = df[df['defects'] == target].reset_index(drop = True)
    sub_df.to_csv(sub_data_path, index = False)

In [36]:
# Create training set with 2 classes
def two_class_train():
    class1_path = 'Data/NSL-KDD/modified/Train/Saperate_classes/kddcup10_normal.csv'
    df_class1 = pd.read_csv(class1_path)
    class2_path = 'Data/NSL-KDD/modified/Train/Saperate_classes/kddcup10_neptune.csv'
    df_class2 = pd.read_csv(class2_path)
    train_df = pd.concat([df_class1, df_class2], ignore_index=True)
    train_df_path = 'Data/NSL-KDD/modified/Train/train.csv'
    train_df.to_csv(train_df_path, index = False)
two_class_train()

In [5]:
# Create Test Dataset without normal
def selected_train():
    train_df = pd.DataFrame([])
    for target in targets:
        sub_data_path = 'Data/NSL-KDD/modified/Train/kddcup10_' + target.split('.')[0] + '.csv'
        if target.split('.')[0] in ['normal', 'neptune', 'teardrop', 'smurf', 'saint', 'warezmaster',
           'portsweep', 'processtable', 'guess_passwd', 'mscan', 'apache2']: #'neptune','smurf'
            sub_df = df[df['defects'] == target].reset_index(drop = True)
            train_df = pd.concat([train_df,sub_df], ignore_index=True)
    train_df_path = 'Data/NSL-KDD/modified/Train/train.csv'
    train_df.to_csv(train_df_path, index = False)
    train_df.shape

In [6]:
# Create Test Dataset without normal
def all_class_one_by_one():
    train_df = pd.DataFrame([])
    i = 0
    for target in targets:
        sub_data_path = 'Data/NSL-KDD/modified/Train/kddcup10_' + target.split('.')[0] + '.csv'
        sub_df = df[df['defects'] == target].reset_index(drop = True)
        train_df = pd.concat([train_df,sub_df], ignore_index=True)
        train_df_path = 'Data/NSL-KDD/modified/Train/train_' + str(i) + '.csv'
        i += 1
        train_df.to_csv(train_df_path, index = False)
        train_df.shape

In [None]:
# Create training set with 1 classes
def one_class_train()
    class1_path = 'Data/NSL-KDD/modified/Train/kddcup10_normal.csv'
    df_class1 = pd.read_csv(class1_path)
    train_df = df_class1
    train_df_path = 'Data/NSL-KDD/modified/Train/train.csv'
    train_df.to_csv(train_df_path, index = False)

In [None]:
# Create training set with all classes
def all_class_at_once():
    target = 'defects'
    train_df = df
    train_df_path = 'Data/NSL-KDD/modified/Train/train.csv'
    train_df.to_csv(train_df_path, index = False)

In [None]:
df.groupby('defects').count()

In [26]:
# Dividing Normal Class into 10 dataset
def divide_ten():
    class1_path = 'Data/NSL-KDD/modified/Train/Saperate_classes/kddcup10_normal.csv'
    df_class1 = pd.read_csv(class1_path)
    class2_path = 'Data/NSL-KDD/modified/Train/Saperate_classes/kddcup10_neptune.csv'
    df_class2 = pd.read_csv(class2_path)
    train_df = pd.concat([df_class1, df_class2], ignore_index=True)
    y = train_df['defects']
    X = train_df.drop(labels=['defects'],axis=1)
    skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
    i = 0
    for train_index, test_index in skf.split(X,y):
        sub_df = train_df.loc[test_index]
        sub_df_path = 'Data/NSL-KDD/modified/Train/Normal_Data/N' + str(i) + '.csv'
        sub_df.to_csv(sub_df_path, index = False)
        i += 1
        print(sub_df.shape)

In [27]:
divide_ten()

(1815, 120)
(1815, 120)
(1815, 120)
(1815, 120)
(1815, 120)
(1814, 120)
(1814, 120)
(1814, 120)
(1813, 120)
(1813, 120)


In [8]:
# set of Anomaly Classes
set1 = ['smurf',
'ipsweep',
'snmpguess',
'ftp_write',
'xsnoop',
'phf',
'spy',
'worm',
'perl']

set2 = ['guess_passwd',
'apache2',
'saint']

set3 = ['satan',
'portsweep',
'mailbomb']

set4 = ['mscan',
'processtable',
'teardrop',
'warezclient',
'snmpgetattack']

set5 = ['warezmaster',
'back',
'nmap',
'httptunnel',
'pod',
'buffer_overflow',
'rootkit',
'ps',
'multihop',
'xterm',
'named',
'sendmail',
'land',
'xlock']

sets = [set1,set2,set3,set4,set5]

In [30]:
# Create Test Dataset without normal
def selected_train_anomaly(sets,i,j):
    train_df = pd.DataFrame([])
    for target in sets:
        try:
            sub_data_path = 'Data/NSL-KDD/modified/Train/Saperate_classes/kddcup10_' + target + '.csv'
            sub_df = pd.read_csv(sub_data_path)
            train_df = pd.concat([train_df,sub_df], ignore_index=True)
        except:
            continue
    print(train_df.defects.unique())
    y = train_df['defects']
    X = train_df.drop(labels=['defects'],axis=1)
    skf = StratifiedKFold(n_splits=2, random_state=42, shuffle=True)
    for train_index, test_index in skf.split(X,y):
        sub_df_1 = train_df.loc[train_index]
        sub_df_2 = train_df.loc[test_index]
        sub_df_1_path = 'Data/NSL-KDD/modified/Train/5_anomaly/A' + str(i) + '.csv'
        sub_df_2_path = 'Data/NSL-KDD/modified/Train/5_anomaly/A' + str(j) + '.csv'
        sub_df_1.to_csv(sub_df_1_path, index = False)
        sub_df_2.to_csv(sub_df_2_path, index = False)
        break

In [31]:
i = 0
j = 5
for _set in sets:
    print()
    selected_train_anomaly(_set,i,j)
    i += 1
    j += 1


['smurf' 'ipsweep' 'snmpguess' 'ftp_write' 'xsnoop' 'phf' 'worm' 'perl']





['guess_passwd' 'apache2' 'saint']

['satan' 'portsweep' 'mailbomb']

['mscan' 'processtable' 'teardrop' 'warezclient' 'snmpgetattack']

['warezmaster' 'back' 'nmap' 'httptunnel' 'pod' 'buffer_overflow'
 'rootkit' 'ps' 'multihop' 'xterm' 'named' 'sendmail' 'land' 'xlock']


In [11]:
df_1 = 

Unnamed: 0,duration,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,num_failed_logins,logged_in,num_compromised,...,flag_RSTO,flag_RSTOS0,flag_RSTR,flag_S0,flag_S1,flag_S2,flag_S3,flag_SF,flag_SH,defects
0,0,868,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,normal
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,neptune
2,0,2381,279,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,normal
3,0,202,540,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,normal
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,mscan
5,0,245,663,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,normal
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,neptune
7,0,7280,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,normal
8,0,233,261,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,normal
9,0,263,292,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,normal
