## Partitioning UNSW-NB15-Train-Basic into 7 nodes 

The partitions made can be balanced/ unbalanced, with 7 nodes. Attacks might appear in all nodes or only a subset. 

In [1]:
import numpy as np  # for array
import pandas as pd  # for csv files and dataframe
import matplotlib.pyplot as plt  # for plotting
import seaborn as sns  # plotting
from scipy import stats

import pickle  # To load data int disk

import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, confusion_matrix, make_scorer
from sklearn.metrics import auc, f1_score, roc_curve
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import cross_validate, cross_val_predict

In [2]:
# Get UNSW-NB15-Train-Basic dataset 
complete = pd.read_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic.csv')

In [3]:
def filter(dataset): 
    # Get only the rows that contain state (PAR, ACC, ECO, CON, FIN, INT, REQ, RST) and proto (igmp, arp, icmp, udp, tcp, ipv6-icmp, rarp)
    dataset = dataset[dataset['state'].isin(['PAR', 'ACC', 'ECO', 'CON', 'FIN', 'INT', 'REQ', 'RST'])]
    dataset = dataset[dataset['proto'].isin(['igmp', 'arp', 'icmp', 'udp', 'tcp', 'ipv6-icmp', 'rarp'])]
    return dataset

In [4]:
complete = filter(complete)

### id = 7A : Partition with 7 balanced nodes 

All of the traffic represented in the 7 nodes. 



In [14]:
# Create the partitions 

normal1 = complete[complete['label'] == 0].iloc[:27605]
normal2 = complete[complete['label'] == 0].iloc[27605:(27605*2)]
normal3 = complete[complete['label'] == 0].iloc[(27605*2):(27605*3)]
normal4 = complete[complete['label'] == 0].iloc[(27605*3):(27605*4)]
normal5 = complete[complete['label'] == 0].iloc[(27605*4):(27605*5)]
normal6 = complete[complete['label'] == 0].iloc[(27605*5):(27605*6)]
normal7 = complete[complete['label'] == 0].iloc[(27605*6):(27605*7)]

generic1 = complete[complete['attack_cat'] == "generic"].iloc[:22863]
generic2 = complete[complete['attack_cat'] == "generic"].iloc[22863:22863*2]
generic3 = complete[complete['attack_cat'] == "generic"].iloc[22863*2:22863*3]
generic4 = complete[complete['attack_cat'] == "generic"].iloc[22863*3:22863*4]
generic5 = complete[complete['attack_cat'] == "generic"].iloc[22863*4:22863*5]
generic6 = complete[complete['attack_cat'] == "generic"].iloc[22863*5:22863*6]
generic7 = complete[complete['attack_cat'] == "generic"].iloc[22863*6:]

exploits1 = complete[complete['attack_cat'] == "exploits"].iloc[:3053]
exploits2 = complete[complete['attack_cat'] == "exploits"].iloc[3053:3053*2]
exploits3 = complete[complete['attack_cat'] == "exploits"].iloc[3053*2:3053*3]
exploits4 = complete[complete['attack_cat'] == "exploits"].iloc[3053*3:3053*4]
exploits5 = complete[complete['attack_cat'] == "exploits"].iloc[3053*4:3053*5]
exploits7 = complete[complete['attack_cat'] == "exploits"].iloc[3053*5:3053*6-1]
exploits6 = complete[complete['attack_cat'] == "exploits"].iloc[3053*6-1:]

dos1 = complete[complete['attack_cat'] == "dos"].iloc[:416]
dos2 = complete[complete['attack_cat'] == "dos"].iloc[416:(416*2)]
dos3 = complete[complete['attack_cat'] == "dos"].iloc[(416*2):(416*3)]
dos4 = complete[complete['attack_cat'] == "dos"].iloc[(416*3):(416*4)]
dos5 = complete[complete['attack_cat'] == "dos"].iloc[(416*4):(416*5)]
dos6 = complete[complete['attack_cat'] == "dos"].iloc[(416*5):(416*6+1)]
dos7 = complete[complete['attack_cat'] == "dos"].iloc[(416*6+1):]

recon1 = complete[complete['attack_cat'] == "reconnaissance"].iloc[:1273]
recon2 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273:1273*2]
recon3 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*2:1273*3]
recon4 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*3:1273*4]
recon5 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*4:1273*5]
recon6 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*5:1273*6]
recon7 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*6:]

In [15]:

part1 = pd.concat([normal1, generic1, exploits1, dos1, recon1])
part2 = pd.concat([normal2, generic2, exploits2, dos2, recon2])
part3 = pd.concat([normal3, generic3, exploits3, dos3, recon3])
part4 = pd.concat([normal4, generic4, exploits4, dos4, recon4])
part5 = pd.concat([normal5, generic5, exploits5, dos5, recon5])
part6 = pd.concat([normal6, generic6, exploits6, dos6, recon6])
part7 = pd.concat([normal7, generic7, exploits7, dos7, recon7])
part1.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7A-Part1.csv', index=False)
part2.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7A-Part2.csv', index=False)
part3.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7A-Part3.csv', index=False)
part4.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7A-Part4.csv', index=False)
part5.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7A-Part5.csv', index=False)
part6.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7A-Part6.csv', index=False)
part7.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7A-Part7.csv', index=False)

In [13]:
complete['attack_cat'].value_counts() # 416 dos, 1273 recon, 3503 exploits, 22863 generic, 27605 normal

normal            217320
generic           160041
exploits           21369
reconnaissance      8912
dos                 2914
Name: attack_cat, dtype: int64

### id = 7B : Partition with 7 balanced nodes 


In [16]:
# Create the partitions 

normal1 = complete[complete['label'] == 0].iloc[:26636]
normal2 = complete[complete['label'] == 0].iloc[26636:(26636+24560)]
normal3 = complete[complete['label'] == 0].iloc[(26636+24560):(26636+24560+23075)]
normal4 = complete[complete['label'] == 0].iloc[(26636+24560+23075):(26636+24560+23075+23074)]
normal5 = complete[complete['label'] == 0].iloc[(26636+24560+23075+23074):(26636+24560+23075+23074+46143)]
normal6 = complete[complete['label'] == 0].iloc[(26636+24560+23075+23074+46143):(26636+24560+23075+23074+46143+49746)]

generic1 = complete[complete['attack_cat'] == "generic"].iloc[:26673]
generic2 = complete[complete['attack_cat'] == "generic"].iloc[26673:26673*2]
generic3 = complete[complete['attack_cat'] == "generic"].iloc[26673*2:26673*3]
generic4 = complete[complete['attack_cat'] == "generic"].iloc[26673*3:26673*4+1]
generic5 = complete[complete['attack_cat'] == "generic"].iloc[26673*4+1:26673*5+2]
generic6 = complete[complete['attack_cat'] == "generic"].iloc[26673*5+2:26673*6+3]


exploits1 = complete[complete['attack_cat'] == "exploits"].iloc[:3561]
exploits2 = complete[complete['attack_cat'] == "exploits"].iloc[3561:3561*2]
exploits3 = complete[complete['attack_cat'] == "exploits"].iloc[3561*2:3561*3]
exploits4 = complete[complete['attack_cat'] == "exploits"].iloc[3561*3:3561*4+1]
exploits5 = complete[complete['attack_cat'] == "exploits"].iloc[3561*4+1:3561*5+2]
exploits6 = complete[complete['attack_cat'] == "exploits"].iloc[3561*5+2:3561*6+3]


dos1 = complete[complete['attack_cat'] == "dos"].iloc[:416]
dos2 = complete[complete['attack_cat'] == "dos"].iloc[416:(416*2)]
dos3 = complete[complete['attack_cat'] == "dos"].iloc[(416*2):(416*3)]
dos4 = complete[complete['attack_cat'] == "dos"].iloc[(416*3):(416*4)]
dos5 = complete[complete['attack_cat'] == "dos"].iloc[(416*4):(416*4+417*2)]
dos6 = complete[complete['attack_cat'] == "dos"].iloc[(416*4+417*2):]

recon1 = complete[complete['attack_cat'] == "reconnaissance"].iloc[:1485]
recon2 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485:1485*2]
recon3 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*2:1485*3]
recon4 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*3:1485*4]
recon5 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*4:1485*5+1]
recon6 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*5+1:1485*6+2]


In [17]:
part1 = pd.concat([normal1, generic1,  dos1, recon1])
part2 = pd.concat([normal2, generic2, exploits1, dos2])
part3 = pd.concat([normal3, generic3, exploits2, dos3, recon2])
part4 = pd.concat([normal4, generic4, exploits3, dos4, recon3])
part5 = pd.concat([ generic5, exploits4, dos5, recon4])
part6 = pd.concat([normal5, generic6, exploits5, recon5])
part7 = pd.concat([normal6, dos6, exploits6, recon6])

part1.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7B-Part1.csv', index=False)
part2.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7B-Part2.csv', index=False)
part3.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7B-Part3.csv', index=False)
part4.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7B-Part4.csv', index=False)
part5.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7B-Part5.csv', index=False)
part6.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7B-Part6.csv', index=False)
part7.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7B-Part7.csv', index=False)

### id = 7C : Partition with 7 unbalanced nodes 





In [19]:
# Create the partitions 

normal1 = complete[complete['label'] == 0].iloc[:10000]
normal2 = complete[complete['label'] == 0].iloc[10000:(10000+50000)]
normal3 = complete[complete['label'] == 0].iloc[(10000+50000):(10000+50000+20000)]
normal4 = complete[complete['label'] == 0].iloc[(10000+50000+20000):(10000+50000+20000+75000)]
normal5 = complete[complete['label'] == 0].iloc[(10000+50000+20000+75000):(10000+50000+20000+75000+28236)]
normal6 = complete[complete['label'] == 0].iloc[(10000+50000+20000+75000+28236):(10000+50000+20000+75000+28236+10000)]

generic1 = complete[complete['attack_cat'] == "generic"].iloc[:26673]
generic2 = complete[complete['attack_cat'] == "generic"].iloc[26673:26673*2]
generic3 = complete[complete['attack_cat'] == "generic"].iloc[26673*2:26673*3]
generic4 = complete[complete['attack_cat'] == "generic"].iloc[26673*3:26673*4+1]
generic5 = complete[complete['attack_cat'] == "generic"].iloc[26673*4+1:26673*5+2]
generic6 = complete[complete['attack_cat'] == "generic"].iloc[26673*5+2:26673*6+3]


exploits1 = complete[complete['attack_cat'] == "exploits"].iloc[:3561]
exploits2 = complete[complete['attack_cat'] == "exploits"].iloc[3561:3561*2]
exploits3 = complete[complete['attack_cat'] == "exploits"].iloc[3561*2:3561*3]
exploits4 = complete[complete['attack_cat'] == "exploits"].iloc[3561*3:3561*4+1]
exploits5 = complete[complete['attack_cat'] == "exploits"].iloc[3561*4+1:3561*5+2]
exploits6 = complete[complete['attack_cat'] == "exploits"].iloc[3561*5+2:3561*6+3]


dos1 = complete[complete['attack_cat'] == "dos"].iloc[:416]
dos2 = complete[complete['attack_cat'] == "dos"].iloc[416:(416*2)]
dos3 = complete[complete['attack_cat'] == "dos"].iloc[(416*2):(416*3)]
dos4 = complete[complete['attack_cat'] == "dos"].iloc[(416*3):(416*4)]
dos5 = complete[complete['attack_cat'] == "dos"].iloc[(416*4):(416*4+417*2)]
dos6 = complete[complete['attack_cat'] == "dos"].iloc[(416*4+417*2):]

recon1 = complete[complete['attack_cat'] == "reconnaissance"].iloc[:1485]
recon2 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485:1485*2]
recon3 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*2:1485*3]
recon4 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*3:1485*4]
recon5 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*4:1485*5+1]
recon6 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1485*5+1:1485*6+2]


In [20]:
part1 = pd.concat([normal1, generic1,  dos1, recon1])
part2 = pd.concat([normal2, generic2, exploits1, dos2])
part3 = pd.concat([normal3, generic3, exploits2, dos3, recon2])
part4 = pd.concat([ generic4, exploits3, dos4, recon3])
part5 = pd.concat([normal4, generic5, exploits4, dos5, recon4])
part6 = pd.concat([normal5, generic6, exploits5, recon5])
part7 = pd.concat([normal6, dos6, exploits6, recon6])

part1.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7C-Part1.csv', index=False)
part2.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7C-Part2.csv', index=False)
part3.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7C-Part3.csv', index=False)
part4.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7C-Part4.csv', index=False)
part5.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7C-Part5.csv', index=False)
part6.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7C-Part6.csv', index=False)
part7.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-7C-Part7.csv', index=False)

### Classification datasets 

#### Cat7A

In [21]:


generic1 = complete[complete['attack_cat'] == "generic"].iloc[:22863]
generic2 = complete[complete['attack_cat'] == "generic"].iloc[22863:22863*2]
generic3 = complete[complete['attack_cat'] == "generic"].iloc[22863*2:22863*3]
generic4 = complete[complete['attack_cat'] == "generic"].iloc[22863*3:22863*4]
generic5 = complete[complete['attack_cat'] == "generic"].iloc[22863*4:22863*5]
generic6 = complete[complete['attack_cat'] == "generic"].iloc[22863*5:22863*6]
generic7 = complete[complete['attack_cat'] == "generic"].iloc[22863*6:]

exploits1 = complete[complete['attack_cat'] == "exploits"].iloc[:3053]
exploits2 = complete[complete['attack_cat'] == "exploits"].iloc[3053:3053*2]
exploits3 = complete[complete['attack_cat'] == "exploits"].iloc[3053*2:3053*3]
exploits4 = complete[complete['attack_cat'] == "exploits"].iloc[3053*3:3053*4]
exploits5 = complete[complete['attack_cat'] == "exploits"].iloc[3053*4:3053*5]
exploits7 = complete[complete['attack_cat'] == "exploits"].iloc[3053*5:3053*6-1]
exploits6 = complete[complete['attack_cat'] == "exploits"].iloc[3053*6-1:]

dos1 = complete[complete['attack_cat'] == "dos"].iloc[:416]
dos2 = complete[complete['attack_cat'] == "dos"].iloc[416:(416*2)]
dos3 = complete[complete['attack_cat'] == "dos"].iloc[(416*2):(416*3)]
dos4 = complete[complete['attack_cat'] == "dos"].iloc[(416*3):(416*4)]
dos5 = complete[complete['attack_cat'] == "dos"].iloc[(416*4):(416*5)]
dos6 = complete[complete['attack_cat'] == "dos"].iloc[(416*5):(416*6+1)]
dos7 = complete[complete['attack_cat'] == "dos"].iloc[(416*6+1):]

recon1 = complete[complete['attack_cat'] == "reconnaissance"].iloc[:1273]
recon2 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273:1273*2]
recon3 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*2:1273*3]
recon4 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*3:1273*4]
recon5 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*4:1273*5]
recon6 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*5:1273*6]
recon7 = complete[complete['attack_cat'] == "reconnaissance"].iloc[1273*6:]

In [22]:

part1 = pd.concat([ generic1, exploits1, dos1, recon1])
part2 = pd.concat([generic2, exploits2, dos2, recon2])
part3 = pd.concat([ generic3, exploits3, dos3, recon3])
part4 = pd.concat([ generic4, exploits4, dos4, recon4])
part5 = pd.concat([ generic5, exploits5, dos5, recon5])
part6 = pd.concat([ generic6, exploits6, dos6, recon6])
part7 = pd.concat([ generic7, exploits7, dos7, recon7])
part1.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7A-Part1.csv', index=False)
part2.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7A-Part2.csv', index=False)
part3.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7A-Part3.csv', index=False)
part4.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7A-Part4.csv', index=False)
part5.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7A-Part5.csv', index=False)
part6.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7A-Part6.csv', index=False)
part7.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7A-Part7.csv', index=False)

#### Cat7B

In [23]:


generic1 = complete[complete['attack_cat'] == "generic"].iloc[:416]
generic2 = complete[complete['attack_cat'] == "generic"].iloc[416:(416*2)]
generic3 = complete[complete['attack_cat'] == "generic"].iloc[(416*2):(416*3)]
generic4 = complete[complete['attack_cat'] == "generic"].iloc[(416*3):(416*4)]
generic5 = complete[complete['attack_cat'] == "generic"].iloc[(416*4):(416*5)]
generic6 = complete[complete['attack_cat'] == "generic"].iloc[(416*5):(416*6)]
generic7 = complete[complete['attack_cat'] == "generic"].iloc[(416*6):416*7]

exploits1 = complete[complete['attack_cat'] == "exploits"].iloc[:416]
exploits2 = complete[complete['attack_cat'] == "exploits"].iloc[416:(416*2)]
exploits3 = complete[complete['attack_cat'] == "exploits"].iloc[(416*2):(416*3)]
exploits4 = complete[complete['attack_cat'] == "exploits"].iloc[(416*3):(416*4)]
exploits5 = complete[complete['attack_cat'] == "exploits"].iloc[(416*4):(416*5)]
exploits7 = complete[complete['attack_cat'] == "exploits"].iloc[(416*5):(416*6)]
exploits6 = complete[complete['attack_cat'] == "exploits"].iloc[(416*6):416*7]

dos1 = complete[complete['attack_cat'] == "dos"].iloc[:416]
dos2 = complete[complete['attack_cat'] == "dos"].iloc[416:(416*2)]
dos3 = complete[complete['attack_cat'] == "dos"].iloc[(416*2):(416*3)]
dos4 = complete[complete['attack_cat'] == "dos"].iloc[(416*3):(416*4)]
dos5 = complete[complete['attack_cat'] == "dos"].iloc[(416*4):(416*5)]
dos6 = complete[complete['attack_cat'] == "dos"].iloc[(416*5):(416*6)]
dos7 = complete[complete['attack_cat'] == "dos"].iloc[(416*6):416*7]

recon1 = complete[complete['attack_cat'] == "reconnaissance"].iloc[:416]
recon2 = complete[complete['attack_cat'] == "reconnaissance"].iloc[416:(416*2)]
recon3 = complete[complete['attack_cat'] == "reconnaissance"].iloc[(416*2):(416*3)]
recon4 = complete[complete['attack_cat'] == "reconnaissance"].iloc[(416*3):(416*4)]
recon5 = complete[complete['attack_cat'] == "reconnaissance"].iloc[(416*4):(416*5)]
recon6 = complete[complete['attack_cat'] == "reconnaissance"].iloc[(416*5):(416*6)]
recon7 = complete[complete['attack_cat'] == "reconnaissance"].iloc[(416*6):416*7]

In [24]:

part1 = pd.concat([ generic1, exploits1, dos1, recon1])
part2 = pd.concat([generic2, exploits2, dos2, recon2])
part3 = pd.concat([ generic3, exploits3, dos3, recon3])
part4 = pd.concat([ generic4, exploits4, dos4, recon4])
part5 = pd.concat([ generic5, exploits5, dos5, recon5])
part6 = pd.concat([ generic6, exploits6, dos6, recon6])
part7 = pd.concat([ generic7, exploits7, dos7, recon7])
part1.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7B-Part1.csv', index=False)
part2.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7B-Part2.csv', index=False)
part3.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7B-Part3.csv', index=False)
part4.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7B-Part4.csv', index=False)
part5.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7B-Part5.csv', index=False)
part6.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7B-Part6.csv', index=False)
part7.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7B-Part7.csv', index=False)

#### Cat7C

In [25]:


generic1 = complete[complete['attack_cat'] == "generic"].iloc[:729]
generic2 = complete[complete['attack_cat'] == "generic"].iloc[729:(729*2)]
generic3 = complete[complete['attack_cat'] == "generic"].iloc[(729*2):(729*2+3727)]
generic4 = complete[complete['attack_cat'] == "generic"].iloc[(729*2+3727):(729*2+3727+3728)]
generic5 = complete[complete['attack_cat'] == "generic"].iloc[(729*2+3727+3728):(729*2+3727+3728+15457)]


exploits1 = complete[complete['attack_cat'] == "exploits"].iloc[:728]
exploits2 = complete[complete['attack_cat'] == "exploits"].iloc[728:(728*2)]
exploits3 = complete[complete['attack_cat'] == "exploits"].iloc[(728*2):(728*3+1)]
exploits4 = complete[complete['attack_cat'] == "exploits"].iloc[(728*3+1):(728*3+1+3727)]
exploits5 = complete[complete['attack_cat'] == "exploits"].iloc[(728*3+1+3727):(728*3+1+3727+15457)]


dos1 = complete[complete['attack_cat'] == "dos"].iloc[:728]
dos2 = complete[complete['attack_cat'] == "dos"].iloc[728:(728*2)]
dos3 = complete[complete['attack_cat'] == "dos"].iloc[(728*2):(728*3+1)]
dos4 = complete[complete['attack_cat'] == "dos"].iloc[(728*3+1):(728*4+2)]


recon1 = complete[complete['attack_cat'] == "reconnaissance"].iloc[:728]
recon2 = complete[complete['attack_cat'] == "reconnaissance"].iloc[728:(728*2+1)]
recon3 = complete[complete['attack_cat'] == "reconnaissance"].iloc[(728*2+1):(728*2+1+3727)]
recon4 = complete[complete['attack_cat'] == "reconnaissance"].iloc[(728*2+1+3727):(728*2+1+3727+3728)]


In [26]:

part1 = pd.concat([ exploits1, dos1])
part2 = pd.concat([ exploits2, dos2, recon1])
part3 = pd.concat([ generic1, exploits3, dos3])
part4 = pd.concat([ generic2, dos4, recon2])
part5 = pd.concat([ generic3, exploits4,  recon3])
part6 = pd.concat([ generic4, recon4])
part7 = pd.concat([ generic5, exploits5])
part1.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7C-Part1.csv', index=False)
part2.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7C-Part2.csv', index=False)
part3.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7C-Part3.csv', index=False)
part4.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7C-Part4.csv', index=False)
part5.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7C-Part5.csv', index=False)
part6.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7C-Part6.csv', index=False)
part7.to_csv('C:/Users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Train-Basic-Cat7C-Part7.csv', index=False)