In [28]:
import pandas as pd
import torch
from torch.autograd import Variable
import torch.nn.functional as F
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [29]:
# read csv
# https://research.unsw.edu.au/projects/unsw-nb15-dataset
# According to the website above csv is wrongly saved, so I change the name
# The number of records in the training set is 175,341 records and the testing set is 82,332 records from the different types, attack and normal.
df = pd.read_csv('../../Data/UNSW_NB15_testing-set.csv')

In [30]:
# fix seed
# Pytorch
torch.manual_seed(0)
torch.cuda.manual_seed(0)

In [31]:
# nominal to numeric of data
# proto                 object
# service               object
# state                 object

# proto to numeric
# proto_mapping = {'xxx':2, 'xxx':1, 'xxx':0}
# data['proto'] = data['proto'].map(proto_mapping)

# proto to numeric
proto_le = LabelEncoder()
df['proto'] = proto_le.fit_transform(df['proto'])
 
# service to numeric
service_le = LabelEncoder()
df['service'] = service_le.fit_transform(df['service'])

# state to numeric
state_le = LabelEncoder()
df['state'] = state_le.fit_transform(df['state'])

# nominal to numeric of data
# attack_cat            object

# target to numeric
df['attack_cat2']=df['attack_cat']
attack_cat2_le = LabelEncoder()
df['attack_cat2'] = attack_cat2_le.fit_transform(df['attack_cat2'])
df_processed = pd.get_dummies(df, columns=(['attack_cat']))


df_processed.head(10)

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,attack_cat_Analysis,attack_cat_Backdoor,attack_cat_DoS,attack_cat_Exploits,attack_cat_Fuzzers,attack_cat_Generic,attack_cat_Normal,attack_cat_Reconnaissance,attack_cat_Shellcode,attack_cat_Worms
0,1,0.121478,113,0,2,6,4,258,172,74.08749,...,0,0,0,0,0,0,1,0,0,0
1,2,0.649902,113,0,2,14,38,734,42014,78.473372,...,0,0,0,0,0,0,1,0,0,0
2,3,1.623129,113,0,2,8,16,364,13186,14.170161,...,0,0,0,0,0,0,1,0,0,0
3,4,1.681642,113,3,2,12,12,628,770,13.677108,...,0,0,0,0,0,0,1,0,0,0
4,5,0.449454,113,0,2,10,6,534,268,33.373826,...,0,0,0,0,0,0,1,0,0,0
5,6,0.380537,113,0,2,10,6,534,268,39.41798,...,0,0,0,0,0,0,1,0,0,0
6,7,0.637109,113,0,2,10,8,534,354,26.683033,...,0,0,0,0,0,0,1,0,0,0
7,8,0.521584,113,0,2,10,8,534,354,32.593026,...,0,0,0,0,0,0,1,0,0,0
8,9,0.542905,113,0,2,10,8,534,354,31.313031,...,0,0,0,0,0,0,1,0,0,0
9,10,0.258687,113,0,2,10,6,534,268,57.985135,...,0,0,0,0,0,0,1,0,0,0


In [32]:
df.head(10)

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label,attack_cat2
0,1,0.121478,113,0,2,6,4,258,172,74.08749,...,1,0,0,0,1,1,0,Normal,0,6
1,2,0.649902,113,0,2,14,38,734,42014,78.473372,...,2,0,0,0,1,6,0,Normal,0,6
2,3,1.623129,113,0,2,8,16,364,13186,14.170161,...,3,0,0,0,2,6,0,Normal,0,6
3,4,1.681642,113,3,2,12,12,628,770,13.677108,...,3,1,1,0,2,1,0,Normal,0,6
4,5,0.449454,113,0,2,10,6,534,268,33.373826,...,40,0,0,0,2,39,0,Normal,0,6
5,6,0.380537,113,0,2,10,6,534,268,39.41798,...,40,0,0,0,2,39,0,Normal,0,6
6,7,0.637109,113,0,2,10,8,534,354,26.683033,...,40,0,0,0,1,39,0,Normal,0,6
7,8,0.521584,113,0,2,10,8,534,354,32.593026,...,40,0,0,0,3,39,0,Normal,0,6
8,9,0.542905,113,0,2,10,8,534,354,31.313031,...,40,0,0,0,3,39,0,Normal,0,6
9,10,0.258687,113,0,2,10,6,534,268,57.985135,...,40,0,0,0,3,39,0,Normal,0,6


In [33]:
# SMOTE
sm = SMOTE()

label_train = df.iloc[:,-2]

data_rebalanced, label_rebalanced = sm.fit_resample(df_processed, label_train)

In [34]:
df_train, df_test = train_test_split(data_rebalanced, test_size=0.3, random_state=0)

In [35]:
#df_train.dtypes
df_train.shape

(167077, 55)

In [36]:
df_train.head(10)

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,attack_cat_Analysis,attack_cat_Backdoor,attack_cat_DoS,attack_cat_Exploits,attack_cat_Fuzzers,attack_cat_Generic,attack_cat_Normal,attack_cat_Reconnaissance,attack_cat_Shellcode,attack_cat_Worms
208167,14119,0.118954,113,0,2,20,21,4002,2657,2500.286578,...,0,0,0,0,0,0,1,0,0,0
117281,117282,8e-06,119,2,3,2,0,114,0,125000.0003,...,0,0,0,0,0,1,0,0,0,0
107751,107752,0.0,6,0,3,1,0,46,0,0.0,...,0,0,0,0,0,0,1,0,0,0
232733,17964,0.004629,113,0,2,18,18,3717,2437,7991.992287,...,0,0,0,0,0,0,1,0,0,0
129951,129952,8e-06,120,0,3,2,0,200,0,125000.0003,...,0,0,0,1,0,0,0,0,0,0
237859,24407,4.438847,113,0,2,224,410,12961,501661,149.569277,...,0,0,0,0,0,0,1,0,0,0
65132,65133,1e-05,97,0,3,2,0,200,0,100000.0025,...,0,0,0,1,0,0,0,0,0,0
46842,46843,0.572216,113,0,0,6,2,978,86,12.233143,...,0,0,0,0,0,0,1,0,0,0
88324,88325,1.671502,113,3,2,26,22,1334,1638,28.118423,...,0,0,0,1,0,0,0,0,0,0
107709,107710,0.483477,113,5,2,12,8,858,1010,39.298664,...,0,0,1,0,0,0,0,0,0,0


In [37]:
data_train = df_train.drop(['id', 'label', 'attack_cat2','attack_cat_Analysis','attack_cat_Backdoor','attack_cat_DoS','attack_cat_Exploits','attack_cat_Fuzzers','attack_cat_Generic','attack_cat_Normal','attack_cat_Reconnaissance','attack_cat_Shellcode','attack_cat_Worms'], axis=1)
analysis_train = df_train.iloc[:,-10]
backdoor_train = df_train.iloc[:,-9]
dos_train = df_train.iloc[:,-8]
exploits_train = df_train.iloc[:,-7]
fuzzers_train = df_train.iloc[:,-6]
generic_train = df_train.iloc[:,-5]
normal_train = df_train.iloc[:,-4]
reconnaissance_train = df_train.iloc[:,-3]
shellcode_train = df_train.iloc[:,-2]
worms_train = df_train.iloc[:,-1]

attack_train = df_train.iloc[:,-11]
data_train.head(10)

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_dst_ltm,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports
208167,0.118954,113,0,2,20,21,4002,2657,2500.286578,31,...,5,1,1,6,0,0,0,6,8,0
117281,8e-06,119,2,3,2,0,114,0,125000.0003,254,...,18,18,18,18,0,0,0,19,18,0
107751,0.0,6,0,3,1,0,46,0,0.0,0,...,2,2,2,2,0,0,0,2,2,1
232733,0.004629,113,0,2,18,18,3717,2437,7991.992287,31,...,1,1,1,3,0,0,0,1,7,0
129951,8e-06,120,0,3,2,0,200,0,125000.0003,254,...,2,2,2,5,0,0,0,5,5,0
237859,4.438847,113,0,2,224,410,12961,501661,149.569277,31,...,4,1,1,3,0,0,0,1,10,0
65132,1e-05,97,0,3,2,0,200,0,100000.0025,254,...,2,2,2,4,0,0,0,4,4,0
46842,0.572216,113,0,0,6,2,978,86,12.233143,62,...,4,3,1,3,0,0,0,3,3,0
88324,1.671502,113,3,2,26,22,1334,1638,28.118423,62,...,2,1,1,3,1,1,0,3,1,0
107709,0.483477,113,5,2,12,8,858,1010,39.298664,62,...,1,1,1,1,0,0,1,1,1,0


In [38]:
#min-max scaling
data_train_norm = (data_train - data_train.min()) / (data_train.max() - data_train.min())
data_train_norm = data_train_norm.fillna(0)

In [39]:
data_train_norm.shape
data_train_norm.head(10)

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_dst_ltm,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports
208167,0.001982563,0.856061,0.0,0.285714,0.001976,0.001935,0.0003060501,0.000183,0.0025,0.121569,...,0.08,0.0,0.0,0.080645,0.0,0.0,0.0,0.084746,0.114754,0.0
117281,1.333334e-07,0.901515,0.166667,0.428571,0.000104,0.0,6.170364e-06,0.0,0.125,0.996078,...,0.34,0.34,0.377778,0.274194,0.0,0.0,0.0,0.305085,0.278689,0.0
107751,0.0,0.045455,0.0,0.428571,0.0,0.0,9.255546e-07,0.0,0.0,0.0,...,0.02,0.02,0.022222,0.016129,0.0,0.0,0.0,0.016949,0.016393,1.0
232733,7.715199e-05,0.856061,0.0,0.285714,0.001768,0.001659,0.0002840681,0.000168,0.007992,0.121569,...,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.098361,0.0
129951,1.333334e-07,0.909091,0.0,0.428571,0.000104,0.0,1.280351e-05,0.0,0.125,0.996078,...,0.02,0.02,0.022222,0.064516,0.0,0.0,0.0,0.067797,0.065574,0.0
237859,0.0739808,0.856061,0.0,0.285714,0.023193,0.037788,0.0009970537,0.034617,0.00015,0.121569,...,0.06,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.147541,0.0
65132,1.666667e-07,0.734848,0.0,0.428571,0.000104,0.0,1.280351e-05,0.0,0.1,0.996078,...,0.02,0.02,0.022222,0.048387,0.0,0.0,0.0,0.050847,0.04918,0.0
46842,0.009536935,0.856061,0.0,0.0,0.00052,0.000184,7.28103e-05,6e-06,1.2e-05,0.243137,...,0.06,0.04,0.0,0.032258,0.0,0.0,0.0,0.033898,0.032787,0.0
88324,0.02785837,0.856061,0.25,0.285714,0.0026,0.002028,0.0001002684,0.000113,2.8e-05,0.243137,...,0.02,0.0,0.0,0.032258,0.25,0.25,0.0,0.033898,0.0,0.0
107709,0.008057951,0.856061,0.416667,0.285714,0.001144,0.000737,6.355475e-05,7e-05,3.9e-05,0.243137,...,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0


In [40]:
# preparing 10 types of binary classification set
train_X = torch.tensor(data_train_norm.values, dtype=torch.float32)

train_analysis_Y = torch.tensor(analysis_train.values, dtype=torch.long) 
train_backdoor_Y = torch.tensor(backdoor_train.values, dtype=torch.long) 
train_dos_Y = torch.tensor(dos_train.values, dtype=torch.long) 
train_exploits_Y = torch.tensor(exploits_train.values, dtype=torch.long) 
train_fuzzers_Y = torch.tensor(fuzzers_train.values, dtype=torch.long) 
train_generic_Y = torch.tensor(generic_train.values, dtype=torch.long) 
train_normal_Y = torch.tensor(normal_train.values, dtype=torch.long) 
train_reconnaissance_Y = torch.tensor(reconnaissance_train.values, dtype=torch.long) 
train_shellcode_Y = torch.tensor(shellcode_train.values, dtype=torch.long) 
train_worms_Y = torch.tensor(worms_train.values, dtype=torch.long) 

train_analysis = TensorDataset(train_X, train_analysis_Y)
train_backdoor = TensorDataset(train_X, train_backdoor_Y)
train_dos = TensorDataset(train_X, train_dos_Y)
train_exploits = TensorDataset(train_X, train_exploits_Y)
train_fuzzers = TensorDataset(train_X, train_fuzzers_Y)
train_generic = TensorDataset(train_X, train_generic_Y)
train_normal = TensorDataset(train_X, train_normal_Y)
train_reconnaissance = TensorDataset(train_X, train_reconnaissance_Y)
train_shellcode = TensorDataset(train_X, train_shellcode_Y)
train_worms = TensorDataset(train_X, train_worms_Y)


In [41]:
train_analysis_loader = DataLoader(train_analysis, batch_size=100, shuffle=True)
train_backdoor_loader = DataLoader(train_backdoor, batch_size=100, shuffle=True)
train_dos_loader = DataLoader(train_dos, batch_size=100, shuffle=True)
train_exploits_loader = DataLoader(train_exploits, batch_size=100, shuffle=True)
train_fuzzers_loader = DataLoader(train_fuzzers, batch_size=100, shuffle=True)
train_generic_loader = DataLoader(train_generic, batch_size=100, shuffle=True)
train_normal_loader = DataLoader(train_normal, batch_size=100, shuffle=True)
train_reconnaissance_loader = DataLoader(train_reconnaissance, batch_size=100, shuffle=True)
train_shellcode_loader = DataLoader(train_shellcode, batch_size=100, shuffle=True)
train_worms_loader = DataLoader(train_worms, batch_size=100, shuffle=True)
# drop_last = True

In [42]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(42, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 2)
        self.bc1 = nn.BatchNorm1d(100)
        self.bc2 = nn.BatchNorm1d(100)
        self.bc3 = nn.BatchNorm1d(100)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.bc1(x)
        x = F.relu(x) # ReLU: max(x, 0)
        x = self.fc2(x)
        x = self.bc2(x)
        x = F.relu(x) # ReLU: max(x, 0)
        x = self.fc3(x)
        x = self.bc3(x)
        x = F.relu(x) # ReLU: max(x, 0)
        x = self.fc4(x)
        # return F.log_softmax(x, dim=1)
        return x

model_analysis = Net()
model_backdoor = Net()
model_dos = Net()
model_exploits = Net()
model_fuzzers = Net()
model_generic = Net()
model_normal = Net()
model_reconnaissance = Net()
model_shellcode = Net()
model_worms = Net()

In [43]:
print(torch.cuda.is_available())

True


In [44]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_analysis = model_analysis.to(device)
model_backdoor = model_backdoor.to(device)
model_dos = model_dos.to(device)
model_exploits = model_exploits.to(device)
model_fuzzers = model_fuzzers.to(device)
model_generic = model_generic.to(device)
model_normal = model_normal.to(device)
model_reconnaissance = model_reconnaissance.to(device)
model_shellcode = model_shellcode.to(device)
model_worms = model_worms.to(device)
print(device)

cuda


In [45]:
criterion = nn.CrossEntropyLoss()

In [46]:
#optimizer = torch.optim.SGD(model.parameters(), lr=0.03)
optimizer_analysis = torch.optim.Adam(model_analysis.parameters(), lr=0.03)
optimizer_backdoor = torch.optim.Adam(model_backdoor.parameters(), lr=0.03)
optimizer_dos = torch.optim.Adam(model_dos.parameters(), lr=0.03)
optimizer_exploits = torch.optim.Adam(model_exploits.parameters(), lr=0.03)
optimizer_fuzzers = torch.optim.Adam(model_fuzzers.parameters(), lr=0.03)
optimizer_generic = torch.optim.Adam(model_generic.parameters(), lr=0.03)
optimizer_normal = torch.optim.Adam(model_normal.parameters(), lr=0.03)
optimizer_reconnaissance = torch.optim.Adam(model_reconnaissance.parameters(), lr=0.03)
optimizer_shellcode = torch.optim.Adam(model_shellcode.parameters(), lr=0.03)
optimizer_worms = torch.optim.Adam(model_worms.parameters(), lr=0.03)

In [47]:
#Training the machine learning model
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_analysis.train()
    loss_list=[]
    for train_x, train_y in train_analysis_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_analysis.zero_grad()
        output = model_analysis(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_analysis.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_analysis_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

In [48]:
with torch.no_grad():
    correct = 0
    model_analysis.eval()
    train_x, train_y = Variable(train_X), Variable(train_analysis_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_analysis = model_analysis(train_x)
    pred = torch.max(output_analysis.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

correct _rate: 165814/167077 (99%)



In [49]:
#output.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()

In [50]:
twoS_train_data = pd.DataFrame()
twoS_train_data["analysis_pred"]=output_analysis.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

Unnamed: 0,analysis_pred
0,5.733373e-29
1,3.435122e-08
2,5.950863e-20
3,1.090117e-15
4,0.04804679
5,4.157892e-20
6,0.04804679
7,8.733348000000001e-22
8,1.574244e-08
9,0.006273745


In [51]:
testdata = df_test.drop(['id', 'label', 'attack_cat2','attack_cat_Analysis','attack_cat_Backdoor','attack_cat_DoS','attack_cat_Exploits','attack_cat_Fuzzers','attack_cat_Generic','attack_cat_Normal','attack_cat_Reconnaissance','attack_cat_Shellcode','attack_cat_Worms'], axis=1)

analysis_test = df_test.iloc[:,-10]
backdoor_test = df_test.iloc[:,-9]
dos_test = df_test.iloc[:,-8]
exploits_test = df_test.iloc[:,-7]
fuzzers_test = df_test.iloc[:,-6]
generic_test = df_test.iloc[:,-5]
normal_test = df_test.iloc[:,-4]
reconnaissance_test = df_test.iloc[:,-3]
shellcode_test = df_test.iloc[:,-2]
worms_test = df_test.iloc[:,-1]

attack_test = df_test.iloc[:,-11]

#min-max scaling
testdata_norm = (testdata - testdata.min()) / (testdata.max() - testdata.min())
testdata_norm = testdata_norm.fillna(0)

testdata_norm.head(10)

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_dst_ltm,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports
83703,0.006743185,0.856061,0.416667,0.25,0.000948,0.000547,6.5e-05,1.8e-05,3.7e-05,0.996078,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
215281,0.005651618,0.856061,0.0,0.25,0.000948,0.000547,0.000155,1.8e-05,6.1e-05,0.996078,...,0.02,0.02,0.0,0.09375,0.0,0.0,0.0,0.04,0.098361,0.0
66227,0.0100422,0.856061,0.0,0.25,0.001159,0.000729,7.9e-05,5.2e-05,3.2e-05,0.996078,...,0.02,0.02,0.0,0.03125,0.0,0.0,0.0,0.38,0.032787,0.0
104554,0.01354457,0.856061,0.0,0.25,0.001159,0.000911,7.7e-05,5.5e-05,2.6e-05,0.996078,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92327,1.333334e-07,0.856061,0.0,0.625,0.000105,0.0,5e-06,0.0,0.125,0.996078,...,0.02,0.0,0.0,0.109375,0.0,0.0,0.0,0.02,0.114754,0.0
106343,1.166667e-07,0.477273,0.0,0.375,0.000105,0.0,1.4e-05,0.0,0.142857,0.996078,...,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.032787,0.0
130089,1.5e-07,0.363636,0.0,0.375,0.000105,0.0,1.4e-05,0.0,0.111111,0.996078,...,0.04,0.04,0.044444,0.078125,0.0,0.0,0.0,0.06,0.081967,0.0
231026,0.01722326,0.856061,0.416667,0.25,0.001159,0.00164,0.000124,0.000694,2.8e-05,0.121569,...,0.04,0.0,0.0,0.03125,0.0,0.0,0.033333,0.02,0.016393,0.0
147844,5.000001e-08,0.901515,0.166667,0.375,0.000105,0.0,7e-06,0.0,0.333333,0.996078,...,0.26,0.26,0.288889,0.40625,0.0,0.0,0.0,0.26,0.42623,0.0
58003,0.005990434,0.856061,0.0,0.25,0.005795,0.001093,0.005329,3.6e-05,0.000186,0.996078,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0


In [52]:
#test_X = torch.tensor(testdata_norm.values, dtype=torch.float32)
#test_Y = torch.tensor(testlabel.values, dtype=torch.long) 

#test = TensorDataset(test_X, test_Y)
#test_loader = DataLoader(test, batch_size=100)

In [53]:
# preparing 10 types of binary classification set
test_X = torch.tensor(testdata_norm.values, dtype=torch.float32)

test_analysis_Y = torch.tensor(analysis_test.values, dtype=torch.long) 
test_backdoor_Y = torch.tensor(backdoor_test.values, dtype=torch.long) 
test_dos_Y = torch.tensor(dos_test.values, dtype=torch.long) 
test_exploits_Y = torch.tensor(exploits_test.values, dtype=torch.long) 
test_fuzzers_Y = torch.tensor(fuzzers_test.values, dtype=torch.long) 
test_generic_Y = torch.tensor(generic_test.values, dtype=torch.long) 
test_normal_Y = torch.tensor(normal_test.values, dtype=torch.long) 
test_reconnaissance_Y = torch.tensor(reconnaissance_test.values, dtype=torch.long) 
test_shellcode_Y = torch.tensor(shellcode_test.values, dtype=torch.long) 
test_worms_Y = torch.tensor(worms_test.values, dtype=torch.long) 

test_analysis = TensorDataset(test_X, test_analysis_Y)
test_backdoor = TensorDataset(test_X, test_backdoor_Y)
test_dos = TensorDataset(test_X, test_dos_Y)
test_exploits = TensorDataset(test_X, test_exploits_Y)
test_fuzzers = TensorDataset(test_X, test_fuzzers_Y)
test_generic = TensorDataset(test_X, test_generic_Y)
test_normal = TensorDataset(test_X, test_normal_Y)
test_reconnaissance = TensorDataset(test_X, test_reconnaissance_Y)
test_shellcode = TensorDataset(test_X, test_shellcode_Y)
test_worms = TensorDataset(test_X, test_worms_Y)


In [54]:
test_analysis_loader = DataLoader(test_analysis, batch_size=100, shuffle=True)
test_backdoor_loader = DataLoader(test_backdoor, batch_size=100, shuffle=True)
test_dos_loader = DataLoader(test_dos, batch_size=100, shuffle=True)
test_exploits_loader = DataLoader(test_exploits, batch_size=100, shuffle=True)
test_fuzzers_loader = DataLoader(test_fuzzers, batch_size=100, shuffle=True)
test_generic_loader = DataLoader(test_generic, batch_size=100, shuffle=True)
test_normal_loader = DataLoader(test_normal, batch_size=100, shuffle=True)
test_reconnaissance_loader = DataLoader(test_reconnaissance, batch_size=100, shuffle=True)
test_shellcode_loader = DataLoader(test_shellcode, batch_size=100, shuffle=True)
test_worms_loader = DataLoader(test_worms, batch_size=100, shuffle=True)
# drop_last = True

In [55]:
with torch.no_grad():
    correct = 0
    model_analysis.eval()
    test_x, test_y = Variable(test_X), Variable(test_analysis_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_analysis = model_analysis(test_x)
    pred = torch.max(output_test_analysis.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 71042/71605 (99%)



In [56]:
twoS_test_data = pd.DataFrame()
twoS_test_data["analysis_pred"]=output_test_analysis.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

Unnamed: 0,analysis_pred
0,4.057994e-06
1,5.22834e-13
2,1.064407e-12
3,8.059102e-10
4,0.0005029461
5,0.04804679
6,0.04804679
7,9.361324e-24
8,9.168352e-11
9,1.8198689999999999e-19


In [57]:
#Training the machine learning model for backdoor
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_backdoor.train()
    loss_list=[]
    for train_x, train_y in train_backdoor_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_backdoor.zero_grad()
        output = model_backdoor(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_backdoor.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_backdoor_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

In [58]:
with torch.no_grad():
    correct = 0
    model_backdoor.eval()
    train_x, train_y = Variable(train_X), Variable(train_backdoor_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_backdoor = model_backdoor(train_x)
    pred = torch.max(output_backdoor.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

correct _rate: 165869/167077 (99%)



In [59]:
twoS_train_data["backdoor_pred"]=output_backdoor.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred
0,5.733373e-29,6.250578e-19
1,3.435122e-08,1.821575e-21
2,5.950863e-20,1.338373e-22
3,1.090117e-15,5.022311e-18
4,0.04804679,0.03599505
5,4.157892e-20,0.0
6,0.04804679,0.03599505
7,8.733348000000001e-22,2.729735e-21
8,1.574244e-08,3.809537e-18
9,0.006273745,0.0003018818


In [60]:
with torch.no_grad():
    correct = 0
    model_backdoor.eval()
    test_x, test_y = Variable(test_X), Variable(test_backdoor_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_backdoor = model_backdoor(test_x)
    pred = torch.max(output_test_backdoor.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 71088/71605 (99%)



In [61]:
twoS_test_data["backdoor_pred"]=output_test_backdoor.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred
0,4.057994e-06,0.002584545
1,5.22834e-13,6.202563e-15
2,1.064407e-12,2.141497e-05
3,8.059102e-10,0.0003021023
4,0.0005029461,1.365519e-08
5,0.04804679,0.03599505
6,0.04804679,0.03599505
7,9.361324e-24,1.718545e-13
8,9.168352e-11,1.767525e-15
9,1.8198689999999999e-19,1.3696300000000002e-23


In [62]:
#Training the machine learning model for dos
batch_loss_list=[]
for epoch in range(100): #learning 100 times
    #total_loss = 0
    model_dos.train()
    loss_list=[]
    for train_x, train_y in train_dos_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_dos.zero_grad()
        output = model_dos(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_dos.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_dos_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

In [63]:
with torch.no_grad():
    correct = 0
    model_dos.eval()
    train_x, train_y = Variable(train_X), Variable(train_dos_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_dos = model_dos(train_x)
    pred = torch.max(output_dos.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

correct _rate: 158461/167077 (95%)



In [64]:
twoS_train_data["dos_pred"]=output_dos.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred,dos_pred
0,5.733373e-29,6.250578e-19,7.539402e-35
1,3.435122e-08,1.821575e-21,7.750936e-11
2,5.950863e-20,1.338373e-22,0.0
3,1.090117e-15,5.022311e-18,5.495129e-36
4,0.04804679,0.03599505,0.3213531
5,4.157892e-20,0.0,3.484529e-15
6,0.04804679,0.03599505,0.3213531
7,8.733348000000001e-22,2.729735e-21,1.214563e-09
8,1.574244e-08,3.809537e-18,2.147255e-16
9,0.006273745,0.0003018818,0.3213531


In [65]:
with torch.no_grad():
    correct = 0
    model_dos.eval()
    test_x, test_y = Variable(test_X), Variable(test_dos_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_dos = model_dos(test_x)
    pred = torch.max(output_test_dos.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 67957/71605 (95%)



In [66]:
twoS_test_data["dos_pred"]=output_test_dos.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred,dos_pred
0,4.057994e-06,0.002584545,0.0272489
1,5.22834e-13,6.202563e-15,0.000261158
2,1.064407e-12,2.141497e-05,0.01656191
3,8.059102e-10,0.0003021023,0.06442982
4,0.0005029461,1.365519e-08,1.661922e-19
5,0.04804679,0.03599505,0.3213531
6,0.04804679,0.03599505,0.3213531
7,9.361324e-24,1.718545e-13,0.0
8,9.168352e-11,1.767525e-15,7.814356e-13
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138


In [67]:
#Training the machine learning model for exploits
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_exploits.train()
    loss_list=[]
    for train_x, train_y in train_exploits_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_exploits.zero_grad()
        output = model_exploits(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_exploits.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_exploits_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

In [68]:
with torch.no_grad():
    correct = 0
    model_exploits.eval()
    train_x, train_y = Variable(train_X), Variable(train_exploits_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_exploits = model_exploits(train_x)
    pred = torch.max(output_exploits.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

correct _rate: 153818/167077 (92%)



In [69]:
twoS_train_data["exploits_pred"]=output_exploits.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred
0,5.733373e-29,6.250578e-19,7.539402e-35,0.0
1,3.435122e-08,1.821575e-21,7.750936e-11,4.420427e-07
2,5.950863e-20,1.338373e-22,0.0,9.437731e-40
3,1.090117e-15,5.022311e-18,5.495129e-36,0.0
4,0.04804679,0.03599505,0.3213531,0.4006477
5,4.157892e-20,0.0,3.484529e-15,0.0
6,0.04804679,0.03599505,0.3213531,0.4006477
7,8.733348000000001e-22,2.729735e-21,1.214563e-09,1.911803e-05
8,1.574244e-08,3.809537e-18,2.147255e-16,0.9956461
9,0.006273745,0.0003018818,0.3213531,0.4512513


In [70]:
with torch.no_grad():
    correct = 0
    model_exploits.eval()
    test_x, test_y = Variable(test_X), Variable(test_exploits_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_exploits = model_exploits(test_x)
    pred = torch.max(output_test_exploits.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

tensor([0, 0, 0,  ..., 0, 0, 1], device='cuda:0')
correct _rate: 65696/71605 (92%)



In [71]:
twoS_test_data["exploits_pred"]=output_test_exploits.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888
3,8.059102e-10,0.0003021023,0.06442982,0.02589165
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10
5,0.04804679,0.03599505,0.3213531,0.4006477
6,0.04804679,0.03599505,0.3213531,0.4006477
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867


In [72]:
#Training the machine learning model for fuzzers
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_fuzzers.train()
    loss_list=[]
    for train_x, train_y in train_fuzzers_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_fuzzers.zero_grad()
        output = model_fuzzers(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_fuzzers.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_fuzzers_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

with torch.no_grad():
    correct = 0
    model_fuzzers.eval()
    train_x, train_y = Variable(train_X), Variable(train_fuzzers_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_fuzzers = model_fuzzers(train_x)
    pred = torch.max(output_fuzzers.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_train_data["fuzzers_pred"]=output_fuzzers.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

with torch.no_grad():
    correct = 0
    model_fuzzers.eval()
    test_x, test_y = Variable(test_X), Variable(test_fuzzers_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_fuzzers = model_fuzzers(test_x)
    pred = torch.max(output_test_fuzzers.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_test_data["fuzzers_pred"]=output_test_fuzzers.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

correct _rate: 158263/167077 (95%)

tensor([0, 0, 1,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 67633/71605 (94%)



Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427,0.001593328
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404,0.2315588
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888,0.9043506
3,8.059102e-10,0.0003021023,0.06442982,0.02589165,0.8119604
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10,0.0009958537
5,0.04804679,0.03599505,0.3213531,0.4006477,0.04245339
6,0.04804679,0.03599505,0.3213531,0.4006477,0.03993946
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45,5.523873e-05
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07,1.2295460000000002e-17
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867,0.07256886


In [73]:
#Training the machine learning model for generic
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_generic.train()
    loss_list=[]
    for train_x, train_y in train_generic_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_generic.zero_grad()
        output = model_generic(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_generic.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_generic_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

with torch.no_grad():
    correct = 0
    model_generic.eval()
    train_x, train_y = Variable(train_X), Variable(train_generic_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_generic = model_generic(train_x)
    pred = torch.max(output_generic.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_train_data["generic_pred"]=output_generic.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

with torch.no_grad():
    correct = 0
    model_generic.eval()
    test_x, test_y = Variable(test_X), Variable(test_generic_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_generic = model_generic(test_x)
    pred = torch.max(output_test_generic.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_test_data["generic_pred"]=output_test_generic.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

correct _rate: 166459/167077 (100%)

tensor([0, 0, 0,  ..., 0, 1, 0], device='cuda:0')
correct _rate: 71318/71605 (100%)



Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred,generic_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427,0.001593328,0.003291132
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404,0.2315588,4.235936e-05
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888,0.9043506,0.0001045906
3,8.059102e-10,0.0003021023,0.06442982,0.02589165,0.8119604,0.005815274
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10,0.0009958537,3.300777e-06
5,0.04804679,0.03599505,0.3213531,0.4006477,0.04245339,0.01451284
6,0.04804679,0.03599505,0.3213531,0.4006477,0.03993946,0.01697282
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45,5.523873e-05,1.570003e-13
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07,1.2295460000000002e-17,1.0
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867,0.07256886,0.001821588


In [75]:
#Training the machine learning model for normal
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_normal.train()
    loss_list=[]
    for train_x, train_y in train_normal_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_normal.zero_grad()
        output = model_normal(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_normal.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_normal_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

with torch.no_grad():
    correct = 0
    model_normal.eval()
    train_x, train_y = Variable(train_X), Variable(train_normal_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_normal = model_normal(train_x)
    pred = torch.max(output_normal.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_train_data["normal_pred"]=output_normal.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

with torch.no_grad():
    correct = 0
    model_normal.eval()
    test_x, test_y = Variable(test_X), Variable(test_normal_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_normal = model_normal(test_x)
    pred = torch.max(output_test_normal.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_test_data["normal_pred"]=output_test_normal.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

correct _rate: 159847/167077 (96%)

tensor([0, 1, 0,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 68105/71605 (95%)



Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred,generic_pred,normal_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427,0.001593328,0.003291132,0.005412919
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404,0.2315588,4.235936e-05,0.6473816
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888,0.9043506,0.0001045906,0.01353174
3,8.059102e-10,0.0003021023,0.06442982,0.02589165,0.8119604,0.005815274,0.2152032
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10,0.0009958537,3.300777e-06,1.0
5,0.04804679,0.03599505,0.3213531,0.4006477,0.04245339,0.01451284,7.348389e-06
6,0.04804679,0.03599505,0.3213531,0.4006477,0.03993946,0.01697282,9.834062e-08
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45,5.523873e-05,1.570003e-13,1.0
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07,1.2295460000000002e-17,1.0,1.787194e-07
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867,0.07256886,0.001821588,0.01843715


In [76]:
#Training the machine learning model for reconnaissance
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_reconnaissance.train()
    loss_list=[]
    for train_x, train_y in train_reconnaissance_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_reconnaissance.zero_grad()
        output = model_reconnaissance(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_reconnaissance.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_reconnaissance_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

with torch.no_grad():
    correct = 0
    model_reconnaissance.eval()
    train_x, train_y = Variable(train_X), Variable(train_reconnaissance_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_reconnaissance = model_reconnaissance(train_x)
    pred = torch.max(output_reconnaissance.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_train_data["reconnaissance_pred"]=output_reconnaissance.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

with torch.no_grad():
    correct = 0
    model_reconnaissance.eval()
    test_x, test_y = Variable(test_X), Variable(test_reconnaissance_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_reconnaissance = model_reconnaissance(test_x)
    pred = torch.max(output_test_reconnaissance.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_test_data["reconnaissance_pred"]=output_test_reconnaissance.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

correct _rate: 163690/167077 (98%)

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 70305/71605 (98%)



Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred,generic_pred,normal_pred,reconnaissance_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427,0.001593328,0.003291132,0.005412919,0.4725392
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404,0.2315588,4.235936e-05,0.6473816,3.30796e-14
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888,0.9043506,0.0001045906,0.01353174,2.080167e-05
3,8.059102e-10,0.0003021023,0.06442982,0.02589165,0.8119604,0.005815274,0.2152032,9.135199e-05
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10,0.0009958537,3.300777e-06,1.0,4.574951e-14
5,0.04804679,0.03599505,0.3213531,0.4006477,0.04245339,0.01451284,7.348389e-06,0.04821298
6,0.04804679,0.03599505,0.3213531,0.4006477,0.03993946,0.01697282,9.834062e-08,0.04678237
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45,5.523873e-05,1.570003e-13,1.0,2.245482e-11
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07,1.2295460000000002e-17,1.0,1.787194e-07,6.328558e-10
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867,0.07256886,0.001821588,0.01843715,4.203895e-45


In [77]:
#Training the machine learning model for shellcode
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_shellcode.train()
    loss_list=[]
    for train_x, train_y in train_shellcode_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_shellcode.zero_grad()
        output = model_shellcode(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_shellcode.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_shellcode_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

with torch.no_grad():
    correct = 0
    model_shellcode.eval()
    train_x, train_y = Variable(train_X), Variable(train_shellcode_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_shellcode = model_shellcode(train_x)
    pred = torch.max(output_shellcode.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_train_data["shellcode_pred"]=output_shellcode.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

with torch.no_grad():
    correct = 0
    model_shellcode.eval()
    test_x, test_y = Variable(test_X), Variable(test_shellcode_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_shellcode = model_shellcode(test_x)
    pred = torch.max(output_test_shellcode.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_test_data["shellcode_pred"]=output_test_shellcode.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

correct _rate: 166384/167077 (100%)

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 71303/71605 (100%)



Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred,generic_pred,normal_pred,reconnaissance_pred,shellcode_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427,0.001593328,0.003291132,0.005412919,0.4725392,1.058855e-07
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404,0.2315588,4.235936e-05,0.6473816,3.30796e-14,7.301754e-06
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888,0.9043506,0.0001045906,0.01353174,2.080167e-05,4.162353e-08
3,8.059102e-10,0.0003021023,0.06442982,0.02589165,0.8119604,0.005815274,0.2152032,9.135199e-05,0.002425542
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10,0.0009958537,3.300777e-06,1.0,4.574951e-14,5.839519e-15
5,0.04804679,0.03599505,0.3213531,0.4006477,0.04245339,0.01451284,7.348389e-06,0.04821298,1.694935e-16
6,0.04804679,0.03599505,0.3213531,0.4006477,0.03993946,0.01697282,9.834062e-08,0.04678237,1.5261510000000001e-22
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45,5.523873e-05,1.570003e-13,1.0,2.245482e-11,4.216239e-12
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07,1.2295460000000002e-17,1.0,1.787194e-07,6.328558e-10,1.087341e-24
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867,0.07256886,0.001821588,0.01843715,4.203895e-45,9.890521e-14


In [78]:
#Training the machine learning model for worms
batch_loss_list=[]
for epoch in range(300): #learning 100 times
    #total_loss = 0
    model_worms.train()
    loss_list=[]
    for train_x, train_y in train_worms_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer_worms.zero_grad()
        output = model_worms(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer_worms.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_worms_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

with torch.no_grad():
    correct = 0
    model_worms.eval()
    train_x, train_y = Variable(train_X), Variable(train_worms_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output_worms = model_worms(train_x)
    pred = torch.max(output_worms.data, 1)[1]
    #print(output.data.softmax(dim=1))
    #print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_train_data["worms_pred"]=output_worms.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_train_data.head(10)

with torch.no_grad():
    correct = 0
    model_worms.eval()
    test_x, test_y = Variable(test_X), Variable(test_worms_Y)
    test_x = test_x.to(device)
    test_y = test_y.to(device)
    output_test_worms = model_worms(test_x)
    pred = torch.max(output_test_worms.data, 1)[1]
    print(pred)
    correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

twoS_test_data["worms_pred"]=output_test_worms.data.softmax(dim=1)[:,1].to('cpu').detach().numpy().tolist()
twoS_test_data.head(10)

correct _rate: 166999/167077 (100%)

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')
correct _rate: 71564/71605 (100%)



Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred,generic_pred,normal_pred,reconnaissance_pred,shellcode_pred,worms_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427,0.001593328,0.003291132,0.005412919,0.4725392,1.058855e-07,9.706838e-05
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404,0.2315588,4.235936e-05,0.6473816,3.30796e-14,7.301754e-06,1.349035e-22
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888,0.9043506,0.0001045906,0.01353174,2.080167e-05,4.162353e-08,9.189757e-16
3,8.059102e-10,0.0003021023,0.06442982,0.02589165,0.8119604,0.005815274,0.2152032,9.135199e-05,0.002425542,6.104952e-24
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10,0.0009958537,3.300777e-06,1.0,4.574951e-14,5.839519e-15,4.796616e-20
5,0.04804679,0.03599505,0.3213531,0.4006477,0.04245339,0.01451284,7.348389e-06,0.04821298,1.694935e-16,1.873232e-10
6,0.04804679,0.03599505,0.3213531,0.4006477,0.03993946,0.01697282,9.834062e-08,0.04678237,1.5261510000000001e-22,1.688455e-10
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45,5.523873e-05,1.570003e-13,1.0,2.245482e-11,4.216239e-12,4.72858e-19
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07,1.2295460000000002e-17,1.0,1.787194e-07,6.328558e-10,1.087341e-24,2.833992e-09
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867,0.07256886,0.001821588,0.01843715,4.203895e-45,9.890521e-14,6.219158000000001e-27


In [79]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc1 = nn.Linear(10, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 10)
        self.bc1 = nn.BatchNorm1d(100)
        self.bc2 = nn.BatchNorm1d(100)
        self.bc3 = nn.BatchNorm1d(100)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.bc1(x)
        x = F.relu(x) # ReLU: max(x, 0)
        x = self.fc2(x)
        x = self.bc2(x)
        x = F.relu(x) # ReLU: max(x, 0)
        x = self.fc3(x)
        x = self.bc3(x)
        x = F.relu(x) # ReLU: max(x, 0)
        x = self.fc4(x)
        # return F.log_softmax(x, dim=1)
        return x

model2 = Net2()

model2 = model2.to(device)

In [80]:
twoS_train_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred,generic_pred,normal_pred,reconnaissance_pred,shellcode_pred,worms_pred
0,5.733373e-29,6.250578e-19,7.539402e-35,0.0,1.836076e-09,6.265219e-09,1.0,7.535022e-15,1.055001e-11,1.903452e-23
1,3.435122e-08,1.821575e-21,7.750936e-11,4.420427e-07,3.4163559999999996e-19,1.0,4.437381e-07,3.161641e-10,5.658384e-24,1.183567e-09
2,5.950863e-20,1.338373e-22,0.0,9.437731e-40,7.522134e-06,4.246021e-18,1.0,2.846251e-20,1.665157e-06,3.350487e-09
3,1.090117e-15,5.022311e-18,5.495129e-36,0.0,8.439204e-09,4.752686e-07,1.0,2.4476360000000003e-22,1.01223e-11,4.62998e-19
4,0.04804679,0.03599505,0.3213531,0.4006477,0.04566233,0.0359857,5.581888e-05,0.03442169,0.0004716759,1.078856e-08
5,4.157892e-20,0.0,3.484529e-15,0.0,2.810762e-10,2.856784e-10,1.0,1.648569e-09,7.722195e-21,4.764071e-25
6,0.04804679,0.03599505,0.3213531,0.4006477,0.04101146,0.01938453,1.09922e-08,0.04821261,4.843993e-07,1.722881e-09
7,8.733348000000001e-22,2.729735e-21,1.214563e-09,1.911803e-05,4.861691e-07,2.19505e-05,1.0,4.820286e-24,1.948054e-10,8.908855e-38
8,1.574244e-08,3.809537e-18,2.147255e-16,0.9956461,6.812713e-07,5.220345e-06,0.0001178547,2.898806e-18,3.880724e-08,1.139231e-34
9,0.006273745,0.0003018818,0.3213531,0.4512513,0.0004336499,0.001220824,0.01554651,0.001791653,3.020374e-08,2.80567e-16


In [81]:
twoS_test_data.head(10)

Unnamed: 0,analysis_pred,backdoor_pred,dos_pred,exploits_pred,fuzzers_pred,generic_pred,normal_pred,reconnaissance_pred,shellcode_pred,worms_pred
0,4.057994e-06,0.002584545,0.0272489,0.454427,0.001593328,0.003291132,0.005412919,0.4725392,1.058855e-07,9.706838e-05
1,5.22834e-13,6.202563e-15,0.000261158,0.0006026404,0.2315588,4.235936e-05,0.6473816,3.30796e-14,7.301754e-06,1.349035e-22
2,1.064407e-12,2.141497e-05,0.01656191,0.003367888,0.9043506,0.0001045906,0.01353174,2.080167e-05,4.162353e-08,9.189757e-16
3,8.059102e-10,0.0003021023,0.06442982,0.02589165,0.8119604,0.005815274,0.2152032,9.135199e-05,0.002425542,6.104952e-24
4,0.0005029461,1.365519e-08,1.661922e-19,5.561249e-10,0.0009958537,3.300777e-06,1.0,4.574951e-14,5.839519e-15,4.796616e-20
5,0.04804679,0.03599505,0.3213531,0.4006477,0.04245339,0.01451284,7.348389e-06,0.04821298,1.694935e-16,1.873232e-10
6,0.04804679,0.03599505,0.3213531,0.4006477,0.03993946,0.01697282,9.834062e-08,0.04678237,1.5261510000000001e-22,1.688455e-10
7,9.361324e-24,1.718545e-13,0.0,1.401298e-45,5.523873e-05,1.570003e-13,1.0,2.245482e-11,4.216239e-12,4.72858e-19
8,9.168352e-11,1.767525e-15,7.814356e-13,2.38628e-07,1.2295460000000002e-17,1.0,1.787194e-07,6.328558e-10,1.087341e-24,2.833992e-09
9,1.8198689999999999e-19,1.3696300000000002e-23,0.04754138,0.780867,0.07256886,0.001821588,0.01843715,4.203895e-45,9.890521e-14,6.219158000000001e-27


In [82]:
train_X = torch.tensor(twoS_train_data.values, dtype=torch.float32)
train_Y = torch.tensor(attack_train.values, dtype=torch.long) 
train = TensorDataset(train_X, train_Y)
train_loader = DataLoader(train, batch_size=100, shuffle=True)

In [83]:
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.03)

In [84]:
#Training the machine learning model
batch_loss_list=[]
for epoch in range(600): #learning 100 times
    #total_loss = 0
    model2.train()
    loss_list=[]
    for train_x, train_y in train_loader:
        train_x, train_y = Variable(train_x), Variable(train_y)
        train_x = train_x.to(device)
        train_y = train_y.to(device)
        optimizer2.zero_grad()
        output = model2(train_x)
        loss = criterion(output, train_y)
        loss.backward()
        optimizer2.step()
        loss_list.append(loss.data)
        #print('epoch {}, loss {}'.format(epoch, loss.item()))
    batch_loss = sum(loss_list)/len(train_loader)
    batch_loss_list.append(batch_loss.to('cpu').detach().numpy().tolist())

In [85]:
batch_loss_list

[0.38173356652259827,
 0.3641822338104248,
 0.36038750410079956,
 0.3582131862640381,
 0.3560808002948761,
 0.3544391989707947,
 0.353730171918869,
 0.35346731543540955,
 0.35272014141082764,
 0.351641446352005,
 0.35103467106819153,
 0.35045579075813293,
 0.3501166105270386,
 0.3497096598148346,
 0.34994038939476013,
 0.349591463804245,
 0.3497408330440521,
 0.34859389066696167,
 0.3490772247314453,
 0.348572313785553,
 0.348207026720047,
 0.3479268550872803,
 0.3481791317462921,
 0.3474534749984741,
 0.34787628054618835,
 0.3469572365283966,
 0.3472856283187866,
 0.34657981991767883,
 0.346591979265213,
 0.3465231657028198,
 0.3468639850616455,
 0.34681886434555054,
 0.34589675068855286,
 0.34595805406570435,
 0.34598663449287415,
 0.3451443612575531,
 0.345685750246048,
 0.3457096517086029,
 0.34526026248931885,
 0.34544047713279724,
 0.3452375829219818,
 0.3456973135471344,
 0.34453046321868896,
 0.3453456163406372,
 0.3445844352245331,
 0.34478339552879333,
 0.34485483169555664,
 

In [86]:
with torch.no_grad():
    correct = 0
    model2.eval()
    train_x, train_y = Variable(train_X), Variable(train_Y)
    train_x = train_x.to(device)
    train_y = train_y.to(device)
    output = model2(train_x)
    pred = torch.max(output.data, 1)[1]
    print(pred)
    correct += pred.eq(train_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(train_y)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

tensor([6, 5, 6,  ..., 5, 6, 6], device='cuda:0')
correct _rate: 144160/167077 (86%)



In [87]:
test_X = torch.tensor(twoS_test_data.values, dtype=torch.float32)
test_Y = torch.tensor(attack_test.values, dtype=torch.long) 

test = TensorDataset(test_X, test_Y)
test_loader = DataLoader(test, batch_size=1024)

In [88]:
with torch.no_grad():
    model2.eval()
    correct = 0
    pred_list = list()
    test_y_list = list()
    for test_x, test_y in test_loader:
        test_x, test_y = Variable(test_x), Variable(test_y)
        test_y_list.extend(test_y.to('cpu').detach().numpy().tolist())
        test_x = test_x.to(device)
        test_y = test_y.to(device)
        output = model2(test_x)
        pred = torch.max(output.data, 1)[1]
        pred_list.extend(pred.to('cpu').detach().numpy().tolist())
        correct += pred.eq(test_y.data.view_as(pred)).sum() 
    # correct_rate
    data_num = len(test_loader.dataset)
    print('correct _rate: {}/{} ({:.0f}%)\n'.format(correct, data_num, 100. * correct / data_num))

correct _rate: 61129/71605 (85%)



In [89]:
accuracy_score(test_y_list, pred_list)

0.8536973675022694

In [90]:
precision_score(test_y_list, pred_list, average='macro')

0.7103748849882404

In [91]:
recall_score(test_y_list, pred_list, average='macro')

0.49604555642732606

In [92]:
f1_score(test_y_list, pred_list, average='macro')

0.5114422263239946

In [93]:
cm = confusion_matrix(test_y_list, pred_list, labels=[0,1,2,3,4,5,6,7,8,9])
print(cm)

[[   58     0     0   454     1     0    90     0    10     0]
 [    0    42     7   455     0     0     5     4     7     0]
 [    0     4   146  3333    26     7    56    11    65     0]
 [    6    11    54  9321   112    14   289   112    87     3]
 [    1     3     6   598  2551     6  2176    23    37     0]
 [    0     3    31   193     7 11807    13     2     7     1]
 [    5     0     3   152   752     3 34816    19    28     0]
 [    0     2    35   773    86     1    94  2177    11     0]
 [    0     6     3    57    31     2    28    17   206     0]
 [    0     0     0    35     1     0     2     0     0     5]]


In [94]:
from sklearn.ensemble import RandomForestClassifier

In [95]:
clf = RandomForestClassifier(random_state=0)
clf.fit(twoS_train_data, attack_train)

RandomForestClassifier(random_state=0)

In [96]:
clf.score(twoS_train_data, attack_train)

0.9342997540056381

In [97]:
#clf.fit(twoS_test_data, attack_test)

In [98]:
clf.score(twoS_test_data, attack_test)

0.8528594371901403

In [99]:
clf.score(twoS_train_data, attack_train)

0.9342997540056381

In [100]:
clf.fit(data_train_norm, attack_train)
clf.score(testdata_norm, attack_test)

0.8493680608896027

In [101]:
clf.score(data_train_norm, attack_train)

0.9338927560346427