In [1]:
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import itertools
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix,accuracy_score,recall_score,precision_score,f1_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from keras.layers import Input,Dropout,Dense
from keras.models import Model
from keras import regularizers
from keras.utils.data_utils import get_file
from sklearn.model_selection import train_test_split
from numpy.random import seed
%matplotlib inline

Using TensorFlow backend.


In [2]:
col_names=["id","dur","proto","service","state",
           "spkts","dpkts","sbytes","dbytes","rate",
           "sttl","dttl","sload","dload","sloss",
           "dloss","sinpkt","dinpkt","sjit","djit",
           "swin","stcpb","dtcpb","dwin","tcprtt",
           "synack","ackdat","smean","dmean","trans_depth",
           "response_body_len","ct_srv_src","ct_state_ttl","ct_dst_ltm","ct_src_dport_ltm",
           "ct_dst_sport_ltm","ct_dst_src_ltm","is_ftp_login","ct_ftp_cmd","ct_flw_http_mthd",
           "ct_src_ltm","ct_srv_dst","is_sm_ips_ports","attack_cat","label"
]

training_df = pd.read_csv("UNSW_NB15_training-set.csv",header=None,names=col_names)
testing_df = pd.read_csv("UNSW_NB15_testing-set.csv",header=None,names=col_names)


In [3]:
del training_df['id']
training_df.head()

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,1.1e-05,udp,-,INT,2,0,496,0,90909.0902,254,...,1,2,0,0,0,1,2,0,Normal,0
1,8e-06,udp,-,INT,2,0,1762,0,125000.0003,254,...,1,2,0,0,0,1,2,0,Normal,0
2,5e-06,udp,-,INT,2,0,1068,0,200000.0051,254,...,1,3,0,0,0,1,3,0,Normal,0
3,6e-06,udp,-,INT,2,0,900,0,166666.6608,254,...,1,3,0,0,0,2,3,0,Normal,0
4,1e-05,udp,-,INT,2,0,2126,0,100000.0025,254,...,1,3,0,0,0,2,3,0,Normal,0


In [4]:
del testing_df['id']
testing_df.head()

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,0.121478,tcp,-,FIN,6,4,258,172,74.08749,252,...,1,1,0,0,0,1,1,0,Normal,0
1,0.649902,tcp,-,FIN,14,38,734,42014,78.473372,62,...,1,2,0,0,0,1,6,0,Normal,0
2,1.623129,tcp,-,FIN,8,16,364,13186,14.170161,62,...,1,3,0,0,0,2,6,0,Normal,0
3,1.681642,tcp,ftp,FIN,12,12,628,770,13.677108,62,...,1,3,1,1,0,2,1,0,Normal,0
4,0.449454,tcp,-,FIN,10,6,534,268,33.373826,254,...,1,40,0,0,0,2,39,0,Normal,0


In [5]:
print("Training set has {} rows.".format(len(training_df)))
print("Testing set has {} rows.".format(len(testing_df)))

Training set has 82332 rows.
Testing set has 175341 rows.


In [6]:
training_attacks=training_df["attack_cat"].unique()
testing_attacks=testing_df["attack_cat"].unique()
print("The training set has {} possible outcomes \n".format(len(training_attacks)) )
print(", ".join(training_attacks)+".")
print("\nThe testing set has {} possible outcomes \n".format(len(testing_attacks)))
print(", ".join(testing_attacks)+".")

The training set has 10 possible outcomes 

Normal, Reconnaissance, Backdoor, DoS, Exploits, Analysis, Fuzzers, Worms, Shellcode, Generic.

The testing set has 10 possible outcomes 

Normal, Backdoor, Analysis, Fuzzers, Shellcode, Reconnaissance, Exploits, DoS, Worms, Generic.


In [7]:
# testing_df = testing_df.fillna(0)
# testing_df['attack_cat'].unique()

In [8]:
reconnaissance_attacks=['Reconnaissance']
backdoor_attacks=['Backdoor']
dos_attacks=['DoS']
exploits_attacks=['Exploits']
analysis_attacks=['Analysis']
fuzzers_attacks=['Fuzzers']
worms_attacks=['Worms']
shellcode_attacks=['Shellcode']
generic_attacks=['Generic']

classes=["Normal","Reconnaissance","Backdoor", "DoS", "Exploits", "Analysis", "Fuzzers", "Worms", "Shellcode", "Generic"]

#Helper function to label samples to 5 classes
def label_attack (row):
    if row["attack_cat"] in reconnaissance_attacks:
        return classes[1]
    if row["attack_cat"] in backdoor_attacks:
        return classes[2]
    if row["attack_cat"] in dos_attacks:
        return classes[3]
    if row["attack_cat"] in exploits_attacks:
        return classes[4]
    if row["attack_cat"] in analysis_attacks:
        return classes[5]
    if row["attack_cat"] in fuzzers_attacks:
        return classes[6]
    if row["attack_cat"] in worms_attacks:
        return classes[7]
    if row["attack_cat"] in shellcode_attacks:
        return classes[8]
    if row["attack_cat"] in generic_attacks:
        return classes[9]
    
    return classes[0]


#We combine the datasets temporarily to do the labeling 
test_samples_length = len(testing_df)
df=pd.concat([training_df,testing_df])
df["Class"]=df.apply(label_attack,axis=1)

target=training_df["attack_cat"]
test_target=testing_df["attack_cat"]

df=df.drop("attack_cat",axis=1)
df=df.drop("label",axis=1)

# we again split the data into training and test sets.
training_df= df.iloc[:-test_samples_length, :]
testing_df= df.iloc[-test_samples_length:,:]

In [9]:
training_attacks=training_df["Class"].unique()
testing_attacks=testing_df["Class"].unique()
print("The training set has {} possible attacks \n".format(len(training_attacks)) )
print(", ".join(training_attacks)+".")
print("\nThe testing set has {} possible attacks \n".format(len(testing_attacks)))
print(", ".join(testing_attacks)+".")

The training set has 10 possible attacks 

Normal, Reconnaissance, Backdoor, DoS, Exploits, Analysis, Fuzzers, Worms, Shellcode, Generic.

The testing set has 10 possible attacks 

Normal, Backdoor, Analysis, Fuzzers, Shellcode, Reconnaissance, Exploits, DoS, Worms, Generic.


In [10]:
# Helper function for scaling continous values
def minmax_scale_values(training_df,testing_df, col_name):
    scaler = MinMaxScaler()
    scaler = scaler.fit(training_df[col_name].values.reshape(-1, 1))
    train_values_standardized = scaler.transform(training_df[col_name].values.reshape(-1, 1))
    training_df[col_name] = train_values_standardized
    test_values_standardized = scaler.transform(testing_df[col_name].values.reshape(-1, 1))
    testing_df[col_name] = test_values_standardized
    
    
#Helper function for one hot encoding
def encode_text(training_df,testing_df, name):
    training_set_dummies = pd.get_dummies(training_df[name])
    testing_set_dummies = pd.get_dummies(testing_df[name])
    for x in training_set_dummies.columns:
        dummy_name = "{}_{}".format(name, x)
        training_df[dummy_name] = training_set_dummies[x]
        if x in testing_set_dummies.columns :
            testing_df[dummy_name]=testing_set_dummies[x]
        else :
            testing_df[dummy_name]=np.zeros(len(testing_df))
    training_df.drop(name, axis=1, inplace=True)
    testing_df.drop(name, axis=1, inplace=True)
    
    
symbolic_columns=["proto","service","state"]
label_column="Class"
for column in df.columns :
    if column in symbolic_columns:
        encode_text(training_df,testing_df,column)
    elif not column == label_column:
        minmax_scale_values(training_df,testing_df, column)


In [11]:
training_df.head(5)

Unnamed: 0,dur,spkts,dpkts,sbytes,dbytes,rate,sttl,dttl,sload,dload,...,service_snmp,service_ssh,service_ssl,state_ACC,state_CLO,state_CON,state_FIN,state_INT,state_REQ,state_RST
0,1.833334e-07,9.4e-05,0.0,3.3e-05,0.0,0.090909,0.996078,0.0,0.034238,0.0,...,0,0,0,0,0,0,0,1,0,0
1,1.333334e-07,9.4e-05,0.0,0.000121,0.0,0.125,0.996078,0.0,0.167236,0.0,...,0,0,0,0,0,0,0,1,0,0
2,8.333335e-08,9.4e-05,0.0,7.3e-05,0.0,0.2,0.996078,0.0,0.162187,0.0,...,0,0,0,0,0,0,0,1,0,0
3,1e-07,9.4e-05,0.0,6.1e-05,0.0,0.166667,0.996078,0.0,0.113895,0.0,...,0,0,0,0,0,0,0,1,0,0
4,1.666667e-07,9.4e-05,0.0,0.000146,0.0,0.1,0.996078,0.0,0.161427,0.0,...,0,0,0,0,0,0,0,1,0,0


In [12]:
testing_df.head(5)

Unnamed: 0,dur,spkts,dpkts,sbytes,dbytes,rate,sttl,dttl,sload,dload,...,service_snmp,service_ssh,service_ssl,state_ACC,state_CLO,state_CON,state_FIN,state_INT,state_REQ,state_RST
0,0.002025,0.00047,0.000363,1.6e-05,1.2e-05,7.4e-05,0.988235,1.003953,2.687726e-06,0.000408,...,0,0,0,0.0,0.0,0,1,0,0,0
1,0.010832,0.001221,0.003449,4.9e-05,0.002866,7.8e-05,0.243137,0.996047,1.593605e-06,0.024186,...,0,0,0,0.0,0.0,0,1,0,0,0
2,0.027052,0.000658,0.001452,2.4e-05,0.0009,1.4e-05,0.243137,0.996047,2.984571e-07,0.002926,...,0,0,0,0.0,0.0,0,1,0,0,0
3,0.028027,0.001033,0.001089,4.2e-05,5.3e-05,1.4e-05,0.243137,0.996047,5.201554e-07,0.000161,...,0,0,0,0.0,0.0,0,1,0,0,0
4,0.007491,0.000845,0.000545,3.6e-05,1.8e-05,3.3e-05,0.996078,0.996047,1.62519e-06,0.000191,...,0,0,0,0.0,0.0,0,1,0,0,0


In [13]:
x,y=training_df,training_df.pop("Class").values
x=x.values
x_test,y_test=testing_df,testing_df.pop("Class").values
x_test=x_test.values
y0=np.ones(len(y),np.int8)
y0[np.where(y==classes[0])]=0
y0_test=np.ones(len(y_test),np.int8)
y0_test[np.where(y_test==classes[0])]=0

In [14]:
x.shape


(82332, 190)

In [15]:
x_test.shape

(175341, 190)

In [16]:
y.shape

(82332,)

In [17]:
y_test.shape

(175341,)

In [18]:
ncol = x.shape[1]
X_train, X_test, Y_train, Y_test = train_test_split(x, target, train_size = 0.9, random_state = seed(2017))
print(target)

0        Normal
1        Normal
2        Normal
3        Normal
4        Normal
          ...  
82327    Normal
82328    Normal
82329    Normal
82330    Normal
82331    Normal
Name: attack_cat, Length: 82332, dtype: object


In [19]:
encoding_dim = 151
input_dim = Input(shape = (ncol, ))

# Encoder Layers
encoded1 = Dense(16, activation = 'relu')(input_dim)
encoded2 = Dense(encoding_dim, activation = 'relu')(encoded1)

# Decoder Layers
decoded1 = Dense(16, activation = 'relu')(encoded2)
decoded2 = Dense(ncol, activation = 'sigmoid')(decoded1)

# Combine Encoder and Deocder layers
autoencoder = Model(inputs = input_dim, outputs = decoded2)

# Compile the Model
autoencoder.compile(optimizer = 'adadelta', loss = 'binary_crossentropy')

In [20]:
history=autoencoder.fit(X_train, X_train, nb_epoch = 10, batch_size = 100, shuffle = True, validation_data = (X_test, X_test))

Train on 74098 samples, validate on 8234 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [21]:
encoder = Model(inputs = input_dim, outputs = encoded2)
encoded_input = Input(shape = (encoding_dim, ))

In [22]:
x_train = pd.DataFrame(encoder.predict(x))
x_test = pd.DataFrame(encoder.predict(x_test))

In [23]:
x_train['target'] = target
x_test['target']=test_target

In [24]:
x_train.shape

(82332, 152)

In [25]:
attack_df_train=x_train['target']
attack_df_test=x_test['target']
# change the label column
new_attack_df_train=attack_df_train.replace({ 'Normal' : 0, 'Generic' : 1 ,'Exploits' : 2,'Fuzzers': 3,'DoS': 4,'Reconnaissance': 5,
                            'Analysis' : 6,'Backdoor' : 7,'Shellcode' : 8,'Worms' : 9})
new_attack_df_test=attack_df_test.replace({ 'Normal' : 0, 'Generic' : 1 ,'Exploits' : 2,'Fuzzers': 3,'DoS': 4,'Reconnaissance': 5,
                            'Analysis' : 6,'Backdoor' : 7,'Shellcode' : 8,'Worms' : 9})
# put the new label column back
x_train['target'] = new_attack_df_train
x_test['target'] = new_attack_df_test
print(x_train['target'].head())

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64


In [26]:
normal_mask=x_train['target'] ==0
attack_mask=x_train['target'] !=0

# new_df_train.drop('attack_cat',axis=1,inplace=True)

x_normal=x_train[normal_mask]
x_attack=x_train[attack_mask]

normal_mask_test=x_test['target'] ==0
attack_mask_test=x_test['target'] !=0

# new_df_train.drop('attack_cat',axis=1,inplace=True)

normal_test=x_test[normal_mask_test]
attack_test=x_test[attack_mask_test]


print(x_normal.shape)
print(attack_test.shape)

(37000, 152)
(119341, 152)


In [27]:
drop_Generic = [1]
drop_Exploits = [2]
drop_Fuzzers = [3]
drop_DoS = [4]
drop_Reconnaissance = [5]
drop_Analysis = [6]
drop_Backdoor = [7]
drop_Shellcode = [8]
drop_Worms = [9]

Generic_df=x_train[~x_train['target'].isin(drop_Generic)];
Exploits_df=x_train[~x_train['target'].isin(drop_Exploits)];
Fuzzers_df=x_train[~x_train['target'].isin(drop_Fuzzers)];
DoS_df=x_train[~x_train['target'].isin(drop_DoS)];
Reconnaissance_df=x_train[~x_train['target'].isin(drop_Reconnaissance)];
Analysis_df=x_train[~x_train['target'].isin(drop_Analysis)];
Backdoor_df=x_train[~x_train['target'].isin(drop_Backdoor)];
Shellcode_df=x_train[~x_train['target'].isin(drop_Shellcode)];
Worms_df=x_train[~x_train['target'].isin(drop_Worms)];

#test

Generic_df_test=x_test[~x_test['target'].isin(drop_Generic)];
Exploits_df_test=x_test[~x_test['target'].isin(drop_Exploits)];
Fuzzers_df_test=x_test[~x_test['target'].isin(drop_Fuzzers)];
DoS_df_test=x_test[~x_test['target'].isin(drop_DoS)];
Reconnaissance_df_test=x_test[~x_test['target'].isin(drop_Reconnaissance)];
Analysis_df_test=x_test[~x_test['target'].isin(drop_Analysis)];
Backdoor_df_test=x_test[~x_test['target'].isin(drop_Backdoor)];
Shellcode_df_test=x_test[~x_test['target'].isin(drop_Shellcode)];
Worms_df_test=x_test[~x_test['target'].isin(drop_Worms)];


print('Train:')

print('Dimensions of DoS:' ,Generic_df.shape)
print('Dimensions of Exploits:' ,Exploits_df.shape)
print('Dimensions of Fuzzers:' ,Fuzzers_df.shape)
print('Dimensions of DoS:' ,DoS_df.shape)
print('Dimensions of Reconnaissance:' ,Reconnaissance_df.shape)
print('Dimensions of Analysis:' ,Analysis_df.shape)
print('Dimensions of Backdoor:' ,Backdoor_df.shape)
print('Dimensions of Shellcode:' ,Shellcode_df.shape)
print('Dimensions of Worms:' ,Worms_df.shape)

print('Test:')
print('Dimensions of DoS:' ,Generic_df_test.shape)
print('Dimensions of Exploits:' ,Exploits_df_test.shape)
print('Dimensions of Fuzzers:' ,Fuzzers_df_test.shape)
print('Dimensions of DoS:' ,DoS_df_test.shape)
print('Dimensions of Reconnaissance:' ,Reconnaissance_df_test.shape)
print('Dimensions of Analysis:' ,Analysis_df_test.shape)
print('Dimensions of Backdoor:' ,Backdoor_df_test.shape)
print('Dimensions of Shellcode:' ,Shellcode_df_test.shape)
print('Dimensions of Worms:' ,Worms_df_test.shape)


Train:
Dimensions of DoS: (63461, 152)
Dimensions of Exploits: (71200, 152)
Dimensions of Fuzzers: (76270, 152)
Dimensions of DoS: (78243, 152)
Dimensions of Reconnaissance: (78836, 152)
Dimensions of Analysis: (81655, 152)
Dimensions of Backdoor: (81749, 152)
Dimensions of Shellcode: (81954, 152)
Dimensions of Worms: (82288, 152)
Test:
Dimensions of DoS: (135341, 152)
Dimensions of Exploits: (141948, 152)
Dimensions of Fuzzers: (157157, 152)
Dimensions of DoS: (163077, 152)
Dimensions of Reconnaissance: (164850, 152)
Dimensions of Analysis: (173341, 152)
Dimensions of Backdoor: (173595, 152)
Dimensions of Shellcode: (174208, 152)
Dimensions of Worms: (175211, 152)


In [28]:
X_Generic = Generic_df.drop('target',1)
Y_Generic = Generic_df.target

# print(X_Generic)

X_Exploits = Exploits_df.drop('target',1)
Y_Exploits = Exploits_df.target

X_Fuzzers = Fuzzers_df.drop('target',1)
Y_Fuzzers = Fuzzers_df.target

X_DoS = DoS_df.drop('target',1)
Y_DoS = DoS_df.target

X_Reconnaissance = Reconnaissance_df.drop('target',1)
Y_Reconnaissance = Reconnaissance_df.target

X_Analysis = Analysis_df.drop('target',1)
Y_Analysis = Analysis_df.target


X_Backdoor = Backdoor_df.drop('target',1)
Y_Backdoor = Backdoor_df.target


X_Shellcode = Shellcode_df.drop('target',1)
Y_Shellcode = Shellcode_df.target

X_Worms =Worms_df.drop('target',1)
Y_Worms =Worms_df.target

X_Generic_test = Generic_df_test.drop('target',1)
Y_Generic_test = Generic_df_test.target

X_Exploits_test = Exploits_df_test.drop('target',1)
Y_Exploits_test = Exploits_df_test.target

X_Fuzzers_test = Fuzzers_df_test.drop('target',1)
Y_Fuzzers_test = Fuzzers_df_test.target

X_DoS_test = DoS_df_test.drop('target',1)
Y_DoS_test = DoS_df_test.target

X_Reconnaissance_test = Reconnaissance_df_test.drop('target',1)
Y_Reconnaissance_test = Reconnaissance_df_test.target

X_Analysis_test = Analysis_df_test.drop('target',1)
Y_Analysis_test = Analysis_df_test.target


X_Backdoor_test = Backdoor_df_test.drop('target',1)
Y_Backdoor_test = Backdoor_df_test.target


X_Shellcode_test = Shellcode_df_test.drop('target',1)
Y_Shellcode_test = Shellcode_df_test.target

X_Worms_test =Worms_df_test.drop('target',1)
Y_Worms_test =Worms_df_test.target


In [29]:
X_normal=x_normal.drop('target',1)
Y_normal=x_normal.target

X_attack=x_attack.drop('target',1)
Y_attack=x_attack.target

X_normal_test=normal_test.drop('target',1)
Y_normal_test=normal_test.target

X_attack_test=attack_test.drop('target',1)
Y_attack_test=attack_test.target


In [30]:
def accuracy_report(classifier,X,y,num_cv):
    from sklearn.model_selection import cross_val_score
    from sklearn.metrics import precision_score
    from sklearn.model_selection import cross_val_predict

#   y_pred = cross_val_predict(classifier, X, y, cv=10)
    scores = cross_val_score(classifier, X,y, cv=num_cv)
    recall = cross_val_score(classifier, X, y, cv=num_cv, scoring='recall_weighted')
    precision = cross_val_score(classifier, X, y, cv=num_cv, scoring='precision_weighted')
    f1 = cross_val_score(classifier,X, y, scoring='f1_weighted', cv=num_cv)
    
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print("Recall: %0.2f (+/- %0.2f)" % (recall.mean(), recall.std() * 2))
    print("Precision: %0.2f (+/- %0.2f)" % (precision.mean(), precision.std() * 2))
    print("F1-Score: %0.2f (+/- %0.2f)" % (f1.mean(), f1.std() * 2))

In [31]:
from sklearn import tree
rf_DoS = tree.DecisionTreeClassifier()
rf_DoS.fit(X_DoS, Y_DoS)

rf_Generic= tree.DecisionTreeClassifier()
rf_Generic.fit(X_Generic,Y_Generic)

rf_Exploits= tree.DecisionTreeClassifier()
rf_Exploits.fit(X_Exploits,Y_Exploits)

rf_Fuzzers= tree.DecisionTreeClassifier()
rf_Fuzzers.fit(X_Fuzzers,Y_Fuzzers)

rf_Analysis= tree.DecisionTreeClassifier()
rf_Analysis.fit(X_Analysis,Y_Analysis)

rf_Backdoor= tree.DecisionTreeClassifier()
rf_Backdoor.fit(X_Backdoor,Y_Backdoor)

rf_Shellcode= tree.DecisionTreeClassifier()
rf_Shellcode.fit(X_Shellcode,Y_Shellcode)

rf_Worms= tree.DecisionTreeClassifier()
rf_Worms.fit(X_Worms,Y_Worms)

rf_Reconnaissance= tree.DecisionTreeClassifier()
rf_Reconnaissance.fit(X_Reconnaissance,Y_Reconnaissance)

rf_attack= tree.DecisionTreeClassifier()
rf_attack.fit(X_attack,Y_attack)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [32]:
rf_normal= tree.DecisionTreeClassifier()
rf_normal.fit(X_normal,Y_normal)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [33]:
rf_DoS.predict(X_DoS_test)
rf_Generic.predict(X_Generic_test)
rf_Exploits.predict(X_Exploits_test)
rf_Fuzzers.predict(X_Fuzzers_test)
rf_Analysis.predict(X_Analysis_test)
rf_Backdoor.predict(X_Backdoor_test)
rf_Shellcode.predict(X_Shellcode_test)
rf_Worms.predict(X_Worms_test)
rf_Reconnaissance.predict(X_Reconnaissance_test)
rf_attack.predict(X_attack_test)

array([4, 4, 4, ..., 1, 1, 1], dtype=int64)

In [34]:
rf_normal.predict(X_normal_test)

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [35]:
# rf_DoS.predict_proba(X_DoS_test)[0:10]
# rf_Generic.predict_proba(X_Generic_test)[0:10]

In [36]:
Y_DoS_pred=rf_DoS.predict(X_DoS_test)
Y_Generic_pred=rf_Generic.predict(X_Generic_test)
Y_Exploits_pred=rf_Exploits.predict(X_Exploits_test)
Y_Fuzzers_pred=rf_Fuzzers.predict(X_Fuzzers_test)
Y_Analysis_pred=rf_Analysis.predict(X_Analysis_test)
Y_Backdoor_pred=rf_Backdoor.predict(X_Backdoor_test)
Y_Shellcode_pred=rf_Shellcode.predict(X_Shellcode_test)
Y_Worms_pred=rf_Worms.predict(X_Worms_test)
Y_Reconnaissance_pred=rf_Reconnaissance.predict(X_Reconnaissance_test)

Y_attack_pred=rf_attack.predict(X_attack_test)
# Create confusion matrix
# pd.crosstab(Y_DoS_test, Y_DoS_pred, rownames=['Actual attacks'], colnames=['Predicted attacks'])

In [37]:
Y_normal_pred=rf_normal.predict(X_normal_test)

In [39]:
# DoS_accuracy=accuracy_report(rf_DoS,X_DoS_test,Y_DoS_test,10)

Accuracy: 0.81 (+/- 0.14)
Recall: 0.81 (+/- 0.13)
Precision: 0.81 (+/- 0.08)
F1-Score: 0.80 (+/- 0.13)


In [40]:
# Exploits_accuracy=accuracy_report(rf_Exploits,X_Exploits_test,Y_Exploits_test,10)

Accuracy: 0.81 (+/- 0.14)
Recall: 0.81 (+/- 0.14)
Precision: 0.82 (+/- 0.08)
F1-Score: 0.80 (+/- 0.12)


In [41]:
# Fuzzers_accuracy=accuracy_report(rf_Fuzzers,X_Fuzzers_test,Y_Fuzzers_test,10)

Accuracy: 0.80 (+/- 0.07)
Recall: 0.80 (+/- 0.07)
Precision: 0.79 (+/- 0.04)
F1-Score: 0.79 (+/- 0.05)


In [42]:
# Analysis_accuracy=accuracy_report(rf_Analysis,X_Analysis_test,Y_Analysis_test,10)

Accuracy: 0.75 (+/- 0.12)
Recall: 0.75 (+/- 0.12)
Precision: 0.75 (+/- 0.07)
F1-Score: 0.74 (+/- 0.10)


In [38]:
Backdoor_accuracy=accuracy_report(rf_Backdoor,X_Backdoor_test,Y_Backdoor_test,10)

Accuracy: 0.75 (+/- 0.12)
Recall: 0.75 (+/- 0.12)
Precision: 0.75 (+/- 0.07)
F1-Score: 0.74 (+/- 0.11)


In [None]:
# Shellcode_accuracy=accuracy_report(rf_Shellcode,X_Shellcode_test,Y_Shellcode_test,10)

In [39]:
Worms_accuracy=accuracy_report(rf_Worms,X_Worms_test,Y_Worms_test,10)

Accuracy: 0.74 (+/- 0.12)
Recall: 0.74 (+/- 0.12)
Precision: 0.74 (+/- 0.07)
F1-Score: 0.73 (+/- 0.10)


In [42]:
Reconnaissance_accuracy=accuracy_report(rf_Reconnaissance,X_Reconnaissance_test,Y_Reconnaissance_test,10)

Accuracy: 0.76 (+/- 0.13)
Recall: 0.76 (+/- 0.13)
Precision: 0.76 (+/- 0.07)
F1-Score: 0.75 (+/- 0.11)


In [38]:
# Normal_accuracy=accuracy_report(rf_normal,X_normal_test,Y_normal_test,10)

Accuracy: 1.00 (+/- 0.00)
Recall: 1.00 (+/- 0.00)
Precision: 1.00 (+/- 0.00)
F1-Score: 1.00 (+/- 0.00)


In [39]:
# Attack_accuracy=accuracy_report(rf_attack,X_attack_test,Y_attack_test,10)

Accuracy: 0.72 (+/- 0.03)
Recall: 0.72 (+/- 0.03)
Precision: 0.71 (+/- 0.03)
F1-Score: 0.71 (+/- 0.02)


In [40]:
Shellcode_accuracy=accuracy_report(rf_Shellcode,X_Shellcode_test,Y_Shellcode_test,10)

Accuracy: 0.74 (+/- 0.12)
Recall: 0.74 (+/- 0.12)
Precision: 0.75 (+/- 0.07)
F1-Score: 0.74 (+/- 0.11)


In [41]:
Generic_accuracy=accuracy_report(rf_Generic,X_Generic_test,Y_Generic_test,10)

Accuracy: 0.67 (+/- 0.16)
Recall: 0.67 (+/- 0.16)
Precision: 0.68 (+/- 0.08)
F1-Score: 0.66 (+/- 0.14)
