In [24]:
"""
This is our 2-stage testing script (binary and multiclass)

Author: Wesley
"""
import numpy as np
import pandas as pd
import pickle
import keras
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.metrics import classification_report

Load dataset and get binary and multiclass y columns.

In [42]:
binary = pd.read_csv("testing_set.csv")
col = binary[" Label"].values
col = ["UDP-lag" if x == "UDPLag" else x for x in col]
binary[" Label"] = col.copy()

binary.head()
print(binary[" Label"].value_counts())

BENIGN     50000
LDAP       50000
NetBIOS    50000
MSSQL      50000
Portmap    50000
Syn        50000
UDP        50000
UDP-lag     1873
Name:  Label, dtype: int64


Get binary encodings

In [43]:
y = binary[" Label"].copy()
X = binary.drop([" Label"], axis=1)
y_bin = [0 if x=="BENIGN" else 1 for x in y.values]

print("The text labels from the encoding will be passed to classification report so we can interpret our results more easily.\n")
binary_labels = ["BENIGN", "ATTACK"]

print("Binary Label Encodings (in order of digits 0 -> 1): ")
print(binary_labels)


The text labels from the encoding will be passed to classification report so we can interpret our results more easily.

Binary Label Encodings (in order of digits 0 -> 1): 
['BENIGN', 'ATTACK']


Get multiclass encodings and transform the test set with them. Use dummy label for Portmap

Necessary because LabelEncoder was used on the models that were trained...

In [44]:
# Encode attack labels to int and save as array to be used later.
le = LabelEncoder()

enc_helper = pd.read_csv("binary.csv")
le.fit(enc_helper[" Label"].values)
y_multi = le.transform(enc_helper[" Label"].values)

multiclass_labels = []
print("\nMulticlass Label Encodings (in order of digits 0 -> n): ")
for i in range(0, len(list(set(list(y_multi))))):
    multiclass_labels.append(le.inverse_transform([i])[0])

print(multiclass_labels)


Multiclass Label Encodings (in order of digits 0 -> n): 
['BENIGN', 'DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDP-lag']


In [45]:
# Now that we have the right labels, build the label set.
y_multi = y.values

y_multi = [20 if x=="Portmap" else multiclass_labels.index(x) for x in y_multi]

# Offset encoding so it corresponds to our original encoding (no benign column, which is now -1)
y_multi = [x-1 for x in y_multi]

Load models

In [46]:
# Load a binary model
stage_1 = pickle.load(open("rf_binary_test.pickle", 'rb'))

# Load a multiclass model
stage_2 = keras.models.load_model('LSTM_big.h5')



Stage 1 Prediction (Binary)

In [47]:
y_pred = stage_1.predict(X)

In [48]:
print(classification_report(y_bin, y_pred, digits=6, target_names=binary_labels))

              precision    recall  f1-score   support

      BENIGN   0.497568  0.996400  0.663705     50000
      ATTACK   0.999285  0.833350  0.908806    301873

    accuracy                       0.856519    351873
   macro avg   0.748427  0.914875  0.786255    351873
weighted avg   0.927993  0.856519  0.873977    351873



Housekeeping to prepare the dataset for 2nd stage.

In [49]:
# Create new dataframe out of binary predictions and multiclass labels that can be used to compare them later.
X_agg = X.copy()
X_agg["binary_pred"] = y_pred.copy()
X_agg[" Labels"] = y_multi.copy()

# Slice out the portions that we've identified as benign and replace their label with that of the multiclass benign label for comparison later.
X_rem = X_agg[X_agg["binary_pred"] == 0]
X_rem['binary_pred'] = X_rem['binary_pred'].replace([0], -1)

# Slice out the portions we've labeled as attack for 2nd stage classification.
X_mult = X_agg[X_agg["binary_pred"] == 1]
X_mult = X_mult.drop(["binary_pred"], axis = 1)
y_mult = X_mult[" Labels"].copy()
X_mult = X_mult.drop([" Labels"], axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_rem['binary_pred'] = X_rem['binary_pred'].replace([0], -1)


2nd stage prediction

In [50]:
# Some reshaping is needed for the LSTM model to work
scal = StandardScaler()
X_tr = scal.fit_transform(X_mult)
X_tr = np.reshape(X_tr, (X_mult.shape[0], 1, X_mult.shape[1]))

y_pred2 = stage_2.predict(X_tr)
y_pred2 = [np.argmax(x) for x in y_pred2]



In [54]:
multiclass_labels.append("Portmap")

['BENIGN', 'DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDP-lag', 'Portmap']


In [55]:
print(classification_report(y_mult, y_pred2, digits=6, target_names=multiclass_labels))

              precision    recall  f1-score   support

      BENIGN   0.000000  0.000000  0.000000       180
         DNS   0.000000  0.000000  0.000000         0
        LDAP   0.937985  0.996819  0.966507     49981
       MSSQL   0.965684  0.559753  0.708708     49972
         NTP   0.000000  0.000000  0.000000         0
     NetBIOS   0.498137  0.024068  0.045917     49984
        SNMP   0.000000  0.000000  0.000000         0
        SSDP   0.000000  0.000000  0.000000         0
         Syn   0.034483  0.138889  0.055249        36
        TFTP   0.000000  0.000000  0.000000         0
         UDP   0.651672  0.266345  0.378140     49984
     UDP-lag   0.028674  0.022612  0.025284      1769
     Portmap   0.000000  0.000000  0.000000     49840

    accuracy                       0.366858    251746
   macro avg   0.239741  0.154499  0.167677    251746
weighted avg   0.606415  0.366858  0.416949    251746



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Final Calculation of Results

In [57]:
y_pred_final = list(X_rem["binary_pred"].values.copy())
y_pred_final.extend(y_pred2)

y_true_final = list(X_rem[" Labels"].values.copy())
y_true_final.extend(y_mult.values)

print(classification_report(y_true_final, y_pred_final, digits=6, target_names=multiclass_labels))

              precision    recall  f1-score   support

      BENIGN   0.497568  0.996400  0.663705     50000
         DNS   0.000000  0.000000  0.000000         0
        LDAP   0.937985  0.996440  0.966329     50000
       MSSQL   0.965684  0.559440  0.708457     50000
         NTP   0.000000  0.000000  0.000000         0
     NetBIOS   0.498137  0.024060  0.045903     50000
        SNMP   0.000000  0.000000  0.000000         0
        SSDP   0.000000  0.000000  0.000000         0
         Syn   0.034483  0.000100  0.000199     50000
        TFTP   0.000000  0.000000  0.000000         0
         UDP   0.651672  0.266260  0.378054     50000
     UDP-lag   0.028674  0.021356  0.024480      1873
     Portmap   0.000000  0.000000  0.000000     50000

    accuracy                       0.404052    351873
   macro avg   0.278016  0.220312  0.214394    351873
weighted avg   0.509644  0.404052  0.392693    351873



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
