In [9]:
"""
This is our 2-stage testing script (binary and multiclass)

Author: Wesley
"""
import numpy as np
import pandas as pd
import pickle
import keras
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.metrics import classification_report

from sklearn.neighbors import KNeighborsClassifier

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier

In [3]:
"""
This is a custom transformer that allows us to reduce the feature sets for each classifier appropriately.

Necessary since we're making them all part of a StackingClassifier and each one uses a different feature set.
"""
class FeatureReducer(BaseEstimator, TransformerMixin):
    def __init__(self, feature_list=None):
        self.feature_list = feature_list
    
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):

        if self.feature_list is None:
            return X
        
        else:
            return X.loc[:, self.feature_list]

Load dataset and get binary and multiclass y columns.

In [4]:
binary_train = pd.read_csv("binary_train.csv")
binary_test = pd.read_csv("binary_test.csv")

multiclass_train = pd.read_csv("multiclass_train.csv")
multiclass_test = pd.read_csv("multiclass_test.csv")

print(binary_train[" Label"].value_counts())
print(binary_test[" Label"].value_counts())
print(multiclass_train[" Label"].value_counts())
print(multiclass_test[" Label"].value_counts())

BENIGN     79997
TFTP        6667
Syn         6667
UDP         6667
DNS         6667
UDP-lag     6666
LDAP        6666
Portmap     6666
SSDP        6666
NTP         6666
NetBIOS     6666
MSSQL       6666
SNMP        6666
Name:  Label, dtype: int64
BENIGN     19999
SSDP        1667
NetBIOS     1667
NTP         1667
LDAP        1667
UDP-lag     1667
SNMP        1667
MSSQL       1667
Portmap     1667
UDP         1666
TFTP        1666
Syn         1666
DNS         1666
Name:  Label, dtype: int64
LDAP       13333
DNS        13333
Portmap    13333
UDP        13333
MSSQL      13333
UDP-lag    13333
Syn        13333
NetBIOS    13333
TFTP       13333
SSDP       13332
NTP        13332
SNMP       13332
Name:  Label, dtype: int64
SNMP       3334
SSDP       3334
NTP        3334
Portmap    3333
NetBIOS    3333
UDP-lag    3333
MSSQL      3333
LDAP       3333
DNS        3333
UDP        3333
TFTP       3333
Syn        3333
Name:  Label, dtype: int64


Split and encode data.

In [5]:
bin_y_train = binary_train[" Label"].copy()
bin_x_train = binary_train.drop([" Label"], axis=1)

bin_y_test = binary_test[" Label"].copy()
bin_x_test = binary_test.drop([" Label"], axis=1)

bin_y_train = [0 if x=="BENIGN" else 1 for x in bin_y_train.values]
bin_y_test = [0 if x=="BENIGN" else 1 for x in bin_y_test.values]

print("The text labels from the encoding will be passed to classification report so we can interpret our results more easily.\n")
binary_labels = ["BENIGN", "ATTACK"]

print("Binary Label Encodings (in order of digits 0 -> 1): ")
print(binary_labels)

multi_y_train = multiclass_train[" Label"].copy()
multi_x_train = multiclass_train.drop([" Label"], axis=1)

multi_y_test = multiclass_test[" Label"].copy()
multi_x_test = multiclass_test.drop([" Label"], axis=1)

# Encode attack labels to int and save as array to be used later.
le = LabelEncoder()

multi_y_train = le.fit_transform(multi_y_train.values)
multi_y_test = le.transform(multi_y_test.values)

multiclass_labels = []
print("\nMulticlass Label Encodings (in order of digits 0 -> n): ")
for i in range(0, len(list(set(list(multi_y_test))))):
    multiclass_labels.append(le.inverse_transform([i])[0])

print(multiclass_labels)

The text labels from the encoding will be passed to classification report so we can interpret our results more easily.

Binary Label Encodings (in order of digits 0 -> 1): 
['BENIGN', 'ATTACK']

Multiclass Label Encodings (in order of digits 0 -> n): 
['DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'Portmap', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDP-lag']


Load models

In [12]:
# Load a binary model
stage_1 = keras.models.load_model('CNN_binary_final.h5')

# Load a multiclass model

# Hardcoded feature sets from tuning results.
dt_features = [' Protocol', ' Flow Duration', 'Total Length of Fwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Min', ' Bwd Packet Length Mean', ' Bwd Packet Length Std', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Std', ' Flow IAT Max', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', 'Bwd IAT Total', ' Bwd IAT Min', 'Fwd PSH Flags', 'Fwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', ' RST Flag Count', ' ACK Flag Count', ' URG Flag Count', ' CWE Flag Count', ' Down/Up Ratio', ' Average Packet Size', ' Avg Fwd Segment Size', ' Avg Bwd Segment Size', ' Subflow Fwd Bytes', 'Init_Win_bytes_forward', 'Idle Mean', ' Idle Max', ' Idle Min', ' Inbound', ' Fwd Header Length', ' Fwd Header Length.1', ' min_seg_size_forward', ' Total Fwd Packets', 'Subflow Fwd Packets', ' act_data_pkt_fwd']
lr_features = [' Protocol', ' Flow Duration', 'Total Length of Fwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', ' Bwd Packet Length Mean', 'Flow Bytes/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', 'Fwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', ' ACK Flag Count', ' Average Packet Size', ' Avg Fwd Segment Size', ' Avg Bwd Segment Size', ' Subflow Fwd Bytes', 'Init_Win_bytes_forward', ' act_data_pkt_fwd', 'Idle Mean', ' Idle Std', ' Idle Max', ' Idle Min']
rf_features = [' Protocol', ' Flow Duration', ' Total Fwd Packets', ' Total Backward Packets', 'Total Length of Fwd Packets', ' Total Length of Bwd Packets', ' Fwd Packet Length Max', ' Fwd Packet Length Min', ' Fwd Packet Length Mean', ' Fwd Packet Length Std', 'Bwd Packet Length Max', ' Bwd Packet Length Mean', 'Flow Bytes/s', ' Flow Packets/s', ' Flow IAT Mean', ' Flow IAT Std', ' Flow IAT Max', ' Flow IAT Min', 'Fwd IAT Total', ' Fwd IAT Mean', ' Fwd IAT Std', ' Fwd IAT Max', ' Fwd IAT Min', 'Bwd IAT Total', ' Bwd IAT Mean', ' Bwd IAT Std', ' Bwd IAT Max', ' Bwd IAT Min', ' Fwd Header Length', ' Bwd Header Length', 'Fwd Packets/s', ' Bwd Packets/s', ' Min Packet Length', ' Max Packet Length', ' Packet Length Mean', ' Packet Length Std', ' Packet Length Variance', ' ACK Flag Count', ' URG Flag Count', ' CWE Flag Count', ' Down/Up Ratio', ' Average Packet Size', ' Avg Fwd Segment Size', ' Avg Bwd Segment Size', ' Fwd Header Length.1', 'Subflow Fwd Packets', ' Subflow Fwd Bytes', ' Subflow Bwd Packets', 'Init_Win_bytes_forward', ' Init_Win_bytes_backward', ' act_data_pkt_fwd', ' min_seg_size_forward', 'Active Mean', ' Active Max', ' Active Min', 'Idle Mean', ' Idle Std', ' Idle Max', ' Idle Min', ' Inbound']

# Use the custom transformer to reduce the feature sets for each classifier appropriately.
clf_knn = KNeighborsClassifier(algorithm = 'kd_tree', n_neighbors=47, weights = 'distance')
clf_dt = make_pipeline(FeatureReducer(feature_list=dt_features),DecisionTreeClassifier(random_state=42, criterion = 'gini', max_depth = 19, max_features = 'sqrt', min_samples_leaf = 1, min_samples_split = 3))
clf_lr = make_pipeline(FeatureReducer(feature_list=lr_features), StandardScaler(), LogisticRegression(random_state=42, C = 4602.593009396766, max_iter = 240))
clf_rf = make_pipeline(FeatureReducer(feature_list=rf_features), RandomForestClassifier(criterion = 'entropy', max_depth = 65, max_features = 'sqrt', min_samples_leaf = 2, min_samples_split = 18, n_estimators = 478, random_state=42))

# Load the estimators into a list.
estimators = [("knn",clf_knn),("dt",clf_dt),("lr",clf_lr),("rf",clf_rf)]

# Intialize the stacking classifier (finally)
stage_2 = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

Stage 1 Prediction (Binary)

In [11]:
scal = StandardScaler()
scal = scal.fit(bin_x_train)
bin_x_test_reshaped = scal.transform(bin_x_test)
y_pred = stage_1.predict(bin_x_test_reshaped)
y_pred = [1 if x>=0.5 else 0 for x in y_pred]



In [13]:
print(classification_report(bin_y_test, y_pred, digits=6, target_names=binary_labels))

              precision    recall  f1-score   support

      BENIGN   0.999650  0.999900  0.999775     19999
      ATTACK   0.999900  0.999650  0.999775     20000

    accuracy                       0.999775     39999
   macro avg   0.999775  0.999775  0.999775     39999
weighted avg   0.999775  0.999775  0.999775     39999



Housekeeping to prepare the dataset for 2nd stage.

In [14]:
# Create new dataframe out of binary predictions and multiclass labels that can be used to compare them later.
X_agg = bin_x_test.copy()
X_agg["binary_pred"] = y_pred.copy()
X_agg[" Label"] = binary_test[" Label"].copy()
#X_agg[" Label"] = le.transform(X_agg[" Label"].values)
X_agg.loc[~X_agg[" Label"].isin(le.classes_)," Label"] = -1 
X_agg.loc[X_agg[" Label"].isin(le.classes_)," Label"] = le.transform(X_agg[" Label"][X_agg[" Label"].isin(le.classes_)])

# Slice out the portions that we've identified as benign and replace their label with that of the multiclass benign label for comparison later.
X_rem = X_agg[X_agg["binary_pred"] == 0]
X_rem['binary_pred'] = X_rem['binary_pred'].replace([0], -1)

# Slice out the portions we've labeled as attack for 2nd stage classification.
X_mult = X_agg[X_agg["binary_pred"] == 1]
X_mult = X_mult.drop(["binary_pred"], axis = 1)
y_mult = X_mult[" Label"].values.copy()

X_mult = X_mult.drop([" Label"], axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_rem['binary_pred'] = X_rem['binary_pred'].replace([0], -1)


2nd stage prediction

In [15]:
# Fit the stacking classifier
stage_2.fit(multi_x_train, multi_y_train)

y_pred2 = stage_2.predict(X_mult)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [16]:
print(np.unique(y_pred2, return_counts=True))
print(np.unique(list(y_mult), return_counts=True))

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]), array([1135, 1862, 1664, 1668, 1108, 2275, 1997, 1971,  953, 1656, 1458,
       2248], dtype=int64))
(array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]), array([   2, 1665, 1667, 1667, 1664, 1667, 1666, 1667, 1666, 1666, 1665,
       1666, 1667], dtype=int64))


In [17]:
print(multiclass_labels)
multiclass_labels_add = ["BENIGN"]
multiclass_labels_add.extend(multiclass_labels)
print(multiclass_labels_add)

['DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'Portmap', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDP-lag']
['BENIGN', 'DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'Portmap', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDP-lag']


In [18]:
print(classification_report(list(y_mult), y_pred2, digits=6, target_names=multiclass_labels_add))

              precision    recall  f1-score   support

      BENIGN   0.000000  0.000000  0.000000         2
         DNS   0.791189  0.539339  0.641429      1665
        LDAP   0.623523  0.696461  0.657977      1667
       MSSQL   0.954327  0.952609  0.953467      1667
         NTP   0.996403  0.998798  0.997599      1664
     NetBIOS   0.732852  0.487103  0.585225      1667
     Portmap   0.620220  0.846939  0.716062      1666
        SNMP   0.711567  0.852430  0.775655      1667
        SSDP   0.646372  0.764706  0.700577      1666
         Syn   0.988458  0.565426  0.719359      1666
        TFTP   0.998792  0.993393  0.996086      1665
         UDP   0.690672  0.604442  0.644686      1666
     UDP-lag   0.659253  0.889022  0.757088      1667

    accuracy                       0.765791     19995
   macro avg   0.724125  0.706974  0.703478     19995
weighted avg   0.784346  0.765791  0.761991     19995



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Final Calculation of Results

In [19]:
y_pred_final = list(X_rem["binary_pred"].values.copy())
y_pred_final.extend(y_pred2)

y_true_final = list(X_rem[" Label"].values.copy())
y_true_final.extend(list(y_mult))

print(classification_report(y_true_final, y_pred_final, digits=6, target_names=multiclass_labels_add))

              precision    recall  f1-score   support

      BENIGN   0.999650  0.999900  0.999775     19999
         DNS   0.791189  0.539016  0.641200      1666
        LDAP   0.623523  0.696461  0.657977      1667
       MSSQL   0.954327  0.952609  0.953467      1667
         NTP   0.996403  0.997001  0.996702      1667
     NetBIOS   0.732852  0.487103  0.585225      1667
     Portmap   0.620220  0.846431  0.715880      1667
        SNMP   0.711567  0.852430  0.775655      1667
        SSDP   0.646372  0.764247  0.700385      1667
         Syn   0.988458  0.565426  0.719359      1666
        TFTP   0.998792  0.992797  0.995786      1666
         UDP   0.690672  0.604442  0.644686      1666
     UDP-lag   0.659253  0.889022  0.757088      1667

    accuracy                       0.882747     39999
   macro avg   0.801021  0.783606  0.780245     39999
weighted avg   0.892049  0.882747  0.880861     39999

