In [1]:
"""
The code for testing the classifiers. Uses binary.csv and multiclass.csv

Loads feature lists and tuning ranges from pickles in CWD.

Change model types in models to tune on specific ones.

Writes output files.

Author: Wesley
"""

# Accelerates tuning of some classifiers
#from sklearnex import patch_sklearn
#patch_sklearn()

import pandas as pd
import numpy as np

from skopt import BayesSearchCV
from skopt.space import Categorical, Integer, Real

from sklearn.feature_selection import RFECV

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

import pickle

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    accuracy_score
)

from time import time

from sklearn.metrics import classification_report

In [2]:
multiclass = pd.read_csv("multiclass_train.csv")
multiclass_test = pd.read_csv("multiclass_test.csv")

Preprocessing (make labels numeric)

In [3]:
# Encode attack labels to int and save as array to be used later.
le = LabelEncoder()
multiclass[" Label"] = le.fit_transform(multiclass[" Label"].values)
multiclass_test[" Label"] = le.transform(multiclass_test[" Label"].values)

multiclass_labels = []
print("\nMulticlass Label Encodings (in order of digits 0 -> n): ")
for i in range(0, len(list(set(list(multiclass[' Label']))))):
    multiclass_labels.append(le.inverse_transform([i])[0])

print(multiclass_labels)


Multiclass Label Encodings (in order of digits 0 -> n): 
['DNS', 'LDAP', 'MSSQL', 'NTP', 'NetBIOS', 'Portmap', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDP-lag']


Load feature sets and search spaces and enumerate their contents.

In [4]:
feature_sets = pickle.load(open("feature_sets.pickle", 'rb'))
search_spaces = pickle.load(open("hyperparameter_search_spaces.pickle", 'rb'))

print(f"Available Tuning Ranges: {search_spaces.keys()}")

print("Feature Sets for Binary Dataset:")
for key, value in feature_sets["Binary"].items():
    if key == "RFE Sets":
        print(value.keys())

    elif key == "PCA":
        print(f"{key}, suggested variance threshold is {value}")
        
    else:
        print(key)

print("Feature Sets for Multiclass Dataset:")
for key, value in feature_sets["Multiclass"].items():
    if key == "RFE Sets":
        print(value.keys())

    elif key == "PCA":
        print(f"{key}, suggested variance threshold is {value}")

    else:
        print(key)

Available Tuning Ranges: dict_keys(['XGBoost', 'Bagging SVM', 'SVC (RBF)', 'SVC (Poly)', 'Logistic Regression', 'Random Forest', 'KNN', 'Linear SVC', 'Naive Bayes', 'Decision Tree'])
Feature Sets for Binary Dataset:
All
Correlation
Mutual Information
dict_keys(['Decision Tree', 'Random Forest', 'XGBoost', 'Linear SVC', 'Logistic Regression'])
PCA, suggested variance threshold is 0.95
Feature Sets for Multiclass Dataset:
All
Correlation
Mutual Information
dict_keys(['Decision Tree', 'Random Forest', 'XGBoost', 'Logistic Regression'])
PCA, suggested variance threshold is 0.95


In [5]:
"""
This is a helper method to place our performance results in a DataFrame for future analysis.
"""
def format_results_multiclass(y_test, predicted_values, fold_index, fitTime):
    # get scores
    accuracy = accuracy_score(y_test,predicted_values)
    recall_pos = recall_score(y_test, predicted_values, average='macro')
    precision_pos = precision_score(y_test,predicted_values, average='macro')
    f1 = f1_score(y_test,predicted_values, average='macro')

    cols = ["Fitting Time", "accuracy", "Precision", "Recall", "F1 Score"]
    results = [fitTime, accuracy, precision_pos, recall_pos, f1]

    outFrame = pd.DataFrame([results], columns=cols, index=[fold_index])

    return outFrame

This is for tuning on the multiclass set

In [6]:
models = {
    #"Decision Tree": DecisionTreeClassifier(random_state=42),
    #"Random Forest": RandomForestClassifier(random_state=42),
    #"XGBoost": xgb.XGBClassifier(random_state=42, num_class=11, objective='multi:softmax'),
    #"Linear SVC": make_pipeline(StandardScaler(), LinearSVC(random_state=42)),
    #"Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(random_state=42)),
    "KNN": KNeighborsClassifier(),
    #"Naive Bayes": GaussianNB(),
}

score_methods = ['accuracy']

feature_set = feature_sets["Multiclass"]

y = multiclass[" Label"].copy()
X = multiclass.drop([" Label"], axis=1)

y_test_f = multiclass[" Label"].copy()
X_test_f = multiclass.drop([" Label"], axis=1)

In [7]:
params = []

# This will hold all of our results.
runFrame = None

for name, model in models.items():
        for feature_key, feature_val in feature_set.items():

            # If we're on the RFE sets, check if we have one for this classifier. If not, skip it.
            if feature_key == "RFE Sets":
                if name in feature_val.keys():
                    feature_val = feature_val[name]
                else:
                    continue
            
            for score_method in score_methods:
                opt = BayesSearchCV(estimator=model,search_spaces=search_spaces[name],n_iter=50,scoring=score_method,cv=5,n_jobs=5)
                kf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

                counter = 0

                # Used to hold data for a single run (performance metric)
                perfFrame = None

                for train_index, test_index in kf.split(X, y):

                    counter += 1

                    # PCA requires different logic to create X.
                    if feature_key != "PCA":
                        current_X = X.loc[:, feature_val]
                        X_train, X_test = current_X.iloc[train_index,:], current_X.iloc[test_index,:]
                        Y_train, Y_test = y.iloc[train_index], y.iloc[test_index]

                        current_X_f = X_test_f.loc[:, feature_val]
                        
                    else:
                        pca_trans = PCA(n_components=feature_val, random_state=42)
                        current_X = X.loc[:, X.columns]
                        X_train, X_test = current_X.iloc[train_index,:], current_X.iloc[test_index,:]
                        Y_train, Y_test = y.iloc[train_index], y.iloc[test_index]

                        # Apply PCA to training set and use it to transform test set.
                        X_train = pca_trans.fit_transform(X_train)
                        X_test = pca_trans.transform(X_test)
                        current_X_f = pca_trans.transform(X_test_f)

                        # Convert back to DataFrames
                        pca_cols = ["PC"+str(i) for i in list(range(1, len(X_train[0])+1))]
                        X_train = pd.DataFrame(data=X_train, columns=pca_cols)
                        X_test = pd.DataFrame(data=X_test, columns=pca_cols)
                        current_X_f = pd.DataFrame(data=current_X_f, columns=pca_cols)

                    startTime = time()

                    opt.fit(X_train,Y_train)

                    endTime = time()
                    fitTime = endTime - startTime

                    predicted_values = opt.predict(X_test)

                    # get metrics for this fold.
                    foldFrame = format_results_multiclass(Y_test, predicted_values, counter, fitTime)

                    # Add them to our lists of metric.
                    if perfFrame is None:
                        perfFrame = foldFrame
                    else:
                        perfFrame = pd.concat([perfFrame, foldFrame])

                    # Print a classification report on the testing results.
                    print("Validation Results: ")
                    print(classification_report(Y_test, predicted_values, target_names=multiclass_labels, digits=6))

                    print("Testing Results: ")
                    # Print a classification report on the testing results.
                    pred_test = opt.predict(current_X_f)
                    print(classification_report(y_test_f, pred_test, target_names=multiclass_labels, digits=6))

                    # Add tuple with the best params as well as the related model/config
                    params.append((f"Multiclass {name} {feature_key} {score_method} Fold {counter}", opt.best_params_))

                # Create a new line in the results table that averages all the folds
                perfFrame.loc["fold average"] = perfFrame.mean()

                # Mark the results table with the chosen classifier and the current performance metric.
                perfFrame['metric'] = [score_method for j in range(0,6)]
                perfFrame['Classifier'] = [name for j in range(0,6)]
                perfFrame['Feature Set'] = [feature_key for j in range(0,6)]
                perfFrame['Dataset'] = ["multiclass" for j in range(0,6)]
                print(f"{name} with {feature_key} and {score_method} completed.")

                # Add this run to the table with all runs.
                if runFrame is None:
                    runFrame = perfFrame
                else:
                    runFrame = pd.concat([runFrame, perfFrame])

# Write output file, best parameters, and best models to be used later.
runFrame.to_csv(f"multiclass_results_KNN_{time()}.csv")
pickle.dump(params, open(f"multiclass_params_KNN_{time()}.pickle", "wb"))



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.681671  0.550806  0.609291      2667
        LDAP   0.615492  0.661417  0.637629      2667
       MSSQL   0.907236  0.917104  0.912143      2666
         NTP   0.913855  0.954989  0.933969      2666
     NetBIOS   0.714777  0.468117  0.565730      2666
     Portmap   0.614432  0.855964  0.715361      2666
        SNMP   0.713879  0.752156  0.732518      2667
        SSDP   0.504340  0.435696  0.467512      2667
         Syn   0.909532  0.561680  0.694483      2667
        TFTP   0.986916  0.989876  0.988394      2667
         UDP   0.499216  0.596550  0.543560      2667
     UDP-lag   0.633275  0.810953  0.711184      2666

    accuracy                       0.712929     31999
   macro avg   0.724552  0.712942  0.709314     31999
weighted avg   0.724547  0.712929  0.709305     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.720956  0.583665  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.578194  0.636534  0.605963      2666
        LDAP   0.610096  0.475816  0.534653      2667
       MSSQL   0.893452  0.905851  0.899609      2666
         NTP   0.917477  0.954989  0.935857      2666
     NetBIOS   0.715385  0.453488  0.555096      2666
     Portmap   0.605795  0.854518  0.708975      2667
        SNMP   0.726535  0.772028  0.748591      2667
        SSDP   0.509398  0.508061  0.508729      2667
         Syn   0.861780  0.577428  0.691513      2667
        TFTP   0.983551  0.986502  0.985024      2667
         UDP   0.520246  0.539558  0.529726      2667
     UDP-lag   0.626050  0.782446  0.695565      2666

    accuracy                       0.703928     31999
   macro avg   0.712330  0.703935  0.699942     31999
weighted avg   0.712325  0.703928  0.699936     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.613823  0.707418  0.65



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.718733  0.493623  0.585279      2666
        LDAP   0.603987  0.681665  0.640479      2667
       MSSQL   0.903774  0.898013  0.900884      2667
         NTP   0.907361  0.947862  0.927169      2666
     NetBIOS   0.723815  0.435321  0.543667      2667
     Portmap   0.607569  0.878890  0.718467      2667
        SNMP   0.708099  0.803451  0.752768      2666
        SSDP   0.514331  0.484621  0.499034      2666
         Syn   0.928660  0.561305  0.699696      2667
        TFTP   0.981392  0.988751  0.985058      2667
         UDP   0.524252  0.571643  0.546923      2666
     UDP-lag   0.634167  0.836520  0.721423      2667

    accuracy                       0.715147     31999
   macro avg   0.729678  0.715139  0.710071     31999
weighted avg   0.729687  0.715147  0.710078     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.776917  0.549314  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.598405  0.619048  0.608551      2667
        LDAP   0.626686  0.487997  0.548714      2666
       MSSQL   0.906015  0.903637  0.904824      2667
         NTP   0.917967  0.948256  0.932866      2667
     NetBIOS   0.713707  0.462692  0.561419      2667
     Portmap   0.608801  0.845519  0.707895      2667
        SNMP   0.704724  0.805701  0.751838      2666
        SSDP   0.518248  0.559265  0.537976      2666
         Syn   0.870876  0.574269  0.692134      2666
        TFTP   0.977432  0.990998  0.984168      2666
         UDP   0.534730  0.516879  0.525653      2666
     UDP-lag   0.634845  0.797900  0.707094      2667

    accuracy                       0.709357     31998
   macro avg   0.717703  0.709347  0.705261     31998
weighted avg   0.717705  0.709357  0.705267     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.646373  0.656941  0.65



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.749560  0.479190  0.584629      2667
        LDAP   0.601392  0.680795  0.638635      2666
       MSSQL   0.900743  0.908511  0.904611      2667
         NTP   0.907086  0.940757  0.923615      2667
     NetBIOS   0.720068  0.473566  0.571364      2667
     Portmap   0.614863  0.850338  0.713679      2666
        SNMP   0.707523  0.818455  0.758957      2666
        SSDP   0.500197  0.475244  0.487401      2666
         Syn   0.907784  0.568642  0.699262      2666
        TFTP   0.979198  0.988747  0.983949      2666
         UDP   0.503176  0.564304  0.531990      2667
     UDP-lag   0.639671  0.817398  0.717695      2667

    accuracy                       0.713826     31998
   macro avg   0.727605  0.713829  0.709649     31998
weighted avg   0.727607  0.713826  0.709648     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.794213  0.533188  0.63



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.684482  0.547432  0.608333      2667
        LDAP   0.615040  0.668541  0.640676      2667
       MSSQL   0.907841  0.916354  0.912078      2666
         NTP   0.912432  0.949737  0.930711      2666
     NetBIOS   0.711845  0.468867  0.565355      2666
     Portmap   0.613600  0.852963  0.713748      2666
        SNMP   0.715354  0.752906  0.733650      2667
        SSDP   0.506520  0.466067  0.485452      2667
         Syn   0.869663  0.580427  0.696200      2667
        TFTP   0.985816  0.990251  0.988028      2667
         UDP   0.508911  0.578178  0.541338      2667
     UDP-lag   0.634843  0.783196  0.701259      2666

    accuracy                       0.712897     31999
   macro avg   0.722196  0.712910  0.709736     31999
weighted avg   0.722190  0.712897  0.709727     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.727512  0.579914  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.719519  0.493623  0.585539      2666
        LDAP   0.604250  0.682415  0.640958      2667
       MSSQL   0.904798  0.898013  0.901393      2667
         NTP   0.907328  0.947487  0.926972      2666
     NetBIOS   0.718636  0.442445  0.547691      2667
     Portmap   0.608479  0.871766  0.716708      2667
        SNMP   0.708099  0.803451  0.752768      2666
        SSDP   0.513236  0.487247  0.499904      2666
         Syn   0.927374  0.560180  0.698457      2667
        TFTP   0.980662  0.988751  0.984690      2667
         UDP   0.524221  0.568267  0.545356      2666
     UDP-lag   0.633599  0.835771  0.720776      2667

    accuracy                       0.714960     31999
   macro avg   0.729183  0.714951  0.710101     31999
weighted avg   0.729192  0.714960  0.710109     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.777082  0.549314  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.592267  0.631796  0.611393      2667
        LDAP   0.636501  0.474869  0.543931      2666
       MSSQL   0.906356  0.903637  0.904994      2667
         NTP   0.918027  0.949006  0.933260      2667
     NetBIOS   0.713785  0.458193  0.558118      2667
     Portmap   0.607796  0.847769  0.708001      2667
        SNMP   0.704821  0.806077  0.752056      2666
        SSDP   0.520586  0.559640  0.539407      2666
         Syn   0.872000  0.572393  0.691123      2666
        TFTP   0.977441  0.991373  0.984358      2666
         UDP   0.536915  0.521005  0.528841      2666
     UDP-lag   0.633929  0.798650  0.706819      2667

    accuracy                       0.709544     31998
   macro avg   0.718369  0.709534  0.705192     31998
weighted avg   0.718371  0.709544  0.705198     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.641075  0.674492  0.65



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.749560  0.479190  0.584629      2667
        LDAP   0.601392  0.680795  0.638635      2666
       MSSQL   0.900743  0.908511  0.904611      2667
         NTP   0.906758  0.940757  0.923445      2667
     NetBIOS   0.719658  0.473566  0.571235      2667
     Portmap   0.615030  0.850338  0.713791      2666
        SNMP   0.707523  0.818455  0.758957      2666
        SSDP   0.501784  0.474869  0.487955      2666
         Syn   0.910457  0.568267  0.699769      2666
        TFTP   0.979198  0.988747  0.983949      2666
         UDP   0.504165  0.567304  0.533874      2667
     UDP-lag   0.640199  0.819273  0.718750      2667

    accuracy                       0.714170     31998
   macro avg   0.728039  0.714173  0.709967     31998
weighted avg   0.728041  0.714170  0.709966     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.794213  0.533188  0.63



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.684482  0.547432  0.608333      2667
        LDAP   0.615040  0.668541  0.640676      2667
       MSSQL   0.907772  0.915604  0.911671      2666
         NTP   0.912401  0.949362  0.930515      2666
     NetBIOS   0.711440  0.468867  0.565227      2666
     Portmap   0.613600  0.852963  0.713748      2666
        SNMP   0.715456  0.753281  0.733881      2667
        SSDP   0.505112  0.463067  0.483177      2667
         Syn   0.872738  0.578553  0.695829      2667
        TFTP   0.985816  0.990251  0.988028      2667
         UDP   0.507404  0.578178  0.540484      2667
     UDP-lag   0.634767  0.786197  0.702413      2666

    accuracy                       0.712679     31999
   macro avg   0.722169  0.712691  0.709498     31999
weighted avg   0.722164  0.712679  0.709490     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.727512  0.579914  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.576754  0.638410  0.606017      2666
        LDAP   0.610950  0.472816  0.533080      2667
       MSSQL   0.893704  0.905101  0.899366      2666
         NTP   0.916938  0.952363  0.934315      2666
     NetBIOS   0.744735  0.437734  0.551382      2666
     Portmap   0.607409  0.885264  0.720476      2667
        SNMP   0.726728  0.772778  0.749046      2667
        SSDP   0.510654  0.512186  0.511419      2667
         Syn   0.926329  0.542182  0.684011      2667
        TFTP   0.982450  0.986502  0.984471      2667
         UDP   0.521188  0.539558  0.530214      2667
     UDP-lag   0.623662  0.830458  0.712355      2666

    accuracy                       0.706272     31999
   macro avg   0.720125  0.706279  0.701346     31999
weighted avg   0.720120  0.706272  0.701340     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.613271  0.709818  0.65



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.719519  0.493623  0.585539      2666
        LDAP   0.603786  0.681665  0.640366      2667
       MSSQL   0.903505  0.898763  0.901128      2667
         NTP   0.907361  0.947862  0.927169      2666
     NetBIOS   0.723364  0.435321  0.543539      2667
     Portmap   0.607569  0.878890  0.718467      2667
        SNMP   0.708099  0.803451  0.752768      2666
        SSDP   0.514536  0.484621  0.499131      2666
         Syn   0.929287  0.556805  0.696366      2667
        TFTP   0.981392  0.988751  0.985058      2667
         UDP   0.524252  0.571643  0.546923      2666
     UDP-lag   0.632295  0.836895  0.720349      2667

    accuracy                       0.714866     31999
   macro avg   0.729580  0.714858  0.709734     31999
weighted avg   0.729589  0.714866  0.709741     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.776976  0.549239  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.598405  0.619048  0.608551      2667
        LDAP   0.626686  0.487997  0.548714      2666
       MSSQL   0.905370  0.904012  0.904690      2667
         NTP   0.917967  0.948256  0.932866      2667
     NetBIOS   0.713707  0.462692  0.561419      2667
     Portmap   0.608801  0.845519  0.707895      2667
        SNMP   0.704956  0.805701  0.751969      2666
        SSDP   0.518248  0.559265  0.537976      2666
         Syn   0.875507  0.567142  0.688368      2666
        TFTP   0.977071  0.990998  0.983985      2666
         UDP   0.534550  0.516504  0.525372      2666
     UDP-lag   0.632653  0.802025  0.707341      2667

    accuracy                       0.709107     31998
   macro avg   0.717827  0.709097  0.704929     31998
weighted avg   0.717829  0.709107  0.704935     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.646325  0.656941  0.65



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.749413  0.478815  0.584306      2667
        LDAP   0.601392  0.680795  0.638635      2666
       MSSQL   0.900074  0.908511  0.904273      2667
         NTP   0.906758  0.940757  0.923445      2667
     NetBIOS   0.720068  0.473566  0.571364      2667
     Portmap   0.614863  0.850338  0.713679      2666
        SNMP   0.707523  0.818455  0.758957      2666
        SSDP   0.500395  0.475244  0.487495      2666
         Syn   0.910412  0.564141  0.696619      2666
        TFTP   0.978834  0.988747  0.983766      2666
         UDP   0.503342  0.564679  0.532250      2667
     UDP-lag   0.638621  0.819648  0.717898      2667

    accuracy                       0.713638     31998
   macro avg   0.727641  0.713641  0.709390     31998
weighted avg   0.727643  0.713638  0.709390     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.794167  0.533038  0.63



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.665860  0.515754  0.581272      2666
        LDAP   0.598986  0.664792  0.630176      2667
       MSSQL   0.884144  0.893098  0.888599      2666
         NTP   0.891196  0.869467  0.880197      2666
     NetBIOS   0.607349  0.669542  0.636931      2666
     Portmap   0.639671  0.613048  0.626077      2667
        SNMP   0.723957  0.773903  0.748097      2667
        SSDP   0.515347  0.629546  0.566751      2667
         Syn   0.873134  0.526434  0.656842      2667
        TFTP   0.974055  0.985377  0.979683      2667
         UDP   0.544946  0.438695  0.486082      2667
     UDP-lag   0.616616  0.837959  0.710447      2666

    accuracy                       0.701459     31999
   macro avg   0.711272  0.701468  0.699263     31999
weighted avg   0.711268  0.701459  0.699257     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.725441  0.588765  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.715859  0.487622  0.580098      2666
        LDAP   0.603033  0.685789  0.641754      2667
       MSSQL   0.894398  0.886014  0.890186      2667
         NTP   0.884397  0.878095  0.881235      2666
     NetBIOS   0.714110  0.434571  0.540326      2667
     Portmap   0.603154  0.874766  0.714002      2667
        SNMP   0.708224  0.807577  0.754644      2666
        SSDP   0.522157  0.570143  0.545096      2666
         Syn   0.866546  0.538058  0.663891      2667
        TFTP   0.975185  0.987252  0.981181      2667
         UDP   0.545235  0.508627  0.526295      2666
     UDP-lag   0.622763  0.835021  0.713439      2667

    accuracy                       0.707803     31999
   macro avg   0.721255  0.707794  0.702679     31999
weighted avg   0.721262  0.707803  0.702686     31999

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.785753  0.538588  0.63



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.596549  0.609299  0.602857      2667
        LDAP   0.626016  0.490998  0.550347      2666
       MSSQL   0.905718  0.896888  0.901281      2667
         NTP   0.891800  0.880765  0.886248      2667
     NetBIOS   0.690617  0.482940  0.568402      2667
     Portmap   0.607163  0.820022  0.697719      2667
        SNMP   0.703211  0.813203  0.754218      2666
        SSDP   0.538089  0.582896  0.559597      2666
         Syn   0.824875  0.554764  0.663377      2666
        TFTP   0.976279  0.987997  0.982103      2666
         UDP   0.557240  0.524006  0.540112      2666
     UDP-lag   0.622120  0.809899  0.703698      2667

    accuracy                       0.704482     31998
   macro avg   0.711640  0.704473  0.700830     31998
weighted avg   0.711641  0.704482  0.700835     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.649551  0.645166  0.64



Validation Results: 
              precision    recall  f1-score   support

         DNS   0.736750  0.474316  0.577099      2667
        LDAP   0.600463  0.681545  0.638440      2666
       MSSQL   0.886834  0.899138  0.892944      2667
         NTP   0.889566  0.875891  0.882675      2667
     NetBIOS   0.744575  0.450319  0.561215      2667
     Portmap   0.611430  0.878845  0.721145      2666
        SNMP   0.703560  0.815454  0.755386      2666
        SSDP   0.521165  0.503376  0.512116      2666
         Syn   0.860588  0.548762  0.670179      2666
        TFTP   0.972603  0.985371  0.978945      2666
         UDP   0.520263  0.563180  0.540871      2667
     UDP-lag   0.625744  0.827522  0.712625      2667

    accuracy                       0.708638     31998
   macro avg   0.722795  0.708643  0.703637     31998
weighted avg   0.722797  0.708638  0.703635     31998

Testing Results: 
              precision    recall  f1-score   support

         DNS   0.799200  0.524488  0.63

In [9]:
best = pickle.load(open("multiclass_params_KNN_1670628152.649287.pickle", 'rb'))

In [10]:
for i in range(0, 5):
    print(best[i])

('Multiclass KNN All accuracy Fold 1', OrderedDict([('algorithm', 'kd_tree'), ('n_neighbors', 29), ('weights', 'distance')]))
('Multiclass KNN All accuracy Fold 2', OrderedDict([('algorithm', 'kd_tree'), ('n_neighbors', 24), ('weights', 'distance')]))
('Multiclass KNN All accuracy Fold 3', OrderedDict([('algorithm', 'kd_tree'), ('n_neighbors', 47), ('weights', 'distance')]))
('Multiclass KNN All accuracy Fold 4', OrderedDict([('algorithm', 'kd_tree'), ('n_neighbors', 50), ('weights', 'distance')]))
('Multiclass KNN All accuracy Fold 5', OrderedDict([('algorithm', 'ball_tree'), ('n_neighbors', 50), ('weights', 'distance')]))


In [11]:
clf = KNeighborsClassifier(algorithm = 'kd_tree', n_neighbors=47, weights = 'distance')
clf.fit(X, y)
pred_test = clf.predict(X_test_f)
print(classification_report(y_test_f, pred_test, target_names=multiclass_labels, digits=6))

              precision    recall  f1-score   support

         DNS   0.650171  0.684842  0.667056     13333
        LDAP   0.666569  0.511288  0.578693     13333
       MSSQL   0.966134  0.964974  0.965553     13333
         NTP   0.997082  0.999700  0.998389     13332
     NetBIOS   0.740944  0.477087  0.580436     13333
     Portmap   0.620312  0.867172  0.723258     13333
        SNMP   0.733127  0.854710  0.789264     13332
        SSDP   0.801893  0.756301  0.778430     13332
         Syn   0.958987  0.596265  0.735328     13333
        TFTP   0.997816  0.993550  0.995678     13333
         UDP   0.773242  0.824046  0.797836     13333
     UDP-lag   0.679556  0.913448  0.779331     13333

    accuracy                       0.786947    159993
   macro avg   0.798819  0.786948  0.782438    159993
weighted avg   0.798818  0.786947  0.782436    159993

