In [1]:
import os, platform, pprint, sys
import fastai
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
import yellowbrick as yb

from fastai.tabular.data import TabularDataLoaders, TabularPandas
from fastai.tabular.all import FillMissing, Categorify, Normalize, tabular_learner, accuracy, ClassificationInterpretation, ShowGraphCallback, RandomSplitter, range_of

from sklearn.base import BaseEstimator
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

from yellowbrick.model_selection import CVScores, LearningCurve, ValidationCurve



seed: int = 14


# set up pretty printer for easier data evaluation
pretty = pprint.PrettyPrinter(indent=4, width=30).pprint


# declare file paths for the data we will be working on
data_path_1: str = '../data/prepared/baseline/'
data_path_2: str = '../data/prepared/timebased/'
modelPath  : str = './models'


# list the names of the datasets we will be using
attacks : list = [ 'DNS', 'LDAP', 'MSSQL', 'NetBIOS', 'NTP', 'Portmap', 'SNMP', 'SSDP', 'Syn', 'TFTP', 'UDP', 'UDPLag' ]
datasets: list = [
    "DNS_vs_all.csv" , "LDAP_vs_all.csv"    , "MSSQL_vs_all.csv" , "NetBIOS_vs_all.csv" ,
    "NTP_vs_all.csv" , "Portmap_vs_all.csv" , "SNMP_vs_all.csv"  , "SSDP_vs_all.csv"    ,
    "Syn_vs_all.csv" , "TFTP_vs_all.csv"    , "UDP_vs_all.csv"   , "UDPLag_vs_all.csv"  ,
]


# set up enumeration of experiment types
Baseline : int = 0
Timebased: int = 1


# print library and python versions for reproducibility
print(
    f'''
    python:\t{platform.python_version()}

    \tfastai:\t\t{fastai.__version__}
    \tmatplotlib:\t{mpl.__version__}
    \tnumpy:\t\t{np.__version__}
    \tpandas:\t\t{pd.__version__}
    \tsklearn:\t{sklearn.__version__}
    \tyellowbrick:\t{yb.__version__}
    '''
)


    python:	3.7.10

    	faiss:		1.7.0
    	fastai:		2.4.1
    	matplotlib:	3.3.4
    	numpy:		1.20.3
    	pandas:		1.2.5
    	sklearn:	0.24.2
    	yellowbrick:	1.3.post1
    


In [2]:
def get_file_path(directory: str):
    '''
        Closure that will return a function that returns the filepath to the directory given to the closure
    '''

    def func(file: str) -> str:
        return os.path.join(directory, file)

    return func


# use the get_file_path closure to create a function that will return the path to a file
baseline_path  = get_file_path(data_path_1)
timebased_path = get_file_path(data_path_2)


# create a list of the paths to all of the dataset files
baseline_files : list = list(map(baseline_path , datasets))
timebased_files: list = list(map(timebased_path, datasets))

In [4]:
def load_data(filePath: str) -> pd.DataFrame:
    '''
        Loads the Dataset from the given filepath and caches it for quick access in the future
        Function will only work when filepath is a .csv file
    '''

    # slice off the ./CSV/ from the filePath
    if filePath[0] == '.' and filePath[1] == '.':
        filePathClean: str = filePath[17::]
        pickleDump: str = f'../data/cache/{filePathClean}.pickle'
    else:
        pickleDump: str = f'../data/cache/{filePath}.pickle'
    
    print(f'Loading Dataset: {filePath}')
    print(f'\tTo Dataset Cache: {pickleDump}\n')
    
    # check if data already exists within cache
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
        
    # if not, load data and cache it
    else:
        df = pd.read_csv(filePath, low_memory=True)
        df.to_pickle(pickleDump)

    
    return df


def run_experiment(df: pd.DataFrame, name: str) -> tuple:
    '''
        Run binary classification using K-Nearest Neighbors
        returns the 7-tuple with the following indicies:
        results: tuple = (name, model, classes, X_train, y_train, X_test, y_test)
    '''

    df: pd.DataFrame = ndf.sample(n=100000, random_state=seed)


    # First we split the features into the dependent variable and 
    # continous and categorical features
    dep_var: str = 'Label'
    if 'Protocol' in df.columns:
        categorical_features: list = ['Protocol']
    else:
        categorical_features: list = []
    continuous_features = list(set(df) - set(categorical_features) - set([dep_var]))


    # Next, we set up the feature engineering pipeline, namely filling missing values
    # encoding categorical features, and normalizing the continuous features
    # all within a pipeline to prevent the normalization from leaking details
    # about the test sets through the normalized mapping of the training sets
    procs = [FillMissing, Categorify, Normalize]
    splits = RandomSplitter(valid_pct=0.2, seed=seed)(range_of(df))
    
    
    # The dataframe is loaded into a fastai datastructure now that 
    # the feature engineering pipeline has been set up
    to = TabularPandas(
        df            , y_names=dep_var                , 
        splits=splits , cat_names=categorical_features ,
        procs=procs   , cont_names=continuous_features , 
    )


    # We use fastai to quickly extract the names of the classes as they are mapped to the encodings
    dls = to.dataloaders(bs=64)
    mds = tabular_learner(dls)
    classes : list = list(mds.dls.vocab)


    # We extract the training and test datasets from the dataframe
    X_train = to.train.xs.reset_index(drop=True)
    X_test = to.valid.xs.reset_index(drop=True)
    y_train = to.train.ys.values.ravel()
    y_test = to.valid.ys.values.ravel()


    # Now that we have the train and test datasets, we set up a gridsearch of the K-NN classifier
    # using SciKitLearn and print the results 
    params = {"n_neighbors": range(1, 50)}
    model = GridSearchCV(KNeighborsClassifier(), params)
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    report = classification_report(y_test, prediction)
    print(report)
    print("Best Parameters found by gridsearch:")
    print(model.best_params_)


   # we add a target_type_ attribute to our model so yellowbrick knows how to make the visualizations
    if len(classes) == 2:
        model.target_type_ = 'binary'
        # wrapped_model.target_type_ = 'binary'
    elif len(classes) > 2:  
        model.target_type_ = 'multiclass'
        # wrapped_model.target_type_ = 'multiclass'
    else:
        print('Must be more than one class to perform classification')
        raise ValueError('Wrong number of classes')


    # Now that the classifier has been created and trained, we pass out our training values
    # so that yellowbrick can use them to create various visualizations
    return (name, model, classes, X_train, y_train, X_test, y_test)



In [5]:
baseline_dfs : map = map( load_data    , baseline_files  )
timebased_dfs: map = map( load_data    , timebased_files )
experiments  : zip = zip( baseline_dfs , timebased_dfs   , attacks )

In [6]:
def experiment_runner():
    '''
        A generator that handles running the experiments
    '''
    num = 1
    for baseline, timebased, info in experiments:
        print(f'Running experiment #{num}:\t{info}')

        print('Baseline results')
        baseline_results = run_experiment(baseline, f'{info}_vs_all_baseline')
        
        print('\nTime-based results')
        timebased_results = run_experiment(timebased, f'{info}_vs_all_timebased')
        
        num += 1
        yield (baseline_results, timebased_results, info, num)


def do_experiment(num: int) -> tuple:
    '''
        A function that runs the specific experiment specified
    '''
    index = num - 1
    baseline = load_data(baseline_files[index])
    timebased = load_data(timebased_files[index])
    info = attacks[index]

    print(f'Running experiment #{num}:\t{info}')

    print('Baseline results')
    baseline_results = run_experiment(baseline, f'{info}_vs_all_baseline')
    
    print('\nTime-based results')
    timebased_results = run_experiment(timebased, f'{info}_vs_all_timebased')

    return (baseline_results, timebased_results, info, num)


experiment = experiment_runner()

## Experiment #1: DNS vs All

In [7]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/DNS_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/DNS_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/DNS_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/DNS_vs_all.csv.pickle

Running experiment #1:	DNS
Baseline results
              precision    recall  f1-score   support

           0       0.85      0.98      0.91      9955
           1       0.97      0.84      0.90     10045

    accuracy                           0.91     20000
   macro avg       0.91      0.91      0.90     20000
weighted avg       0.91      0.91      0.90     20000

Best Parameters found by gridsearch:
{'n_neighbors': 4}

Time-based results
              precision    recall  f1-score   support

           0       0.86      0.97      0.91     10005
           1       0.97      0.84      0.90      9995

    accuracy                           0.91     20000
   macro avg       0.91      0.91      0.91     20000
weighted avg       0.91      0.91

## Experiment #2: LDAP vs All

In [8]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/LDAP_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/LDAP_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/LDAP_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/LDAP_vs_all.csv.pickle

Running experiment #2:	LDAP
Baseline results
              precision    recall  f1-score   support

           0       0.88      0.96      0.92     10018
           1       0.96      0.87      0.91      9982

    accuracy                           0.92     20000
   macro avg       0.92      0.91      0.91     20000
weighted avg       0.92      0.92      0.91     20000

Best Parameters found by gridsearch:
{'n_neighbors': 13}

Time-based results
              precision    recall  f1-score   support

           0       0.88      0.99      0.93     10041
           1       0.99      0.87      0.93      9959

    accuracy                           0.93     20000
   macro avg       0.94      0.93      0.93     20000
weighted avg       0.94    

## Experiment #3: MSSQL vs All

In [9]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/MSSQL_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/MSSQL_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/MSSQL_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/MSSQL_vs_all.csv.pickle

Running experiment #3:	MSSQL
Baseline results
              precision    recall  f1-score   support

           0       0.99      0.97      0.98     10134
           1       0.97      0.99      0.98      9866

    accuracy                           0.98     20000
   macro avg       0.98      0.98      0.98     20000
weighted avg       0.98      0.98      0.98     20000

Best Parameters found by gridsearch:
{'n_neighbors': 7}

Time-based results
              precision    recall  f1-score   support

           0       0.99      0.96      0.97      9995
           1       0.96      0.99      0.97     10005

    accuracy                           0.97     20000
   macro avg       0.97      0.97      0.97     20000
weighted avg       0.97

## Experiment #4: NetBIOS vs All

In [10]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/NetBIOS_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/NetBIOS_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/NetBIOS_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/NetBIOS_vs_all.csv.pickle

Running experiment #4:	NetBIOS
Baseline results
              precision    recall  f1-score   support

           0       0.99      0.91      0.95      9958
           1       0.92      0.99      0.95     10042

    accuracy                           0.95     20000
   macro avg       0.95      0.95      0.95     20000
weighted avg       0.95      0.95      0.95     20000

Best Parameters found by gridsearch:
{'n_neighbors': 8}

Time-based results
              precision    recall  f1-score   support

           0       0.99      0.90      0.95      9997
           1       0.91      0.99      0.95     10003

    accuracy                           0.95     20000
   macro avg       0.95      0.95      0.95     20000
weighted avg 

## Experiment #5: NTP vs All

In [11]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/NTP_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/NTP_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/NTP_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/NTP_vs_all.csv.pickle

Running experiment #5:	NTP
Baseline results
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     10012
           1       1.00      1.00      1.00      9988

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Best Parameters found by gridsearch:
{'n_neighbors': 1}

Time-based results
              precision    recall  f1-score   support

           0       1.00      0.99      0.99     10071
           1       0.99      1.00      0.99      9929

    accuracy                           0.99     20000
   macro avg       0.99      0.99      0.99     20000
weighted avg       0.99      0.99

## Experiment #6: Portmap vs All

In [12]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/Portmap_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/Portmap_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/Portmap_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/Portmap_vs_all.csv.pickle

Running experiment #6:	Portmap
Baseline results
              precision    recall  f1-score   support

           0       0.99      0.93      0.96     10034
           1       0.93      0.99      0.96      9966

    accuracy                           0.96     20000
   macro avg       0.96      0.96      0.96     20000
weighted avg       0.96      0.96      0.96     20000

Best Parameters found by gridsearch:
{'n_neighbors': 7}

Time-based results
              precision    recall  f1-score   support

           0       0.99      0.92      0.95     10035
           1       0.92      0.99      0.96      9965

    accuracy                           0.96     20000
   macro avg       0.96      0.96      0.96     20000
weighted avg 

## Experiment #7: SNMP vs All

In [13]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/SNMP_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/SNMP_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/SNMP_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/SNMP_vs_all.csv.pickle

Running experiment #7:	SNMP
Baseline results
              precision    recall  f1-score   support

           0       0.89      0.95      0.92      9992
           1       0.94      0.88      0.91     10008

    accuracy                           0.91     20000
   macro avg       0.92      0.91      0.91     20000
weighted avg       0.92      0.91      0.91     20000

Best Parameters found by gridsearch:
{'n_neighbors': 24}

Time-based results
              precision    recall  f1-score   support

           0       0.88      0.96      0.92     10064
           1       0.95      0.87      0.91      9936

    accuracy                           0.91     20000
   macro avg       0.92      0.91      0.91     20000
weighted avg       0.92    

## Experiment #8: SSDP vs All

In [14]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/SSDP_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/SSDP_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/SSDP_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/SSDP_vs_all.csv.pickle

Running experiment #8:	SSDP
Baseline results
              precision    recall  f1-score   support

           0       0.98      0.90      0.94     10056
           1       0.91      0.98      0.94      9944

    accuracy                           0.94     20000
   macro avg       0.94      0.94      0.94     20000
weighted avg       0.94      0.94      0.94     20000

Best Parameters found by gridsearch:
{'n_neighbors': 14}

Time-based results
              precision    recall  f1-score   support

           0       0.97      0.90      0.93      9916
           1       0.90      0.97      0.94     10084

    accuracy                           0.93     20000
   macro avg       0.94      0.93      0.93     20000
weighted avg       0.94    

## Experiment #9: Syn vs All

In [15]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/Syn_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/Syn_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/Syn_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/Syn_vs_all.csv.pickle

Running experiment #9:	Syn
Baseline results
              precision    recall  f1-score   support

           0       0.99      0.94      0.97      9900
           1       0.94      0.99      0.97     10100

    accuracy                           0.97     20000
   macro avg       0.97      0.97      0.97     20000
weighted avg       0.97      0.97      0.97     20000

Best Parameters found by gridsearch:
{'n_neighbors': 14}

Time-based results
              precision    recall  f1-score   support

           0       0.99      0.93      0.96     10028
           1       0.94      0.99      0.96      9972

    accuracy                           0.96     20000
   macro avg       0.96      0.96      0.96     20000
weighted avg       0.96      0.9

## Experiment #10: TFTP vs All

In [16]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/TFTP_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/TFTP_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/TFTP_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/TFTP_vs_all.csv.pickle

Running experiment #10:	TFTP
Baseline results
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      9968
           1       1.00      0.99      1.00     10032

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Best Parameters found by gridsearch:
{'n_neighbors': 2}

Time-based results
              precision    recall  f1-score   support

           0       0.99      1.00      1.00     10144
           1       1.00      0.99      0.99      9856

    accuracy                           1.00     20000
   macro avg       1.00      0.99      1.00     20000
weighted avg       1.00    

## Experiment #11: UDP vs All

In [17]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/UDP_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/UDP_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/UDP_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/UDP_vs_all.csv.pickle

Running experiment #11:	UDP
Baseline results
              precision    recall  f1-score   support

           0       0.99      0.91      0.95      9868
           1       0.92      0.99      0.95     10132

    accuracy                           0.95     20000
   macro avg       0.95      0.95      0.95     20000
weighted avg       0.95      0.95      0.95     20000

Best Parameters found by gridsearch:
{'n_neighbors': 16}

Time-based results
              precision    recall  f1-score   support

           0       0.98      0.90      0.94     10032
           1       0.90      0.98      0.94      9968

    accuracy                           0.94     20000
   macro avg       0.94      0.94      0.94     20000
weighted avg       0.94      0.

## Experiment #12: UDP-lag vs All

In [18]:
results = next(experiment)

Loading Dataset: ../data/prepared/baseline/UDPLag_vs_all.csv
	To Dataset Cache: ../data/cache/baseline/UDPLag_vs_all.csv.pickle

Loading Dataset: ../data/prepared/timebased/UDPLag_vs_all.csv
	To Dataset Cache: ../data/cache/timebased/UDPLag_vs_all.csv.pickle

Running experiment #12:	UDPLag
Baseline results
              precision    recall  f1-score   support

           0       0.93      0.92      0.92     10090
           1       0.92      0.93      0.92      9910

    accuracy                           0.92     20000
   macro avg       0.92      0.92      0.92     20000
weighted avg       0.92      0.92      0.92     20000

Best Parameters found by gridsearch:
{'n_neighbors': 12}

Time-based results
              precision    recall  f1-score   support

           0       0.89      0.92      0.91     10088
           1       0.92      0.88      0.90      9912

    accuracy                           0.90     20000
   macro avg       0.90      0.90      0.90     20000
weighted avg    