In [1]:
import pandas as pd
from benchmarkUtils import Benchmark
pd.set_option("display.width", 10000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_colwidth", None)
bu = Benchmark()
bu.import_df("./benchmark_results.csv")

In [2]:
def pretty_print(model):
    df = bu.display(model=model)
    df = df.rename(columns={"Time per data per iter": "Detection time (ms)"})

    dataset = "New attacks"
    tmp = df[df["Dataset"] == dataset]
    tmp = tmp.pivot(index=["Info"], columns="Dataset", values=["Accuracy", "Precision", "Recall", "F1", "Detection time (ms)"])
    tmp.columns = [col[0] for col in tmp.columns]
    display(tmp.round(5).rename_axis(f"{model} - {dataset}"))
    dataset = "Similar attacks"
    tmp = df[df["Dataset"] == dataset]
    tmp = tmp.pivot(index=["Info"], columns="Dataset", values=["Accuracy", "Precision", "Recall", "F1", "Detection time (ms)"])
    tmp.columns = [col[0] for col in tmp.columns]
    display(tmp.round(5).rename_axis(f"{model} - {dataset}"))
    dataset = "Known attacks"
    tmp = df[df["Dataset"] == dataset]
    tmp = tmp.pivot(index=["Info"], columns="Dataset", values=["Accuracy", "Precision", "Recall", "F1", "Detection time (ms)"])
    tmp.columns = [col[0] for col in tmp.columns]
    display(tmp.round(5).rename_axis(f"{model} - {dataset}"))
    params=df[["Model", "Info"]].drop_duplicates().reset_index(drop=True).values
    for param in params:
        print("Pipeline: ", param[1])
        print("Params  : ", param[0])
        print()

In [3]:
common_latex_args = {
    "index":False,
    "escape":True,
    "formatters":{
        "Info": lambda s: s.replace("|correlation| > 0.1", "$|corr| > 0.1$")
    },
    "float_format":"{:.4f}".format
}

In [4]:
pretty_print("SVM")

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
SVM - New attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.76714,0.65625,0.00946,0.01866,2984.30678
All features with 95% PCA,0.76682,0.61966,0.00817,0.01612,3798.19162
|correlation| > 0.1 features scaled,0.76562,0.38255,0.00321,0.00637,2147.51506
|correlation| > 0.1 features with 95% PCA,0.76639,0.58647,0.00439,0.00872,2294.04816


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
SVM - Similar attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.99046,0.99466,0.98113,0.98785,1037.26303
All features with 95% PCA,0.99106,0.99688,0.98046,0.9886,1514.05368
|correlation| > 0.1 features scaled,0.98902,0.99755,0.97462,0.98595,749.06375
|correlation| > 0.1 features with 95% PCA,0.9886,0.99931,0.97184,0.98538,830.62343


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
SVM - Known attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.99757,0.99712,0.9973,0.99721,518.72968
All features with 95% PCA,0.99686,0.9955,0.9973,0.9964,689.6471
|correlation| > 0.1 features scaled,0.99577,0.99711,0.99315,0.99513,362.89114
|correlation| > 0.1 features with 95% PCA,0.99647,0.99855,0.99333,0.99594,428.19019


Pipeline:  All features scaled
Params  :  SVM {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  SVM {'C': 1000, 'gamma': 'scale', 'kernel': 'rbf'}

Pipeline:  All features with 95% PCA
Params  :  SVM {'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  SVM {'C': 1000, 'gamma': 'auto', 'kernel': 'rbf'}



In [5]:
pretty_print(model="GaussianNB")

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
GaussianNB - New attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.69134,0.31335,0.2682,0.28902,50.29495
All features with 95% PCA,0.66436,0.38287,0.71079,0.49767,83.33984
|correlation| > 0.1 features scaled,0.6997,0.31935,0.25084,0.28098,28.35442
|correlation| > 0.1 features with 95% PCA,0.79337,0.72914,0.18561,0.2959,41.31183


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
GaussianNB - Similar attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.79484,0.66106,0.98716,0.79186,19.19476
All features with 95% PCA,0.76001,0.62474,0.98391,0.76423,30.44038
|correlation| > 0.1 features scaled,0.80567,0.67189,0.99368,0.8017,7.98878
|correlation| > 0.1 features with 95% PCA,0.96868,0.94287,0.98017,0.96116,22.16548


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
GaussianNB - Known attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.81738,0.70689,0.99117,0.82523,9.91684
All features with 95% PCA,0.78556,0.67167,0.99189,0.80096,15.76805
|correlation| > 0.1 features scaled,0.82601,0.7159,0.99477,0.8326,4.69666
|correlation| > 0.1 features with 95% PCA,0.93456,0.92272,0.92721,0.92496,12.79858


Pipeline:  All features scaled
Params  :  GaussianNB {'var_smoothing': 1.519911082952933e-07}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  GaussianNB {'var_smoothing': 4.328761281083053e-06}

Pipeline:  All features with 95% PCA
Params  :  GaussianNB {'var_smoothing': 4.328761281083053e-06}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  GaussianNB {'var_smoothing': 0.0005336699231206307}



In [6]:
pretty_print(model="Logistic Regression")

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Logistic Regression - New attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.77448,0.84507,0.04394,0.08353,36.03333
All features with 95% PCA,0.76731,0.53333,0.04191,0.07771,72.39632
|correlation| > 0.1 features scaled,0.78468,0.822,0.10145,0.18061,24.30066
|correlation| > 0.1 features with 95% PCA,0.76736,0.5458,0.03256,0.06145,30.20472


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Logistic Regression - Similar attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.98004,0.99511,0.95421,0.97423,18.70627
All features with 95% PCA,0.98251,0.98343,0.97213,0.97775,22.98899
|correlation| > 0.1 features scaled,0.98413,0.98764,0.97203,0.97977,15.14763
|correlation| > 0.1 features with 95% PCA,0.98175,0.98051,0.97318,0.97683,13.88026


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Logistic Regression - Known attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.99334,0.98996,0.99477,0.99236,6.33683
All features with 95% PCA,0.9797,0.97419,0.97928,0.97673,16.59493
|correlation| > 0.1 features scaled,0.98965,0.98323,0.99315,0.98817,11.0295
|correlation| > 0.1 features with 95% PCA,0.97845,0.97377,0.97676,0.97526,7.6283


Pipeline:  All features scaled
Params  :  Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  Logistic Regression {'C': 0.01, 'penalty': 'none', 'solver': 'newton-cg'}

Pipeline:  All features with 95% PCA
Params  :  Logistic Regression {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}



In [7]:
pretty_print(model="XGBClassifier")

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
XGBClassifier - New attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.76233,0.27559,0.00986,0.01904,126.80479
All features with 95% PCA,0.76745,0.60196,0.01729,0.03362,169.95784
|correlation| > 0.1 features scaled,0.77997,0.75095,0.08884,0.15888,122.58658
|correlation| > 0.1 features with 95% PCA,0.76748,0.69203,0.01076,0.02119,138.88963


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
XGBClassifier - Similar attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.98917,0.99931,0.97328,0.98612,51.63573
All features with 95% PCA,0.99148,0.99631,0.98209,0.98915,105.65433
|correlation| > 0.1 features scaled,0.99084,0.99922,0.97759,0.98828,46.50661
|correlation| > 0.1 features with 95% PCA,0.98981,0.99465,0.9795,0.98702,88.79687


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
XGBClassifier - Known attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.99984,0.99982,0.99982,0.99982,39.1346
All features with 95% PCA,0.99577,0.99442,0.99586,0.99514,89.67574
|correlation| > 0.1 features scaled,0.99976,0.99964,0.99982,0.99973,22.74885
|correlation| > 0.1 features with 95% PCA,0.99671,0.99604,0.9964,0.99622,76.1154


Pipeline:  All features scaled
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 1}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 5}

Pipeline:  All features with 95% PCA
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 200, 'scale_pos_weight': 1}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 100, 'scale_pos_weight': 1}



In [8]:
pretty_print(model="Random Forest")

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Random Forest - New attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.76682,1.0,0.00315,0.00629,622.69861
All features with 95% PCA,0.76682,0.57821,0.01166,0.02286,525.65986
|correlation| > 0.1 features scaled,0.76724,0.92308,0.00541,0.01075,308.59882
|correlation| > 0.1 features with 95% PCA,0.76665,0.62874,0.00591,0.01172,272.3147


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Random Forest - Similar attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.98747,1.0,0.9683,0.98389,242.98107
All features with 95% PCA,0.98917,0.99532,0.9772,0.98618,232.80571
|correlation| > 0.1 features scaled,0.98864,0.9998,0.97146,0.98543,111.21453
|correlation| > 0.1 features with 95% PCA,0.99046,0.99746,0.97835,0.98781,135.49947


Unnamed: 0_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Random Forest - Known attacks,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All features scaled,0.99984,0.99982,0.99982,0.99982,140.3135
All features with 95% PCA,0.9942,0.99174,0.99495,0.99334,157.93166
|correlation| > 0.1 features scaled,0.99984,0.99982,0.99982,0.99982,68.07935
|correlation| > 0.1 features with 95% PCA,0.99686,0.99622,0.99658,0.9964,84.76016


Pipeline:  All features scaled
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

Pipeline:  All features with 95% PCA
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}



In [9]:
bu.display(dataset="New", sort_by="Time per data per iter", ascending=True)

Unnamed: 0,Model,Dataset,Info,Data size,Accuracy,Precision,Recall,F1,Time per data per iter
0,"Logistic Regression {'C': 0.01, 'penalty': 'none', 'solver': 'newton-cg'}",New attacks,|correlation| > 0.1 features scaled,75890,0.784675,0.821999,0.101453,0.180615,24.300656
1,GaussianNB {'var_smoothing': 4.328761281083053e-06},New attacks,|correlation| > 0.1 features scaled,75890,0.699697,0.319349,0.250845,0.280982,28.354416
2,"Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}",New attacks,|correlation| > 0.1 features with 95% PCA,75890,0.767361,0.545798,0.03256,0.061453,30.204715
3,"Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}",New attacks,All features scaled,75890,0.774476,0.84507,0.043939,0.083534,36.033333
4,GaussianNB {'var_smoothing': 0.0005336699231206307},New attacks,|correlation| > 0.1 features with 95% PCA,75890,0.793372,0.729144,0.185613,0.2959,41.311833
5,GaussianNB {'var_smoothing': 1.519911082952933e-07},New attacks,All features scaled,75890,0.691343,0.313347,0.268195,0.289018,50.294952
6,"Logistic Regression {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",New attacks,All features with 95% PCA,75890,0.767308,0.533333,0.041911,0.077715,72.396318
7,GaussianNB {'var_smoothing': 4.328761281083053e-06},New attacks,All features with 95% PCA,75890,0.664356,0.382874,0.710793,0.497673,83.339836
8,"XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 5}",New attacks,|correlation| > 0.1 features scaled,75890,0.779971,0.750952,0.088835,0.158876,122.58658
9,"XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 1}",New attacks,All features scaled,75890,0.762327,0.275591,0.009858,0.019035,126.80479


In [10]:
bu.display(dataset="New", sort_by="F1", ascending=False, top=5)

Unnamed: 0,Model,Dataset,Info,Data size,Accuracy,Precision,Recall,F1,Time per data per iter
0,GaussianNB {'var_smoothing': 4.328761281083053e-06},New attacks,All features with 95% PCA,75890,0.664356,0.382874,0.710793,0.497673,83.339836
1,GaussianNB {'var_smoothing': 0.0005336699231206307},New attacks,|correlation| > 0.1 features with 95% PCA,75890,0.793372,0.729144,0.185613,0.2959,41.311833
2,GaussianNB {'var_smoothing': 1.519911082952933e-07},New attacks,All features scaled,75890,0.691343,0.313347,0.268195,0.289018,50.294952
3,GaussianNB {'var_smoothing': 4.328761281083053e-06},New attacks,|correlation| > 0.1 features scaled,75890,0.699697,0.319349,0.250845,0.280982,28.354416
4,"Logistic Regression {'C': 0.01, 'penalty': 'none', 'solver': 'newton-cg'}",New attacks,|correlation| > 0.1 features scaled,75890,0.784675,0.821999,0.101453,0.180615,24.300656


In [11]:
bu.display(dataset="New", sort_by="Recall", ascending=False, top=5)

Unnamed: 0,Model,Dataset,Info,Data size,Accuracy,Precision,Recall,F1,Time per data per iter
0,GaussianNB {'var_smoothing': 4.328761281083053e-06},New attacks,All features with 95% PCA,75890,0.664356,0.382874,0.710793,0.497673,83.339836
1,GaussianNB {'var_smoothing': 1.519911082952933e-07},New attacks,All features scaled,75890,0.691343,0.313347,0.268195,0.289018,50.294952
2,GaussianNB {'var_smoothing': 4.328761281083053e-06},New attacks,|correlation| > 0.1 features scaled,75890,0.699697,0.319349,0.250845,0.280982,28.354416
3,GaussianNB {'var_smoothing': 0.0005336699231206307},New attacks,|correlation| > 0.1 features with 95% PCA,75890,0.793372,0.729144,0.185613,0.2959,41.311833
4,"Logistic Regression {'C': 0.01, 'penalty': 'none', 'solver': 'newton-cg'}",New attacks,|correlation| > 0.1 features scaled,75890,0.784675,0.821999,0.101453,0.180615,24.300656


In [12]:
bu.display(dataset="New", sort_by="Precision", ascending=False, top=5)

Unnamed: 0,Model,Dataset,Info,Data size,Accuracy,Precision,Recall,F1,Time per data per iter
0,"Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}",New attacks,All features scaled,75890,0.76682,1.0,0.003155,0.006289,622.698613
1,"Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}",New attacks,|correlation| > 0.1 features scaled,75890,0.767242,0.923077,0.005408,0.010753,308.598819
2,"Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}",New attacks,All features scaled,75890,0.774476,0.84507,0.043939,0.083534,36.033333
3,"Logistic Regression {'C': 0.01, 'penalty': 'none', 'solver': 'newton-cg'}",New attacks,|correlation| > 0.1 features scaled,75890,0.784675,0.821999,0.101453,0.180615,24.300656
4,"XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 5}",New attacks,|correlation| > 0.1 features scaled,75890,0.779971,0.750952,0.088835,0.158876,122.58658


In [13]:
bu.display(dataset="New", sort_by="Accuracy", ascending=False, top=5)

Unnamed: 0,Model,Dataset,Info,Data size,Accuracy,Precision,Recall,F1,Time per data per iter
0,GaussianNB {'var_smoothing': 0.0005336699231206307},New attacks,|correlation| > 0.1 features with 95% PCA,75890,0.793372,0.729144,0.185613,0.2959,41.311833
1,"Logistic Regression {'C': 0.01, 'penalty': 'none', 'solver': 'newton-cg'}",New attacks,|correlation| > 0.1 features scaled,75890,0.784675,0.821999,0.101453,0.180615,24.300656
2,"XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 5}",New attacks,|correlation| > 0.1 features scaled,75890,0.779971,0.750952,0.088835,0.158876,122.58658
3,"Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}",New attacks,All features scaled,75890,0.774476,0.84507,0.043939,0.083534,36.033333
4,"XGBClassifier {'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 100, 'scale_pos_weight': 1}",New attacks,|correlation| > 0.1 features with 95% PCA,75890,0.767479,0.692029,0.010759,0.021189,138.889631


In [14]:
print(bu.display(dataset="New", sort_by="Accuracy", ascending=False, top=5, show_model_params=False).to_latex(**common_latex_args))

\begin{tabular}{lllrrrrrr}
\toprule
Model & Dataset & Info & Data size & Accuracy & Precision & Recall & F1 & Time per data per iter \\
\midrule
GaussianNB & New attacks & $|corr| > 0.1$ features with 95\% PCA & 75890 & 0.7934 & 0.7291 & 0.1856 & 0.2959 & 41.3118 \\
Logistic Regression & New attacks & $|corr| > 0.1$ features scaled & 75890 & 0.7847 & 0.8220 & 0.1015 & 0.1806 & 24.3007 \\
XGBClassifier & New attacks & $|corr| > 0.1$ features scaled & 75890 & 0.7800 & 0.7510 & 0.0888 & 0.1589 & 122.5866 \\
Logistic Regression & New attacks & All features scaled & 75890 & 0.7745 & 0.8451 & 0.0439 & 0.0835 & 36.0333 \\
XGBClassifier & New attacks & $|corr| > 0.1$ features with 95\% PCA & 75890 & 0.7675 & 0.6920 & 0.0108 & 0.0212 & 138.8896 \\
\bottomrule
\end{tabular}

