In [1]:
import os
import sys
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import make_scorer, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np
import time

print("All Imports Successful")

All Imports Successful


In [2]:
baseline_df = pd.read_csv("cleaned_up_data_random_sample.csv")
baseline_df[baseline_df.select_dtypes(include=[np.number]).ge(0).all(1)]
Y=baseline_df["Label"]
X=baseline_df.drop(["Label"],axis=1)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2)

In [21]:
print(Y.value_counts())

SNMP       59884
UDP        59126
SSDP       59040
NTP        58995
LDAP       58906
MSSQL      58317
TFTP       58266
DNS        58109
NetBIOS    58102
Portmap    55476
UDP-lag    53520
Syn        52322
BENIGN      3022
WebDDoS       67
Name: Label, dtype: int64


In [24]:
# Currently a bug with Roc scoring
scoring = {'accuracy' : make_scorer(balanced_accuracy_score), 
           'precision' : make_scorer(precision_score, average='weighted'),
           'recall' : make_scorer(recall_score, pos_label=1, average='weighted'), 
           'f1_weighted' : make_scorer(f1_score, average='weighted')}
           #'roc_auc_weighted' : make_scorer(roc_auc_score, average='weighted')}
    
models = [("Decision Tree", DecisionTreeClassifier()),
          ("Linear Discriminant Analaysis", LinearDiscriminantAnalysis()),
          ("MultinomialNB", MultinomialNB()), 
          ("Random Forest", RandomForestClassifier()), 
          ("AdaBoostClassifier", AdaBoostClassifier()),
          ("GraidentBoostingClassifier", GradientBoostingClassifier()),
          ("Extra Trees", ExtraTreesClassifier()),
          ("SVC", SVC())]

In [90]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
results = cross_validate(DecisionTreeClassifier(), X_train, Y_train, cv=cv, scoring=scoring)
print(np.mean(results['test_accuracy']))
print(np.mean(results['test_precision']))
print(np.mean(results['test_recall']))
print(np.mean(results['test_f1_weighted']))

0.6887023842493997
0.7173894427811944
0.70852681849113
0.6973305641162492


In [91]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
results = cross_validate(LinearDiscriminantAnalysis(), X_train, Y_train, cv=cv, scoring=scoring)
print(np.mean(results['test_accuracy']))
print(np.mean(results['test_precision']))
print(np.mean(results['test_recall']))
print(np.mean(results['test_f1_weighted']))

0.56435068929332
0.5520087887840779
0.5314875355873591
0.48669443991961225


In [None]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
temp_X = X_train.np.todense() 
results = cross_validate(MultinomialNB(), temp_X, Y_train, cv=cv, scoring=scoring)
print(results)

In [94]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
results = cross_validate(RandomForestClassifier(), X_train, Y_train, cv=cv, scoring=scoring)
print(np.mean(results['test_accuracy']))
print(np.mean(results['test_precision']))
print(np.mean(results['test_recall']))
print(np.mean(results['test_f1_weighted']))

0.6902242495945494
0.7217009241418368
0.7108549533238284
0.6993620910122607


In [None]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
results = cross_validate(AdaBoostClassifier(DecisionTreeClassifier(max_depth=2),n_estimators=600,learning_rate=1), X_train, Y_train, cv=cv, scoring=scoring)
print(np.mean(results['test_accuracy']))
print(np.mean(results['test_precision']))
print(np.mean(results['test_recall']))
print(np.mean(results['test_f1_weighted']))

In [96]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
results = cross_validate(GradientBoostingClassifier(), X_train, Y_train, cv=cv, scoring=scoring)
print(np.mean(results['test_accuracy']))
print(np.mean(results['test_precision']))
print(np.mean(results['test_recall']))
print(np.mean(results['test_f1_weighted']))

0.6768842369991317
0.7339495584382127
0.71151858922451
0.6964803709114558


In [97]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
results = cross_validate(ExtraTreesClassifier(), X_train, Y_train, cv=cv, scoring=scoring)
print(np.mean(results['test_accuracy']))
print(np.mean(results['test_precision']))
print(np.mean(results['test_recall']))
print(np.mean(results['test_f1_weighted']))

0.6925362316299382
0.7209528593877865
0.7102111542952032
0.6988041721391834


In [None]:
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
results = cross_validate(SVC(), X_train, Y_train, cv=cv, scoring=scoring)
print(np.mean(results['test_accuracy']))
print(np.mean(results['test_precision']))
print(np.mean(results['test_recall']))
print(np.mean(results['test_f1_weighted']))

In [26]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
scores = []
for i in range(2):
  for name, model in models:
      print("Currently working on: {}".format(name))
      t0 = time.time()
      if i == 0:
        results = cross_validate(model, X_train, Y_train, cv=cv, scoring=scoring)
      else:
        results = cross_validate(model, X_train, Y_train, cv=cv, scoring=scoring)
      train_time = time.time()-t0
      acc= np.mean(results['test_accuracy'])
      prec = np.mean(results['test_precision'])
      recall = np.mean(results['test_recall'])
      f1 = np.mean(results['test_f1_score'])
      roc_auc = np.mean(results['test_roc_auc'])
      pred_time = np.mean(results['score_time'])
      print("Layer {0}: ML Algo: {1} with: \n Score: acc={2:0.5f}, \n pre={3:0.5f}, \n rec={4:0.5f}, \n f1={5:0.5f} \n roc_auc={6:0.5f} \n train_t = {7:0.5f}, \n pred_t={8:0.5f}\n\n".format(i+1, name, acc, prec, recall, f1, roc_auc, train_time, pred_time))

Currently working on: Decision Tree


Traceback (most recent call last):
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/metrics/_scorer.py", line 88, in __call__
    *args, **kwargs)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/metrics/_scorer.py", line 243, in _score
    **self._kwargs)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/metrics/_classification.py", line 1659, in precision_score
    zero_division=zero_division)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/home/drake/miniconda3/envs/gpu2/li

Traceback (most recent call last):
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 674, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/metrics/_scorer.py", line 88, in __call__
    *args, **kwargs)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/metrics/_scorer.py", line 243, in _score
    **self._kwargs)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/metrics/_classification.py", line 1659, in precision_score
    zero_division=zero_division)
  File "/home/drake/miniconda3/envs/gpu2/lib/python3.6/site-packages/sklearn/utils/validation.py", line 63, in inner_f
    return f(*args, **kwargs)
  File "/home/drake/miniconda3/envs/gpu2/li

KeyboardInterrupt: 