## Importing Libraries

In [1]:
from time import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix

### Loading dataset

In [2]:
data = pd.read_csv('dataset.csv')
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,7,21,30,2,3,29,2,3,57,5,1,0
1,7,32,70,2,4,67,2,4,132,5,1,0
2,7,15,1,2,2,0,2,2,0,5,1,0
3,7,33,77,2,7,72,2,7,142,5,1,0
4,7,55,78,3,7,73,3,7,144,6,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
355886,7,25,46,2,15,45,4,15,90,7,0,8505
355887,7,82,14,7,10,8,0,10,16,0,0,22
355888,7,82,14,7,10,8,0,10,17,1,0,1
355889,7,30,62,2,20,60,3,20,120,3,0,0


### Data Preprocessing


In [3]:
y = data.iloc[:,0].values
x = data.iloc[:,1:].values
x,y

(array([[  21,   30,    2, ...,    5,    1,    0],
        [  32,   70,    2, ...,    5,    1,    0],
        [  15,    1,    2, ...,    5,    1,    0],
        ...,
        [  82,   14,    7, ...,    1,    0,    1],
        [  30,   62,    2, ...,    3,    0,    0],
        [  30,   62,    2, ...,    7,    0, 9156]], dtype=int64),
 array([7, 7, 7, ..., 7, 7, 7], dtype=int64))

### Splitting the dataset into training and testing 

In [4]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25)

### Scaling the dataset 

In [5]:
sc = MinMaxScaler()
x_train_sc = sc.fit_transform(x_train)
x_test_sc = sc.transform(x_test)

# Applying Machine Learning Algorithms

### XGBoost

In [7]:
from xgboost import XGBClassifier
xgbc = XGBClassifier()

In [12]:
xgbc.fit(x_train,y_train)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=4,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

In [15]:
xgbc.score(x_train,y_train)

0.9943166065982811

In [17]:
xgbc.score(x_test,y_test)

0.9945713868252166

## Applying logistic regression

In [6]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(penalty = 'l2', multi_class = 'multinomial')

In [7]:
s = time()
lr.fit(x_train_sc,y_train)
lr_time = time()-s
print("Time elapsed",lr_time)

Time elapsed 39.63400387763977


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Training accuracy

In [8]:
lr.score(x_train_sc,y_train)

0.9867000352168082

### Testing accuracy

In [9]:
lr.score(x_test_sc,y_test)

0.9878727254335585

### Accuracy of the model

In [10]:
y_lr_train_pred = lr.predict(x_train_sc)
lr_train_accuracy = accuracy_score(y_train,y_lr_train_pred)
print("Accuracy of Logistic Regression Training Dataset: ",lr_train_accuracy)

Accuracy of Logistic Regression Training Dataset:  0.9867000352168082


In [11]:
y_lr_test_pred = lr.predict(x_test_sc)
lr_test_accuracy = accuracy_score(y_test,y_lr_test_pred)
print("Accuracy of Logistic Regression Testing Dataset: ",lr_test_accuracy)

Accuracy of Logistic Regression Testing Dataset:  0.9878727254335585


## Applying KNN

In [127]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=9)

In [128]:
s = time()
knn.fit(x_train_sc,y_train)
knn_time = time()-s
print("Time elapsed",knn_time)

  return self._fit(X, y)


Time elapsed 1.409510612487793


### Training accuracy

In [129]:
knn.score(x_train_sc,y_train)

0.9936235098419739

### Testing accuracy

In [130]:
knn.score(x_test_sc,y_test)

0.9932676205140886

### Accuracy of the model

In [16]:
y_knn_train_pred = knn.predict(x_train_sc)
knn_train_accuracy = accuracy_score(y_train,y_knn_train_pred)
print("Accuracy of K Nearest Neigbour Training Dataset: ",knn_train_accuracy)

Accuracy of K Nearest Neigbour Training Dataset:  0.993645988655692


In [17]:
y_knn_test_pred = knn.predict(x_test_sc)
knn_test_accuracy = accuracy_score(y_test,y_knn_test_pred)
print("Accuracy of K Nearest Neigbour Testing Dataset: ",knn_test_accuracy)

Accuracy of K Nearest Neigbour Testing Dataset:  0.9933462960673463


## Applying Decision Tree Classifier

In [18]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()

In [19]:
s = time()
dtc.fit(x_train_sc,y_train)
dtc_time = time()-s
print("Time elapsed",dtc_time)

Time elapsed 1.8945696353912354


### Training accuracy

In [20]:
dtc.score(x_train_sc,y_train)

0.9943465783499053

### Testing accuracy

In [21]:
dtc.score(x_test_sc,y_test)

0.9944477538129545

### Accuracy of the model

In [22]:
y_dtc_train_pred = dtc.predict(x_train_sc)
dtc_train_accuracy = accuracy_score(y_train,y_dtc_train_pred)
print("Accuracy of Decision Tree Classifier Training Dataset: ",dtc_train_accuracy)

Accuracy of Decision Tree Classifier Training Dataset:  0.9943465783499053


In [23]:
y_dtc_test_pred = dtc.predict(x_test_sc)
dtc_test_accuracy = accuracy_score(y_test,y_dtc_test_pred)
print("Accuracy of Decision Tree Classifier Testing Dataset: ",dtc_test_accuracy)

Accuracy of Decision Tree Classifier Testing Dataset:  0.9944477538129545


## Applying Random Forest Classifier

In [24]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

In [25]:
s = time()
rfc.fit(x_train_sc,y_train)
rfc_time = time()-s
print("Time elapsed",rfc_time)

Time elapsed 86.07441997528076


### Training accuracy

In [26]:
rfc.score(x_train_sc,y_train)

0.9943465783499053

### Testing accuracy

In [27]:
rfc.score(x_test_sc,y_test)

0.9944814719072078

### Accuracy of the model

In [28]:
y_rfc_train_pred = rfc.predict(x_train_sc)
rfc_train_accuracy = accuracy_score(y_train,y_rfc_train_pred)
print("Accuracy of Random Forest Classifier Training Dataset: ",rfc_train_accuracy)

Accuracy of Random Forest Classifier Training Dataset:  0.9943465783499053


In [29]:
y_rfc_test_pred = rfc.predict(x_test_sc)
rfc_test_accuracy = accuracy_score(y_test,y_rfc_test_pred)
print("Accuracy of Random Forest Classifier Testing Dataset: ",rfc_test_accuracy)

Accuracy of Random Forest Classifier Testing Dataset:  0.9944814719072078


## Applying Naive bayes 

In [106]:
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB()

In [109]:
s = time()
nb.fit(x_train_sc,y_train)
nb_time = time()-s
print("Time elapsed",nb_time)

Time elapsed 0.0859382152557373


  y = column_or_1d(y, warn=True)


### Training accuracy

In [110]:
nb.score(x_train_sc,y_train)

0.9772889052068425

### Testing accuracy

In [111]:
nb.score(x_test_sc,y_test)

0.9785215739606398

### Accuracy of the model

In [121]:
y_nb_train_pred = nb.predict(x_train_sc)
nb_train_accuracy = accuracy_score(y_train,y_nb_train_pred)
print("Accuracy of Naive Bayes Training Dataset: ",nb_train_accuracy)

Accuracy of Naive Bayes Training Dataset:  0.9772889052068425


In [122]:
y_nb_test_pred = nb.predict(x_test_sc)
nb_test_accuracy = accuracy_score(y_test,y_nb_test_pred)
print("Accuracy of Naive Bayes Testing Dataset: ",nb_test_accuracy)

Accuracy of Naive Bayes Testing Dataset:  0.9785215739606398


## Applying SVM

In [36]:
from sklearn.svm import LinearSVC
svc = LinearSVC()

In [37]:
s = time()
svc.fit(x_train_sc,y_train)
svc_time = time()-s
print("Time elapsed",svc_time)

Time elapsed 78.33316397666931


### Training accuracy

In [38]:
svc.score(x_train_sc,y_train)

0.9831783544009771

### Testing accuracy

In [39]:
svc.score(x_test_sc,y_test)

0.9846245490204893

### Accuracy of the model

In [40]:
y_svc_train_pred = svc.predict(x_train_sc)
svc_train_accuracy = accuracy_score(y_train,y_svc_train_pred)
print("Accuracy of Support Vector Machine Training Dataset: ",svc_train_accuracy)

Accuracy of Support Vector Machine Training Dataset:  0.9831783544009771


In [41]:
y_svc_test_pred = svc.predict(x_test_sc)
svc_test_accuracy = accuracy_score(y_test,y_svc_test_pred)
print("Accuracy of Support Vector Machine Testing Dataset: ",svc_test_accuracy)

Accuracy of Support Vector Machine Testing Dataset:  0.9846245490204893


## Applying Artificial Neural Network using Tensorflow

In [42]:
import tensorflow as tf
from tensorflow.keras import Sequential,layers

In [43]:
y_train.shape,y_test.shape

((266918,), (88973,))

In [44]:
y_train = y_train.reshape(-1,1)

In [45]:
y_test = y_test.reshape(-1,1)

In [46]:
y_train.shape,y_test.shape

((266918, 1), (88973, 1))

In [47]:
x_train.shape,x_train_sc.shape,x_test.shape,x_test_sc.shape

((266918, 11), (266918, 11), (88973, 11), (88973, 11))

In [48]:
ann = Sequential([
    layers.Dense(10,input_shape=(11,),activation='relu'),
    layers.Dense(20,activation='relu'),
    layers.Dense(20,activation='relu'),
    layers.Dense(8,activation='sigmoid'),
    
])

ann.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics=['accuracy'])
s = time()
ann.fit(x_train_sc,y_train,epochs= 20)
ann_time = time()-s
print("Time elapsed",ann_time)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Time elapsed 279.6557123661041


## Training Accuracy

In [49]:
loss_train, ann_train_accuracy = ann.evaluate(x_train_sc,y_train)
ann_train_accuracy



0.9942379593849182

In [50]:
loss_test, ann_test_accuracy = ann.evaluate(x_test_sc,y_test)
ann_test_accuracy



0.9943915605545044

## Evaluation Metrics

In [51]:
target_names = ['DoSattack','dataProbing','malitiousControl', 'malitiousOperation','scan','spying', 'wrongSetUp','Normal']

In [52]:
target_names

['DoSattack',
 'dataProbing',
 'malitiousControl',
 'malitiousOperation',
 'scan',
 'spying',
 'wrongSetUp',
 'Normal']

## Classification Report of Logistic Regression

### For Training Dataset

In [53]:
print(classification_report(y_train, y_lr_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.93      0.50      0.65      2995
       dataProbing       1.00      0.82      0.90       186
  malitiousControl       0.94      0.48      0.64       696
malitiousOperation       0.73      0.57      0.64       522
              scan       0.99      0.40      0.57      1167
            spying       0.00      0.00      0.00       403
        wrongSetUp       0.00      0.00      0.00        93
            Normal       0.99      1.00      0.99    260856

          accuracy                           0.99    266918
         macro avg       0.70      0.47      0.55    266918
      weighted avg       0.98      0.99      0.98    266918



### For Testing Dataset

In [54]:
print(classification_report(y_test, y_lr_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.94      0.51      0.66      1005
       dataProbing       1.00      0.86      0.92        56
  malitiousControl       0.97      0.49      0.65       193
malitiousOperation       0.74      0.65      0.69       135
              scan       0.99      0.42      0.59       364
            spying       0.00      0.00      0.00       129
        wrongSetUp       0.00      0.00      0.00        29
            Normal       0.99      1.00      0.99     87062

          accuracy                           0.99     88973
         macro avg       0.70      0.49      0.56     88973
      weighted avg       0.99      0.99      0.99     88973



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Confusion Matrix for Logistic Regression

### For Training dataset

In [55]:
confusion_matrix(y_train, y_lr_train_pred)

array([[  1486,      0,      0,      0,      0,      0,      0,   1509],
       [     0,    152,      0,      0,      0,      0,      0,     34],
       [     0,      0,    335,      0,      0,      0,      0,    361],
       [     0,      0,      0,    299,      0,      0,      0,    223],
       [     0,      0,     17,      0,    470,      0,      9,    671],
       [     0,      0,      0,      0,      0,      0,      0,    403],
       [     0,      0,      0,      0,      0,      0,      0,     93],
       [   110,      0,      3,    109,      5,      3,      0, 260626]],
      dtype=int64)

### For Testing dataset

In [56]:
confusion_matrix(y_test, y_lr_test_pred)

array([[  514,     0,     0,     0,     0,     0,     0,   491],
       [    0,    48,     0,     0,     0,     0,     0,     8],
       [    0,     0,    94,     0,     0,     0,     0,    99],
       [    0,     0,     0,    88,     0,     0,     0,    47],
       [    0,     0,     3,     0,   153,     0,     2,   206],
       [    0,     0,     0,     0,     0,     0,     0,   129],
       [    0,     0,     0,     0,     0,     0,     0,    29],
       [   32,     0,     0,    31,     2,     0,     0, 86997]],
      dtype=int64)

## Classification Report for K Nearest Neighbour

### For Training Dataset

In [57]:
print(classification_report(y_train, y_knn_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.64      1.00      0.78      2995
       dataProbing       1.00      1.00      1.00       186
  malitiousControl       1.00      1.00      1.00       696
malitiousOperation       1.00      1.00      1.00       522
              scan       1.00      1.00      1.00      1167
            spying       1.00      1.00      1.00       403
        wrongSetUp       1.00      1.00      1.00        93
            Normal       1.00      0.99      1.00    260856

          accuracy                           0.99    266918
         macro avg       0.95      1.00      0.97    266918
      weighted avg       1.00      0.99      0.99    266918



### For Testing Dataset

In [58]:
print(classification_report(y_test, y_knn_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.63      1.00      0.77      1005
       dataProbing       1.00      1.00      1.00        56
  malitiousControl       1.00      1.00      1.00       193
malitiousOperation       1.00      1.00      1.00       135
              scan       1.00      1.00      1.00       364
            spying       1.00      1.00      1.00       129
        wrongSetUp       1.00      1.00      1.00        29
            Normal       1.00      0.99      1.00     87062

          accuracy                           0.99     88973
         macro avg       0.95      1.00      0.97     88973
      weighted avg       1.00      0.99      0.99     88973



## Confusion Matrix for K Nearest Neighbour

### For Training dataset

In [59]:
confusion_matrix(y_train, y_knn_train_pred)

array([[  2995,      0,      0,      0,      0,      0,      0,      0],
       [     0,    186,      0,      0,      0,      0,      0,      0],
       [     0,      0,    696,      0,      0,      0,      0,      0],
       [     0,      0,      0,    522,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1167,      0,      0,      0],
       [     0,      0,      0,      0,      0,    403,      0,      0],
       [     0,      0,      0,      0,      0,      0,     93,      0],
       [  1693,      0,      0,      0,      2,      1,      0, 259160]],
      dtype=int64)

### For Testing dataset

In [60]:
confusion_matrix(y_test, y_knn_test_pred)

array([[ 1005,     0,     0,     0,     0,     0,     0,     0],
       [    0,    56,     0,     0,     0,     0,     0,     0],
       [    0,     0,   193,     0,     0,     0,     0,     0],
       [    0,     0,     0,   135,     0,     0,     0,     0],
       [    0,     0,     0,     0,   364,     0,     0,     0],
       [    0,     0,     0,     0,     0,   129,     0,     0],
       [    0,     0,     0,     0,     0,     0,    29,     0],
       [  591,     0,     0,     0,     1,     0,     0, 86470]],
      dtype=int64)

## Classification Report for Decision Tree Classifier

### For Training Dataset

In [61]:
print(classification_report(y_train, y_dtc_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.50      0.66      2995
       dataProbing       1.00      1.00      1.00       186
  malitiousControl       1.00      1.00      1.00       696
malitiousOperation       1.00      1.00      1.00       522
              scan       1.00      1.00      1.00      1167
            spying       1.00      1.00      1.00       403
        wrongSetUp       1.00      1.00      1.00        93
            Normal       0.99      1.00      1.00    260856

          accuracy                           0.99    266918
         macro avg       1.00      0.94      0.96    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing Dataset

In [62]:
print(classification_report(y_test, y_dtc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.51      0.68      1005
       dataProbing       1.00      1.00      1.00        56
  malitiousControl       0.99      1.00      1.00       193
malitiousOperation       1.00      1.00      1.00       135
              scan       1.00      1.00      1.00       364
            spying       0.98      1.00      0.99       129
        wrongSetUp       1.00      1.00      1.00        29
            Normal       0.99      1.00      1.00     87062

          accuracy                           0.99     88973
         macro avg       1.00      0.94      0.96     88973
      weighted avg       0.99      0.99      0.99     88973



## Confusion Matrix for Decision Tree Classifier

### For Training dataset

In [63]:
confusion_matrix(y_train, y_dtc_train_pred)

array([[  1486,      0,      0,      0,      0,      0,      0,   1509],
       [     0,    186,      0,      0,      0,      0,      0,      0],
       [     0,      0,    696,      0,      0,      0,      0,      0],
       [     0,      0,      0,    522,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1167,      0,      0,      0],
       [     0,      0,      0,      0,      0,    403,      0,      0],
       [     0,      0,      0,      0,      0,      0,     93,      0],
       [     0,      0,      0,      0,      0,      0,      0, 260856]],
      dtype=int64)

### For Testing dataset

In [64]:
confusion_matrix(y_test, y_dtc_test_pred)

array([[  514,     0,     0,     0,     0,     0,     0,   491],
       [    0,    56,     0,     0,     0,     0,     0,     0],
       [    0,     0,   193,     0,     0,     0,     0,     0],
       [    0,     0,     0,   135,     0,     0,     0,     0],
       [    0,     0,     0,     0,   364,     0,     0,     0],
       [    0,     0,     0,     0,     0,   129,     0,     0],
       [    0,     0,     0,     0,     0,     0,    29,     0],
       [    0,     0,     1,     0,     0,     2,     0, 87059]],
      dtype=int64)

## Classification Report for Random Forest Classifier

### For Training Dataset

In [65]:
print(classification_report(y_train, y_rfc_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.50      0.66      2995
       dataProbing       1.00      1.00      1.00       186
  malitiousControl       1.00      1.00      1.00       696
malitiousOperation       1.00      1.00      1.00       522
              scan       1.00      1.00      1.00      1167
            spying       1.00      1.00      1.00       403
        wrongSetUp       1.00      1.00      1.00        93
            Normal       0.99      1.00      1.00    260856

          accuracy                           0.99    266918
         macro avg       1.00      0.94      0.96    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing Dataset

In [66]:
print(classification_report(y_test, y_rfc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.51      0.68      1005
       dataProbing       1.00      1.00      1.00        56
  malitiousControl       1.00      1.00      1.00       193
malitiousOperation       1.00      1.00      1.00       135
              scan       1.00      1.00      1.00       364
            spying       1.00      1.00      1.00       129
        wrongSetUp       1.00      1.00      1.00        29
            Normal       0.99      1.00      1.00     87062

          accuracy                           0.99     88973
         macro avg       1.00      0.94      0.96     88973
      weighted avg       0.99      0.99      0.99     88973



## Confusion Matrix for Random Forest Classifier

### For Training dataset

In [67]:
confusion_matrix(y_train, y_rfc_train_pred)

array([[  1486,      0,      0,      0,      0,      0,      0,   1509],
       [     0,    186,      0,      0,      0,      0,      0,      0],
       [     0,      0,    696,      0,      0,      0,      0,      0],
       [     0,      0,      0,    522,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1167,      0,      0,      0],
       [     0,      0,      0,      0,      0,    403,      0,      0],
       [     0,      0,      0,      0,      0,      0,     93,      0],
       [     0,      0,      0,      0,      0,      0,      0, 260856]],
      dtype=int64)

### For Testing dataset

In [68]:
confusion_matrix(y_test, y_rfc_test_pred)

array([[  514,     0,     0,     0,     0,     0,     0,   491],
       [    0,    56,     0,     0,     0,     0,     0,     0],
       [    0,     0,   193,     0,     0,     0,     0,     0],
       [    0,     0,     0,   135,     0,     0,     0,     0],
       [    0,     0,     0,     0,   364,     0,     0,     0],
       [    0,     0,     0,     0,     0,   129,     0,     0],
       [    0,     0,     0,     0,     0,     0,    29,     0],
       [    0,     0,     0,     0,     0,     0,     0, 87062]],
      dtype=int64)

## Classification Report for Naive Bayes

### For Training Dataset

In [112]:
print(classification_report(y_train, y_nb_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.34      1.00      0.50      2995
       dataProbing       0.04      0.82      0.07       186
  malitiousControl       0.18      0.92      0.31       696
malitiousOperation       0.67      1.00      0.80       522
              scan       1.00      0.14      0.24      1167
            spying       0.01      0.94      0.01       403
        wrongSetUp       1.00      1.00      1.00        93
            Normal       1.00      0.75      0.86    260856

          accuracy                           0.75    266918
         macro avg       0.53      0.82      0.48    266918
      weighted avg       0.99      0.75      0.85    266918



### For Testing Dataset

In [113]:
print(classification_report(y_test, y_nb_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.34      1.00      0.51      1005
       dataProbing       0.04      0.86      0.07        56
  malitiousControl       0.16      0.94      0.27       193
malitiousOperation       0.64      1.00      0.78       135
              scan       1.00      0.13      0.24       364
            spying       0.01      0.93      0.01       129
        wrongSetUp       1.00      1.00      1.00        29
            Normal       1.00      0.75      0.86     87062

          accuracy                           0.75     88973
         macro avg       0.52      0.83      0.47     88973
      weighted avg       0.99      0.75      0.85     88973



## Confusion Matrix for Naive Bayes

### For Training dataset

In [114]:
confusion_matrix(y_train, y_nb_train_pred)

array([[  2995,      0,      0,      0,      0,      0,      0,      0],
       [     0,    152,      0,      0,      0,      0,      0,     34],
       [     0,      0,    638,      0,      0,      7,      0,     51],
       [     0,      0,      0,    522,      0,      0,      0,      0],
       [    10,      0,      0,      0,    161,    635,      0,    361],
       [     0,      0,     26,      0,      0,    377,      0,      0],
       [     0,      0,      0,      0,      0,      0,     93,      0],
       [  5892,   3844,   2798,    255,      0,  51809,      0, 196258]],
      dtype=int64)

### For Testing dataset

In [115]:
confusion_matrix(y_test, y_nb_test_pred)

array([[ 1005,     0,     0,     0,     0,     0,     0,     0],
       [    0,    48,     0,     0,     0,     0,     0,     8],
       [    0,     0,   182,     0,     0,     2,     0,     9],
       [    0,     0,     0,   135,     0,     0,     0,     0],
       [    1,     0,     0,     0,    49,   197,     0,   117],
       [    0,     0,     9,     0,     0,   120,     0,     0],
       [    0,     0,     0,     0,     0,     0,    29,     0],
       [ 1960,  1273,   963,    77,     0, 17275,     0, 65514]],
      dtype=int64)

## Classification Report for Support Vector Machine Classifier

### For Training Dataset

In [73]:
print(classification_report(y_train, y_svc_train_pred, target_names=target_names))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                    precision    recall  f1-score   support

         DoSattack       0.93      0.50      0.65      2995
       dataProbing       0.00      0.00      0.00       186
  malitiousControl       0.00      0.00      0.00       696
malitiousOperation       0.73      0.57      0.64       522
              scan       0.00      0.00      0.00      1167
            spying       0.00      0.00      0.00       403
        wrongSetUp       0.00      0.00      0.00        93
            Normal       0.98      1.00      0.99    260856

          accuracy                           0.98    266918
         macro avg       0.33      0.26      0.29    266918
      weighted avg       0.97      0.98      0.98    266918



  _warn_prf(average, modifier, msg_start, len(result))


### For Testing Dataset

In [74]:
print(classification_report(y_test, y_svc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.94      0.51      0.66      1005
       dataProbing       0.00      0.00      0.00        56
  malitiousControl       0.00      0.00      0.00       193
malitiousOperation       0.74      0.65      0.69       135
              scan       0.00      0.00      0.00       364
            spying       0.00      0.00      0.00       129
        wrongSetUp       0.00      0.00      0.00        29
            Normal       0.99      1.00      0.99     87062

          accuracy                           0.98     88973
         macro avg       0.33      0.27      0.29     88973
      weighted avg       0.98      0.98      0.98     88973



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Confusion Matrix for Support Vector Machine Classifier

### For Training dataset

In [75]:
confusion_matrix(y_train, y_svc_train_pred)

array([[  1486,      0,      0,      0,      0,      0,      0,   1509],
       [     0,      0,      0,      0,      0,      0,      0,    186],
       [     0,      0,      0,      0,      0,      0,      0,    696],
       [     0,      0,      0,    299,      0,      0,      0,    223],
       [     6,      0,      8,      0,      0,      0,      0,   1153],
       [     0,      0,      0,      0,      0,      0,      0,    403],
       [     0,      0,      0,      0,      0,      0,      0,     93],
       [    99,      0,      0,    112,      0,      2,      0, 260643]],
      dtype=int64)

### For Testing dataset

In [76]:
confusion_matrix(y_test, y_svc_test_pred)

array([[  514,     0,     0,     0,     0,     0,     0,   491],
       [    0,     0,     0,     0,     0,     0,     0,    56],
       [    0,     0,     0,     0,     0,     0,     0,   193],
       [    0,     0,     0,    88,     0,     0,     0,    47],
       [    5,     0,     2,     0,     0,     0,     0,   357],
       [    0,     0,     0,     0,     0,     0,     0,   129],
       [    0,     0,     0,     0,     0,     0,     0,    29],
       [   28,     0,     0,    31,     0,     0,     0, 87003]],
      dtype=int64)

## Confusion Matrix for Artificial Neural Network

### For Training dataset

In [77]:
y_ann_train_pred = ann.predict(x_train_sc)
y_ann_train_pred

array([[4.9520966e-16, 1.6500573e-35, 2.1900650e-16, ..., 4.0857522e-06,
        6.3212279e-16, 9.9853718e-01],
       [2.5253403e-16, 0.0000000e+00, 1.5039352e-22, ..., 2.6140771e-05,
        3.5364246e-20, 9.9976861e-01],
       [1.7589559e-37, 0.0000000e+00, 0.0000000e+00, ..., 8.2154415e-20,
        0.0000000e+00, 1.0000000e+00],
       ...,
       [9.2619075e-06, 3.0336758e-16, 5.9368234e-14, ..., 3.4253597e-03,
        1.5315234e-13, 9.5111692e-01],
       [4.9712131e-16, 1.6844383e-35, 2.2134138e-16, ..., 4.0361688e-06,
        6.2696675e-16, 9.9853331e-01],
       [3.4666853e-14, 4.5429543e-28, 4.9990421e-13, ..., 8.0381797e-06,
        5.3462531e-14, 9.8849821e-01]], dtype=float32)

In [78]:
y_train.shape

(266918, 1)

In [79]:
y_ann_train_pred.shape

(266918, 8)

In [80]:
y_ann_train_pred_labels = [np.argmax(i) for i in y_ann_train_pred]
y_ann_train_pred_labels[:5]

[7, 7, 7, 7, 7]

In [81]:
print(tf.math.confusion_matrix(labels=y_train, predictions=y_ann_train_pred_labels))

tf.Tensor(
[[  1486      0      0      0      0      0      0   1509]
 [     0    186      0      0      0      0      0      0]
 [     0      0    689      0      7      0      0      0]
 [     0      0      0    522      0      0      0      0]
 [     0      0      0      0   1167      0      0      0]
 [     0      0      0      0      0    403      0      0]
 [     0      0      0      0      0      0     93      0]
 [     5      0      2      0     11      4      0 260834]], shape=(8, 8), dtype=int32)


### For Testing dataset

In [82]:
y_ann_test_pred = ann.predict(x_test_sc)
y_ann_test_pred

array([[3.6851128e-17, 9.9178024e-36, 2.0833830e-16, ..., 5.5361972e-05,
        9.2830254e-15, 9.9903047e-01],
       [1.2463939e-27, 0.0000000e+00, 1.5808135e-14, ..., 8.4415762e-19,
        1.6410281e-23, 9.9999899e-01],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 3.0847943e-28,
        0.0000000e+00, 8.9114815e-02],
       ...,
       [8.0591803e-21, 0.0000000e+00, 2.5427861e-18, ..., 1.8806703e-11,
        6.9359146e-25, 9.9999177e-01],
       [0.0000000e+00, 2.1485711e-26, 5.1018814e-35, ..., 5.4584727e-25,
        0.0000000e+00, 9.1820562e-01],
       [2.4936598e-06, 1.1911538e-16, 2.1314779e-10, ..., 3.2101488e-07,
        4.2536617e-12, 8.7413716e-01]], dtype=float32)

In [83]:
y_test.shape

(88973, 1)

In [84]:
y_ann_test_pred.shape

(88973, 8)

In [85]:
y_ann_test_pred_labels = [np.argmax(i) for i in y_ann_test_pred]
y_ann_test_pred_labels[:5]

[7, 7, 7, 7, 7]

In [86]:
print(tf.math.confusion_matrix(labels=y_test, predictions=y_ann_test_pred_labels))

tf.Tensor(
[[  514     0     0     0     0     0     0   491]
 [    0    56     0     0     0     0     0     0]
 [    0     0   191     0     2     0     0     0]
 [    0     0     0   135     0     0     0     0]
 [    0     0     0     0   364     0     0     0]
 [    0     0     0     0     0   129     0     0]
 [    0     0     0     0     0     0    29     0]
 [    1     1     1     0     3     0     0 87056]], shape=(8, 8), dtype=int32)


## Classification Report for Artificial Neural Network

### For Training dataset

In [87]:
print(classification_report(y_train, y_ann_train_pred_labels, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.50      0.66      2995
       dataProbing       1.00      1.00      1.00       186
  malitiousControl       1.00      0.99      0.99       696
malitiousOperation       1.00      1.00      1.00       522
              scan       0.98      1.00      0.99      1167
            spying       0.99      1.00      1.00       403
        wrongSetUp       1.00      1.00      1.00        93
            Normal       0.99      1.00      1.00    260856

          accuracy                           0.99    266918
         macro avg       1.00      0.94      0.96    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing dataset

In [88]:
print(classification_report(y_test, y_ann_test_pred_labels, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.51      0.68      1005
       dataProbing       0.98      1.00      0.99        56
  malitiousControl       0.99      0.99      0.99       193
malitiousOperation       1.00      1.00      1.00       135
              scan       0.99      1.00      0.99       364
            spying       1.00      1.00      1.00       129
        wrongSetUp       1.00      1.00      1.00        29
            Normal       0.99      1.00      1.00     87062

          accuracy                           0.99     88973
         macro avg       0.99      0.94      0.96     88973
      weighted avg       0.99      0.99      0.99     88973



In [89]:
y_ann_train_labels = np.array(y_ann_train_pred_labels)
y_ann_test_labels = np.array(y_ann_test_pred_labels)
y_ann_train_labels, y_ann_test_labels

(array([7, 7, 7, ..., 7, 7, 7], dtype=int64),
 array([7, 7, 7, ..., 7, 7, 7], dtype=int64))

### Creating dataset

In [123]:
dataset = [[lr_time,lr_train_accuracy,lr_test_accuracy],
           [knn_time,knn_train_accuracy,knn_test_accuracy],
           [dtc_time,dtc_train_accuracy,dtc_test_accuracy],
           [rfc_time,rfc_train_accuracy,rfc_test_accuracy],
           [nb_time,nb_train_accuracy,nb_test_accuracy],
           [svc_time,svc_train_accuracy,svc_test_accuracy],
           [ann_time,ann_train_accuracy,ann_test_accuracy]
          ]

In [124]:
dataset

[[39.63400387763977, 0.9867000352168082, 0.9878727254335585],
 [2.7285642623901367, 0.993645988655692, 0.9933462960673463],
 [1.8945696353912354, 0.9943465783499053, 0.9944477538129545],
 [86.07441997528076, 0.9943465783499053, 0.9944814719072078],
 [0.0859382152557373, 0.9772889052068425, 0.9785215739606398],
 [78.33316397666931, 0.9831783544009771, 0.9846245490204893],
 [279.6557123661041, 0.9942379593849182, 0.9943915605545044]]

In [125]:
comparison = pd.DataFrame(dataset, columns = ['Time_Taken','Training_Accuracy','Testing_Accuracy'], 
                         index=['Logistic Regression','K Nearest Neighbour','Decision Tree','Random Forest','Naive Bayes','Support Vector Machine','Artificial Neural Network',])
comparison

Unnamed: 0,Time_Taken,Training_Accuracy,Testing_Accuracy
Logistic Regression,39.634004,0.9867,0.987873
K Nearest Neighbour,2.728564,0.993646,0.993346
Decision Tree,1.89457,0.994347,0.994448
Random Forest,86.07442,0.994347,0.994481
Naive Bayes,0.085938,0.977289,0.978522
Support Vector Machine,78.333164,0.983178,0.984625
Artificial Neural Network,279.655712,0.994238,0.994392


In [126]:
comparison.to_csv('performance.csv')