# Initialization

In [1]:
import pandas as pd
import numpy as np
import os
import datetime
from enum import Enum
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
import joblib

from sklearn.linear_model import LogisticRegression, SGDClassifier, RidgeClassifier
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier, NearestCentroid
from sklearn.naive_bayes import GaussianNB, CategoricalNB, BernoulliNB
from sklearn.neural_network import BernoulliRBM, MLPClassifier # Unsupervised
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.utils import to_categorical
# import tensorflow_decision_forests as tfdf


In [2]:
#Categorize and enumerate all attacks in dataset
ATTACKS = ['DDoS', 'DoS', 'Mirai', 'Recon', 'Spoofing', 'Benign', 'Web', 'BruteForce']
ATTACKS_ENUM = Enum('ATTACKS', ATTACKS, start=0)
dict_7classes = {}
dict_7classes['DDoS-RSTFINFlood'] = 'DDoS'
dict_7classes['DDoS-PSHACK_Flood'] = 'DDoS'
dict_7classes['DDoS-SYN_Flood'] = 'DDoS'
dict_7classes['DDoS-UDP_Flood'] = 'DDoS'
dict_7classes['DDoS-TCP_Flood'] = 'DDoS'
dict_7classes['DDoS-ICMP_Flood'] = 'DDoS'
dict_7classes['DDoS-SynonymousIP_Flood'] = 'DDoS'
dict_7classes['DDoS-ACK_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-UDP_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-ICMP_Fragmentation'] = 'DDoS'
dict_7classes['DDoS-SlowLoris'] = 'DDoS'
dict_7classes['DDoS-HTTP_Flood'] = 'DDoS'

dict_7classes['DoS-UDP_Flood'] = 'DoS'
dict_7classes['DoS-SYN_Flood'] = 'DoS'
dict_7classes['DoS-TCP_Flood'] = 'DoS'
dict_7classes['DoS-HTTP_Flood'] = 'DoS'


dict_7classes['Mirai-greeth_flood'] = 'Mirai'
dict_7classes['Mirai-greip_flood'] = 'Mirai'
dict_7classes['Mirai-udpplain'] = 'Mirai'

dict_7classes['Recon-PingSweep'] = 'Recon'
dict_7classes['Recon-OSScan'] = 'Recon'
dict_7classes['Recon-PortScan'] = 'Recon'
dict_7classes['VulnerabilityScan'] = 'Recon'
dict_7classes['Recon-HostDiscovery'] = 'Recon'

dict_7classes['DNS_Spoofing'] = 'Spoofing'
dict_7classes['MITM-ArpSpoofing'] = 'Spoofing'

dict_7classes['BenignTraffic'] = 'Benign'

dict_7classes['BrowserHijacking'] = 'Web'
dict_7classes['Backdoor_Malware'] = 'Web'
dict_7classes['XSS'] = 'Web'
dict_7classes['Uploading_Attack'] = 'Web'
dict_7classes['SqlInjection'] = 'Web'
dict_7classes['CommandInjection'] = 'Web'


dict_7classes['DictionaryBruteForce'] = 'BruteForce'

In [3]:
# ===== Generation of CSV data =====
# Adapted from same source as Dataset
#import pcap2csv
run_this = False
if(run_this):
    from pcap2csv import Generating_dataset #Generating_dataset#, Supporting_functions, Communication_features, Connectivity_features, Dynamic_features, Feature_extraction, Layered_features
    import os
    PCAP_DIRECTORY = 'pcap/'
    pcap_files = [k for k in os.listdir(PCAP_DIRECTORY) if k.endswith('.pcap')] 
    Generating_dataset.make_csv(pcap_files)


In [4]:
# =====Split Train / Test data======
# Dataset link-> https://www.unb.ca/cic/datasets/iotdataset-2023.html
#E. C. P. Neto, S. Dadkhah, R. Ferreira, A. Zohourian, R. Lu, A. A. Ghorbani. "CICIoT2023: A real-time dataset and benchmark for large-scale attacks in IoT environment," Sensor (2023) – (submitted to Journal of Sensors).

DATASET_DIRECTORY = 'dataset/'
df_sets = [k for k in os.listdir(DATASET_DIRECTORY) if k.endswith('.csv')] # all files
#df_sets = [k for k in os.listdir(DATASET_DIRECTORY) if k.endswith('1-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv')] # smaller subset for faster testing, 17 files =  10% of whole dataset
#df_sets = [k for k in os.listdir(DATASET_DIRECTORY) if k.endswith('11-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv')] # 2 files = 1%
df_sets.sort()
training_sets = df_sets[:int(len(df_sets)*.8)]
test_sets = df_sets[int(len(df_sets)*.8):]

In [5]:
#=====Extract Data=====
X_columns = [
    'flow_duration', 'Header_Length', 'Protocol Type', 'Duration',
       'Rate', 'Srate', 'Drate', 'fin_flag_number', 'syn_flag_number',
       'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
       'ece_flag_number', 'cwr_flag_number', 'ack_count',
       'syn_count', 'fin_count', 'urg_count', 'rst_count', 
    'HTTP', 'HTTPS', 'DNS', 'Telnet', 'SMTP', 'SSH', 'IRC', 'TCP',
       'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC', 'Tot sum', 'Min',
       'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number', 'Magnitue',
       'Radius', 'Covariance', 'Variance', 'Weight', 
] #columns 0-45
Y_columns = 'label' #column 46

all_columns = X_columns+[Y_columns]

NUMERIC_FEATURE_NAMES = [
      'flow_duration', 'Header_Length', 'Protocol Type', 'Duration',
      'Rate', 'Srate', 'Drate', 'ack_count', 'syn_count', 'fin_count', 'urg_count', 'rst_count', 
      'Tot sum', 'Min', 'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number', 'Magnitue',
      'Radius', 'Covariance', 'Variance', 'Weight', 
]
CATEGORICAL_FEATURE_NAMES = [
      'fin_flag_number', 'syn_flag_number',
      'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
      'ece_flag_number', 'cwr_flag_number','HTTP', 'HTTPS', 'DNS', 'Telnet', 'SMTP', 'SSH', 'IRC', 'TCP',
      'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC',
]


In [6]:
#=====Feature Scaling======
# columnsToScale = ['flow_duration', 'Header_Length', 'Duration', 'Rate', 'Srate', 'Drate', 'ack_count', 'syn_count', 'fin_count', 'urg_count', 'rst_count', 'fin_count']
# scale all
scaler = MinMaxScaler(feature_range=(0,1))
for train_set in tqdm(training_sets):
    df = pd.read_csv(DATASET_DIRECTORY + train_set, index_col=None, header=0, delimiter=',')[X_columns]
    x_train = scaler.fit(df)
    del df

100%|██████████| 1/1 [00:00<00:00,  1.41it/s]


# Define Model layers

In [7]:
class Model:
    def __init__(self, model, name, type):
        self.model = model
        self.name = name
        self.type = type
        #self.batch_size = batch_size

TYPES = {}
TYPES['SK_LR'] = 1
TYPES['SK_RF'] = 2
TYPES['TF'] = 3

verbose, epochs, batch_size = 0, 10, 512
activationFunction='relu'

# def getOtimizedSequentialModel():
#     model = Sequential()
#     model.add(Dense(46, activation=activationFunction))
#     model.add(Dense(30, activation=activationFunction))
#     model.add(Dense(8, activation='softmax'))
#     model.compile(loss=keras.losses.BinaryCrossentropy(),
#                     optimizer=keras.optimizers.Adam(learning_rate=1e-3), 
#                     metrics=[ keras.metrics.BinaryAccuracy(), keras.metrics.FalseNegatives()]
#                     )
#     return model

def getANN():
    model = Sequential()
    model.add(Dense(46, activation=activationFunction))
    model.add(Dense(30, activation=activationFunction))
    model.add(Dense(20, activation=activationFunction))
    model.add(Dense(12, activation=activationFunction))
    model.add(Dense(8, activation='softmax'))
    model.compile(loss=keras.losses.BinaryCrossentropy(),
                    optimizer=keras.optimizers.Adam(learning_rate=1e-3), 
                    metrics=[ keras.metrics.BinaryAccuracy(), keras.metrics.FalseNegatives()]
                    )
    return Model(model, "ANN", TYPES['TF'])


def getCNN1():
    model = Sequential()
    model.add(Conv1D(32, 46, activation=activationFunction))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, 46, activation=activationFunction))
    model.add(MaxPooling1D(2))
    model.add(Conv1D(64, 46, activation=activationFunction))
    model.add(Flatten())
    model.add(Dense(64, activation=activationFunction))
    model.add(Dense(8, activation='softmax'))
    model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
    return Model(model, 'CNN', TYPES['TF'])

def getRFModel():
    rf = RandomForestClassifier(
        n_estimators=100,
        criterion = 'gini',
        max_depth=None,
        )
    return Model(rf, "RF", TYPES['SK_RF'])

def getLRModel():
    lr = LogisticRegression()
    return Model(lr, "LR", TYPES['SK_LR'])

def getSVCModel():
    model = SVC()
    return Model(model, "Lin-SVC", TYPES['SK_LR'])

def getLinSVCModel():
    model = LinearSVC(tol = 1e-5)
    return Model(model, "Lin-SVC", TYPES['SK_LR'])

def getSGDCModel():
    model = SGDClassifier(loss='hinge')
    return Model(model, "SGDC Hinge", TYPES['SK_LR'])

def getSGDC_LogLossModel():
    model = SGDClassifier(loss='log_loss')
    return Model(model, "SGDC LogLoss", TYPES['SK_LR'])

def getSGDC_HuberModel():
    model = SGDClassifier(loss='modified_huber')
    return Model(model, "SGDC Modified Huber", TYPES['SK_LR'])

def getKNNCModel():
    model = KNeighborsClassifier()
    return Model(model, "KNNC", TYPES['SK_LR'])

def getRadNNCModel():
    model = RadiusNeighborsClassifier()
    return Model(model, "RadNNC", TYPES['SK_LR'])

def getNCentModel():
    model = NearestCentroid()
    return Model(model, "NCent", TYPES['SK_LR'])

def getRidgeModel():
    model = RidgeClassifier(solver='saga')
    return Model(model, "Ridge", TYPES['SK_RF'])

# === Test bottom models later ===

def getBRBMModel():
    model = BernoulliRBM()
    return Model(model, "Bernoulli Restricted Bolzman Machine", TYPES['SK_RF'])

def getBNBModel():
    model = BernoulliNB()
    return Model(model, "Bernoulli Naive Bayes", TYPES['SK_RF'])

def getCNBModel():
    model = CategoricalNB()
    return Model(model, "Categocical Naive Bayes", TYPES['SK_RF'])

def getGNBModel():
    model = GaussianNB()
    return Model(model, "Gausian Naive Bayes", TYPES['SK_RF'])

def getMLPCModel():
    model = MLPClassifier() #MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)  
    return Model(model, "Bernoulli", TYPES['SK_RF'])

def getNUSVCModel():
    model = NuSVC()
    return Model(model, "NuSVC", TYPES['SK_LR'])

ML_Models = [
            # getSVCModel(), # bad - infinite training
            getLinSVCModel(), # good
            getSGDCModel(), #good
            getSGDC_LogLossModel(),
            getSGDC_HuberModel(),
            getKNNCModel(), #good results - long prediction time
            # getRadNNCModel(), #bad - infinite testing?
            getNCentModel(), # good
            getRidgeModel(), # good
            # getBRBMModel(),
            # getBNBModel(),
            # getCNBModel(),
            # getGNBModel(),
            #getGNBModel(), # good
            # getNUSVCModel(), # bad - error
            # getMLPCModel(), # unsupervised
            # getNUSVCModel() # unsupervised


]


# Train Models

In [8]:
print(f"Last ran on {len(ML_Models)} models, with {len(training_sets)} training sets on date: {datetime.datetime.now()}")
for train_set in tqdm(training_sets):
    df = pd.read_csv(DATASET_DIRECTORY + train_set, index_col=None, header=0, delimiter=',')[all_columns]
    x_train = scaler.transform(df[X_columns])
    y_train = [ATTACKS_ENUM[dict_7classes[k]].value for k in df[Y_columns]]
    y_train_Cat = to_categorical(y_train, num_classes=8)

    for i in range(len(ML_Models)):
            model = ML_Models[i]
            if model.type==TYPES["SK_LR"]:
                model.model.fit(x_train, y_train)  
            
            elif model.type==TYPES["SK_RF"]:
                model.model.fit(x_train, y_train_Cat)  

            elif model.type == TYPES["TF"]:
                model.model.fit(x=x_train, 
                            y=y_train_Cat, 
                            epochs=epochs, 
                            verbose=verbose,
                            batch_size=batch_size)                   
    del df
    del x_train
    del y_train
    del y_train_Cat



Last ran on 8 models, with 1 training sets on date: 2024-06-15 01:06:24.801329


100%|██████████| 1/1 [02:03<00:00, 123.87s/it]


# Test the resulting trained models

In [9]:
def showResults(test, pred,model_name):
    output = ''
    output += str(datetime.datetime.now())
    output += f"\n===== {model_name} =====\n"
    output+=classification_report(test, pred, target_names=ATTACKS)
    accuracy = accuracy_score(test, pred)
    precision=precision_score(test, pred, average='weighted')
    f1Score=f1_score(test, pred, average='weighted') 
    output+=f"\nAccuracy  : {accuracy}\n"
    output+=f"Precision : {precision}\n"
    output+=f"f1Score : {f1Score}\n"
    cm=confusion_matrix(test, pred)
    output+=str(cm) 
    
    joblib.dump(output, f"outputs/{model_name}.txt") 

    print(output)

In [10]:
def testModel(model):
    y_test = []
    y_predict = []
    for test_set in tqdm(test_sets):
        df = pd.read_csv(DATASET_DIRECTORY + test_set, index_col=None, header=0, delimiter=',')[all_columns]
        x_test = scaler.transform(df[X_columns])
        for k in df[Y_columns]:
            y_test.append(ATTACKS_ENUM[dict_7classes[k]].value)
        if model.type == TYPES['TF']:
            y_predict+= list(model.model.predict(x_test, verbose=0))
        elif model.type == TYPES['SK_LR'] or model.type == TYPES["SK_RF"]:
            y_predict+= list(model.model.predict(x_test))

        del df
        del x_test

    y_test=np.array(y_test)
    if model.type == TYPES['TF'] or model.type ==TYPES["SK_RF"]:
        y_test = to_categorical(y_test, num_classes=8)
        y_test = np.argmax(y_test, axis=1)
        y_predict = np.argmax(y_predict, axis=1)
    showResults(y_test, y_predict, model.name)


    del y_test
    del y_predict

In [None]:
for i in range(len(ML_Models)):
    model = ML_Models[i]
    if model.type==TYPES["TF"]:
        model.model.save(f"SavedModels\\{model.name}.keras",overwrite=True)
    elif model.type==TYPES["SK_RF"] or model.type==TYPES["SK_LR"]:
        joblib.dump(model.model, f"SavedModels/{model.name}.pkl") 

In [11]:
print(f"Last ran on {len(ML_Models)} models, with {len(test_sets)} testing sets on date: {datetime.datetime.now()}")
for i in range(len(ML_Models)):
    testModel(ML_Models[i])

Last ran on 8 models, with 1 testing sets on date: 2024-06-15 01:08:28.724636


100%|██████████| 1/1 [00:01<00:00,  1.05s/it]


2024-06-15 01:08:29.785715
===== Lin-SVC =====
              precision    recall  f1-score   support

        DDoS       0.82      0.99      0.90    196031
         DoS       0.74      0.10      0.18     46701
       Mirai       0.98      0.99      0.99     15235
       Recon       0.64      0.25      0.36      2034
    Spoofing       0.79      0.26      0.39      2796
      Benign       0.67      0.86      0.76      6240
         Web       0.00      0.00      0.00       143
  BruteForce       1.00      0.12      0.22        73

    accuracy                           0.82    269253
   macro avg       0.71      0.45      0.47    269253
weighted avg       0.81      0.82      0.77    269253

Accuracy  : 0.8217958574277724
Precision : 0.8095918215356802
f1Score : 0.7653423871357131
[[194756    910      3     37     21    304      0      0]
 [ 41690   4803     16     25     20    145      2      0]
 [    22     41  15094      2     15     61      0      0]
 [   429    235     10    508     

100%|██████████| 1/1 [00:01<00:00,  1.05s/it]


2024-06-15 01:08:31.380678
===== SGDC Hinge =====
              precision    recall  f1-score   support

        DDoS       0.80      1.00      0.89    196031
         DoS       0.64      0.00      0.00     46701
       Mirai       0.98      0.99      0.98     15235
       Recon       0.79      0.12      0.21      2034
    Spoofing       0.61      0.12      0.19      2796
      Benign       0.60      0.82      0.69      6240
         Web       0.06      0.01      0.02       143
  BruteForce       1.00      0.07      0.13        73

    accuracy                           0.80    269253
   macro avg       0.68      0.39      0.39    269253
weighted avg       0.77      0.80      0.72    269253

Accuracy  : 0.8027134330908104
Precision : 0.7744334760171304
f1Score : 0.7215576739157472
[[195304     10    139     13      7    553      5      0]
 [ 46298     51      9     16     12    304     11      0]
 [    93      4  15058      0     11     65      4      0]
 [   830      8      1    242  

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


2024-06-15 01:08:32.996963
===== SGDC LogLoss =====
              precision    recall  f1-score   support

        DDoS       0.80      0.99      0.89    196031
         DoS       0.16      0.01      0.01     46701
       Mirai       0.99      0.99      0.99     15235
       Recon       0.74      0.12      0.20      2034
    Spoofing       0.63      0.18      0.28      2796
      Benign       0.61      0.81      0.70      6240
         Web       0.00      0.00      0.00       143
  BruteForce       0.00      0.00      0.00        73

    accuracy                           0.80    269253
   macro avg       0.49      0.39      0.38    269253
weighted avg       0.69      0.80      0.73    269253

Accuracy  : 0.8025017362852038
Precision : 0.6948977448321642
f1Score : 0.7251007980678462
[[194960    307      3      9    216    536      0      0]
 [ 46119    277      6      8     15    276      0      0]
 [    51     78  15040      0      4     62      0      0]
 [   599    334      2    236

100%|██████████| 1/1 [00:01<00:00,  1.04s/it]


2024-06-15 01:08:34.578131
===== SGDC Modified Huber =====
              precision    recall  f1-score   support

        DDoS       0.81      0.99      0.90    196031
         DoS       0.67      0.08      0.14     46701
       Mirai       0.98      0.99      0.99     15235
       Recon       0.61      0.25      0.35      2034
    Spoofing       0.88      0.17      0.29      2796
      Benign       0.62      0.84      0.71      6240
         Web       0.00      0.00      0.00       143
  BruteForce       1.00      0.12      0.22        73

    accuracy                           0.81    269253
   macro avg       0.70      0.43      0.45    269253
weighted avg       0.79      0.81      0.75    269253

Accuracy  : 0.8149955617950404
Precision : 0.7933893435948004
f1Score : 0.7535251287765302
[[194566    792     54     46     15    558      0      0]
 [ 42768   3579      6     40     12    296      0      0]
 [    28     64  15061      1      4     77      0      0]
 [   459    241      0

100%|██████████| 1/1 [00:58<00:00, 58.05s/it]


2024-06-15 01:09:33.177604
===== KNNC =====
              precision    recall  f1-score   support

        DDoS       0.97      0.98      0.97    196031
         DoS       0.91      0.86      0.88     46701
       Mirai       1.00      0.99      1.00     15235
       Recon       0.60      0.57      0.58      2034
    Spoofing       0.67      0.56      0.61      2796
      Benign       0.75      0.85      0.80      6240
         Web       0.27      0.04      0.07       143
  BruteForce       0.82      0.12      0.21        73

    accuracy                           0.95    269253
   macro avg       0.75      0.62      0.64    269253
weighted avg       0.95      0.95      0.95    269253

Accuracy  : 0.9470015190174297
Precision : 0.9460477403153176
f1Score : 0.9461392532924253
[[191841   4114     19     51      6      0      0      0]
 [  6659  39997     12     31      2      0      0      0]
 [    79     21  15122      9      4      0      0      0]
 [    48     18      7   1161    184 

  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
# model = ML_Models[i].model
# for test_set in test_sets:
#     df = pd.read_csv(DATASET_DIRECTORY + test_set, index_col=None, header=0, delimiter=',')[all_columns]
#     x_test = scaler.transform(df[X_columns])
#     y_test = [ATTACKS_ENUM[dict_7classes[k]].value for k in df[Y_columns]]
#     print("predicting")
#     y_predict = model.predict(x_test)
#     print("done")
#     y_test=np.array(y_test)

#     showResults(y_test, y_predict, "TEst")

predicting
done
2024-06-15 00:21:12.827986
===== TEst =====
              precision    recall  f1-score   support

        DDoS       0.97      0.98      0.97    196031
         DoS       0.91      0.86      0.88     46701
       Mirai       1.00      0.99      1.00     15235
       Recon       0.60      0.57      0.58      2034
    Spoofing       0.67      0.56      0.61      2796
      Benign       0.75      0.85      0.80      6240
         Web       0.27      0.04      0.07       143
  BruteForce       0.82      0.12      0.21        73

    accuracy                           0.95    269253
   macro avg       0.75      0.62      0.64    269253
weighted avg       0.95      0.95      0.95    269253

Accuracy  : 0.9470015190174297
Precision : 0.9460477403153176
f1Score : 0.9461392532924253
[[191841   4114     19     51      6      0      0      0]
 [  6659  39997     12     31      2      0      0      0]
 [    79     21  15122      9      4      0      0      0]
 [    48     18      

In [None]:
#====For debug ===
# test_sets = [k for k in os.listdir(DATASET_DIRECTORY) if k.endswith('.csv')] 
#testModel(load_model("SavedModels\\BestANN.keras"), "Best ANN so far")
#testModel(Model(joblib.load("SavedModels/BestRF.pkl"),"debug_DF", TYPES['SK_RF']))

# Save Models

In [None]:
# for i in range(len(ML_Models)):
#     model = ML_Models[i]
#     if model.type==TYPES["TF"]:
#         model.model.save(f"SavedModels\\{model.name}.keras",overwrite=True)
#     elif model.type==TYPES["SK_RF"] or model.type==TYPES["SK_LR"]:
#         joblib.dump(model.model, f"SavedModels/{model.name}.pkl") 

# Train a seperate model to detect each attack

In [None]:
# verbose, epochs, batch_size = 1, 100, 512
# activationFunction='relu'

# def getSequentialModel():
#     model = Sequential()
#     model.add(Dense(128, activation=activationFunction))
#     model.add(Dense(64, activation=activationFunction))
#     model.add(Dense(32, activation=activationFunction))
#     model.add(Dense(16, activation=activationFunction))
#     model.add(Dense(8, activation=activationFunction))
#     model.add(Dense(4, activation=activationFunction))
#     model.add(Dense(2, activation='softmax'))
#     model.compile(loss=keras.losses.BinaryCrossentropy(),
#                     optimizer=keras.optimizers.Adam(learning_rate=1e-3), 
#                     metrics=[ keras.metrics.BinaryAccuracy(), keras.metrics.FalseNegatives()]
#                     )
#     return model

# ML_Models = [
#             getSequentialModel(),
#             getSequentialModel(),
#             getSequentialModel(),
#             getSequentialModel(),
#             getSequentialModel(),
#             getSequentialModel(),
#             getSequentialModel(),
#             getSequentialModel()

# ]
# ML_Model_Names = ATTACKS

In [None]:
# print(f"Last ran on {len(ML_Models)} models, with {len(training_sets)} training sets on date: {datetime.datetime.now()}")
# for train_set in tqdm(training_sets):
#     df = pd.read_csv(DATASET_DIRECTORY + train_set, index_col=None, header=0, delimiter=',')[all_columns]
#     x_train = scaler.transform(df[X_columns])

#     for i in range(len(ML_Models)-1):
#             y_train = to_categorical([ATTACKS_ENUM[dict_7classes[k]].value == ATTACKS_ENUM[ATTACKS[i]].value for k in df[Y_columns]], num_classes=2)
#             model = ML_Models[i]
#             model.fit(x=x_train, 
#                         y=y_train, 
#                         epochs=epochs, 
#                         verbose=verbose,
#                         batch_size=batch_size)   
#             del y_train             
#     del df
#     del x_train

In [None]:
# def showResults8Models(test, pred, model_num):
#     print(f"===== {model_num} =====")
#     print(classification_report(test, pred, target_names=["Negative", "Positive"]))
#     accuracy = accuracy_score(test, pred)
#     precision=precision_score(test, pred, average='weighted')
#     f1Score=f1_score(test, pred, average='weighted') 
#     print("Accuracy  : {}".format(accuracy))
#     print("Precision : {}".format(precision))
#     print("f1Score : {}".format(f1Score))
#     cm=confusion_matrix(test, pred)
#     print(cm) 

# print(f"Last ran on {len(ML_Models)} models, with {len(test_sets)} testing sets on date: {datetime.datetime.now()}")
# for i in range(len(ML_Models)):
#     model = ML_Models[i]
#     y_test = []
#     y_predict = []
#     for test_set in tqdm(test_sets):
#         df = pd.read_csv(DATASET_DIRECTORY + test_set, index_col=None, header=0, delimiter=',')[all_columns]
#         x_test = scaler.transform(df[X_columns])
#         for k in df[Y_columns]:
#             y_test.append(ATTACKS_ENUM[dict_7classes[k]].value==ATTACKS[i])
#         y_predict+= list(model.predict(x_test))

#         del df
#         del x_test

#     myarr = np.array([ATTACKS_ENUM[dict_7classes[k]].value == ATTACKS_ENUM[ATTACKS[0]].value for k in ['DDoS-RSTFINFlood','DDoS-PSHACK_Flood','DDoS-SYN_Flood','DoS-SYN_Flood','DoS-TCP_Flood','Mirai-udpplain','Recon-OSScan','DNS_Spoofing','BrowserHijacking','Backdoor_Malware','DictionaryBruteForce']])
#     print(myarr)
#     print(to_categorical(myarr, num_classes=2))
#     y_test=np.array(y_test)
#     print(y_test[0:10])
#     y_test = to_categorical(y_test, num_classes=2)
#     print(y_test[0:10])
#     print("=========")
#     for i in range(10):
#         print(f"{i}: {y_predict[i]} actual {y_test[i]}")    

#     test = np.argmax(y_test, axis=1)
#     predict = np.argmax(y_predict, axis=1)
#     showResults8Models(test, predict, i)

#     del test
#     del predict
#     del y_test
#     del y_predict