# Model deployment
In this laboratory, the objective is to convert a DDoS dection script written for Jupyter notebook into a stand-alone Python program that can be deployed to the target machine. To use this notebeook, first train and save an MLP model with the script of laboratory [03-Hyperparameters](https://github.com/doriguzzi/dl-for-network-security-phd/tree/main/03-Hyperparameters), and then test your model using this script on live traffic or on a network traffic trace locally available on your computer.
If everything works as expected, convert this notebook into a Python script that supports the following command-line arguments:
- Path of the MLP model
- Path of the ingress network interface (alternatively, the path to a network traffic trace) 
- Path to the output CSV file where the program writes the classification results.

In [None]:
# Author: Roberto Doriguzzi-Corin
# Project: Lecture on Intrusion Detection with Deep Learning
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import random as rn
import h5py
import glob
import time
import sys
import csv
import os
import logging
import pprint
import nest_asyncio

from sklearn.metrics import f1_score, accuracy_score,confusion_matrix
from traffic_processing import *
import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)

# Seed Random Numbers
os.environ['PYTHONHASHSEED']=str(SEED)
np.random.seed(SEED)
rn.seed(SEED)
tf.random.set_seed(SEED)
config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads=1)

nest_asyncio.apply()

In [None]:
def report_results(Y_true, Y_pred,packets, model_name, data_source, prediction_time,writer):
    ddos_rate = '{:04.3f}'.format(sum(Y_pred)/Y_pred.shape[0])

    if Y_true is not None: # if we have the labels, we can compute the classification accuracy
        Y_true = Y_true.reshape((Y_true.shape[0], 1))
        accuracy = accuracy_score(Y_true, Y_pred)

        f1 = f1_score(Y_true, Y_pred)
        tn, fp, fn, tp = confusion_matrix(Y_true, Y_pred,labels=[0,1]).ravel()
        tnr = tn / (tn + fp)
        fpr = fp / (fp + tn)
        fnr = fn / (fn + tp)
        tpr = tp / (tp + fn)

        row = {'Model': model_name, 'Time': '{:04.3f}'.format(prediction_time), 'Packets': packets,
               'Samples': Y_pred.shape[0], 'DDOS%':ddos_rate,'Accuracy':accuracy, 'F1Score':f1,
               'TPR':tpr, 'FPR':fpr, 'TNR':tnr, 'FNR':fnr, 'Source':data_source}
    else:
        row = {'Model': model_name, 'Time': '{:04.3f}'.format(prediction_time), 'Packets': packets,
               'Samples': Y_pred.shape[0], 'DDOS%': ddos_rate, 'Accuracy': "N/A", 'F1Score': "N/A",
               'TPR': "N/A", 'FPR': "N/A", 'TNR': "N/A", 'FNR': "N/A", 'Source': data_source}
    pprint.pprint(row,sort_dicts=False)
    writer.writerow(row)

## External files
In the cell below, you need to set the right path to your MLP model. The path to the pcap file is already set. In this script, you can also find the code to collect network traffic from one of the network interfaces of the target computer. In the example below, the name of the interface is ```eth0```, however it might not be the right name on the target computer. 

Note that, the stand-alone Python program must take the paths (or the interface name) as command-line arguments, therefore the code in the following cell must be modified accordingly (e.g., using [argparse](https://docs.python.org/3/library/argparse.html)). 

In [None]:
model = load_model("/path/to/the/mlp-model")
print (model.summary())
pcap_file = "../Datasets/IDS2017/IDS2017-dataset.pcap"
cap = pyshark.FileCapture(pcap_file)
data_source = pcap_file.split('/')[-1].strip()
#cap =  pyshark.LiveCapture(interface="eth0")
#data_source = "eth0"

print ("Traffic source: ",data_source)

## Log file
In the stand-alone Python script, the path to the output log file must be specified as a command-line argument.

In [None]:
classify_fieldnames = ['Model','Time','Packets', 'Samples', 'DDOS%','Accuracy', 'F1Score','TPR','FPR','TNR','FNR','Source']
predict_file = open('./results.csv', 'a', newline='')
predict_file.truncate(0)  # clean the file content (as we open the file in append mode)
predict_writer = csv.DictWriter(predict_file, fieldnames=classify_fieldnames)
predict_writer.writeheader()
predict_file.flush()

In [None]:
 # load the labels, if available
labels = parse_labels("DOS2017")
time_window = 10
max_flow_len = 10
mins, maxs = static_min_max(time_window)

while (True):
    samples = process_live_traffic(cap,labels, max_flow_len, traffic_type="all", time_window=time_window)
    if len(samples) > 0:
        X,Y_true,keys = dataset_to_list_of_fragments(samples)
        X = np.array(normalize_and_padding(X, mins, maxs, max_flow_len))
        if labels is not None:
            Y_true = np.array(Y_true)
        else:
            Y_true = None

        X = np.expand_dims(X, axis=3)
        pt0 = time.time()
        Y_pred = np.squeeze(model.predict(X, batch_size=2048) > 0.5,axis=1)
        pt1 = time.time()
        prediction_time = pt1 - pt0
            
        try:
            [packets] = count_packets_in_dataset([X])
            report_results(np.squeeze(Y_true), Y_pred, packets, model.name, data_source, prediction_time,predict_writer)
            predict_file.flush()
        except:
            print("No packets received during the last time window.")

predict_file.close()