# DDoS Attack Detection With ML & DL and Differential Privacy Concept

### 1.Data Preprocessing 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, '../source/')
from DataProcessor import DataProcessor

In [2]:
def find_common_features(my_A, my_B, my_C):
   my_s1 = set(my_A)
   my_s2 = set(my_B)
   my_s3 = set(my_C)
   my_set1 = my_s1.intersection(my_s2)
   output_set = my_set1.intersection(my_s3)
   output_list = list(output_set)
   return output_list

In [3]:
def process_basics_for_data(dataframe : pd.DataFrame) -> pd.DataFrame :
    dataProcessor: DataProcessor = DataProcessor(dataframe)
    scaledData = dataProcessor.min_max_scaler(dataframe)
    encodedScaledData = dataProcessor.label_encoder(scaledData)
    return encodedScaledData

In [None]:
mondayDataset: pd.DataFrame = pd.read_csv(
    "../verisetleri/Monday-WorkingHours.pcap_ISCX.csv", low_memory=False)
    
fridayDataSet: pd.DataFrame = pd.read_csv(
    "../verisetleri/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv", low_memory=False)
    
synDataset : pd.DataFrame = pd.read_csv("../verisetleri/01-12/Syn.csv", low_memory = False)

In [None]:
commonColumns = find_common_features(mondayDataset.columns, fridayDataSet.columns, synDataset.columns)
mondayDataset = mondayDataset[commonColumns]
fridayDataSet = fridayDataSet[commonColumns]
synDataset = synDataset[commonColumns]

In [None]:
processedMonday = process_basics_for_data(mondayDataset)
processedFriday = process_basics_for_data(fridayDataSet)
processedSyn = process_basics_for_data(synDataset)

In [None]:
commonColumns = find_common_features(processedMonday.columns, processedFriday.columns, processedSyn.columns)
processedMonday = processedMonday[commonColumns]
processedFriday = processedFriday[commonColumns]
processedSyn = processedSyn[commonColumns]

### 2. Feature Selection For Machine Learning

In [None]:
dataProcessor: DataProcessor = DataProcessor(processedFriday)
bestFeatureFrame = dataProcessor.chi_square_feature_selector(processedFriday)
bestFeatures = bestFeatureFrame.Specs

In [None]:
corrDataframe  = pd.concat([processedFriday[bestFeatures], processedFriday[" Label"]], axis = 1)
corrDataframe = corrDataframe.reset_index()

In [None]:
corrDataframe = corrDataframe.drop(["index"], axis=1)
corrDataframe

In [None]:
dataProcessor.print_correlation_matrix(corrDataframe)

## Machine Learning Methods 

In [None]:
import MLMethodBuilder as mlbuilder;
mlBuilder = mlbuilder.MLMethodBuilder(corrDataframe)

In [None]:
mlBuilder.NaiveBayes()

In [None]:
mlBuilder.DecisionTreeClassifier()

## Using Differential Privacy 

In [None]:
mlBuilder.DifferentialNaiveBayes()

In [None]:
mlBuilder.DifferentialDecisionTreeClassifier()

In [None]:
processMondayTest = processedMonday[bestFeatures][:10000]
processedSynTest = processedSyn[bestFeatures][:10000]

MondayTestLabel = np.array(processedMonday[" Label"][:10000])
SynTestLabel = np.array(processedSyn[" Label"][:10000])

In [None]:
predictionsMondayNaive = mlBuilder.predict(processMondayTest.values, "naive_bayes")
predictionsMondayDecisionTree = mlBuilder.predict(processMondayTest.values, "decision_tree")

from sklearn.metrics import accuracy_score
print('Accuracy for Monday GNB: %f' % (accuracy_score(predictionsMondayNaive, MondayTestLabel)*100))
print('Accuracy for Monday DecisionTree: %f' % (accuracy_score(predictionsMondayDecisionTree, MondayTestLabel)*100))

In [None]:
predictionsSynNaive = mlBuilder.predict(processMondayTest.values, "naive_bayes")
predictionsSynDecisionTree = mlBuilder.predict(processedSynTest.values, "decision_tree")

from sklearn.metrics import accuracy_score
print('Accuracy for Syn GNB: %f' % (accuracy_score(predictionsSynNaive, SynTestLabel)*100))
print('Accuracy for Syn DecisionTree: %f' % (accuracy_score(predictionsSynDecisionTree, SynTestLabel)*100))

## Convolutional Neural Networks (1D) 

In [None]:
from DLMethodBuilder import DLMethodBuilder

In [None]:
dlBuilderWithFullData = DLMethodBuilder(processedFriday)
dlBuilderWithFullData.ConvolutionalNeuralNetwork()

In [None]:
dlBuilderWithFullData.start_to_compile(epochNum = 10, batchSize = 256)

In [None]:
predictionsMonday = dlBuilderWithFullData.predict(processedMonday.drop([" Label"], axis = 1)[:10000].values)
predictionsSyn = dlBuilderWithFullData.predict(processedSyn.drop([" Label"], axis = 1)[:10000].values)

In [None]:
def labelPredictions(predictionList: np.array) -> list: 
    labeledPredictions = []
    thrRatio = 1.65
    threshold = np.mean(predictionList) + thrRatio * np.std(predictionList)
    for elem in predictionList : 
        if elem > threshold:
            labeledPredictions.append(1)
        else:
            labeledPredictions.append(0)
    return labeledPredictions

In [None]:
labeledMondayPredictions = labelPredictions(predictionsMonday)
labeledSynPredictions = labelPredictions(predictionsSyn)

In [None]:
from sklearn.metrics import accuracy_score
print('Accuracy: %f' % (accuracy_score(labeledMondayPredictions, MondayTestLabel)*100))
print('Accuracy: %f' % (accuracy_score(labeledSynPredictions, MondayTestLabel)*100))