In [1]:
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import plot_confusion_matrix, roc_curve, roc_auc_score
from sklearn.metrics import balanced_accuracy_score, matthews_corrcoef
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from collections import Counter
from imblearn.under_sampling import NearMiss,CondensedNearestNeighbour, TomekLinks, EditedNearestNeighbours, OneSidedSelection, NeighbourhoodCleaningRule
from imblearn.over_sampling import SMOTE
from sklearn import svm
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from matplotlib import pyplot
from numpy import where
import pandas as pd 
import os 
import json
import numpy as np
import pylab as plt
import tensorflow as tf
import time
from vecstack import stacking
import seaborn as sns
from math import floor

pd.set_option("display.max_columns", None)

In [2]:
# Read the datasets for the first scenario (07:00-09:00)
merged1 = pd.read_csv(os.path.join('./0709', 'merged1.csv')) #DoS
merged2 = pd.read_csv(os.path.join('./0709', 'merged2.csv')) #DoS Random
merged3 = pd.read_csv(os.path.join('./0709', 'merged3.csv')) #DoS Disruptive
merged4 = pd.read_csv(os.path.join('./0709', 'merged4.csv')) #DoS Random Sybil
merged5 = pd.read_csv(os.path.join('./0709', 'merged5.csv')) #DoS Disruptive Sybil

### Define sampling and classification functions

### Results per attack

In [21]:
def sampling_per_attack(sample,veremi):
    veremi =veremi[['pos_x_send', 'pos_y_send', 
       'spd_x_send', 'spd_y_send', 'acl_x_send', 'acl_y_send', 'hed_x_send', 'hed_y_send', 'label', 
       'nb_packets_sent', 'frequency1', 'frequency2', 'time_diff', 'distance', 'difSpeed', 'estAoA']]
    veremi.dropna(inplace=True)

    a = veremi.label.unique()[1]
    veremi['label'] = veremi['label'].replace(a,1)
    veremi = veremi.astype(np.uint8)
    
    if sample==0:
        # Data split (original dataset)
        y = veremi['label']
        X = veremi.drop(columns='label')
        
       
    elif sample==-1:
        # under sampling 
        u = veremi.label.value_counts().min()
        veremi_under_sampling = pd.concat([veremi[veremi['label']==0].sample(u), 
                                          veremi[veremi['label']==1].sample(u)], axis= 0)
        y = veremi_under_sampling['label']
        X = veremi_under_sampling.drop(columns='label')
        
        
    elif sample==1:
        # over sampling 
        o = veremi.label.value_counts().max()
        veremi_over_sampling = pd.concat([veremi[veremi['label']==0].sample(o, replace=True), 
                                          veremi[veremi['label']==1].sample(o, replace=True)], axis= 0)
        y = veremi_over_sampling['label']
        X = veremi_over_sampling.drop(columns='label')
        
        
    return(X,y)




In [22]:
def classification_per_attack(sample,method,veremi):
    X,y = sampling_per_attack(sample,veremi)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=0, stratify=y)
             
    if method=='RF':
        # Random Forest
        rf = RandomForestClassifier(n_estimators=50)

        start = time.time()
        rf.fit(X_train, y_train)
        timefit = time.time() - start

        start = time.time()
        y_pred = rf.predict(X_test)
        timepred = time.time() - start
    
    elif method=='Xgboost':
        # XGBoost
        xgb = XGBClassifier()

        start = time.time()
        xgb.fit(X_train,y_train)
        timefit = time.time() - start

        start = time.time()
        y_pred = xgb.predict(X_test)
        timepred = time.time() - start

        
    elif method=='Stacking':
        estimators = [
            ('rf', RandomForestClassifier(n_estimators=50, criterion= 'gini')), #50, 300 or 400
            ('xgbc', XGBClassifier(eta=1, max_depth= 5, sampling_method= 'uniform')) #n_estimators=3000 or 5000
        ]
        
        
        from sklearn.ensemble import StackingClassifier
        from sklearn.linear_model import LogisticRegression
        
        clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

        start = time.time()
        clf.fit(X_train, y_train).score(X_test, y_test)
        timefit = time.time() - start

        start = time.time()
        y_pred = clf.predict(X_test)
        timepred = time.time() - start 
              
   
       
    report = classification_report(y_test,y_pred,output_dict=True)

    return(report['accuracy'], report['weighted avg']['precision'], report['weighted avg']['recall'], 
          report['weighted avg']['f1-score'], balanced_accuracy_score(y_test, y_pred),
          matthews_corrcoef(y_test, y_pred, sample_weight=None), timefit, timepred)


In [5]:
results_original_per_attack = pd.DataFrame(index=['Xgboost_1','RF_1','Stacking_1',
                                                 'Xgboost_2','RF_2','Stacking_2',
                                                 'Xgboost_3','RF_3','Stacking_3',
                                                 'Xgboost_4','RF_4','Stacking_4',
                                                 'Xgboost_5','RF_5','Stacking_5'],
                       columns=['Acc','Pre','Rec','F1s','BAS','MCC','TimeFit','TimePred'])

In [None]:
results_original_per_attack.xs('Xgboost_1')[:] = classification_per_attack(0,'Xgboost',merged1)
print(results_original_per_attack)
results_original_per_attack.xs('RF_1')[:] = classification_per_attack(0,'RF',merged1)
print(results_original_per_attack)
results_original_per_attack.xs('Stacking_1')[:] = classification_per_attack(0,'Stacking',merged1)
print(results_original_per_attack)
results_original_per_attack.to_excel('results_original_per_attack_1.xlsx')

In [None]:
results_original_per_attack.xs('Xgboost_2')[:] = classification_per_attack(0,'Xgboost',merged2)
print(results_original_per_attack)
results_original_per_attack.xs('RF_2')[:] = classification_per_attack(0,'RF',merged2)
print(results_original_per_attack)
results_original_per_attack.xs('Stacking_2')[:] = classification_per_attack(0,'Stacking',merged2)
print(results_original_per_attack)
results_original_per_attack.to_excel('results_original_per_attack_1.xlsx')

In [None]:
results_original_per_attack.xs('Xgboost_3')[:] = classification_per_attack(0,'Xgboost',merged3)
print(results_original_per_attack)
results_original_per_attack.xs('RF_3')[:] = classification_per_attack(0,'RF',merged3)
print(results_original_per_attack)
results_original_per_attack.xs('Stacking_3')[:] = classification_per_attack(0,'Stacking',merged3)
print(results_original_per_attack)
results_original_per_attack.to_excel('results_original_per_attack_1.xlsx')

In [None]:
results_original_per_attack.xs('Xgboost_4')[:] = classification_per_attack(0,'Xgboost',merged4)
print(results_original_per_attack)
results_original_per_attack.xs('RF_4')[:] = classification_per_attack(0,'RF',merged4)
print(results_original_per_attack)
results_original_per_attack.xs('Stacking_4')[:] = classification_per_attack(0,'Stacking',merged4)
print(results_original_per_attack)
results_original_per_attack.to_excel('results_original_per_attack_1.xlsx')

In [None]:
results_original_per_attack.xs('Xgboost_5')[:] = classification_per_attack(0,'Xgboost',merged5)
print(results_original_per_attack)
results_original_per_attack.xs('RF_5')[:] = classification_per_attack(0,'RF',merged5)
print(results_original_per_attack)
results_original_per_attack.xs('Stacking_5')[:] = classification_per_attack(0,'Stacking',merged5)
print(results_original_per_attack)
results_original_per_attack.to_excel('results_original_per_attack_1.xlsx')

In [5]:
results_under_per_attack = pd.DataFrame(index=['Xgboost_1','RF_1','Stacking_1',
                                                 'Xgboost_2','RF_2','Stacking_2',
                                                 'Xgboost_3','RF_3','Stacking_3',
                                                 'Xgboost_4','RF_4','Stacking_4',
                                                 'Xgboost_5','RF_5','Stacking_5'],
                       columns=['Acc','Pre','Rec','F1s','BAS','MCC','TimeFit','TimePred'])

In [None]:
results_under_per_attack.xs('Xgboost_1')[:] = classification_per_attack(-1,'Xgboost',merged1)
print(results_under_per_attack)
results_under_per_attack.xs('RF_1')[:] = classification_per_attack(-1,'RF',merged1)
print(results_under_per_attack)
results_under_per_attack.xs('Stacking_1')[:] = classification_per_attack(-1,'Stacking',merged1)
print(results_under_per_attack)
results_under_per_attack.to_excel('results_under_per_attack_1.xlsx')

In [None]:
results_under_per_attack.xs('Xgboost_2')[:] = classification_per_attack(-1,'Xgboost',merged2)
print(results_under_per_attack)
results_under_per_attack.xs('RF_2')[:] = classification_per_attack(-1,'RF',merged2)
print(results_under_per_attack)
results_under_per_attack.xs('Stacking_2')[:] = classification_per_attack(-1,'Stacking',merged2)
print(results_under_per_attack)
results_under_per_attack.to_excel('results_under_per_attack_1.xlsx')

In [None]:
results_under_per_attack.xs('Xgboost_3')[:] = classification_per_attack(-1,'Xgboost',merged3)
print(results_under_per_attack)
results_under_per_attack.xs('RF_3')[:] = classification_per_attack(-1,'RF',merged3)
print(results_under_per_attack)
results_under_per_attack.xs('Stacking_3')[:] = classification_per_attack(-1,'Stacking',merged3)
print(results_under_per_attack)
results_under_per_attack.to_excel('results_under_per_attack_1.xlsx')

In [None]:
results_under_per_attack.xs('Xgboost_4')[:] = classification_per_attack(-1,'Xgboost',merged4)
print(results_under_per_attack)
results_under_per_attack.xs('RF_4')[:] = classification_per_attack(-1,'RF',merged4)
print(results_under_per_attack)
results_under_per_attack.xs('Stacking_4')[:] = classification_per_attack(-1,'Stacking',merged4)
print(results_under_per_attack)
results_under_per_attack.to_excel('results_under_per_attack_1.xlsx')

In [None]:
results_under_per_attack.xs('Xgboost_5')[:] = classification_per_attack(-1,'Xgboost',merged5)
print(results_under_per_attack)
results_under_per_attack.xs('RF_5')[:] = classification_per_attack(-1,'RF',merged5)
print(results_under_per_attack)
results_under_per_attack.xs('Stacking_5')[:] = classification_per_attack(-1,'Stacking',merged5)
print(results_under_per_attack)
results_under_per_attack.to_excel('results_under_per_attack_1.xlsx')

In [7]:
results_over_per_attack = pd.DataFrame(index=['Xgboost_1','RF_1','Stacking_1',
                                                 'Xgboost_2','RF_2','Stacking_2',
                                                 'Xgboost_3','RF_3','Stacking_3',
                                                 'Xgboost_4','RF_4','Stacking_4',
                                                 'Xgboost_5','RF_5','Stacking_5'],
                       columns=['Acc','Pre','Rec','F1s','BAS','MCC','TimeFit','TimePred'])

In [None]:
results_over_per_attack.xs('Xgboost_1')[:] = classification_per_attack(1,'Xgboost',merged1)
print(results_over_per_attack)
results_over_per_attack.xs('RF_1')[:] = classification_per_attack(1,'RF',merged1)
print(results_over_per_attack)
results_over_per_attack.xs('Stacking_1')[:] = classification_per_attack(1,'Stacking',merged1)
print(results_over_per_attack)
results_over_per_attack.to_excel('results_over_per_attack_1.xlsx')

In [None]:
results_over_per_attack.xs('Xgboost_2')[:] = classification_per_attack(1,'Xgboost',merged2)
print(results_over_per_attack)
results_over_per_attack.xs('RF_2')[:] = classification_per_attack(1,'RF',merged2)
print(results_over_per_attack)
results_over_per_attack.xs('Stacking_2')[:] = classification_per_attack(1,'Stacking',merged2)
print(results_over_per_attack)
results_over_per_attack.to_excel('results_over_per_attack_1.xlsx')

In [None]:
results_over_per_attack.xs('Xgboost_3')[:] = classification_per_attack(1,'Xgboost',merged3)
print(results_over_per_attack)
results_over_per_attack.xs('RF_3')[:] = classification_per_attack(1,'RF',merged3)
print(results_over_per_attack)
results_over_per_attack.xs('Stacking_3')[:] = classification_per_attack(1,'Stacking',merged3)
print(results_over_per_attack)
results_over_per_attack.to_excel('results_over_per_attack_1.xlsx')

In [None]:
results_over_per_attack.xs('Xgboost_4')[:] = classification_per_attack(1,'Xgboost',merged4)
print(results_over_per_attack)
results_over_per_attack.xs('RF_4')[:] = classification_per_attack(1,'RF',merged4)
print(results_over_per_attack)
results_over_per_attack.xs('Stacking_4')[:] = classification_per_attack(1,'Stacking',merged4)
print(results_over_per_attack)
results_over_per_attack.to_excel('results_over_per_attack_1.xlsx')

In [None]:
results_over_per_attack.xs('Xgboost_5')[:] = classification_per_attack(1,'Xgboost',merged5)
print(results_over_per_attack)
results_over_per_attack.xs('RF_5')[:] = classification_per_attack(1,'RF',merged5)
print(results_over_per_attack)
results_over_per_attack.xs('Stacking_5')[:] = classification_per_attack(1,'Stacking',merged5)
print(results_over_per_attack)
results_over_per_attack.to_excel('results_over_per_attack_1.xlsx')

In [None]:
# Read the results if needed
results_original_per_attack = pd.read_excel('results_original_per_attack_1.xlsx')
results_original_per_attack.rename(columns={'Unnamed: 0': 'method'}, inplace=True)
results_original_per_attack.set_index('method', inplace=True)

results_under_per_attack = pd.read_excel('results_under_per_attack_1.xlsx')
results_under_per_attack.rename(columns={'Unnamed: 0': 'method'}, inplace=True)
results_under_per_attack.set_index('method', inplace=True)

results_over_per_attack = pd.read_excel('results_over_per_attack_1.xlsx')
results_over_per_attack.rename(columns={'Unnamed: 0': 'method'}, inplace=True)
results_over_per_attack.set_index('method', inplace=True)

#### Results for the main dataset (not adding new features)

In [26]:
def sampling_per_attack_original(sample,veremi):
    veremi =veremi[['pos_x_send', 'pos_y_send',
       'pos_noise_x', 'pos_noise_y', 'spd_x_send', 'spd_y_send', 'spd_noise_x',
       'spd_noise_y', 'acl_x_send', 'acl_y_send', 'acl_noise_x', 'acl_noise_y',
       'hed_x_send', 'hed_y_send', 'hed_noise_x', 'hed_noise_y', 'label']]
    veremi.dropna(inplace=True)

    a = veremi.label.unique()[1]
    veremi['label'] = veremi['label'].replace(a,1)
    veremi = veremi.astype(np.uint8)
    
    if sample==0:
        # Data split (original dataset)
        y = veremi['label']
        X = veremi.drop(columns='label')
      
    
       
    elif sample==-1:
        # under sampling 
        u = veremi.label.value_counts().min()
        veremi_under_sampling = pd.concat([veremi[veremi['label']==0].sample(u), 
                                          veremi[veremi['label']==1].sample(u)], axis= 0)
        y = veremi_under_sampling['label']
        X = veremi_under_sampling.drop(columns='label')
       
        
    elif sample==1:
        # over sampling 
        o = veremi.label.value_counts().max()
        veremi_over_sampling = pd.concat([veremi[veremi['label']==0].sample(o, replace=True), 
                                          veremi[veremi['label']==1].sample(o, replace=True)], axis= 0)
        y = veremi_over_sampling['label']
        X = veremi_over_sampling.drop(columns='label')
   
        
    return(X,y)




In [27]:
def classification_per_attack_original(sample,method,veremi):
    X,y = sampling_per_attack_original(sample,veremi)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=0, stratify=y)
    
    if method=='RF':
        # Random Forest
        rf = RandomForestClassifier(n_estimators=50)

        start = time.time()
        rf.fit(X_train, y_train)
        timefit = time.time() - start

        start = time.time()
        y_pred = rf.predict(X_test)
        timepred = time.time() - start
    
    elif method=='Xgboost':
        # XGBoost
        xgb = XGBClassifier()

        start = time.time()
        xgb.fit(X_train,y_train)
        timefit = time.time() - start

        start = time.time()
        y_pred = xgb.predict(X_test)
        timepred = time.time() - start

        
    elif method=='Stacking':
        estimators = [
            ('rf', RandomForestClassifier(n_estimators=50, criterion= 'gini')), #50, 300 or 400
            ('xgbc', XGBClassifier(eta=1, max_depth= 5, sampling_method= 'uniform')) #n_estimators=3000 or 5000
        ]
        
        
        from sklearn.ensemble import StackingClassifier
        from sklearn.linear_model import LogisticRegression
        
        clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

        start = time.time()
        clf.fit(X_train, y_train).score(X_test, y_test)
        timefit = time.time() - start

        start = time.time()
        y_pred = clf.predict(X_test)
        timepred = time.time() - start 
        
 
        
    report = classification_report(y_test,y_pred,output_dict=True)

    return(report['accuracy'], report['weighted avg']['precision'], report['weighted avg']['recall'], 
          report['weighted avg']['f1-score'], balanced_accuracy_score(y_test, y_pred),
          matthews_corrcoef(y_test, y_pred, sample_weight=None), timefit, timepred)


In [28]:
results_original_per_attack_original = pd.DataFrame(index=['Xgboost_1','RF_1','Stacking_1',
                                                 'Xgboost_2','RF_2','Stacking_2',
                                                 'Xgboost_3','RF_3','Stacking_3',
                                                 'Xgboost_4','RF_4','Stacking_4',
                                                 'Xgboost_5','RF_5','Stacking_5'],
                       columns=['Acc','Pre','Rec','F1s','BAS','MCC','TimeFit','TimePred'])

In [None]:
results_original_per_attack_original.xs('Xgboost_1')[:] = classification_per_attack_original(0,'Xgboost',merged1)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('RF_1')[:] = classification_per_attack_original(0,'RF',merged1)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('Stacking_1')[:] = classification_per_attack_original(0,'Stacking',merged1)
print(results_original_per_attack_original)
results_original_per_attack_original.to_excel('results_original_per_attack_original_1.xlsx')

In [None]:
results_original_per_attack_original.xs('Xgboost_2')[:] = classification_per_attack_original(0,'Xgboost',merged2)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('RF_2')[:] = classification_per_attack_original(0,'RF',merged2)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('Stacking_2')[:] = classification_per_attack_original(0,'Stacking',merged2)
print(results_original_per_attack_original)
results_original_per_attack_original.to_excel('results_original_per_attack_original_1.xlsx')

In [None]:
results_original_per_attack_original.xs('Xgboost_3')[:] = classification_per_attack_original(0,'Xgboost',merged3)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('RF_3')[:] = classification_per_attack_original(0,'RF',merged3)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('Stacking_3')[:] = classification_per_attack_original(0,'Stacking',merged3)
print(results_original_per_attack_original)
results_original_per_attack_original.to_excel('results_original_per_attack_original_1.xlsx')

In [None]:
results_original_per_attack_original.xs('Xgboost_4')[:] = classification_per_attack_original(0,'Xgboost',merged4)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('RF_4')[:] = classification_per_attack_original(0,'RF',merged4)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('Stacking_4')[:] = classification_per_attack_original(0,'Stacking',merged4)
print(results_original_per_attack_original)
results_original_per_attack_original.to_excel('results_original_per_attack_original_1.xlsx')

In [None]:
results_original_per_attack_original.xs('Xgboost_5')[:] = classification_per_attack_original(0,'Xgboost',merged5)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('RF_5')[:] = classification_per_attack_original(0,'RF',merged5)
print(results_original_per_attack_original)
results_original_per_attack_original.xs('Stacking_5')[:] = classification_per_attack_original(0,'Stacking',merged5)
print(results_original_per_attack_original)
results_original_per_attack_original.to_excel('results_original_per_attack_original_1.xlsx')

In [None]:
# Read the results if needed
results_original_per_attack_original = pd.read_excel('results_original_per_attack_original_1.xlsx')
results_original_per_attack_original.rename(columns={'Unnamed: 0': 'method'}, inplace=True)
results_original_per_attack_original.set_index('method', inplace=True)