# 1. Imports

In [43]:
import pandas as pd
from sklearn import preprocessing
import numpy as np
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.linear_model import LinearRegression

# 2. Importing the Datasets

In [44]:
ddos_portmap_2018_df = pd.read_csv("../data/train_test/CSE_CIC_IDS2018/ddos_portmap_2018_train.csv", index_col=0)

In [45]:
ddos_ldap_2019_df = pd.read_csv("../data/train_test/CIC_DDoS2019/ddos_ldap_2019_train.csv", index_col=0)

In [46]:
ddos_netbios_2019_df = pd.read_csv("../data/train_test/CIC_DDoS2019/ddos_netbios_2019_train.csv", index_col=0)

In [47]:
ddos_syn_2019_df = pd.read_csv("../data/train_test/CIC_DDoS2019/ddos_syn_2019_train.csv", index_col=0)

In [48]:
ddos_udp_2019_df = pd.read_csv("../data/train_test/CIC_DDoS2019/ddos_udp_2019_train.csv", index_col=0)

In [49]:
target_feature = 'Label'

# 3. Feature Selection - Backwards Deletion

### Helper Functions

In [50]:
def BackwardElimination_Helper(X_train, y_train, final_num_features):
    
    lreg = LinearRegression()
    sfs1 = sfs(lreg, k_features=final_num_features, forward=False, verbose=1, scoring='neg_mean_squared_error')
    
    new_X_train = X_train.copy()

    result = sfs1.fit(X_train, y_train)

    feat_names = list(result.k_feature_names_)
    
    return feat_names

In [31]:
def stable_BackwardElimination(train_df, num_splits, final_num_features, verbose):
    
    selectedFeatures = []
    
    if num_splits != None:
        df_split = np.array_split(train_df, num_splits)
    
    for small_df in df_split:
        X_train = small_df.drop([target_feature], axis=1)
        y_train = small_df[target_feature]
        
        selectedFeatures = selectedFeatures + BackwardElimination_Helper(X_train, y_train, final_num_features)
        # NOTE TO ARYAN, PRANAV, AND ANISHA: THIS IS THE LINE YOU SHOULD CHANGE, AFTER IMPLEMENTING YOUR FEATURE
        # SELECTION METHOD
        
    rank_data = {}
    features = train_df.columns.tolist()
    ranks = []
        
    for feature in features:
        if verbose :
            print("Feature: "+feature+". Count: "+str(selectedFeatures.count(feature))+"/"+str(num_splits))
        ranks.append(selectedFeatures.count(feature)/num_splits)
    
    rank_data = {'feature':features, 'rank':ranks}
    rank_df = pd.DataFrame(rank_data) 
    
    return rank_df

### Investigating a Good Choice of Threshold

In [25]:
X_train_udp = ddos_udp_2019_df.drop([target_feature], axis=1)
y_train_udp = ddos_udp_2019_df[target_feature]

In [26]:
X_train_udp.columns

Index(['FlowID', 'SourceIP', 'SourcePort', 'DestinationIP', 'DestinationPort',
       'Protocol', 'FlowDuration', 'TotalFwdPackets', 'TotalBackwardPackets',
       'TotalLengthofFwdPackets', 'TotalLengthofBwdPackets',
       'FwdPacketLengthMax', 'FwdPacketLengthMin', 'FwdPacketLengthMean',
       'FwdPacketLengthStd', 'BwdPacketLengthMax', 'BwdPacketLengthMin',
       'BwdPacketLengthMean', 'BwdPacketLengthStd', 'FlowBytes/s',
       'FlowPackets/s', 'FlowIATMean', 'FlowIATStd', 'FlowIATMax',
       'FlowIATMin', 'FwdIATTotal', 'FwdIATMean', 'FwdIATStd', 'FwdIATMax',
       'FwdIATMin', 'BwdIATTotal', 'BwdIATMean', 'BwdIATStd', 'BwdIATMax',
       'BwdIATMin', 'FwdPSHFlags', 'BwdPSHFlags', 'FwdURGFlags', 'BwdURGFlags',
       'FwdHeaderLength', 'BwdHeaderLength', 'FwdPackets/s', 'BwdPackets/s',
       'MinPacketLength', 'MaxPacketLength', 'PacketLengthMean',
       'PacketLengthStd', 'PacketLengthVariance', 'FINFlagCount',
       'SYNFlagCount', 'RSTFlagCount', 'PSHFlagCount', 'ACKFla

In [27]:
features = BackwardElimination_Helper(X_train_udp, y_train_udp, 50)
print("Features: ("+str(len(features))+") "+str(features))

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.9s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.2s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.2s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.1s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.8s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.6s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.3s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.3s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.1s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.1s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.0s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.0s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    6.7s
Features: 72/50[Parallel(n_

TypeError: 'NoneType' object is not iterable

# 4. Saving the Rankings

In [34]:
portmap_ranking = stable_BackwardElimination(ddos_portmap_2018_df, 10, 50, verbose=False)
portmap_ranking.to_csv("../ranking/CSE_CIC_IDS2018/ddos_portmap_2018_BackwardElimination.csv")
portmap_ranking.head(20)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   27.7s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.4s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.9s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.3s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.8s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.4s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.0s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.8s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.0s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.5s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.1s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.5s
Features: 66/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.2s
Features: 65/50[Parallel(n_

Features: 54/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.0s
Features: 53/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   15.4s
Features: 52/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   11.7s
Features: 51/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   11.2s
Features: 50/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.8s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.4s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.6s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.1s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.2s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.8s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.8s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.6s
Features: 69

Features: 58/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.8s
Features: 57/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.6s
Features: 56/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.3s
Features: 55/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.3s
Features: 54/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.0s
Features: 53/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.7s
Features: 52/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.0s
Features: 51/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   12.0s
Features: 50/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   24.0s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   27.1s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.1s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.3s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.9s
Features: 73

Unnamed: 0,feature,rank
0,DestinationPort,1.0
1,FlowDuration,0.8
2,TotalFwdPackets,0.4
3,TotalBackwardPackets,0.9
4,TotalLengthofFwdPackets,0.4
5,TotalLengthofBwdPackets,0.6
6,FwdPacketLengthMax,1.0
7,FwdPacketLengthMin,1.0
8,FwdPacketLengthMean,0.4
9,FwdPacketLengthStd,0.9


In [40]:
ldap_ranking = stable_BackwardElimination(ddos_ldap_2019_df, 10, 50, verbose=False)
ldap_ranking.to_csv("../ranking/CIC_DDoS2019/ddos_ldap_2019_BackwardElimination.csv")
ldap_ranking.head(20)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   25.3s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   28.1s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.4s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.0s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   26.1s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.3s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.0s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.4s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.9s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.9s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.6s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.4s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.0s
Features: 72/50[Parallel(n_

Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.8s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.5s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   24.8s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.6s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.1s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.6s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.7s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.0s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.1s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.5s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.2s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.3s
Features: 69

Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.6s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.0s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.8s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.2s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.7s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.3s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.4s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.0s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.1s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.6s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.3s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.3s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.4s
Features: 66

Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.9s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.4s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.2s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.5s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.1s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.2s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.6s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.1s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.8s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.1s
Features: 66/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.8s
Features: 65/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.4s
Features: 64/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.0s
Features: 63

Unnamed: 0,feature,rank
0,FlowID,0.4
1,SourceIP,0.9
2,SourcePort,0.8
3,DestinationIP,0.8
4,DestinationPort,0.2
5,Protocol,0.9
6,FlowDuration,0.7
7,TotalFwdPackets,0.6
8,TotalBackwardPackets,0.6
9,TotalLengthofFwdPackets,0.8


In [41]:
netbios_ranking = stable_BackwardElimination(ddos_netbios_2019_df, 10, 50, verbose=False)
netbios_ranking.to_csv("../ranking/CIC_DDoS2019/ddos_netbios_2019_BackwardElimination.csv")
netbios_ranking.head(20)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   24.3s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   25.9s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.3s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.3s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.1s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.6s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.1s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.1s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.4s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.4s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.0s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.4s
Features: 72/50[Parallel(n_

Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.5s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   22.1s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.6s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.6s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.8s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   24.0s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.9s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.7s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.0s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.7s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.0s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.6s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.2s
Features: 69

Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   21.6s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.8s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.5s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.0s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.1s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.3s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.9s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.6s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.6s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.5s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.1s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.7s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.2s
Features: 66

Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.5s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.9s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.1s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.4s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.5s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.2s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.7s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.3s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.0s
Features: 66/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   15.6s
Features: 65/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.3s
Features: 64/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.6s
Features: 63

Unnamed: 0,feature,rank
0,FlowID,0.3
1,SourceIP,0.7
2,SourcePort,0.4
3,DestinationIP,0.9
4,DestinationPort,0.4
5,Protocol,0.9
6,FlowDuration,0.8
7,TotalFwdPackets,0.4
8,TotalBackwardPackets,0.6
9,TotalLengthofFwdPackets,0.6


In [42]:
syn_ranking = stable_BackwardElimination(ddos_syn_2019_df, 10, 50, verbose=False)
syn_ranking.to_csv("../ranking/CIC_DDoS2019/ddos_syn_2019_BackwardElimination.csv")
syn_ranking.head(20)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   11.3s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   11.0s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.7s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.8s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.3s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.4s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.0s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.0s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.9s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.3s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   11.3s
Features: 72/50[Parallel(n_

Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.7s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.6s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.5s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.5s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.0s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.8s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   11.4s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.0s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.1s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.0s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.8s
Features: 69

Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.2s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.7s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.6s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.3s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.1s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.8s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.7s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.5s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.4s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.5s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.3s
Features: 66

Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.4s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.3s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.0s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.9s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.1s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.5s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.2s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.2s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.3s
Features: 66/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.0s
Features: 65/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.2s
Features: 64/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    7.8s
Features: 63

Unnamed: 0,feature,rank
0,FlowID,0.2
1,SourceIP,0.8
2,SourcePort,0.4
3,DestinationIP,0.9
4,DestinationPort,0.2
5,Protocol,0.7
6,FlowDuration,0.7
7,TotalFwdPackets,0.7
8,TotalBackwardPackets,0.5
9,TotalLengthofFwdPackets,0.8


In [32]:
udp_ranking = stable_BackwardElimination(ddos_udp_2019_df, 10, 50, verbose=False)
udp_ranking.to_csv("../ranking/CIC_DDoS2019/ddos_udp_2019_BackwardElimination.csv")
udp_ranking.head(20)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.4s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.1s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.1s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.0s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.0s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 72/50[Parallel(n_

Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.0s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.0s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 69

Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.9s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.5s
Features: 66

Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.5s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.6s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.7s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.8s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.6s
Features: 66/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.5s
Features: 65/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.4s
Features: 64/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    2.4s
Features: 63

Unnamed: 0,feature,rank
0,FlowID,0.8
1,SourceIP,0.6
2,SourcePort,1.0
3,DestinationIP,0.8
4,DestinationPort,0.4
5,Protocol,0.9
6,FlowDuration,0.6
7,TotalFwdPackets,0.3
8,TotalBackwardPackets,0.3
9,TotalLengthofFwdPackets,0.7


# 5. Baseline

In [60]:
def baseline_BackwardElimination(train_df, final_num_features, verbose):
    
    # Take a smaller piece of the entire dataset
    df_split = np.array_split(train_df, 20)
    train_df = df_split[0]
    
    selectedFeatures = []
    
    X_train = train_df.drop([target_feature], axis=1)
    y_train = train_df[target_feature]
        
    selectedFeatures =  BackwardElimination_Helper(X_train, y_train, final_num_features)
        
    rank_data = {}
    features = train_df.columns.tolist()
    ranks = []
        
    for feature in features:
        if verbose :
            print("Feature: "+feature+". Count: "+str(selectedFeatures.count(feature)))
        ranks.append(selectedFeatures.count(feature))
    
    rank_data = {'feature':features, 'rank':ranks}
    rank_df = pd.DataFrame(rank_data)
    
    return rank_df

In [61]:
portmap_ranking_baseline = baseline_BackwardElimination(ddos_portmap_2018_df, 50, verbose=False)
portmap_ranking_baseline.to_csv("../baseline_ranking/CSE_CIC_IDS2018/ddos_portmap_2018_BackwardElimination.csv")
portmap_ranking_baseline.head(5)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.3s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   15.6s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   15.2s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.9s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.4s
Features: 72/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.0s
Features: 71/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.7s
Features: 70/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   15.6s
Features: 69/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.9s
Features: 68/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.9s
Features: 67/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.8s
Features: 66/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.7s
Features: 65/50[Parallel(n_

Unnamed: 0,feature,rank
0,DestinationPort,1
1,FlowDuration,1
2,TotalFwdPackets,1
3,TotalBackwardPackets,1
4,TotalLengthofFwdPackets,0


In [64]:
ldap_ranking_baseline = baseline_BackwardElimination(ddos_ldap_2019_df, 50, verbose=False)
ldap_ranking_baseline.to_csv("../baseline_ranking/CIC_DDoS2019/ddos_ldap_2019_BackwardElimination.csv")
ldap_ranking_baseline.head(5)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.5s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.8s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.7s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.9s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.7s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.2s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.2s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.8s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.2s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.8s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.7s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.3s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.4s
Features: 72/50[Parallel(n_

Unnamed: 0,feature,rank
0,FlowID,1
1,SourceIP,0
2,SourcePort,0
3,DestinationIP,1
4,DestinationPort,1


In [63]:
udp_ranking_baseline = baseline_BackwardElimination(ddos_udp_2019_df, 50, verbose=False)
udp_ranking_baseline.to_csv("../baseline_ranking/CIC_DDoS2019/ddos_udp_2019_BackwardElimination.csv")
udp_ranking_baseline.head(5)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.6s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.9s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.4s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.4s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.4s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.2s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.6s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.1s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.4s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.0s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.0s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.2s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    3.0s
Features: 72/50[Parallel(n_

Unnamed: 0,feature,rank
0,FlowID,1
1,SourceIP,1
2,SourcePort,1
3,DestinationIP,1
4,DestinationPort,0


In [65]:
netbios_ranking_baseline = baseline_BackwardElimination(ddos_netbios_2019_df, 50, verbose=False)
netbios_ranking_baseline.to_csv("../baseline_ranking/CIC_DDoS2019/ddos_netbios_2019_BackwardElimination.csv")
netbios_ranking_baseline.head(5)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   19.3s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.0s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.3s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.6s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.3s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.6s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   18.6s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   15.2s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   17.2s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   16.7s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   15.1s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.7s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   14.5s
Features: 72/50[Parallel(n_

Unnamed: 0,feature,rank
0,FlowID,0
1,SourceIP,1
2,SourcePort,1
3,DestinationIP,1
4,DestinationPort,0


In [66]:
syn_ranking_baseline = baseline_BackwardElimination(ddos_syn_2019_df, 50, verbose=False)
syn_ranking_baseline.to_csv("../baseline_ranking/CIC_DDoS2019/ddos_syn_2019_BackwardElimination.csv")
syn_ranking_baseline.head(5)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.1s
Features: 84/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.5s
Features: 83/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.6s
Features: 82/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.0s
Features: 81/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.3s
Features: 80/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   13.7s
Features: 79/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.4s
Features: 78/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   10.1s
Features: 77/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.8s
Features: 76/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.9s
Features: 75/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.8s
Features: 74/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    8.2s
Features: 73/50[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.9s
Features: 72/50[Parallel(n_

Unnamed: 0,feature,rank
0,FlowID,1
1,SourceIP,0
2,SourcePort,1
3,DestinationIP,0
4,DestinationPort,1
