In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.metrics import fbeta_score, make_scorer
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

# Reading Data

In [3]:
df = pd.read_csv('kyoto/kyoto-full.csv')

In [4]:
df.head()

Unnamed: 0,Duration,Service,Source_bytes,Destination_bytes,Count,Same_srv_rate,Serror_rate,Srv_serror_rate,Dst_host_count,Dst_host_srv_count,Dst_host_same_src_port_rate,Dst_host_serror_rate,Dst_host_srv_serror_rate,Flag,Label,Source_Port_Number,Destination_Port_Number,protocol_type
0,0.0,other,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,S0,-1,47904,23,tcp
1,0.0,other,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,S0,-1,58974,23,tcp
2,0.0,other,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,S0,-1,37174,23,tcp
3,0.0,other,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,S0,-1,40711,3389,tcp
4,5.2e-05,other,0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,SH,-1,8429,22,tcp


In [5]:
df = df.sample(n=4000)

# Data Preprocessing

In [6]:
dummy_cols = ['Service' , 'Flag' , 'protocol_type']
df = pd.get_dummies( df , columns = dummy_cols)
df.isna().sum()


Duration                       0
Source_bytes                   0
Destination_bytes              0
Count                          0
Same_srv_rate                  0
Serror_rate                    0
Srv_serror_rate                0
Dst_host_count                 0
Dst_host_srv_count             0
Dst_host_same_src_port_rate    0
Dst_host_serror_rate           0
Dst_host_srv_serror_rate       0
Label                          0
Source_Port_Number             0
Destination_Port_Number        0
Service_dns                    0
Service_http                   0
Service_other                  0
Service_rdp                    0
Service_sip                    0
Service_smtp                   0
Service_snmp                   0
Service_ssh                    0
Flag_OTH                       0
Flag_REJ                       0
Flag_RSTO                      0
Flag_RSTOS0                    0
Flag_RSTR                      0
Flag_RSTRH                     0
Flag_S0                        0
Flag_S1   

In [7]:
df = df.dropna()
df.isna().sum()

Duration                       0
Source_bytes                   0
Destination_bytes              0
Count                          0
Same_srv_rate                  0
Serror_rate                    0
Srv_serror_rate                0
Dst_host_count                 0
Dst_host_srv_count             0
Dst_host_same_src_port_rate    0
Dst_host_serror_rate           0
Dst_host_srv_serror_rate       0
Label                          0
Source_Port_Number             0
Destination_Port_Number        0
Service_dns                    0
Service_http                   0
Service_other                  0
Service_rdp                    0
Service_sip                    0
Service_smtp                   0
Service_snmp                   0
Service_ssh                    0
Flag_OTH                       0
Flag_REJ                       0
Flag_RSTO                      0
Flag_RSTOS0                    0
Flag_RSTR                      0
Flag_RSTRH                     0
Flag_S0                        0
Flag_S1   

In [8]:
df.head()

Unnamed: 0,Duration,Source_bytes,Destination_bytes,Count,Same_srv_rate,Serror_rate,Srv_serror_rate,Dst_host_count,Dst_host_srv_count,Dst_host_same_src_port_rate,...,Flag_RSTR,Flag_RSTRH,Flag_S0,Flag_S1,Flag_SF,Flag_SH,Flag_SHR,protocol_type_icmp,protocol_type_tcp,protocol_type_udp
365315,0.00014,0,0,0,0.0,0.0,1.0,1,12,1.0,...,0,0,0,0,0,0,0,0,1,0
104467,0.0,0,0,0,0.0,0.0,0.82,0,0,0.0,...,0,0,1,0,0,0,0,0,0,1
370364,1.597221,520,1745,1,1.0,0.0,0.0,25,25,0.0,...,0,0,0,0,1,0,0,0,1,0
378066,0.000447,44,104,10,1.0,0.0,0.0,81,96,0.0,...,0,0,0,0,1,0,0,0,0,1
280142,0.000544,44,104,25,1.0,0.0,0.0,93,99,0.0,...,0,0,0,0,1,0,0,0,0,1


# Split Data

In [9]:
X = df.drop(['Label'], axis=1)
y = df['Label']

In [10]:
scoring_metrics = {'accuracy' : make_scorer(accuracy_score), 
           'precision' : make_scorer(precision_score, average="micro"),
           'recall' : make_scorer(recall_score, average="micro"), 
           'f1_score' : make_scorer(f1_score, average="micro")}

In [11]:
df.head()

Unnamed: 0,Duration,Source_bytes,Destination_bytes,Count,Same_srv_rate,Serror_rate,Srv_serror_rate,Dst_host_count,Dst_host_srv_count,Dst_host_same_src_port_rate,...,Flag_RSTR,Flag_RSTRH,Flag_S0,Flag_S1,Flag_SF,Flag_SH,Flag_SHR,protocol_type_icmp,protocol_type_tcp,protocol_type_udp
365315,0.00014,0,0,0,0.0,0.0,1.0,1,12,1.0,...,0,0,0,0,0,0,0,0,1,0
104467,0.0,0,0,0,0.0,0.0,0.82,0,0,0.0,...,0,0,1,0,0,0,0,0,0,1
370364,1.597221,520,1745,1,1.0,0.0,0.0,25,25,0.0,...,0,0,0,0,1,0,0,0,1,0
378066,0.000447,44,104,10,1.0,0.0,0.0,81,96,0.0,...,0,0,0,0,1,0,0,0,0,1
280142,0.000544,44,104,25,1.0,0.0,0.0,93,99,0.0,...,0,0,0,0,1,0,0,0,0,1


In [12]:
y.shape

(4000,)

In [13]:
y.head()

365315   -1
104467   -1
370364   -1
378066   -1
280142   -1
Name: Label, dtype: int64

In [14]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

# Chi Squared

In [15]:
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SelectKBest, chi2

In [16]:
print("before transform:",X)
selector=SelectKBest(score_func=chi2,k=7)
fit = selector.fit(X,y)
features = fit.transform(X)
print("scores_:",fit.scores_)
print("pvalues_:",fit.pvalues_)
print("selected index:",fit.get_support(True))
print("after transform:",fit.transform(X)) 
X = fit.transform(X)

before transform:         Duration  Source_bytes  Destination_bytes  Count  Same_srv_rate  \
365315  0.000140             0                  0      0            0.0   
104467  0.000000             0                  0      0            0.0   
370364  1.597221           520               1745      1            1.0   
378066  0.000447            44                104     10            1.0   
280142  0.000544            44                104     25            1.0   
...          ...           ...                ...    ...            ...   
316485  1.596176           520               1745      1            1.0   
287818  0.000439            44                104     22            1.0   
160364  0.000000             0                  0      0            0.0   
11488   0.000000             0                  0      0            0.0   
179915  0.000000             0                  0      0            0.0   

        Serror_rate  Srv_serror_rate  Dst_host_count  Dst_host_srv_count  \
36531

# (KNN-RF-ADA)_(DT-MLP)_MLP

In [17]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1)),
                        ('ada_1', AdaBoostClassifier(random_state=0, learning_rate=0.1, n_estimators=1000))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                       ]

In [18]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=MLPClassifier())

In [19]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision   Recall
0    0.977  0.866071  0.92381


In [20]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-RF-ADA)_(DT-MLP)_MLP_ChiSquared.csv')

# (KNN-RF)_(DT-MLP)_MLP

In [21]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                       ]
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=MLPClassifier())

In [22]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision    Recall
0    0.978  0.880734  0.914286


In [23]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-RF)_(DT-MLP)_MLP_ChiSquared.csv')

# (RF-MLP)_(DT-KNN)_MLP

In [24]:
# Create Learners per layer
layer_one_estimators = [
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, min_samples_split=0.1)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1))
                       ]
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=MLPClassifier())

In [25]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)



  Accuracy Precision    Recall
0    0.956  0.850575  0.704762


In [26]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(RF-MLP)_(DT-KNN)_MLP_ChiSquared.csv')

# (KNN-RF)_(DT-MLP)_DT

In [27]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                       ]

In [28]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=DecisionTreeClassifier())

In [29]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision    Recall
0    0.964   0.80531  0.866667


In [30]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-RF)_(DT-MLP)_DT_ChiSquared.csv')

# (ADA-MLP)_(DT-RF)_MLP

In [31]:
# Create Learners per layer
layer_one_estimators = [
                        ('ada_1', AdaBoostClassifier(random_state=0, learning_rate=0.1, n_estimators=1000)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=MLPClassifier())

In [32]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision    Recall
0    0.971      0.88  0.838095


In [33]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(ADA-MLP)_(DT-RF)_MLP_ChiSquared.csv')

# (MLP-RF)_(DT-KNN)_DT

In [34]:
# Create Learners per layer
layer_one_estimators = [
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20))
                       ]

layer_two_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10))
                       ]

In [35]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=DecisionTreeClassifier())

In [36]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)



  Accuracy Precision Recall
0     0.96  0.815534    0.8


In [37]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(MLP-RF)_(DT-KNN)_DT_ChiSquaredT.csv')

# (KNN-RF)_(DT-MLP)_KNN

In [38]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                       ]

In [39]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=KNeighborsClassifier())

In [40]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision    Recall
0    0.975  0.863636  0.904762


In [41]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-RF)_(DT-MLP)_KNN_ChiSquared.csv')

# (MLP-RF)_(DT-KNN)_KNN

In [42]:
# Create Learners per layer
layer_one_estimators = [
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1))
                       ]

In [43]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=KNeighborsClassifier())

In [44]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision    Recall
0    0.951  0.841463  0.657143


In [45]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(MLP-RF)_(DT-KNN)_KNN_ChiSquared.csv')

# (KNN-MLP)_(DT-RF)_KNN

In [46]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

In [47]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=KNeighborsClassifier())

In [48]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)



  Accuracy Precision     Recall
0      0.9  0.857143  0.0571429


In [49]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-MLP)_(DT-RF)_KNN_ChiSquared.csv')

# (KNN-RF)_(DT-MLP)_RF

In [50]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                       ]

In [51]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=RandomForestClassifier())

In [52]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision   Recall
0    0.971  0.851852  0.87619


In [53]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-RF)_(DT-MLP)_RF_ChiSquared.csv')

# (MLP-RF)_(DT-KNN)_RF

In [54]:
# Create Learners per layer
layer_one_estimators = [
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1))
                       ]

In [55]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=RandomForestClassifier())

In [56]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)



  Accuracy Precision Recall
0    0.948  0.730435    0.8


In [57]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(MLP-RF)_(DT-KNN)_RF_ChiSquared.csv')

# (KNN-MLP)_(DT-RF)_RF

In [58]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

In [59]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=RandomForestClassifier())

In [60]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)



  Accuracy Precision    Recall
0    0.907     0.875  0.133333


In [61]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-MLP)_(DT-RF)_RF_ChiSquared.csv')

# (KNN-RF)_(DT-MLP)_ADA

In [62]:
# Create Learners per layer
layer_one_estimators = [
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                       ]

In [63]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=AdaBoostClassifier())

In [64]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision    Recall
0    0.974  0.849558  0.914286


In [65]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(KNN-RF)_(DT-MLP)_ADA_ChiSquared.csv')

# (MLP-RF)_(DT-KNN)_ADA

In [66]:
# Create Learners per layer
layer_one_estimators = [
                        ('mlp_2', MLPClassifier(solver='adam', alpha=0.001, hidden_layer_sizes=20)),
                        ('rn_1', RandomForestClassifier(random_state=0, n_jobs=-1, n_estimators=90, max_depth=19, 
                                                        min_samples_split=0.1))
                       ]

layer_two_estimators = [
                        ('dt_2', DecisionTreeClassifier(max_depth=13, min_samples_split=10)),
                        ('knn_1', KNeighborsClassifier(n_neighbors=5, n_jobs=-1))
                       ]

In [67]:
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=AdaBoostClassifier())

In [68]:
# Create Final model by 
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

acurcy = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recal = recall_score(y_test, y_pred)

clf_Score = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall'])
clf_Score.at[0, 'Accuracy'] = acurcy
clf_Score.at[0, 'Precision'] = prec
clf_Score.at[0, 'Recall'] = recal
print(clf_Score)

  Accuracy Precision    Recall
0    0.559  0.191176  0.990476


In [69]:
# clf_Score.to_csv('NSL_DOS_Stacking_level2_(MLP-RF)_(DT-KNN)_ADA_ChiSquared.csv')