In [266]:
import warnings
warnings.filterwarnings("ignore")


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import CondensedNearestNeighbour, RandomUnderSampler
from imblearn.over_sampling import SMOTE, SMOTENC

In [267]:
data = pd.read_csv("Darknet.CSV", low_memory=False, parse_dates=["Timestamp"], error_bad_lines=False)

def display_all(df): # tip: you can transpose before giving input!
    with pd.option_context("display.max_rows", 1000, "display.max_columns", 1000): 
        print("The shape is: ", data.shape)
        data.info()
        data.dtypes
        display(df)
        

def display_types(df):
    print(df["Level1"].unique())
    print(df["Level2"].unique())
    print(df.groupby("Level1")["Level2"].unique())
    print(df.groupby("Level1")["Level2"].nunique())
    
# TODO drop VPN
data.drop(["Flow ID"],axis = 1,inplace = True)
data.drop(["Src IP"],axis = 1,inplace = True)
data.drop(["Dst IP"],axis = 1,inplace = True)
data.drop(["Src Port"],axis = 1,inplace = True)
data.drop(["Dst Port"],axis = 1,inplace = True)
data.drop(["Flow Duration"],axis = 1,inplace = True)
data.drop(["Timestamp"],axis = 1,inplace = True)
# data.drop_duplicates(subset=None, keep='first', inplace=True, ignore_index=False)
# data = data.loc[:, internet_ds_test.apply(pd.Series.nunique) != 1]
data.isnull().sum()

data.rename(columns = {"Label" : "Level1", "Label.1" : "Level2"}, inplace = True)
# data.rename({'Level2': {"AUDIO-STREAMING" : "Audio-Streaming", "Audio-streaming" : "Audio-Streaming", "Video-streaming" : "Video-Streaming", "File-transfer" : "File-Transfer"}}, inplace = True)
data['Level2'].loc[data['Level2'] == 'AUDIO-STREAMING'] = 'Audio-Streaming'
data['Level2'].loc[data['Level2'] == 'Audio-streaming'] = 'Audio-Streaming'
data['Level2'].loc[data['Level2'] == 'File-transfer'] = 'File-Transfer'
data['Level2'].loc[data['Level2'] == 'Video-streaming'] = 'Video-Streaming'
data["Level2"].unique()
# samples[real_columns] = samples[real_columns].replace([np.inf, -np.inf], np.nan)
# samples[real_columns] = samples[real_columns].dropna()

Skipping line 328: expected 85 fields, saw 125



array(['Audio-Streaming', 'Browsing', 'Chat', 'Email', 'File-Transfer',
       'P2P', 'Video-Streaming', 'VOIP'], dtype=object)

In [268]:
start_mem = data.memory_usage().sum() / 1024**2 # start mem for comparison later

for col in data.columns:
    col_type = data[col].dtypes
    if col_type in ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']:
        c_min = data[col].min()
        c_max = data[col].max()
        if str(col_type)[:3] == 'int': # if it's int
            if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: # if it can be int8, make it int8
                data[col] = data[col].astype(np.int8)
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: # if it can be int16, make it int16
                data[col] = data[col].astype(np.int16)
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: # so on
                data[col] = data[col].astype(np.int32)
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                data[col] = data[col].astype(np.int64)
        else: # if it's float
            if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max: # if it can be float16, make it float16
                data[col] = data[col].astype(np.float16)
            elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max: # so on
                data[col] = data[col].astype(np.float32)
            else:
                data[col] = data[col].astype(np.float64)

end_mem = data.memory_usage().sum() / 1024**2 # end mem for comparison later

print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

Memory usage after optimization is: 31.31 MB
Decreased by 62.8%


# Split

In [269]:
copied = data.copy(deep=True)
copied.replace([np.inf, -np.inf], np.nan, inplace=True)
copied = copied.dropna()
copied.isnull().sum().sum()

0

In [270]:
copied = copied[(copied.Level1 == 'Tor') | (copied.Level1 == 'Non-Tor')]  

In [271]:
Y = copied[["Level1", "Level2"]]
X = copied.drop(["Level1", "Level2"], axis=1)

In [272]:
Y.replace('Tor', 1, inplace=True)               #Converting strings to binary classes 0 or 1
Y.replace('Non-Tor', 0, inplace=True)

In [273]:
from sklearn.preprocessing import LabelEncoder

def categorise(row):
    if row['Level1'] == 1:
        return f"Tor {row['Level2']}"
    else:
        return 'Non-Tor'
    

Y['Level2'] = Y.apply(lambda row: categorise(row), axis = 1)

le = LabelEncoder()
Y['Level2'] = le.fit_transform(Y['Level2'])

In [274]:
#X_train__test, X_application, Y_train_test, Y_application = train_test_split(X, Y, test_size=0.30, random_state=42, stratify=Y['Level2'])
#X_train, X_test, Y_train, Y_test = train_test_split(X_train__test, Y_train_test, test_size=0.30, random_state=42, stratify=Y_train_test['Level2'])

In [275]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.35, random_state=42, stratify=Y['Level2'])


In [276]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Training and Testing Models
## Layer 1

Undersampling minority classes from layer 2

In [277]:
Y_train_l = Y_train['Level1']
Y_test_l = Y_test['Level1']

In [278]:
import copy
from copy import deepcopy
Y_train_2 = Y_train['Level2']
Y_test_2 = Y_test['Level2']
Y_train_c = Y_train_2.copy(deep=True)
Y_test_c = Y_test_2.copy(deep=True)
X_train_c = deepcopy(X_train)
X_test_c = deepcopy(X_test)

In [279]:
#Dropping non-tor values for Critic training
cdf = pd.DataFrame(X_train_c)
cdf.columns = X.columns
cdf['Level2'] = Y_train_c.tolist()
cdf = cdf[(cdf.Level2 != 0)]
cdf.head()

Unnamed: 0,Protocol,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Level2
23,-0.538252,0.348746,1.390101,-0.012513,1.894551,1.667111,-0.34011,0.23087,0.966091,2.486498,...,0.536571,0.0,0.0,0.0,0.0,0.850792,-0.323858,0.784613,0.989751,1
40,-0.538252,0.559123,1.347446,0.040394,1.58289,2.498054,-0.34011,0.541941,1.441745,2.486498,...,0.536571,0.0,0.0,0.0,0.0,0.822694,-0.323858,0.757137,0.963093,8
70,-0.538252,3.887289,3.02132,3.317215,0.655163,2.415578,-0.34011,4.887389,2.987417,2.569592,...,0.536571,0.0,0.0,0.0,0.0,0.850774,-0.323858,0.784595,0.989734,6
151,-0.538252,-0.076333,-0.085633,-0.055094,-0.066777,-0.51231,-0.34011,-0.389681,-0.529642,-0.463337,...,0.536571,0.0,0.0,0.0,0.0,-1.261746,-0.323858,-1.281175,-1.014558,2
266,-0.538252,2.10639,1.786382,1.566167,0.460387,2.415578,-0.34011,4.216316,3.285417,2.569592,...,0.536571,0.0,0.0,0.0,0.0,0.850771,-0.323858,0.784592,0.989731,6


In [280]:
#Preparing test set for critic
ctest = pd.DataFrame(X_test_c)
ctest.columns = X.columns
ctest['Level2'] = Y_test_c.tolist()
ctest = ctest[(ctest.Level2 != 0)]
ctest.head()

Unnamed: 0,Protocol,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,Total Length of Bwd Packet,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Level2
57,-0.538252,-0.038476,-0.034034,-0.043659,-0.043953,2.498054,-0.34011,1.45831,1.882196,2.486498,...,0.536571,0.0,0.0,0.0,0.0,0.822696,-0.323858,0.757139,0.963095,8
63,-1.839147,-0.075793,-0.086321,-0.055094,-0.066777,-0.51231,-0.34011,-0.389681,-0.529642,-0.463337,...,-3.398594,0.0,0.0,0.0,0.0,-1.261746,-0.323858,-1.281175,-1.014558,3
72,-0.538252,0.242747,0.574836,0.001795,0.711927,2.498054,-0.34011,0.714792,2.054119,2.486498,...,0.536571,0.0,0.0,0.0,0.0,0.850894,-0.323858,0.784712,0.989848,2
183,-0.538252,14.842009,6.556889,14.317899,1.262722,2.415578,-0.34011,5.588964,2.371768,2.569592,...,0.536571,0.0,0.0,0.0,0.0,0.850683,-0.323858,0.784507,0.989648,6
185,-0.538252,1.333027,2.237014,0.454516,0.570586,2.498054,-0.34011,1.852311,0.880948,2.486498,...,0.536571,0.0,0.0,0.0,0.0,0.82421,-0.323858,0.758619,0.964531,7


In [281]:
#separating X and y for critic fitting
Y_train_c = cdf['Level2']
Y_test_c = ctest['Level2']
X_train_c = cdf.drop(['Level2'], axis=1)
X_test_c = ctest.drop(['Level2'], axis=1)
Y_train_c = Y_train_c.to_numpy()
Y_test_c = Y_test_c.to_numpy()
X_train_c = X_train_c.to_numpy()
X_test_c = X_test_c.to_numpy()

In [282]:
sampless = {1: 110, 2: 129, 6:108, 7:146}
samples = [1, 2, 6, 7]
undersample = CondensedNearestNeighbour(sampling_strategy = samples, random_state = 42, n_jobs=-1, n_neighbors=3)
#undersample = RandomUnderSampler(sampling_strategy = sampless, random_state = 42)
#X_train, Y_train_2 = undersample.fit_resample(X_train, Y_train_2)

In [283]:
np.unique(Y_train_c, return_counts=True)

(array([1, 2, 3, 4, 5, 6, 7, 8], dtype=int64),
 array([146, 171,  42,   8,  70, 143, 194, 131], dtype=int64))

In [284]:
#Oversampling for Critic
#1: 146, 2: 171,  , 6: 143, 7: 194, 8: 131
sampless = {3: 42,   4: 8,  5: 70}
oversample = SMOTE(sampling_strategy = sampless, random_state = 42)
X_train_c, Y_train_c = oversample.fit_resample(X_train_c, Y_train_c)

In [285]:
#Creating a target Y for binary after undersampling

Y_train_l1 = Y_train_2.copy()
Y_train_l1[Y_train_l1>1] = 1
Y_train_l.equals(Y_train_l1)
(Y_train_l == Y_train_l1).all()

True

In [286]:
#Using only the first classification for the gridsearch
#Y_train_l = Y_train['Level1']
#Y_test_l = Y_test['Level1']

In [287]:
#importing libraries for models
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import xgboost as xgb
from imblearn.pipeline import Pipeline as impip
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold


#Creating classifiers to be passed to the pipeline
clf1 = RandomForestClassifier(random_state=42, criterion = 'entropy')
clf2 = ExtraTreesClassifier(random_state=42, criterion = 'entropy')
clf3 = DecisionTreeClassifier(random_state=42, criterion = 'entropy')
clf4 = GradientBoostingClassifier(random_state=42)
clf5 = AdaBoostClassifier(random_state=42)
clf6 = BaggingClassifier(random_state=42)
clf7 = SVC(random_state=42)

#Initializing parameter dictionary for models

param1 = {}
param1['classifier__n_estimators'] = [10, 50, 100]
param1['classifier__max_depth'] = [5, 10, 20]
param1['classifier__min_samples_split'] = [2,5,10]
param1['classifier'] = [clf1]

param2 = {}
param2['classifier__n_estimators'] = [10, 50, 100]
param2['classifier__max_depth'] = [5, 10, 20]
param2['classifier__min_samples_split'] = [2,5,10]
param2['classifier'] = [clf2]

param3 = {}
param3['classifier__max_depth'] = [5, 10, 20]
param3['classifier__min_samples_split'] = [2,5,10]
param3['classifier'] = [clf3]

param4 = {}
param4['classifier__n_estimators'] = [10, 50, 100]
param4['classifier__learning_rate'] = [0.01, 0.05, 0.1]
param4['classifier__max_depth'] = [5, 10, 20]
param4['classifier__min_samples_split'] = [2,5,10]
param4['classifier'] = [clf4]

param5 = {}
param5['classifier__n_estimators'] = [10, 50, 100]
param5['classifier__learning_rate'] = [0.01, 0.05, 0.1]
param5['classifier'] = [clf5]

param6 = {}
param6['classifier__n_estimators'] = [10, 50, 100]
param6['classifier__max_samples'] = [0.05, 0.1, 0.2, 0.5]
param6['classifier'] = [clf6]

param7 = {}
param7['classifier__C'] = [0.01, 0.1, 1, 5]
param7['classifier__kernel'] = ['linear', 'rbf', 'sigmoid']
param7['classifier'] = [clf7]

params = [param1, param2, param3, param4, param5, param6, param7]

In [288]:
from sklearn.metrics import accuracy_score, f1_score, classification_report,confusion_matrix

In [289]:
#Creating random_state=ne for the models
# pipeline = impip([('scaler', StandardScaler()),('classifier', clf1),])

# #implementing randomized search because gridsearch takes forever
# rs = GridSearchCV(pipeline, params, cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42), n_jobs=-1, scoring='f1', error_score='raise').fit(X_train, Y_train_l)
# rs.best_params_

In [290]:
'''
#GridSearch for Second Classifier
xgbmodel = xgb.XGBClassifier()
params = {'n_estimators': [10, 50, 100], 'max_depth': [5, 10, 20], 'min_samples_split': [2, 5, 10]}
rs = GridSearchCV(xgbmodel, params, cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42), n_jobs=-1, scoring='roc_auc_ovr', error_score='raise').fit(X_train, Y_train_2)
rs.best_params_
'''

"\n#GridSearch for Second Classifier\nxgbmodel = xgb.XGBClassifier()\nparams = {'n_estimators': [10, 50, 100], 'max_depth': [5, 10, 20], 'min_samples_split': [2, 5, 10]}\nrs = GridSearchCV(xgbmodel, params, cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42), n_jobs=-1, scoring='roc_auc_ovr', error_score='raise').fit(X_train, Y_train_2)\nrs.best_params_\n"

In [291]:
# Decision tree worked best
ranForModel = DecisionTreeClassifier(max_depth=10, min_samples_split=2, random_state=42, criterion = "entropy")
ranForModel.fit(X_train, Y_train_l)
prediction_1 = ranForModel.predict(X_test)
Accuracy = accuracy_score(Y_test_l, prediction_1)
print("F1 score",f1_score(Y_test['Level1'], prediction_1, average=None))
print(classification_report(Y_test['Level1'], prediction_1, target_names=['0','1'], digits=4))
print('Confusion Matrix \n' + str(confusion_matrix(Y_test['Level1'], prediction_1)))

F1 score [0.99993876 0.99589322]
              precision    recall  f1-score   support

           0     0.9999    0.9999    0.9999     32659
           1     0.9959    0.9959    0.9959       487

    accuracy                         0.9999     33146
   macro avg     0.9979    0.9979    0.9979     33146
weighted avg     0.9999    0.9999    0.9999     33146

Confusion Matrix 
[[32657     2]
 [    2   485]]


## Layer 2

In [292]:

#decTree = DecisionTreeClassifier(criterion = "entropy", max_depth=10, min_samples_split=2)
#decTree.fit(X_train, Y_train_l)
xgbmodel = xgb.XGBClassifier(n_estimators=50, n_jobs=-1, random_state=42, max_depth=10)
xgbmodel.fit(X_train, Y_train_2)

In [293]:
prediction_2 = xgbmodel.predict(X_test)
print("F1 score",f1_score(Y_test['Level2'], prediction_2, average=None))
print(classification_report(Y_test['Level2'], prediction_2, target_names=['0','1','2','3','4','5','6','7','8'], digits=4))
print('Confusion Matrix \n' + str(confusion_matrix(Y_test['Level2'], prediction_2)))

F1 score [0.99992346 0.92405063 0.88268156 0.86956522 0.28571429 0.8974359
 0.96153846 0.90640394 0.87323944]
              precision    recall  f1-score   support

           0     0.9998    1.0000    0.9999     32659
           1     0.9125    0.9359    0.9241        78
           2     0.9080    0.8587    0.8827        92
           3     0.8696    0.8696    0.8696        23
           4     0.5000    0.2000    0.2857         5
           5     0.8537    0.9459    0.8974        37
           6     0.9494    0.9740    0.9615        77
           7     0.9293    0.8846    0.9064       104
           8     0.8732    0.8732    0.8732        71

    accuracy                         0.9985     33146
   macro avg     0.8662    0.8380    0.8445     33146
weighted avg     0.9984    0.9985    0.9985     33146

Confusion Matrix 
[[32659     0     0     0     0     0     0     0     0]
 [    0    73     3     0     0     0     2     0     0]
 [    3     5    79     2     0     1     0     0    

## Critique Model

In [294]:
from sklearn.ensemble import VotingClassifier

#Creating classifiers to be passed to the pipeline
models = list()
models.append(('rfc', RandomForestClassifier(random_state=42, criterion = 'entropy')))
models.append(('etc', ExtraTreesClassifier(random_state=42, criterion = 'entropy')))
models.append(('dtc', DecisionTreeClassifier(random_state=42, criterion = 'entropy')))
models.append(('gbc', GradientBoostingClassifier(random_state=42)))
#models.append(('abc', AdaBoostClassifier(random_state=42)))
models.append(('bc', BaggingClassifier(random_state=42)))
#models.append(('svm', SVC(random_state=42, probability=True)))

ensemble = VotingClassifier(estimators=models, voting='soft')

In [295]:
#Y_train_2 = Y_train['Level2']
#Y_test_2 = Y_test['Level2']

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Y_train_c = le.fit_transform(Y_train_c)

ensemble.fit(X_train_c, Y_train_c)

In [296]:
predictions_3 = ensemble.predict(X_test_c)
Y_test_c = le.transform(Y_test_c)
print("F1 score",f1_score(Y_test_c, predictions_3, average=None))
print(classification_report(Y_test_c, predictions_3, target_names=['1','2','3','4','5','6','7','8']))
print('Confusion Matrix \n' + str(confusion_matrix(Y_test_c, predictions_3)))

F1 score [0.94936709 0.91803279 0.93333333 0.75       0.97368421 0.98717949
 0.93137255 0.88888889]
              precision    recall  f1-score   support

           1       0.94      0.96      0.95        78
           2       0.92      0.91      0.92        92
           3       0.95      0.91      0.93        23
           4       1.00      0.60      0.75         5
           5       0.95      1.00      0.97        37
           6       0.97      1.00      0.99        77
           7       0.95      0.91      0.93       104
           8       0.88      0.90      0.89        71

    accuracy                           0.94       487
   macro avg       0.95      0.90      0.92       487
weighted avg       0.94      0.94      0.94       487

Confusion Matrix 
[[75  3  0  0  0  0  0  0]
 [ 4 84  1  0  0  1  0  2]
 [ 0  2 21  0  0  0  0  0]
 [ 0  0  0  3  2  0  0  0]
 [ 0  0  0  0 37  0  0  0]
 [ 0  0  0  0  0 77  0  0]
 [ 1  0  0  0  0  1 95  7]
 [ 0  2  0  0  0  0  5 64]]


# Dynamic Implementation

In [297]:
# passing only the TOR entries to the second layer and passing second layer predictions with more than n % confidence to critique
'''
all_preds = []
threshold_conf = 0.6
for index in range(X_application.shape[0]):
    
    obs = scaler.transform([X_application.iloc[index].values])
    obs = obs.flatten()
    final_pred = 0
    #first layer:
    pred1 = ranForModel.predict([obs])[0]
    final_pred = pred1
    
    if pred1 == 1:
        pred_confs = xgbmodel.predict_proba([obs])
        pred2 = np.argmax(pred_confs, axis = 1)[0]
        if max(pred_confs).any() < threshold_conf:
            pred3 = ensemble.predict([obs])[0]
            pred3 += 1
            final_pred = pred3
            print('Critic! ' + str([pred1, pred2, pred_confs[0], pred3]) + ' ' + str(Y_application.iloc[index, 1]))
        else:
            final_pred = pred2
    else:
        pred2 = 'N'
        pred3 = 'N'
            
    all_preds.append([pred1, pred2, pred3, final_pred])
    '''

"\nall_preds = []\nthreshold_conf = 0.6\nfor index in range(X_application.shape[0]):\n    \n    obs = scaler.transform([X_application.iloc[index].values])\n    obs = obs.flatten()\n    final_pred = 0\n    #first layer:\n    pred1 = ranForModel.predict([obs])[0]\n    final_pred = pred1\n    \n    if pred1 == 1:\n        pred_confs = xgbmodel.predict_proba([obs])\n        pred2 = np.argmax(pred_confs, axis = 1)[0]\n        if max(pred_confs).any() < threshold_conf:\n            pred3 = ensemble.predict([obs])[0]\n            pred3 += 1\n            final_pred = pred3\n            print('Critic! ' + str([pred1, pred2, pred_confs[0], pred3]) + ' ' + str(Y_application.iloc[index, 1]))\n        else:\n            final_pred = pred2\n    else:\n        pred2 = 'N'\n        pred3 = 'N'\n            \n    all_preds.append([pred1, pred2, pred3, final_pred])\n    "

In [298]:
'''
print("F1 score",f1_score(Y_application['Level2'], (np.array(all_preds)[:, 3]).astype(int), average=None))
print(classification_report(Y_application['Level2'], (np.array(all_preds)[:, 3]).astype(int), target_names=['0','1','2','3','4','5','6','7','8']))
print('Confusion Matrix \n' + str(confusion_matrix(Y_application['Level2'], (np.array(all_preds)[:, 3].astype(int)))))
'''

'\nprint("F1 score",f1_score(Y_application[\'Level2\'], (np.array(all_preds)[:, 3]).astype(int), average=None))\nprint(classification_report(Y_application[\'Level2\'], (np.array(all_preds)[:, 3]).astype(int), target_names=[\'0\',\'1\',\'2\',\'3\',\'4\',\'5\',\'6\',\'7\',\'8\']))\nprint(\'Confusion Matrix \n\' + str(confusion_matrix(Y_application[\'Level2\'], (np.array(all_preds)[:, 3].astype(int)))))\n'

In [299]:
# passing only the TOR entries to the second layer and passing second layer predictions with more than n % confidence to critique
all_preds = []
threshold_conf = 0.70
for index in range(X_test.shape[0]):
    
    #obs = scaler.transform([X_test[index]])
    #obs = obs.flatten()
    obs = X_test[index].flatten()
    final_pred = 0
    #first layer:
    pred1 = ranForModel.predict([obs])[0]
    final_pred = pred1
    
    if pred1 == 1:
        pred_confs = xgbmodel.predict_proba([obs])
        #print(pred_confs)
        pred2 = np.argmax(pred_confs)
        if pred_confs[0].max() < threshold_conf and pred2 != 0:
            pred3 = ensemble.predict([obs])[0]
            pred3 += 1
            final_pred = pred3
            print('Critic! ' + str([pred1, pred2, pred3]) + ' ' + str(Y_test.iloc[index, 1]))
        else:
            final_pred = pred2
            #if final_pred != Y_test.iloc[index, 1]:
            #    print(pred_confs[0].max())

    else:
        pred2 = 'N'
        pred3 = 'N'
            
    all_preds.append([pred1, pred2, pred3, final_pred])

Critic! [1, 5, 3] 3
Critic! [1, 6, 1] 1
Critic! [1, 3, 2] 2
Critic! [1, 2, 6] 6
Critic! [1, 1, 1] 7
Critic! [1, 5, 4] 4
Critic! [1, 3, 2] 8
Critic! [1, 2, 2] 1
Critic! [1, 6, 6] 7
Critic! [1, 2, 2] 2
Critic! [1, 6, 7] 7
Critic! [1, 2, 2] 8
Critic! [1, 2, 6] 6
Critic! [1, 2, 2] 2
Critic! [1, 2, 2] 2
Critic! [1, 3, 3] 3
Critic! [1, 1, 5] 5
Critic! [1, 8, 7] 7
Critic! [1, 3, 3] 3
Critic! [1, 2, 1] 2
Critic! [1, 7, 8] 8
Critic! [1, 6, 1] 1
Critic! [1, 5, 4] 4
Critic! [1, 5, 5] 5
Critic! [1, 2, 2] 1
Critic! [1, 8, 8] 2


In [300]:
print("F1 score",f1_score(Y_test_2, (np.array(all_preds)[:, 3]).astype(int), average=None))
print(classification_report(Y_test_2, (np.array(all_preds)[:, 3]).astype(int), target_names=['0','1','2','3','4','5','6','7','8']))
print('Confusion Matrix \n' + str(confusion_matrix(Y_test_2, (np.array(all_preds)[:, 3].astype(int)))))

F1 score [0.99992346 0.9375     0.88764045 0.93333333 0.66666667 0.94736842
 0.99354839 0.92156863 0.88732394]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     32659
           1       0.91      0.96      0.94        78
           2       0.92      0.86      0.89        92
           3       0.95      0.91      0.93        23
           4       0.75      0.60      0.67         5
           5       0.92      0.97      0.95        37
           6       0.99      1.00      0.99        77
           7       0.94      0.90      0.92       104
           8       0.89      0.89      0.89        71

    accuracy                           1.00     33146
   macro avg       0.92      0.90      0.91     33146
weighted avg       1.00      1.00      1.00     33146

Confusion Matrix 
[[32659     0     0     0     0     0     0     0     0]
 [    0    75     3     0     0     0     0     0     0]
 [    3     6    79     1     0     1     0     0   