In [1]:
###### importing dependencies #############################################
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
from sklearn.metrics import plot_confusion_matrix

import cryptoaml.datareader as cdr
from cryptoaml.models import LightGbmAlgo

In [10]:
data = cdr.get_data("elliptic")
data_sets = data.train_test_split(train_size=0.7, 
                                  feat_set=["AF"], 
                                  inc_meta=False)

In [21]:
# collection of trained models 
models = OrderedDict()

# load xgboost from exp. 1 
xgb = LightGbmAlgo(persist_props={
    "method": "load",
    "load_path": "saved_models/_experiment_1/tuned/AF/light_boost"
})
models[xgb.model_name_] = xgb
models[xgb.model_name_ + "1"] = xgb
models[xgb.model_name_ + "2"] = xgb
models[xgb.model_name_ + "3"] = xgb

tmp_results = xgb.evaluate(metrics=["precision", "recall", "f1", "f1_micro", "confusion"], 
                                     X=data_sets["AF"].test_X, 
                                     y=data_sets["AF"].test_y)

print(tmp_results)

{'precision': 0.9024390243902439, 'recall': 0.7174515235457064, 'f1': 0.7993827160493828, 'f1_micro': 0.9766046790641871, 'confusion': array([[15503,    84],
       [  306,   777]])}


In [9]:
display(xgb._tuner._tuner.all_logbooks_)

[[{'gen': 0,
   'nevals': 20,
   'avg': 0.7616987635345079,
   'min': 0.7364636640127208,
   'max': 0.781425924471549,
   'std': 0.01216128053821913},
  {'gen': 1,
   'nevals': 6,
   'avg': 0.7694324422366408,
   'min': 0.7323994719262401,
   'max': 0.7903886192367506,
   'std': 0.013019819620680106},
  {'gen': 2,
   'nevals': 15,
   'avg': 0.771643805414977,
   'min': 0.7427681578926324,
   'max': 0.7903886192367506,
   'std': 0.011239411110411428},
  {'gen': 3,
   'nevals': 13,
   'avg': 0.7724070837630816,
   'min': 0.7257649517297682,
   'max': 0.7930102045311661,
   'std': 0.014675209189726535},
  {'gen': 4,
   'nevals': 14,
   'avg': 0.7705360442228592,
   'min': 0.7220386087765855,
   'max': 0.7930102045311661,
   'std': 0.019818305135468842},
  {'gen': 5,
   'nevals': 14,
   'avg': 0.7706113265937299,
   'min': 0.7307707033463836,
   'max': 0.7930102045311661,
   'std': 0.020406292819090458},
  {'gen': 6,
   'nevals': 17,
   'avg': 0.7854903015768865,
   'min': 0.74276815789263

In [26]:
light_test = LightGbmAlgo(boosting_type="dart", n_estimators=400)
x = light_test.fit(data_sets["AF"].train_X, data_sets["AF"].train_y)
tmp_results = light_test.evaluate(metrics=["precision", "recall", "f1", "f1_micro", "confusion"], 
                                     X=data_sets["AF"].test_X, 
                                     y=data_sets["AF"].test_y)

print(tmp_results)

{'precision': 0.9390681003584229, 'recall': 0.7257617728531855, 'f1': 0.81875, 'f1_micro': 0.979124175164967, 'confusion': array([[15536,    51],
       [  297,   786]])}


In [4]:
# # display results 

# def plot_feature_importance(models):
#     for key, value in models.items():
#         value.feature_importances_.plot.bar(figsize=(18,5))
#         plt.xlabel("Feature Importance Score [{0}]".format(key))
#         plt.show()

# def plot_performance(models, test_X, test_y):
#     f1 = []
#     model_names = models.keys()
#     for key, value in models.items():
#         tmp_results = value.evaluate(metrics=["precision", "recall", "f1", "f1_micro", "confusion"], 
#                                      X=data_sets["LF"].test_X, 
#                                      y=data_sets["LF"].test_y)
        
#         f1.append(tmp_results["f1"])
        
#     y_pos = np.arange(len(model_names))
#     plt.bar(y_pos, f1, align='center', alpha=0.5)
#     plt.xticks(y_pos, model_names)
#     plt.ylabel("F1")
#     plt.title("Results - F1 (Illicit)")

#     plt.show()
    
#     print(f1)
        
# # show_feature_importance(models)

# plot_performance(models, data_sets["LF"].test_X, data_sets["LF"].test_y)

# # perf_results = xgb.evaluate(metrics=["precision", "recall", "f1", "f1_micro", "confusion"], 
# #                             X=data_sets["LF"].test_X, 
# #                             y=data_sets["LF"].test_y)
# # print(perf_results)