In [None]:
%reload_kedro

In [None]:
from crypto_thesis.data_domains.modeling import logreg_model_fit, lstm_model_fit, xgboost_model_fit
from typing import Dict, List
import itertools
import time
import numpy as np

In [None]:
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
tf.get_logger().setLevel("ERROR") #only show error messages

## Base

In [None]:
mt_train_multic = catalog.load("master_table_train_multic")
mt_test_multic = catalog.load("master_table_test_multic")

mt_train_nonmultic = catalog.load("master_table_train_nonmultic")

In [None]:
def build_nbr_combinations(grid: Dict[str, List]) -> int:
    keys, values = zip(*grid.items())
    combs = itertools.product(*values)
    return list(combs)

## XGBoost

In [None]:
def build_xgboost_param_combinations():
    return {
    'eval_metric': ['auc'],
     'n_estimators': [300, 500, 1000],
     'max_depth': [3, 5],
     'reg_lambda': [0.05, 0.01, 0.1],
     'gamma': [0.01],
     'min_child_weight': [2.0],
     'learning_rate': [0.01, 0.05, 0.1],
     'subsample': [0.7],
     'colsample_bytree': [0.5],
     'objective': ['binary:logistic'],
     'sampling_method': ['uniform'],
     'tree_method': ['auto']
    }

In [None]:
xgb_model_params = catalog.load("params:xgboost_model_params")
xgb_def_params = catalog.load("params:xgboost_default_params")

# if True, then optimize hyperparameters
# if False, then not
xgb_opt_params = catalog.load("params:xgboost_optimize_params")
# xgb_opt_params = True

print()
if xgb_opt_params:
    xgb_model_params = build_xgboost_param_combinations()
    combinations = build_nbr_combinations(grid=xgb_model_params)
    print(f"--> Optimizing hyperparameters with total combinations of: {len(combinations)}")
else:
    print("--> NOT optimizing hyperparameters")

In [None]:
%%timeit
_, _ = xgboost_model_fit(master_table_train=mt_train_multic,
                        model_params=xgb_model_params, 
                        xgboost_optimize_params=xgb_opt_params, 
                        xgboost_default_params=xgb_def_params)

## LSTM

In [None]:
lstm_timestamp_seq_length = catalog.load("params:lstm_timestamp_seq_length")

# if True, then optimize hyperparameters
# if False, then not
lstm_opt_params = False

print()
if lstm_opt_params:
    print("--> Optimizing hyperparameters")
else:
    print("--> NOT optimizing hyperparameters")

In [None]:
%%timeit
_, _ = lstm_model_fit(master_table_train=mt_train_multic, 
                    master_table_test=mt_test_multic, 
                    seq_length=lstm_timestamp_seq_length)

## LogReg

In [None]:
def build_logreg_param_combinations():
    return {
      "solver": ["saga"],
      "penalty": ["elasticnet"],
      "tol": [0.0001, 0.001, 0.01],
      "C": [0.01, 0.1, 1.0],
      "max_iter": [100, 200],
      "fit_intercept": [True],
      "class_weight": ["balanced"],
      "l1_ratio": [0.01, 0.1, 1.0]
    }

In [None]:
logreg_model_params = catalog.load("params:logreg_model_params")
logreg_def_params = catalog.load("params:logreg_default_params")

# if True, then optimize hyperparameters
# if False, then not
logreg_opt_params = catalog.load("params:logreg_optimize_params")
# logreg_opt_params = True

print()
if logreg_opt_params:
    logreg_model_params = build_logreg_param_combinations()
    combinations = build_nbr_combinations(grid=logreg_model_params)
    print(f"--> Optimizing hyperparameters with total combinations of: {len(combinations)}")
else:
    print("--> NOT optimizing hyperparameters")

In [None]:
times = []

for i in range(0, 5):
    start = time.time()
    _, _ = logreg_model_fit(master_table_train=mt_train_nonmultic,
                                     model_params=logreg_model_params, 
                                     logreg_optimize_params=logreg_opt_params, 
                                     logreg_default_params=logreg_def_params)
    end = time.time()
    
    times.append(end - start)

In [None]:
print(f"Média (s): {np.mean(times)}\nDesvio (s): {np.std(times, ddof=1)}")