In [None]:
%reload_kedro

In [None]:
from crypto_thesis.data_domains.modeling import logreg_model_fit, xgboost_model_fit
from crypto_thesis.utils import optimize_params
from pprint import pprint
from sklearn.linear_model import LogisticRegression

In [None]:
import warnings
warnings.filterwarnings("ignore")

## Base

In [None]:
TARGET_COL = ["label"]
INDEX_COL = "window_nbr"

In [None]:
mt_train_multic = catalog.load("master_table_train_multic")
mt_train_nonmultic = catalog.load("master_table_train_nonmultic")

## XGBoost

In [None]:
def build_xgboost_param_combinations():
    return {
    'booster': ['gbtree', 'gblinear', 'dart'],
    'eval_metric': ['auc', 'logloss', 'error'],
    'n_estimators': [300, 500, 1000],
    'max_depth': [3, 5, 8],
    'reg_lambda': [0.05, 0.01, 0.1],
    # 'reg_alpha': [],
    'gamma': [0.01, 0.1, 1.0],
    'min_child_weight': [0.5, 2.0, 5.0],
    'learning_rate': [0.01, 0.05, 0.1],
    'objective': ['binary:logistic', 'reg:logistic', 'binary:hinge'],
    'sampling_method': ['uniform', 'gradient_based'],
    'tree_method': ['auto', 'approx'],
    'seed': [0]
    }

In [None]:
xgb_def_params = catalog.load("params:xgboost_default_params")
xgb_model_params = build_xgboost_param_combinations()

In [None]:
_, df_params_opt = xgboost_model_fit(master_table_train=mt_train_multic,
                        model_params=xgb_model_params, 
                        xgboost_optimize_params=True, 
                        xgboost_default_params=xgb_def_params)

In [None]:
pprint(df_params_opt.to_dict(orient="records")[0])

## LogReg

In [None]:
def build_logreg_param_combinations():
    return {
      "solver": ["saga"],
      "penalty": ["elasticnet"],
      "tol": [0.0001, 0.001, 0.01],
      "C": [0.01, 0.1, 1.0],
      "max_iter": [100, 200, 500],
      "fit_intercept": [True, False],
      "class_weight": ["balanced", None],
      "l1_ratio": [0.01, 0.1, 1.0],
      "random_state": [0]
    }

def build_logreg_param_combinations():
    return {
      "solver": ["saga"],
      "penalty": ["elasticnet"],
      "tol": [0.0001, 0.001, 0.01],
      "C": [0.01, 0.1, 1.0],
      "max_iter": [100, 200],
      "fit_intercept": [True],
      "class_weight": ["balanced"],
      "l1_ratio": [0.01, 0.1, 1.0]
    }

In [None]:
logreg_def_params = catalog.load("params:logreg_default_params")
logreg_model_params = build_logreg_param_combinations()

master_table_train = mt_train_nonmultic.copy()
master_table_train = master_table_train.set_index(INDEX_COL)
X_train, y_train = master_table_train.drop(columns=TARGET_COL), master_table_train[TARGET_COL]

model = LogisticRegression(**logreg_def_params)

In [None]:
params_opt = optimize_params(model=model,
                            grid=logreg_model_params,
                            X_train=X_train,
                            y_train=y_train,
                            n_splits=5)

pprint(params_opt.best_params_)
pprint(params_opt.best_score_)

In [None]:
pprint(df_params_opt.to_dict(orient="records")[0])