In [None]:
%reload_kedro

In [None]:
from crypto_thesis.data_domains.modeling import logreg_model_fit, xgboost_model_fit
from crypto_thesis.data_domains.modeling.lstm import _build_lstm_timestamps_seq
from pprint import pprint
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
import numpy as np

In [None]:
# LSTM

import pandas as pd
from keras.engine.sequential import Sequential
from keras.layers import LSTM, BatchNormalization, Dense
from keras.models import Sequential
from keras.optimizers import Adam
from keras.regularizers import l2
from crypto_thesis.utils import optimize_params

In [None]:
import warnings
warnings.filterwarnings("ignore")

## Base

In [None]:
TARGET_COL = ["label"]
INDEX_COL = "window_nbr"

In [None]:
mt_train_multic = catalog.load("master_table_train_multic")
mt_train_nonmultic = catalog.load("master_table_train_nonmultic")

seq_length = catalog.load("params:lstm_timestamp_seq_length")

## XGBoost

In [None]:
def build_xgboost_param_combinations():
    return {
    'eval_metric': ['auc'],
     'n_estimators': [300, 500, 1000],
     'max_depth': [3, 5],
     'reg_lambda': [0.05, 0.01, 0.1],
     'gamma': [0.01],
     'min_child_weight': [2.0],
     'learning_rate': [0.01, 0.05, 0.1],
     'objective': ['binary:logistic'],
     'sampling_method': ['uniform'],
     'tree_method': ['auto']
    }

In [None]:
xgb_def_params = catalog.load("params:xgboost_default_params")
xgb_model_params = build_xgboost_param_combinations()

In [None]:
_, df_params_opt = xgboost_model_fit(master_table_train=mt_train_multic,
                        model_params=xgb_model_params, 
                        xgboost_optimize_params=True, 
                        xgboost_default_params=xgb_def_params)

In [None]:
pprint(df_params_opt.to_dict(orient="records")[0])

## LogReg

In [None]:
def build_logreg_param_combinations():
    return {
      "solver": ["saga"],
      "penalty": ["elasticnet"],
      "tol": [0.0001, 0.001, 0.01],
      "C": [0.01, 0.1, 1.0],
      "max_iter": [100, 200],
      "fit_intercept": [True],
      "class_weight": ["balanced"],
      "l1_ratio": [0.01, 0.1, 1.0]
    }

In [None]:
logreg_def_params = catalog.load("params:logreg_default_params")
logreg_model_params = build_logreg_param_combinations()

In [None]:
_, df_params_opt = logreg_model_fit(master_table_train=mt_train_nonmultic,
                                    model_params=logreg_model_params, 
                                    logreg_optimize_params=True, 
                                    logreg_default_params=logreg_def_params)

In [None]:
pprint(df_params_opt.to_dict(orient="records")[0])

## LSTM

In [None]:
def build_lstm_param_combinations():
    return {
      # "batch_size": [10, 20, 40, 60, 80, 100],
      "batch_size": [1000],
      # "epochs": [10, 50, 100],
      "epochs": [100],
      # "optimizer": ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'],
      "model__optimizer": ['SGD'],
      # "optimizer__learning_rate": [0.001, 0.01, 0.1, 0.2, 0.3],
      # "optimizer__learning_rate": [0.01],
      # "optimizer__momentum": [0.0, 0.2, 0.4, 0.6, 0.8, 0.9],
      # "optimizer__momentum": [0.2],
      # "init_mode": ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'],
      # "init_mode": ['uniform']
    }

In [None]:
def _create_lstm_model(X_train_scaled_seq: pd.DataFrame,
                       seq_length: int,
                       optimizer: str = "adam") -> Sequential:

    # parameters
    LAYERS = [20, 20, 20, 1] #[10, 10, 10, 1]                # number of units in hidden and output layers
    N = X_train_scaled_seq.shape[2]                 # number of features
    LR = 0.0005 #0.0005                            # learning rate of the gradient descent
    LAMBD = 0.005 #0.001                         # lambda in L2 regularizaion
    DP = 0.0 #0.0                             # dropout rate
    RDP = 0.0 #0.0                            # recurrent dropout rate

    # model
    model = Sequential()
    model.add(LSTM(
        input_shape=(seq_length, N),
        units=LAYERS[0],
        activation='tanh',
        recurrent_activation='hard_sigmoid',
        kernel_regularizer=l2(LAMBD),
        recurrent_regularizer=l2(LAMBD),
        dropout=DP,
        recurrent_dropout=RDP,
        return_sequences=True,
        return_state=False,
        stateful=False,
        unroll=False
                ))
    model.add(BatchNormalization())
    model.add(LSTM(
        units=LAYERS[1],
        activation='tanh',
        recurrent_activation='hard_sigmoid',
        kernel_regularizer=l2(LAMBD),
        recurrent_regularizer=l2(LAMBD),
        dropout=DP,
        recurrent_dropout=RDP,
        return_sequences=True,
        return_state=False,
        stateful=False,
        unroll=False
                ))
    model.add(BatchNormalization())
    model.add(LSTM(
        units=LAYERS[2],
        activation='tanh',
        recurrent_activation='hard_sigmoid',
        kernel_regularizer=l2(LAMBD),
        recurrent_regularizer=l2(LAMBD),
        dropout=DP,
        recurrent_dropout=RDP,
        return_sequences=False,
        return_state=False,
        stateful=False,
        unroll=False
                ))
    model.add(BatchNormalization())
    model.add(Dense(
        units=LAYERS[3],
        activation='sigmoid'))

    # Compile the model with Adam optimizer
    # model.compile(
    #     loss='binary_crossentropy',
    #     metrics=['accuracy'],
    #     optimizer=Adam(lr=LR))
    
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [None]:
# set numpy seed
np.random.seed(0)
# set tensorflow seed
tf.random.set_seed(0)

tf.keras.utils.disable_interactive_logging()

In [None]:
master_table_train = mt_train_multic.set_index(INDEX_COL)
X_train, y_train = master_table_train.drop(columns=TARGET_COL), master_table_train[TARGET_COL]

X_train_scaled_seq, y_train_scaled_seq = _build_lstm_timestamps_seq(X=X_train,
                                                                    y=y_train,
                                                                    seq_length=seq_length)

In [None]:
lstm_model_params = build_lstm_param_combinations()
keras_model = _create_lstm_model(X_train_scaled_seq=X_train_scaled_seq, seq_length=seq_length)

model = KerasClassifier(model=keras_model,
                        loss="binary_crossentropy",
                        verbose=0)

pprint(lstm_model_params)

In [None]:
params_opt = optimize_params(model=model,
                grid=lstm_model_params,
                X_train=X_train_scaled_seq,
                y_train=y_train_scaled_seq,
                n_splits=10)

In [None]:
pprint(params_opt.best_params_)