# Try out raytune with scikit & keras

In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display

from q2_time import _static_searchspace as ss
from q2_time import _static_trainables as st
from q2_time.config import HOST_ID, MLFLOW_TRACKING_URI, SEED_DATA, SEED_MODEL, TARGET
from q2_time.process_data import load_n_split_data
from q2_time.tune_models import run_trials
from tensorflow.keras.models import load_model
import xgboost as xgb

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Load and split data

In [None]:
train_val, test = load_n_split_data()

## Run all experiments

In [None]:
results_xgb = run_trials(
    MLFLOW_TRACKING_URI,
    "xgb",
    st.train_xgb,
    ss.xgb_space,
    train_val,
    TARGET,
    HOST_ID,
    SEED_DATA,
    SEED_MODEL,
    fully_reproducible=False,
)

In [None]:
results_nn = run_trials(
    MLFLOW_TRACKING_URI,
    "nn",
    st.train_nn,
    ss.nn_space,
    train_val,
    TARGET,
    HOST_ID,
    SEED_DATA,
    SEED_MODEL,
    fully_reproducible=False,
)

In [None]:
results_linreg = run_trials(
    MLFLOW_TRACKING_URI,
    "linreg",
    st.train_linreg,
    ss.linreg_space,
    train_val,
    TARGET,
    HOST_ID,
    SEED_DATA,
    SEED_MODEL,
    fully_reproducible=False,
)

In [None]:
results_rf = run_trials(
    MLFLOW_TRACKING_URI,
    "rf",
    st.train_rf,
    ss.rf_space,
    train_val,
    TARGET,
    HOST_ID,
    SEED_DATA,
    SEED_MODEL,
    fully_reproducible=False,
)

## Starter for getting best model

In [None]:
# keras case: works with changes in trainable stored
def load_best_checkpoint(checkpoint_dir):
    checkpoint_path = os.path.join(checkpoint_dir, "checkpoint")
    # todo: add different loading based on framework used in training
    best_model = load_model(checkpoint_path)
    return best_model


# Get the best trial
best_trial = results_nn.get_best_result()

# Load the best model
best_checkpoint_dir = best_trial.checkpoint.to_directory()
best_model = load_best_checkpoint(best_checkpoint_dir)
best_model.summary()

In [None]:
# xgb case
best_trial = results_xgb.get_best_result()

# Load the best model
best_checkpoint_dir = best_trial.checkpoint.to_directory()
best_checkpoint_path = os.path.join(best_checkpoint_dir, "checkpoint")
best_model = xgb.Booster(model_file=best_checkpoint_path)
best_model

## Compare experiments

In [None]:
results_linreg.get_best_result().config

In [None]:
def get_best_model(exp_name, trial_result, metric_ls=["rmse_train", "rmse_val"]):
    # Get the result with the metric and mode defined in tune_config before
    best_result = trial_result.get_best_result()
    config = best_result.config

    # # get config of best model
    # best_result.config
    # # todo: find out how to extract the best performing model (pkl)
    # best_result.best_checkpoints

    metrics_ser = best_result.metrics_dataframe[metric_ls].iloc[-1]
    metrics_df = pd.DataFrame({exp_name: metrics_ser})
    return metrics_df, config


def calc_best_metrics(dic_trials):
    df_metrics = pd.DataFrame(index=["rmse_train", "rmse_val"])
    dic_config = {}
    for key, value in dic_trials.items():
        df_best, config = get_best_model(key, value)
        df_metrics = df_metrics.join(df_best)
        dic_config[key] = config

    return df_metrics, pd.DataFrame(dic_config)


def plot_best_metrics(df_metrics):
    df2plot = df_metrics.T.sort_values("rmse_val", ascending=True)
    df2plot.columns = ["train", "validation"]
    # plot settings
    # todo: set default plot settings across package
    plt.style.use("seaborn-v0_8-colorblind")  # ("tableau-colorblind10")
    titlesize = 14
    labelsize = 13
    ticklabel = 12
    plt.rcParams.update({"font.size": labelsize})

    df2plot.plot(kind="bar", figsize=(12, 6))

    plt.xticks(fontsize=ticklabel)
    plt.yticks(fontsize=ticklabel)
    plt.ylabel("RMSE", fontsize=labelsize)
    plt.xlabel("Model type (order: increasing val score)", fontsize=labelsize)
    plt.title("Metrics comparison", fontsize=titlesize)
    plt.show()

In [None]:
metrics_all, best_configs = calc_best_metrics(
    {"xgb": results_xgb, "linreg": results_linreg, "nn": results_nn, "rf": results_rf}
)
plot_best_metrics(metrics_all)
display(best_configs)

## Evaluate over training time (example for xgb model here)

In [None]:
# Get the result with the maximum test set `mean_accuracy`
best_xgb = results_xgb.get_best_result()
best_xgb.metrics_dataframe.plot("training_iteration", ["rmse_train", "rmse_val"])

In [None]:
ax = None
for result in results_xgb:
    label = f"data_transform={result.config['data_transform']}, \
        max_depth={result.config['max_depth']}"
    if ax is None:
        ax = result.metrics_dataframe.plot(
            "training_iteration", "rmse_val", label=label
        )
    else:
        result.metrics_dataframe.plot(
            "training_iteration", "rmse_val", ax=ax, label=label
        )
ax.legend(bbox_to_anchor=(1.1, 1.05))

ax.set_title("rsme_val vs. training iteration for all trials")
ax.set_ylabel("RMSE_val")