# Try out raytune with scikit & keras

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display

from q2_time.process_data import load_n_split_data
from q2_time.tune_models import run_all_trials
from q2_time.evaluate_models import get_best_model

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Load and split data

In [None]:
train_val, test = load_n_split_data()

## Run all experiments

In [None]:
result_dic = run_all_trials(train_val, model_types=["xgb", "nn", "linreg", "rf"])
result_dic

## Evaluate best models

In [None]:
best_model_dic = {}
for model_type, result_grid in result_dic.items():
    best_model_dic[model_type] = get_best_model(model_type, result_grid)

best_model_dic

## Starter for predictions w best model

In [None]:
# todo: for prediction you must track the feature engineering performed
# sklearn predict
# best_linreg.predict(train_val[features_ls].values)

## Compare experiments

In [None]:
def get_best_model(exp_name, trial_result, metric_ls=["rmse_train", "rmse_val"]):
    # Get the result with the metric and mode defined in tune_config before
    best_result = trial_result.get_best_result()
    config = best_result.config

    # # get config of best model
    # best_result.config
    # # todo: find out how to extract the best performing model (pkl)
    # best_result.best_checkpoints

    metrics_ser = best_result.metrics_dataframe[metric_ls].iloc[-1]
    metrics_df = pd.DataFrame({exp_name: metrics_ser})
    return metrics_df, config


def calc_best_metrics(dic_trials):
    df_metrics = pd.DataFrame(index=["rmse_train", "rmse_val"])
    dic_config = {}
    for key, value in dic_trials.items():
        df_best, config = get_best_model(key, value)
        df_metrics = df_metrics.join(df_best)
        dic_config[key] = config

    return df_metrics, pd.DataFrame(dic_config)


def plot_best_metrics(df_metrics):
    df2plot = df_metrics.T.sort_values("rmse_val", ascending=True)
    df2plot.columns = ["train", "validation"]
    # plot settings
    # todo: set default plot settings across package
    plt.style.use("seaborn-v0_8-colorblind")  # ("tableau-colorblind10")
    titlesize = 14
    labelsize = 13
    ticklabel = 12
    plt.rcParams.update({"font.size": labelsize})

    df2plot.plot(kind="bar", figsize=(12, 6))

    plt.xticks(fontsize=ticklabel)
    plt.yticks(fontsize=ticklabel)
    plt.ylabel("RMSE", fontsize=labelsize)
    plt.xlabel("Model type (order: increasing val score)", fontsize=labelsize)
    plt.title("Metrics comparison", fontsize=titlesize)
    plt.show()

In [None]:
metrics_all, best_configs = calc_best_metrics(
    {
        "xgb": result_dic["xgb"],
        "linreg": result_dic["linreg"],
        "nn": result_dic["nn"],
        "rf": result_dic["rf"],
    }
)
plot_best_metrics(metrics_all)
display(best_configs)

## Evaluate over training time (example for xgb model here)

In [None]:
# Get the result with the maximum test set `mean_accuracy`
best_xgb = result_dic["xgb"].get_best_result()
best_xgb.metrics_dataframe.plot("training_iteration", ["rmse_train", "rmse_val"])

In [None]:
ax = None
for result in result_dic["xgb"]:
    label = f"data_transform={result.config['data_transform']}, \
        max_depth={result.config['max_depth']}"
    if ax is None:
        ax = result.metrics_dataframe.plot(
            "training_iteration", "rmse_val", label=label
        )
    else:
        result.metrics_dataframe.plot(
            "training_iteration", "rmse_val", ax=ax, label=label
        )
ax.legend(bbox_to_anchor=(1.1, 1.05))

ax.set_title("rsme_val vs. training iteration for all trials")
ax.set_ylabel("RMSE_val")