# Try out framework with 5-cohort MA data

In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display

from q2_time.process_data import load_n_split_data
from q2_time.tune_models import run_all_trials
from q2_time.evaluate_models import get_best_model, get_best_data_processing
from q2_time.engineer_features import transform_features
from q2_time.config import HOST_ID, TARGET
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import numpy as np

# 30.437 is avg. number of days per month
DAYS_PER_MONTH = 30.437
%matplotlib inline
%load_ext autoreload
%autoreload 2

## Load and split data

In [None]:
data_dir = "data/220728_monthly"
path2md = os.path.join(data_dir, "metadata_proc_v20230824.tsv")
path2ft = os.path.join(data_dir, "all_otu_table.qza")
train_val, test = load_n_split_data(path2md, path2ft, [HOST_ID, TARGET])

## Run all experiments

In [None]:
result_dic = run_all_trials(train_val, model_types=["xgb", "nn", "linreg", "rf"])
result_dic

## Evaluate best models

In [None]:
class TunedModel:
    def __init__(self, model, data_config):
        self.model = model
        self.data_config = data_config

    def transform(self, data):
        transformed = transform_features(
            data,
            self.data_config["data_transform"],
            self.data_config["data_alr_denom_idx"],
        )
        if isinstance(self.model, xgb.core.Booster):
            return xgb.DMatrix(transformed)
        else:
            return transformed.values

    def predict(self, data):
        transformed = self.transform(data)
        predicted = self.model.predict(transformed)
        return predicted.flatten()

In [None]:
best_model_dic = {}
for model_type, result_grid in result_dic.items():
    best_model = get_best_model(model_type, result_grid)
    best_data_proc = get_best_data_processing(result_grid)

    best_model_dic[model_type] = TunedModel(best_model, best_data_proc)

best_model_dic

In [None]:
# ! Perform predictions with best model
def save_predictions(data, tmodel, target, features, split=None):
    # id, true
    saved_pred = data[[target]].copy()
    saved_pred.rename(columns={target: "true"}, inplace=True)
    # pred, split
    saved_pred["pred"] = tmodel.predict(data[features])
    saved_pred["split"] = split
    return saved_pred


non_features = [TARGET, HOST_ID]
features = [x for x in train_val if x not in non_features]

# dic with model_type: df with all predictions within
preds_dic = {}
for model_type, tmodel in best_model_dic.items():
    train_df = save_predictions(train_val, tmodel, TARGET, features, "train")
    test_pred = save_predictions(test, tmodel, TARGET, features, "test")

    preds_dic[model_type] = pd.concat([train_df, test_pred])

In [None]:
# plot rmse over all experiments # !(OVERALL case)
def _calculate_rmse(pred_df):
    rmse_scores = {}
    for split in pred_df["split"].unique():
        pred_split = pred_df[pred_df["split"] == split].copy()
        rmse = mean_squared_error(
            pred_split["true"].values, pred_split["pred"].values, squared=False
        )
        rmse_scores[split] = rmse
    return rmse_scores


rmse_dic = {}
for model_type, pred_df in preds_dic.items():
    # todo: add a bin_by variable (for "over time" and "over study")
    rmse_dic[model_type] = _calculate_rmse(pred_df)

rmse_df = pd.DataFrame(rmse_dic).T
rmse_df.plot(kind="bar", title="Overall", ylabel="RMSE")

In [None]:
# plot rmse over true time bin
# ! (TIME case)
model_type = "rf"
pred_df = preds_dic[model_type]
split = None

# bin true columns by months
pred_df["group"] = np.round(pred_df["true"] / DAYS_PER_MONTH, 0).astype(int)

grouped_ser = pred_df.groupby(["group"]).apply(_calculate_rmse)
grouped_df = grouped_ser.apply(pd.Series)
if split is not None:
    grouped_df = grouped_df[[split]].copy()

grouped_df.plot(
    kind="bar", title=f"Model: {model_type}", ylabel="RMSE", figsize=(10, 5)
)

## Compare experiments

In [None]:
def get_best_model(exp_name, trial_result, metric_ls=["rmse_train", "rmse_val"]):
    # Get the result with the metric and mode defined in tune_config before
    best_result = trial_result.get_best_result()
    config = best_result.config

    # # get config of best model
    # best_result.config

    metrics_ser = best_result.metrics_dataframe[metric_ls].iloc[-1]
    metrics_df = pd.DataFrame({exp_name: metrics_ser})
    return metrics_df, config


def calc_best_metrics(dic_trials):
    df_metrics = pd.DataFrame(index=["rmse_train", "rmse_val"])
    dic_config = {}
    for key, value in dic_trials.items():
        df_best, config = get_best_model(key, value)
        df_metrics = df_metrics.join(df_best)
        dic_config[key] = config

    return df_metrics, pd.DataFrame(dic_config)


def plot_best_metrics(df_metrics):
    df2plot = df_metrics.T.sort_values("rmse_val", ascending=True)
    df2plot.columns = ["train", "validation"]
    # plot settings
    # todo: set default plot settings across package
    plt.style.use("seaborn-v0_8-colorblind")  # ("tableau-colorblind10")
    titlesize = 14
    labelsize = 13
    ticklabel = 12
    plt.rcParams.update({"font.size": labelsize})

    df2plot.plot(kind="bar", figsize=(12, 6))

    plt.xticks(fontsize=ticklabel)
    plt.yticks(fontsize=ticklabel)
    plt.ylabel("RMSE", fontsize=labelsize)
    plt.xlabel("Model type (order: increasing val score)", fontsize=labelsize)
    plt.title("Metrics comparison", fontsize=titlesize)
    plt.show()

In [None]:
metrics_all, best_configs = calc_best_metrics(
    {
        "xgb": result_dic["xgb"],
        "linreg": result_dic["linreg"],
        "nn": result_dic["nn"],
        "rf": result_dic["rf"],
    }
)
plot_best_metrics(metrics_all)
display(best_configs)

In [None]:
def _highlight_differing_cols(x):
    """
    Function returning color map of differing columns in x
    Original code used as base:
    https://stackoverflow.com/questions/41654949/pandas-style-function-to-highlight-specific-columns
    """
    # copy df to new - original data is not changed
    df = x.copy()

    # extract list of columns that differ between all models
    ls_col = df.columns[df.nunique(dropna=False) > 1].tolist()

    # select default neutral background
    df.loc[:, :] = "background-color: None"

    # mark columns that differ
    df[ls_col] = "color: red"

    # return colored df
    return df


best_configs.style.apply(_highlight_differing_cols, axis=None)

## Evaluate over training time (example for xgb model here)

In [None]:
# Get the result with the maximum test set `mean_accuracy`
best_xgb = result_dic["xgb"].get_best_result()
best_xgb.metrics_dataframe.plot("training_iteration", ["rmse_train", "rmse_val"])

In [None]:
ax = None
for result in result_dic["xgb"]:
    label = f"data_transform={result.config['data_transform']}, \
        max_depth={result.config['max_depth']}"
    if ax is None:
        ax = result.metrics_dataframe.plot(
            "training_iteration", "rmse_val", label=label
        )
    else:
        result.metrics_dataframe.plot(
            "training_iteration", "rmse_val", ax=ax, label=label
        )
ax.legend(bbox_to_anchor=(1.1, 1.05))

ax.set_title("rsme_val vs. training iteration for all trials")
ax.set_ylabel("RMSE_val")