# Usecase 2: Temperature prediction evaluate ritme models


This notebook evaluates the experiments launched in notebook `n2_run_ritme_model.ipynb` and it can be run in the following conda environment:

```shell
mamba create -n ritme_model -c adamova -c qiime2 -c conda-forge -c bioconda -c pytorch -c anaconda ritme ipykernel -y
conda activate ritme_model
```



## Setup

In [None]:
import warnings

import mlflow

from ritme.evaluate_mlflow import (
    barplot_metric,
    parallel_coordinates_plot,
    plot_complexity_vs_metric,
    violinplot_metric,
    plot_metric_history_per_model_type,
    plot_avg_history_per_model_type,
    extract_run_config,
)

warnings.filterwarnings("ignore", category=FutureWarning)

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
mlflow.__version__

In [None]:
######## USER INPUTS ########

# path to MLflow logs
log_folder_location = (
    "/cluster/work/bokulich/adamova/ritme_example_runs/u2_all_best_model/mlruns"
)

######## END USER INPUTS #####

## Insights on performance per model type

In [None]:
# extract all trial information
mlflow.set_tracking_uri(log_folder_location)

all_trials = mlflow.search_runs(
    order_by=["metrics.rmse_val ASC"], search_all_experiments=True
)

print(f"Found {all_trials.shape[0]} trials")

In [None]:
violinplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col="params.model",
    group_name="Model type",
)

In [None]:
barplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col="params.model",
    group_name="Model type",
    display_trial_name=True,  # if true display trial name on the x-axis
    top_n=None,  # to plot top N models, set to an integer value
)

## Model complexity vs. performance

In [None]:
plot_complexity_vs_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col="params.model",
    group_name="Model type",
    static=True,  # Set to False for interactive plotly graph
)

In [None]:
violinplot_metric(
    all_trials,
    metric_col="metrics.nb_features",
    metric_name="Number of features",
    group_col="params.model",
    group_name="Model type",
)

In [None]:
first_metric_col = "metrics.nb_features"
first_metric_name = "# features"
parallel_coordinates_plot(all_trials, first_metric_col, first_metric_name)

## Performance vs. feature engineering

### Effect of data transformation

In [None]:
var = "params.data_transform"
var_name = "Data transform"

barplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col=var,
    group_name=var_name,
    display_trial_name=True,  # if true display trial name on the x-axis
    top_n=None,  # to plot top N models, set to an integer value
)

In [None]:
violinplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col=var,
    group_name=var_name,
)

In [None]:
parallel_coordinates_plot(all_trials, var, var_name, first_metric_cat=True)

### Effect of data selection

In [None]:
var = "params.data_selection"
var_name = "Data selection"

barplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col=var,
    group_name=var_name,
    display_trial_name=True,  # if true display trial name on the x-axis
    top_n=None,  # to plot top N models, set to an integer value
)

In [None]:
violinplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col=var,
    group_name=var_name,
)

In [None]:
parallel_coordinates_plot(all_trials, var, var_name, first_metric_cat=True)

### Effect of data aggregation

In [None]:
var = "params.data_aggregation"
var_name = "Data aggregation"

barplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col=var,
    group_name=var_name,
    display_trial_name=True,  # if true display trial name on the x-axis
    top_n=None,  # to plot top N models, set to an integer value
)

In [None]:
violinplot_metric(
    all_trials,
    metric_col="metrics.rmse_val",
    metric_name="RMSE Validation",
    group_col=var,
    group_name=var_name,
)

In [None]:
parallel_coordinates_plot(all_trials, var, var_name, first_metric_cat=True)

## Training over time

In [None]:
metric = "rmse_train"
client = mlflow.tracking.MlflowClient(tracking_uri=log_folder_location)

plot_metric_history_per_model_type(metric, client, all_trials)

* If per model type later launched trials yield smaller RMSE train then the selected search algorithm works

In [None]:
plot_avg_history_per_model_type("rmse_train", client, all_trials)

In [None]:
plot_avg_history_per_model_type("rmse_val", client, all_trials)

## Run configuration overview

In [None]:
run_config = extract_run_config(all_trials)
run_config