# Usecase 1: Age prediction run model

This notebook can be run in the following conda environment:
```shell

# TODO: update instructions to remove local once ritme on conda
mamba create -n ritme_model -c local -c qiime2 -c conda-forge -c bioconda -c pytorch -c anaconda -c defaults ritme ipykernel -y
conda activate ritme_model
```

## Setup

In [1]:
from ritme.find_best_model_config import (
    _load_experiment_config,
    _load_phylogeny,
    _load_taxonomy,
    find_best_model_config,
)
from ritme.split_train_test import _load_data, split_train_test
from ritme.evaluate_tuned_models import evaluate_tuned_models

%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
######## USER INPUTS ########
# set experiment configuration path
model_config_path = "u1_rf_config.json"

# define path to feature table, metadata, phylogeny, and taxonomy
path_to_ft = "../../data/u1_subramanian14/otu_table_subr14_rar.tsv"
path_to_md = "../../data/u1_subramanian14/md_subr14.tsv"
path_to_phylo = "../../data/u1_subramanian14/fasttree_tree_rooted_subr14.qza"
path_to_tax = "../../data/u1_subramanian14/taxonomy_subr14.qza"

# define train size
train_size = 0.8
######## END USER INPUTS #####

In [3]:
# load ritme experiment configuration
config = _load_experiment_config(model_config_path)

## Perform train-test split

In [4]:
md, ft = _load_data(path_to_md, path_to_ft)
print(md.shape, ft.shape)

train_val, test = split_train_test(
    md,
    ft,
    stratify_by_column=config["stratify_by_column"],
    feature_prefix=config["feature_prefix"],
    train_size=train_size,
    seed=config["seed_data"],
)

(448, 38) (448, 850)
Train: (362, 888), Test: (86, 888)




## Find and evaluate optimal feature and model configuration with ritme

In [5]:
# find best model config
tax = _load_taxonomy(path_to_tax)
phylo = _load_phylogeny(path_to_phylo)

best_model_dict, path_to_exp = find_best_model_config(
    config, train_val, tax, phylo, path_store_model_logs="u1_rf_best_model"
)

0,1
Current time:,2024-12-05 09:38:09
Running for:,00:00:23.27
Memory:,10.2/16.0 GiB

Trial name,status,loc,bootstrap,data_aggregation,data_selection,data_selection_t,data_transform,max_depth,max_features,min_impurity_decreas e,min_samples_leaf,min_samples_split,min_weight_fraction_ leaf,model,n_estimators,iter,total time (s),rmse_val,rmse_train,r2_val
train_rf_4f054697,TERMINATED,127.0.0.1:97793,False,,variance_threshold,6.16517e-05,,,,0.118351,0.0162992,0.00188113,0.000369536,rf,120,1,0.63906,3.03335,1.13039,0.768043
train_rf_2c0418a4,TERMINATED,127.0.0.1:97804,False,,variance_threshold,2.23264e-05,,16.0,,0.0165222,0.00466977,0.025424,0.000452001,rf,80,1,0.640354,2.87253,1.10773,0.791987


[36m(train_rf pid=97793)[0m Train: (276, 81), Test: (86, 81)


[36m(_WandbLoggingActor pid=97803)[0m wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[36m(_WandbLoggingActor pid=97803)[0m wandb: Currently logged in as: adamovanja (ritme). Use `wandb login --relogin` to force relogin
[36m(_WandbLoggingActor pid=97803)[0m wandb: Tracking run with wandb version 0.18.7
[36m(_WandbLoggingActor pid=97803)[0m wandb: Run data is saved locally in /private/tmp/ray/session_2024-12-05_09-37-43_101217_97729/artifacts/2024-12-05_09-37-46/rf/driver_artifacts/train_rf_4f054697_1_bootstrap=False,data_aggregation=None,data_selection=variance_threshold,data_selection_t=0.0001,data_transform_2024-12-05_09-37-46/wandb/run-20241205_093759-4f054697
[36m(_WandbLoggingActor pid=97803)[0m wandb: Run `wandb offline` to turn off syncing.
[36m(_WandbLoggingActor pid=97803)[0m wandb: Syncing run train_rf_4f054697
[36m(_WandbLoggingActor pid=97803)[0m wandb: ⭐️ View project at https://wandb.ai/ritme/u1_

[36m(train_rf pid=97804)[0m Train: (276, 112), Test: (86, 112)


2024-12-05 09:38:09,624	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/Users/adamova/Documents/projects/14_LM1/ritme_examples/use_cases/u1_amplicon_age_prediction/u1_rf_best_model/u1_rf_config/rf' in 0.0221s.
[36m(_WandbLoggingActor pid=97833)[0m wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[36m(_WandbLoggingActor pid=97833)[0m wandb: Currently logged in as: adamovanja (ritme). Use `wandb login --relogin` to force relogin
[36m(_WandbLoggingActor pid=97833)[0m wandb: Tracking run with wandb version 0.18.7
[36m(_WandbLoggingActor pid=97833)[0m wandb: Run data is saved locally in /private/tmp/ray/session_2024-12-05_09-37-43_101217_97729/artifacts/2024-12-05_09-37-46/rf/driver_artifacts/train_rf_2c0418a4_2_bootstrap=False,data_aggregation=None,data_selection=variance_threshold,data_selection_t=0.0000,data_transform_2024-12-05_09-37-56/wandb/run-20241205_093810-2c0418a4
[3

## Evaluate feature and model configuration used by original paper

In [6]:
metrics = evaluate_tuned_models(best_model_dict, config, train_val, test)
metrics

Unnamed: 0,rmse_train,r2_train,rmse_test,r2_test
rf,1.701717,0.916059,3.724957,0.624255
