# Usecase 1: Age prediction original set-up

This notebook can be run in the following conda environment:

```shell
conda activate ritme model
```


In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, root_mean_squared_error

In [2]:
# Set seed for reproducibility
np.random.seed(123)

# Read training and testing data
train_df = pd.read_pickle("data_splits/train_val.pkl")
test_df = pd.read_pickle("data_splits/test.pkl")

# Extract columns starting with 'F' (features) and the target 'age_months'
predictor_cols = [col for col in train_df.columns if col.startswith("F")]
train_predictors = train_df[predictor_cols]
train_target = train_df["age_months"]

test_predictors = test_df[predictor_cols]
test_target = test_df["age_months"]

# Convert absolute abundances to relative abundances
train_predictors_rel = train_predictors.div(train_predictors.sum(axis=1), axis=0)
test_predictors_rel = test_predictors.div(test_predictors.sum(axis=1), axis=0)

# Determine the number of predictors
p = train_predictors_rel.shape[1]

# Train Random Forest regression model
rf_model = RandomForestRegressor(
    n_estimators=10000, max_features=round(p / 3), random_state=123, n_jobs=-1
)
rf_model.fit(train_predictors_rel, train_target)

# Predictions on training data
train_preds = rf_model.predict(train_predictors_rel)

# Predictions on testing data
test_preds = rf_model.predict(test_predictors_rel)

# Calculate R² and RMSE for training data
train_r2 = r2_score(train_target, train_preds)
train_rmse = root_mean_squared_error(train_target, train_preds)

# Calculate R² and RMSE for testing data
test_r2 = r2_score(test_target, test_preds)
test_rmse = root_mean_squared_error(test_target, test_preds)

# Display performance metrics
print("Training R-squared:", train_r2)
print("Training RMSE:", train_rmse)
print("Testing R-squared:", test_r2)
print("Testing RMSE:", test_rmse)

Training R-squared: 0.9704015071082751
Training RMSE: 1.0104973170124203
Testing R-squared: 0.6684570143539466
Testing RMSE: 3.4990035432313156
