# Model evaluation

Kendra Wyant  
June 11, 2025

### Set Up Environment

In [None]:
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(source("https://github.com/jjcurtin/lab_support/blob/main/format_path.R?raw=true"))
suppressPackageStartupMessages(library(tidyposterior))

path_models_lag <- format_path(str_c("risk/models/lag"))
path_shared <- format_path("risk/data_processed/shared")
path_processed <- format_path("risk/data_processed/lag")

options(knitr.kable.NA = '')

In [None]:
test_metrics_0 <- read_csv(here::here(path_models_lag, 
                                      "test_auroc_6_x_5_1day_0_v3_nested_strat_lh.csv"), 
                              col_types = cols()) |> 
  select(outer_split_num, "lag0" = roc_auc)

test_metrics_24 <- read_csv(here::here(path_models_lag, 
                                       "test_auroc_6_x_5_1day_24_v3_nested_strat_lh.csv"),
                             col_types = cols()) |> 
  select(outer_split_num, "lag24" = roc_auc)

test_metrics_72 <- read_csv(here::here(path_models_lag, 
                                       "test_auroc_6_x_5_1day_72_v3_nested_strat_lh.csv"),
                              col_types = cols()) |> 
  select(outer_split_num, "lag72" = roc_auc)

test_metrics_168 <- read_csv(here::here(path_models_lag, 
                                        "test_auroc_6_x_5_1day_168_v3_nested_strat_lh.csv"), 
                              col_types = cols()) |> 
  select(outer_split_num, "lag168" = roc_auc)

test_metrics_336 <- read_csv(here::here(path_models_lag, 
                                       "test_auroc_6_x_5_1day_336_v3_nested_strat_lh.csv"),
                             col_types = cols()) |> 
  select(outer_split_num, "lag336" = roc_auc)

test_metrics_all <- test_metrics_0 |> 
  left_join(test_metrics_24, by = c("outer_split_num")) |> 
  left_join(test_metrics_72, by = c("outer_split_num")) |>
  left_join(test_metrics_168, by = c("outer_split_num")) |>
  left_join(test_metrics_336, by = c("outer_split_num")) |> 
  mutate(fold_num = rep(1:10, 3),
         repeat_num = c(rep(1, 10), rep(2, 10), rep(3, 10))) |> 
  select(-outer_split_num) |> 
  glimpse()

Rows: 30
Columns: 7
$ lag0       <dbl> 0.8989030, 0.9245294, 0.9297046, 0.9056895, 0.8849340, 0.91…
$ lag24      <dbl> 0.8689003, 0.8988473, 0.9261899, 0.8778537, 0.8780250, 0.90…
$ lag72      <dbl> 0.8358703, 0.8974821, 0.9275120, 0.8653247, 0.8754346, 0.89…
$ lag168     <dbl> 0.8224227, 0.8695022, 0.9108843, 0.8673193, 0.8613850, 0.88…
$ lag336     <dbl> 0.7534889, 0.8629033, 0.9062894, 0.8448594, 0.8349199, 0.85…
$ fold_num   <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1…
$ repeat_num <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…

#### Model evaluation

In [None]:
#| output: false

# Repeated CV (id = repeat, id2 = fold within repeat)
# with a common variance:  statistic ~ model + (model | id2/id)
set.seed(101)
pp <- test_metrics_all |> 
  rename(id = fold_num,
         id2 = repeat_num) |> 
  perf_mod(formula = statistic ~ model + (1 | id2/id),
         transform = tidyposterior::logit_trans,  # for skewed & bounded AUC
         iter = 4000, chains = 4, adapt_delta = .999, # increased iteration from 2000 to fix divergence issues
         family = gaussian, 
)  


SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000793 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 7.93 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 4000 [  0%]  (Warmup)
Chain 1: Iteration:  400 / 4000 [ 10%]  (Warmup)
Chain 1: Iteration:  800 / 4000 [ 20%]  (Warmup)
Chain 1: Iteration: 1200 / 4000 [ 30%]  (Warmup)
Chain 1: Iteration: 1600 / 4000 [ 40%]  (Warmup)
Chain 1: Iteration: 2000 / 4000 [ 50%]  (Warmup)
Chain 1: Iteration: 2001 / 4000 [ 50%]  (Sampling)
Chain 1: Iteration: 2400 / 4000 [ 60%]  (Sampling)
Chain 1: Iteration: 2800 / 4000 [ 70%]  (Sampling)
Chain 1: Iteration: 3200 / 4000 [ 80%]  (Sampling)
Chain 1: Iteration: 3600 / 4000 [ 90%]  (Sampling)
Chain 1: Iteration: 4000 / 4000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 7.623 seconds (Warm-up)
Chain 1:                4.108 seconds (Sampling)
Chain 1:                11.

to find out why this is a problem and how to eliminate them.



In [None]:
pp_tidy <- pp |> 
  tidy(seed = 123) 

q = c(.025, .5, .975)
pp_perf_tibble <- pp_tidy |> 
  group_by(model) |> 
  summarize(pp_median = quantile(posterior, probs = q[2]),
            pp_lower = quantile(posterior, probs = q[1]), 
            pp_upper = quantile(posterior, probs = q[3])) |> 
  mutate(model = factor(model, levels = c("lag0", "lag24", "lag72", "lag168", "lag336"),
                        labels = c("0 lag", "24 lag", "72 lag", "168 lag", "336 lag"))) |> 
  arrange(model)

pp_perf_tibble |> 
  write_csv(here::here(path_models_lag, "pp_perf_tibble.csv"))

pp_tidy |> 
  write_csv(here::here(path_models_lag, "posteriors.csv"))

pp_perf_tibble

# A tibble: 5 × 4
  model   pp_median pp_lower pp_upper
  <fct>       <dbl>    <dbl>    <dbl>
1 0 lag       0.912    0.899    0.923
2 24 lag      0.891    0.876    0.904
3 72 lag      0.882    0.865    0.896
4 168 lag     0.870    0.852    0.885
5 336 lag     0.849    0.830    0.867

### Model Comparisons

#### Baseline Contrasts

In [None]:
ci_baseline <- pp |>
  contrast_models(list("lag0", "lag0", "lag0", "lag0"), 
                  list("lag24", "lag72", "lag168", "lag336")) |> 
  summary(size = 0) |> 
  mutate(contrast = factor(contrast, 
                           levels = c("lag0 vs lag24", "lag0 vs lag72", "lag0 vs lag168", 
                                      "lag0 vs lag336"),
                           labels = c("0 vs. 24", "0 vs. 72", 
                                      "0 vs. 168", "0 vs. 336")))

ci_median_baseline <- pp |> 
  contrast_models(list("lag0", "lag0", "lag0", "lag0"), 
                  list("lag24", "lag72", "lag168", "lag336")) |>  
  group_by(contrast) |> 
  summarize(median = quantile(difference, .5)) |> 
  mutate(contrast = factor(contrast, 
                           levels = c("lag0 vs. lag24", "lag0 vs. lag72", "lag0 vs. lag168", 
                                      "lag0 vs. lag336"),
                           labels = c("0 vs. 24", "0 vs. 72", 
                                      "0 vs. 168", "0 vs. 336")))


ci_baseline <- ci_baseline |> 
  left_join(ci_median_baseline, by = c("contrast")) 

ci_baseline |> 
  write_csv(here::here(path_models_lag, "contrast_baseline.csv"))

ci_baseline

# A tibble: 4 × 10
  contrast  probability   mean  lower  upper  size pract_neg pract_equiv
  <fct>           <dbl>  <dbl>  <dbl>  <dbl> <dbl>     <dbl>       <dbl>
1 0 vs. 168           1 0.0426 0.0369 0.0486     0        NA          NA
2 0 vs. 24            1 0.0212 0.0168 0.0256     0        NA          NA
3 0 vs. 336           1 0.0629 0.0557 0.0702     0        NA          NA
4 0 vs. 72            1 0.0303 0.0254 0.0354     0        NA          NA
# ℹ 2 more variables: pract_pos <dbl>, median <dbl>

#### Adjacent Contrasts

In [None]:
ci_lag <- pp |>
  contrast_models(list("lag24", "lag72", "lag168"), 
                  list("lag72", "lag168", "lag336")) |> 
  summary(size = 0) |> 
  mutate(contrast = factor(contrast, 
                           levels = c("lag24 vs lag72", "lag72 vs lag168", 
                                      "lag168 vs lag336"),
                           labels = c("24 vs. 72", "72 vs. 168", "168 vs. 336")))

ci_median_lag <- pp |> 
  contrast_models(list("lag24", "lag72", "lag168"), 
                  list("lag72", "lag168", "lag336")) |>  
  group_by(contrast) |> 
  summarize(median = quantile(difference, .5)) |> 
  mutate(contrast = factor(contrast, 
                           levels = c("lag24 vs. lag72", "lag72 vs. lag168", 
                                      "lag168 vs. lag336"),
                           labels = c("24 vs. 72", "72 vs. 168", "168 vs. 336")))

ci_lag <- ci_lag |> 
  left_join(ci_median_lag, by = c("contrast")) |> 
  arrange(contrast)

ci_lag |> 
  write_csv(here::here(path_models_lag, "contrast_adjacent.csv"))

ci_lag

# A tibble: 3 × 10
  contrast    probability    mean   lower  upper  size pract_neg pract_equiv
  <fct>             <dbl>   <dbl>   <dbl>  <dbl> <dbl>     <dbl>       <dbl>
1 24 vs. 72          1.00 0.00907 0.00457 0.0135     0        NA          NA
2 72 vs. 168         1    0.0123  0.00752 0.0173     0        NA          NA
3 168 vs. 336        1    0.0202  0.0148  0.0259     0        NA          NA
# ℹ 2 more variables: pract_pos <dbl>, median <dbl>