# Performance Metrics Tables for EMA study

John Curtin

## Overview

## Set up environment

In [None]:
# handle conflicts
options(conflicts.policy = "depends.ok")
# devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_ml.R?raw=true")
# tidymodels_conflictRules()

library(kableExtra, exclude = "group_rows")
# library(patchwork)
# library(ggtext)
# library(consort)
# library(tidyposterior)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

ℹ SHA-1 hash of file is "a58e57da996d1b70bb9a5b58241325d6fd78890f"

## Make metrics df

In [None]:
metrics_week <- read_csv(file.path(path_models, 
                                   "test_metrics_1week_0_v5_nested.csv"),
                         col_types = cols())
metrics_day <- read_csv(file.path(path_models, 
                                  "test_metrics_1day_0_v5_nested.csv"),
                        col_types = cols())
metrics_hour <- read_csv(file.path(path_models, 
                                   "test_metrics_1hour_0_v5_nested.csv"),
                         col_types = cols())

In [None]:
metrics <- metrics_week |> 
  mutate(model = "Week") |> 
  bind_rows(metrics_day |> 
              mutate(model = "Day")) |> 
  bind_rows(metrics_hour |> 
              mutate(model = "Hour")) |> 
  group_by(.metric, model) |> 
  summarize(median = median(.estimate), .groups = "drop") |> 
  pivot_wider(names_from = model, values_from = median) |> 
  select(.metric, Week, Day, Hour)

metrics <- metrics[c(4,5,6, 1, 3, 2),]

In [None]:
#| label: table-paper
#| tbl-cap: "Areas under the receiver operating characteristic curves (auROCs) summarize the model's sensitivity and specificity over all possible decision thresholds. Sensitivity, specificity, balanced accuracy, positive predictive value, and negative predictive value are performance metrics calculated at a single decision threshold for each model determined with Youden’s index. All metrics represent median values across 30 held-out test sets."

metrics |> 
 mutate(.metric = case_when(.metric == "roc_auc" ~ "auROC",
                            .metric == "sens" ~ "sensitivity",
                            .metric == "spec" ~ "specificity",
                            .metric == "bal_accuracy" ~ "balanced accuracy",
                            .metric == "ppv" ~ "positive predictive value",
                            .metric == "npv" ~ "negative predictive value")) |> 
 kbl(col.names = c("Metric", "Week", "Day", "Hour"),
     booktabs = TRUE,
     digits = 2,
     align = c("l", "l", "l", "l"),
     linesep = "",
     caption = "Performance Metrics for Full models by Prediction Window") |>  
  kable_styling(position = "left", latex_options = c("HOLD_position")) |>  
  column_spec(column = 1, width = "25em")  

In [None]:
#| label: table-metrics-week_only
metrics |> 
  slice(1:4) |> 
  mutate(Day = "", Hour = "") |> 
  mutate(.metric = case_when(.metric == "roc_auc" ~ "auROC",
                        .metric == "sens" ~ "sensitivity",
                        .metric == "spec" ~ "specificity",
                        .metric == "bal_accuracy" ~ "balanced accuracy")) |> 
  kbl(col.names = c("", "Week", "Day", "Hour"),
    digits = 2,
    align = c("r", "c", "c", "c"),
    linesep = "") |> 
  row_spec(row = 0, align = "c") |> 
  kable_styling(full_width = FALSE) |> 
  kable_classic("striped") |> 
  column_spec(2, color  = "red", bold = TRUE)