# Make All Tables for Main Manuscript

Kendra Wyant  
January 27, 2025

In [None]:

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(source("https://github.com/jjcurtin/lab_support/blob/main/format_path.R?raw=true"))
suppressPackageStartupMessages(library(tidyposterior))
library(kableExtra)



Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows

## Data and calculations

Table 1

In [None]:
disposition <- read_csv(file.path(path_processed, "disposition.csv"), 
                        col_types = "ccDDcccccccccc")

screen <- read_csv(file.path(path_shared, "screen.csv"), 
                   col_types = cols()) |>
  filter(subid %in% subset(disposition, analysis == "yes")$subid) |> 
  mutate(across(dsm5_1:dsm5_11, ~ recode(., "No" = 0, "Yes" = 1))) |>  
  rowwise() |>  
  mutate(dsm5_total = sum(c(dsm5_1, dsm5_2, dsm5_3, dsm5_4, dsm5_5, dsm5_6, dsm5_7, 
                              dsm5_8, dsm5_9, dsm5_10, dsm5_11))) |>  
  ungroup()

lapses <- read_csv(file.path(path_shared, "lapses.csv"), col_types = cols()) |>
  filter(exclude == FALSE)

# Calcs to make df for table 1 (demographics and clinical characteristics)
n_total <- 151

dem_age <- screen |>
  summarise(mean = as.character(round(mean(dem_1, na.rm = TRUE), 1)),
            SD = as.character(round(sd(dem_1, na.rm = TRUE), 1)),
            min = as.character(min(dem_1, na.rm = TRUE)),
            max = as.character(max(dem_1, na.rm = TRUE))) |>
  mutate(var = "Age",
         n = as.numeric(""),
         perc = as.numeric("")) |>
  select(var, n, perc, everything()) 

dem_sex <-  screen |>
  select(var = dem_2) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |> 
  add_row(var = "Sex", .before = 1)

dem_race <- screen |>
  select(var = dem_3) |>
  mutate(var = fct_relevel(factor(var,
                         c("American Indian/Alaska Native", "Asian", "Black/African American",
                           "White/Caucasian", "Other/Multiracial")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Race", .before = 1)
  
  
dem_ethnicity <- screen |>
  select(var = dem_4) |>
  mutate(var = case_when(var == "No, I am not of Hispanic, Latino, or Spanish origin" ~ "No",
                         TRUE ~ "Yes"),
         var = fct_relevel(factor(var, c("Yes", "No")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Hispanic, Latino, or Spanish origin", .before = 1)

dem_education <- screen |>
  select(var = dem_5) |>
  mutate(var = fct_relevel(factor(var,
                         c("Less than high school or GED degree", "High school or GED",
                           "Some college", "2-Year degree", "College degree", "Advanced degree")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Education", .before = 1)

dem_employment <- screen |>
  select(var = dem_6, dem_6_1) |>
  mutate(var = case_when(dem_6_1 == "Full-time" ~ "Employed full-time",
                         dem_6_1 == "Part-time" ~ "Employed part-time",
                         TRUE ~ var)) |>
  mutate(var = fct_relevel(factor(var,
                         c("Employed full-time", "Employed part-time", "Full-time student",
                           "Homemaker", "Disabled", "Retired", "Unemployed",
                           "Temporarily laid off, sick leave, or maternity leave",
                           "Other, not otherwise specified")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |>
  add_row(var = "Employment", .before = 1)

dem_income <- screen |>
  summarise(mean = format(round(mean(dem_7, na.rm = TRUE), 0), big.mark = ","),
            SD = format(round(sd(dem_7, na.rm = TRUE), 0), big.mark = ","),
            min =format(round(min(dem_7, na.rm = TRUE), 0), big.mark = ","),
            max = format(round(max(dem_7, na.rm = TRUE), 0), scientific = FALSE, big.mark = ",")) |>
  mutate(var = "Personal Income",
        n = as.numeric(""),
        perc = as.numeric(""),
        mean = str_c("$", as.character(mean)),
        SD = str_c("$", as.character(SD)),
        min = str_c("$", as.character(min)),
        max = as.character(max)) |>
  select(var, n, perc, everything())

dem_marital <- screen |>
  select(var = dem_8) |>
  mutate(var = case_when(var == "Never Married" ~ "Never married",
                         TRUE ~ var)) |>
  mutate(var = fct_relevel(factor(var,
                         c("Never married", "Married", "Divorced", "Separated",
                           "Widowed")))) |>
  group_by(var) |>
  summarise(n = n()) |>
  mutate(perc = (n / sum(n)) * 100) |> 
  add_row(var = "Marital Status", .before = 1)

dem_aud <- screen |>
  summarise(mean = as.character(round(mean(dsm5_total, na.rm = TRUE), 1)),
            SD = as.character(round(sd(dsm5_total, na.rm = TRUE), 1)),
            min = as.character(min(dsm5_total, na.rm = TRUE)),
            max = as.character(max(dsm5_total, na.rm = TRUE))) |>
  mutate(var = "DSM-5 AUD Symptom Count",
         n = as.numeric(""),
         perc = as.numeric("")) |>
  select(var, n, perc, everything()) 

lapses_per_subid <- screen |>
  select(subid) |>
  left_join(lapses |>
  janitor::tabyl(subid) |>
  select(-percent), by = "subid") |>
  mutate(n = if_else(is.na(n), 0, n),
         lapse = if_else(n > 0, "yes", "no"))

lapse_info <- lapses_per_subid |>
  group_by(lapse) |>
  rename(var = lapse) |>
  mutate(var = factor(var, levels = c("yes", "no"), labels = c("Yes", "No"))) |>
  summarise(n = n()) |>
  mutate(perc = (n / n_total) * 100,
         mean = NA_character_,
         SD = NA_character_,
         min = NA_character_,
         max = NA_character_) |>
  full_join(lapses_per_subid |>
  summarise(mean = as.character(round(mean(n), 1)),
            SD = as.character(round(sd(n), 1)),
            min = as.character(round(min(n), 1)),
            max = as.character(round(max(n), 1))) |>
  mutate(var = "Number of reported lapses"),
  by = c("var", "mean", "SD", "min", "max")) |> 
  add_row(var = "Reported 1 or More Lapse During Study Period", .before = 1)

table_dem <- dem_age |> 
  bind_rows(dem_sex) |> 
  bind_rows(dem_race) |> 
  bind_rows(dem_ethnicity) |> 
  bind_rows(dem_education) |> 
  bind_rows(dem_employment) |> 
  bind_rows(dem_income) |> 
  bind_rows(dem_marital) |> 
  bind_rows(dem_aud) |> 
  bind_rows(lapse_info) |> 
  mutate(range = str_c(min, "-", max),
         perc = round(perc, 1)) |> 
  select(-c(min, max)) |> 
  rename(N = n,
         `%` = perc,
         M = mean, 
         Range = range)


Table 2

In [None]:
ci_baseline <- read_csv(here::here(path_models_lag, "contrast_baseline.csv"), 
                        col_types = cols())

ci_lag <- read_csv(here::here(path_models_lag, "contrast_adjacent.csv"), 
                   col_types = cols())
ci_fairness_0 <- read_csv(here::here(path_models_lag, "pp_dem_contrast_all.csv"), 
                   col_types = cols()) |> 
  filter(lag == 0)

ci_fairness_336 <- read_csv(here::here(path_models_lag, "pp_dem_contrast_all.csv"), 
                   col_types = cols()) |> 
  filter(lag == 336)

table_ci <- ci_baseline |> 
  mutate(ci = str_c("[", round(lower, 3), ", ", round(upper, 3), "]"),
         median = as.character(round(median, 3)),
         probability = as.character(round(probability, 3))) |> 
  select(contrast, median, ci, probability) |> 
  add_row(contrast = "Baseline Contrasts", median = "", ci = "", probability = "") |> 
  mutate(contrast = factor(contrast,
                           levels = c("Baseline Contrasts",
                                      "0 vs. 24",
                                      "0 vs. 72",
                                      "0 vs. 168",
                                      "0 vs. 336"),
                           labels = c("Baseline Contrasts",
                                      "No lag vs. 1 day",
                                      "No lag vs. 3 days",
                                      "No lag vs. 1 week",
                                      "No lag vs. 2 weeks"))) |> 
  arrange(contrast) |> 
  rbind(ci_lag |> 
          mutate(ci = str_c("[", round(lower, 3), ", ", round(upper, 3), "]"),
                  median = as.character(round(median, 3)),
         probability = as.character(round(probability, 3))) |>
          select(contrast, median, ci, probability) |> 
          add_row(contrast = "Adjacent Contrasts", median = "", ci = "", 
                  probability = "") |> 
          mutate(contrast = factor(contrast,
                           levels = c("Adjacent Contrasts",
                                      "24 vs. 72",
                                      "72 vs. 168",
                                      "168 vs. 336"),
                           labels = c("Adjacent Contrasts",
                                      "1 day vs. 3 days",
                                      "3 days vs. 1 week",
                                      "1 week vs. 2 weeks"))) |> 
          arrange(contrast)) |> 
  rbind(ci_fairness_0 |> 
          mutate(ci = str_c("[", round(lower, 3), ", ", round(upper, 3), "]"),
                  median = as.character(round(median, 3)),
         probability = as.character(round(probability, 3))) |>
          select(contrast, median, ci, probability) |> 
          add_row(contrast = "Fairness Contrasts (No Lag)", median = "", ci = "", 
                  probability = "") |> 
          mutate(contrast = factor(contrast,
                           levels = c("Fairness Contrasts (No Lag)",
                                      "male vs female",
                                      "non-hispanic white vs not white",
                                      "above poverty vs below poverty"),
                           labels = c("Fairness Contrasts (No Lag)",
                                      "male vs. female",
                                      "non-Hispanic White vs. not White",
                                      "above poverty vs. below poverty"))) |> 
          arrange(contrast)) |> 
    rbind(ci_fairness_336 |> 
          mutate(ci = str_c("[", round(lower, 3), ", ", round(upper, 3), "]"),
                  median = as.character(round(median, 3)),
         probability = as.character(round(probability, 3))) |>
          select(contrast, median, ci, probability) |> 
          add_row(contrast = "Fairness Contrasts (2-week Lag)", median = "", ci = "", 
                  probability = "") |> 
          mutate(contrast = factor(contrast,
                           levels = c("Fairness Contrasts (2-week Lag)",
                                      "male vs female",
                                      "non-hispanic white vs not white",
                                      "above poverty vs below poverty"),
                           labels = c("Fairness Contrasts (2-week Lag)",
                                      "male vs. female",
                                      "non-Hispanic White vs. not White",
                                      "above poverty vs. below poverty"))) |> 
          arrange(contrast)) |> 
  rename(Contrast = contrast,
         Median = median,
         `Bayesian CI` = ci,
         Probability = probability)


### Table 1: Demographic and Lapse Characteristics

In [None]:

table_dem |> 
  knitr::kable() |> 
  kable_classic() |> 
  kableExtra::group_rows(start_row = 3, end_row = 4) |> 
  kableExtra::group_rows(start_row = 6, end_row = 10) |> 
  kableExtra::group_rows(start_row = 12, end_row = 13) |> 
  kableExtra::group_rows(start_row = 15, end_row = 20) |> 
  kableExtra::group_rows(start_row = 22, end_row = 30) |> 
  kableExtra::group_rows(start_row = 33, end_row = 37) |> 
  kableExtra::group_rows(start_row = 40, end_row = 41)


var,N,%,M,SD,Range
Age,,,41,11.9,21-72
Sex,,,,,
,,,,,
Female,74.0,49.0,,,
Male,77.0,51.0,,,
Race,,,,,
,,,,,
American Indian/Alaska Native,3.0,2.0,,,
Asian,2.0,1.3,,,
Black/African American,8.0,5.3,,,


### Table 2: Model Comparisons

In [None]:

table_ci |> 
  knitr::kable() |> 
  kable_classic() |> 
  kableExtra::group_rows(start_row = 2, end_row = 5) |> 
  kableExtra::group_rows(start_row = 7, end_row = 9) |> 
  kableExtra::group_rows(start_row = 11, end_row = 13) |> 
  kableExtra::group_rows(start_row = 15, end_row = 17) |> 
  kableExtra::row_spec(5, extra_css = "border-bottom: 1px solid") |> 
  kableExtra::row_spec(9, extra_css = "border-bottom: 1px solid") |>     
  kableExtra::row_spec(13, extra_css = "border-bottom: 1px solid") 


Contrast,Median,Bayesian CI,Probability
Baseline Contrasts,,,
,,,
No lag vs. 1 day,0.02,"[0.013, 0.027]",1.0
No lag vs. 3 days,0.032,"[0.025, 0.04]",1.0
No lag vs. 1 week,0.043,"[0.035, 0.052]",1.0
No lag vs. 2 weeks,0.063,"[0.053, 0.073]",1.0
Adjacent Contrasts,,,
,,,
1 day vs. 3 days,0.012,"[0.005, 0.02]",0.999
3 days vs. 1 week,0.011,"[0.003, 0.018]",0.989
