# Make Tables for Main Manuscript

Lauren Khoury and Kendra Wyant  
October 15, 2025

In [None]:

library(dplyr) |> suppressMessages()
library(skimr)
library(purrr)
library(ggplot2)
library(cowplot)
library(forcats)
library(kableExtra, exclude = ("group_rows"))

theme_set(theme_classic())
method_colors <- c("no_covs" = "sienna",
                   "all_covs" = "goldenrod2",
                   "p_hacked" = "red2",
                   "r" = "springgreen3",
                   "partial_r" = "deepskyblue",
                   "full_lm" = "mediumpurple1",
                   "lasso" = "hotpink")
method_linetypes <- c("no_covs" = "solid",
                      "all_covs" = "solid",
                      "p_hacked" = "longdash",
                      "r" = "longdash",
                      "partial_r" = "longdash",
                      "full_lm" = "longdash",
                      "lasso" = "longdash")

options(digits = 3)


In [None]:
lm_methods <- tibble(Method = c("No covariates", "All covariates", "P-hacking",
                  "Bivariate correlation", "Partial correlation",
                  "Full linear model", "Full linear model with X",
                  "LASSO", "LASSO with X"),
       Definition = c("Y is regressed on X without any covariates.",
                      "All available covariates are included in the regression model.",
                      "Unsystematically adding covariates based on whether they lower the p-value of the main effect of X on Y.",
                      "Pearson correlation coefficient (r) of covariates on Y. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).",
                      "Pearson correlation coefficient (r) of covariates on Y while controlling for X. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).",
                      "A full linear model that regresses Y on all available covariates. Covariates that have a statistically significant effect on Y (p < .05) are retained.",
                      "A full linear model that regresses Y on all available covariates and X. Covariates that have a statistically significant effect on Y (p < .05) when controlling for X are retained.",
                      "A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates, potentially dropping them altogether (i.e., coefficient of 0). Covariates with non-zero coefficients were retained.",
                      "A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates. We assigned a 0 penalty to X to retain it in the model. Covariates with non-zero coefficients when controlling for X were retained."))


In [None]:

lm_methods |> 
  kbl(escape = FALSE) |> 
  kable_classic()


Method,Definition
No covariates,Y is regressed on X without any covariates.
All covariates,All available covariates are included in the regression model.
P-hacking,Unsystematically adding covariates based on whether they lower the p-value of the main effect of X on Y.
Bivariate correlation,Pearson correlation coefficient (r) of covariates on Y. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).
Partial correlation,Pearson correlation coefficient (r) of covariates on Y while controlling for X. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).
Full linear model,A full linear model that regresses Y on all available covariates. Covariates that have a statistically significant effect on Y (p < .05) are retained.
Full linear model with X,A full linear model that regresses Y on all available covariates and X. Covariates that have a statistically significant effect on Y (p < .05) when controlling for X are retained.
LASSO,"A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates, potentially dropping them altogether (i.e., coefficient of 0). Covariates with non-zero coefficients were retained."
LASSO with X,A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates. We assigned a 0 penalty to X to retain it in the model. Covariates with non-zero coefficients when controlling for X were retained.


In [None]:

data_tbl0 <- tibble(
  description = c(
    "The population parameter for X",
    "The number of observations in the sample",
    "The number of covariates",
    "The proportion of \"good\" covariates",
    "The correlation between Y and good covariates"),
  values = c("0, 0.3, 0.5",
             "50, 100, 150, 200, 300, 400", 
             "4, 8, 12, 16, 20", 
             "0.25, 0.50, 0.75",
             "0.3, 0.5" 
             )
)

data_tbl0 |>
  kbl(col.names = c("Research Setting Variable", "Values"), 
      escape = FALSE) |>
  kable_styling(bootstrap_options = c("striped", "hover"), 
                full_width = FALSE) |>
  column_spec(1, width = "15em") |>
  column_spec(2, width = "10em")


Research Setting Variable,Values
The population parameter for X,"0, 0.3, 0.5"
The number of observations in the sample,"50, 100, 150, 200, 300, 400"
The number of covariates,"4, 8, 12, 16, 20"
"The proportion of ""good"" covariates","0.25, 0.50, 0.75"
The correlation between Y and good covariates,"0.3, 0.5"
