# Make Tables for Main Manuscript

Lauren Khoury and Kendra Wyant  
December 3, 2025

In [None]:
library(skimr)
library(cowplot)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()    masks stats::filter()
✖ dplyr::lag()       masks stats::lag()
✖ lubridate::stamp() masks cowplot::stamp()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

In [None]:
lm_methods <- tibble(Method = c("No covariates", "All covariates", "p-hacking",
                  "Single covariate linear model without X", "Single covariate linear model with X",
                  "All covariates linear model without X", "All covariates linear model with X",
                  "All covariates LASSO without X", "All covariates LASSO with X"),
       Definition = c("Y is regressed on X without any covariates.",
                      "All available covariates are included in the regression model.",
                      "Unsystematically adding covariates based on whether they lower the p-value of the main effect of X on Y.",
                      "A linear model that regresses Y on a single covariate. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).",
                      "A linear model that regresses Y on a single covariate and X. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).",
                      "A full linear model that regresses Y on all available covariates. Covariates that have a statistically significant effect on Y (p < .05) are retained.",
                      "A full linear model that regresses Y on all available covariates and X. Covariates that have a statistically significant effect on Y (p < .05) when controlling for X are retained.",
                      "A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates, potentially dropping them altogether (i.e., coefficient of 0). Covariates with non-zero coefficients were retained.",
                      "A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates. We assigned a 0 penalty to X to retain it in the model. Covariates with non-zero coefficients when controlling for X were retained."))

In [None]:
#| label: tbl-methods
#| tbl-cap: The nine linear regression models and their definition for covariate selection.

lm_methods |> 
  kbl(escape = FALSE) |> 
  kable_classic()

Method,Definition
No covariates,Y is regressed on X without any covariates.
All covariates,All available covariates are included in the regression model.
p-hacking,Unsystematically adding covariates based on whether they lower the p-value of the main effect of X on Y.
Single covariate linear model without X,A linear model that regresses Y on a single covariate. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).
Single covariate linear model with X,A linear model that regresses Y on a single covariate and X. Covariates are considered one at a time and included in the final model if they yield a significant effect on Y (p < .05).
All covariates linear model without X,A full linear model that regresses Y on all available covariates. Covariates that have a statistically significant effect on Y (p < .05) are retained.
All covariates linear model with X,A full linear model that regresses Y on all available covariates and X. Covariates that have a statistically significant effect on Y (p < .05) when controlling for X are retained.
All covariates LASSO without X,"A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates, potentially dropping them altogether (i.e., coefficient of 0). Covariates with non-zero coefficients were retained."
All covariates LASSO with X,A linear model that regresses Y on all available covariates and applies a penalty to shrink coefficients for less important covariates. We assigned a 0 penalty to X to retain it in the model. Covariates with non-zero coefficients when controlling for X were retained.


In [None]:
#| label: tbl-dictionary
#| tbl-cap: "Research context variables and values"

data_tbl0 <- tibble(
  description = c(
    "The population parameter for X",
    "The number of observations in the sample",
    "The number of covariates",
    "The proportion of \"good\" covariates",
    "The correlation between Y and good covariates"),
  values = c("0, 0.3, 0.5",
             "50, 100, 150, 200, 300, 400", 
             "4, 8, 12, 16, 20", 
             "0.25, 0.50, 0.75",
             "0.3, 0.5" 
             )
)

data_tbl0 |>
  kbl(col.names = c("Research Context Variable", "Values"), 
      escape = FALSE) |>
  kable_styling(bootstrap_options = c("striped", "hover"), 
                full_width = FALSE) |>
  column_spec(1, width = "15em") |>
  column_spec(2, width = "10em")

Research Context Variable,Values
The population parameter for X,"0, 0.3, 0.5"
The number of observations in the sample,"50, 100, 150, 200, 300, 400"
The number of covariates,"4, 8, 12, 16, 20"
"The proportion of ""good"" covariates","0.25, 0.50, 0.75"
The correlation between Y and good covariates,"0.3, 0.5"


In [None]:
#| label: tbl-est
#| tbl-cap: "Mean estimates of effect size by method and true effect size"
#| warning: false

table |> 
  rename(Method = method, 
         `No effect` = `b_x = 0`,
         `d = .3` = `b_x = .3`,
         `d = .5` = `b_x = .5`) |> 
  mutate(Method = factor(Method, 
                         levels = c("No covariates", "All covariates", "p-hacking", 
                                    "Single covariate lm", "Single covariate lm with X", 
                                    "All covariates lm", "All covariates lm with X", 
                                    "All covariates LASSO", "All covariates LASSO with X"),
                         labels = c("No covariates", "All covariates", "p-hacking", 
                                    "Single covariate lm without X", 
                                    "Single covariate lm with X", 
                                    "All covariates lm without X", 
                                    "All covariates lm with X", 
                                    "All covariates LASSO without X", 
                                    "All covariates LASSO with X")),
         `No effect` = sprintf("%.3f", `No effect`)) |> 
  arrange(Method) |> 
  add_row(Method = "A priori selection", .before = 1) |> 
  add_row(Method = "Data-driven selection", .after = 3) |> 
  kable() |> 
  kable_classic() |> 
  pack_rows(NULL, 2, 3) |> 
  pack_rows(NULL, 5, 11)

Method,No effect,d = .3,d = .5
A priori selection,,,
,,,
No covariates,0.0,0.3,0.5
All covariates,0.0,0.3,0.5
Data-driven selection,,,
,,,
p-hacking,0.0,0.368,0.563
Single covariate lm without X,0.0,0.297,0.495
Single covariate lm with X,0.0,0.3,0.5
All covariates lm without X,0.0,0.287,0.479
