Skip to content

Commit

Permalink
updated preprint
Browse files Browse the repository at this point in the history
  • Loading branch information
gtuckerkellogg committed Oct 30, 2023
1 parent 082bd50 commit 9232737
Show file tree
Hide file tree
Showing 20 changed files with 221 additions and 38 deletions.
3 changes: 0 additions & 3 deletions README.md
Expand Up @@ -6,9 +6,6 @@
- Download https://s3.sa-east-1.amazonaws.com/ckan.saude.gov.br/SRAG/2022/INFLUD20-26-09-2022.csv and put it into `/data`





## Regenerating the figures and tables

- Table 1 and Figure 1 can be generated by running `src/compare_sus_kerr.R`
Expand Down
Binary file added results/attrition-bias-sim1.pdf
Binary file not shown.
Binary file added results/attrition-bias-sim2.pdf
Binary file not shown.
Binary file added results/attrition-bias-sim3.pdf
Binary file not shown.
Binary file modified results/early_infections.pdf
Binary file not shown.
6 changes: 3 additions & 3 deletions results/early_infections.tex
@@ -1,11 +1,11 @@
\setlength{\LTpost}{0mm}
\begin{longtable}{lccc}
\toprule
& \textbf{non-user}, N = 98\textsuperscript{\textit{1}} & \textbf{user}, N = 80\textsuperscript{\textit{1}} & \textbf{p-value}\textsuperscript{\textit{2}} \\
& \textbf{non-user}, N = 97\textsuperscript{\textit{1}} & \textbf{user}, N = 78\textsuperscript{\textit{1}} & \textbf{p-value}\textsuperscript{\textit{2}} \\
\midrule
All Brazilian Health Ministry-mapped individuals & & & <0.001 \\
    Covid onset on or after 7 July 2020 & 59 (60\%) & 75 (94\%) & \\
    Covid onset before 7 July 2020 & 39 (40\%) & 5 (6.3\%) & \\
    Covid onset on or after 7 July 2020 & 58 (60\%) & 72 (92\%) & \\
    Covid onset before 7 July 2020 & 39 (40\%) & 6 (7.7\%) & \\
\bottomrule
\end{longtable}
\begin{minipage}{\linewidth}
Expand Down
4 changes: 2 additions & 2 deletions results/early_infections_deaths.tex
Expand Up @@ -12,8 +12,8 @@
\multicolumn{4}{l}{Dead} \\
\midrule
& & & <0.001 \\
    Covid onset on or after 7 July 2020 & 46 (60\%) & 56 (97\%) & \\
    Covid onset before 7 July 2020 & 31 (40\%) & 2 (3.4\%) & \\
    Covid onset on or after 7 July 2020 & 45 (59\%) & 53 (95\%) & \\
    Covid onset before 7 July 2020 & 31 (41\%) & 3 (5.4\%) & \\
\bottomrule
\end{longtable}
\begin{minipage}{\linewidth}
Expand Down
Binary file added results/figure_s1.pdf
Binary file not shown.
176 changes: 176 additions & 0 deletions results/kerr_sus_match.tsv

Large diffs are not rendered by default.

Binary file added results/reguser_protection-sim1.pdf
Binary file not shown.
Binary file added results/reguser_protection-sim2.pdf
Binary file not shown.
Binary file added results/reguser_protection-sim3.pdf
Binary file not shown.
18 changes: 10 additions & 8 deletions src/compare_sus_kerr.R
Expand Up @@ -8,8 +8,10 @@
## written by Robin Mills
## Revised by Greg Tucker-Kellogg

suppressPackageStartupMessages({
library(here) # independent of Rstudio, sets here() to the git root
library(tidyverse)
library(janitor)
library(readr)
library(readxl)
library(gt)
Expand All @@ -22,15 +24,16 @@ source(here("src/data/global_params.R"), local = TRUE)
source(here("src/data/keep_results.R"), local = TRUE)
library(gtsummary)
library(patchwork)

options(dplyr.summarise.inform = FALSE)
})
source(here("src/import-and-impute", "INFLUD20.R"), local = TRUE)
source(here("src", "import-and-impute", "kerr_early_infection_match.R"), local = TRUE)
source(here("src/import-and-impute/simulated_enrolment.R"))

write_tsv(kerr_early_infection_tbl,file=here("results/kerr_sus_match.tsv"))

message("Comparing in 2nd half 2020")

message("2020 2H")
onset_in_2h <- citywide_infected |>
filter(ID_MN_RESI=="ITAJAI", ID_RG_RESI=="ITAJAI") |>
filter(date_birth <= paper$start - years(18)) |>
Expand All @@ -47,7 +50,8 @@ deaths_in_2h <- citywide_infected |>
results[['deaths_2H_2020']] <- sum(deaths_in_2h$death)
results[['hosp_2H_2020']] <- sum(deaths_in_2h$hospitalised)

message("study period")
message("Comparing in study period")

onset_in_period <- citywide_infected |>
filter(ID_MN_RESI=="ITAJAI", ID_RG_RESI=="ITAJAI") |>
filter(date_birth <= paper$start - years(18)) |>
Expand All @@ -60,7 +64,6 @@ hospitalised_in_period <- citywide_infected |>
filter(ID_MN_RESI=="ITAJAI", ID_RG_RESI=="ITAJAI") |>
filter(date_birth <= paper$start - years(18)) |>
filter(hospitalised,date_hospitalised %within% paper$study_period)

deaths_in_period <- citywide_infected |>
filter(ID_MN_RESI=="ITAJAI", ID_RG_RESI=="ITAJAI") |>
filter(date_birth <= paper$start - years(18)) |>
Expand All @@ -72,7 +75,6 @@ results[['hosp_after_onset_in_study_period']] <- sum(onset_in_period$hospitalise
results[['deaths_in_study_period']] <- nrow(deaths_in_period)
results[['hosp_in_study_period']] <- nrow(hospitalised_in_period)


missed_deaths <-
anti_join(onset_in_period,kerr_early_infection_tbl,by=c('date_birth', 'sex', 'death')) |>
filter(death)
Expand Down Expand Up @@ -158,7 +160,7 @@ matched_p <- kerr_early_infection_tbl |>
ggplot(aes(x = date)) +
study_rect +
ylab("count") + xlab("Event date") +
geom_histogram(binwidth = 2) + scale_fill_hc() +
geom_histogram(binwidth = 2,na.rm=TRUE) + scale_fill_hc() +
facet_wrap(fct_rev(event) ~ .,nrow=3,scales='free_y',strip.position='right',dir='h') +
scale_y_continuous(breaks= pretty_breaks()) +
cowplot::theme_half_open() +
Expand All @@ -179,7 +181,7 @@ fig_s1 <- missed_deaths |>
study_rect +
annotate("text", x = as.Date("2020-09-19"), y = 12, label = "KC22 study period") +
ylab("count") + xlab("Event date") +
geom_histogram(binwidth = 2) + scale_fill_hc() +
geom_histogram(binwidth = 2,na.rm=TRUE) + scale_fill_hc() +
facet_wrap(event ~ .,nrow=3,scales='free_y',strip.position='left') +
scale_y_continuous(breaks= pretty_breaks()) +
cowplot::theme_half_open() +
Expand All @@ -203,7 +205,7 @@ citywide_p <- citywide_infected |>
annotate("text", x = as.Date("2020-09-19"), y = 36, label = "KC22 study period") +
xlab("Symptom onset date") +
ylab("count") +
geom_histogram(binwidth = 2, alpha = 0.6, fill = .colors[1]) +
geom_histogram(binwidth = 2, na.rm=TRUE,alpha = 0.6, fill = .colors[1]) +
common_xlim +
ggtitle("Symptom onset date for Itajaí cases leading to hospitalisation or death (DATASUS)") +
cowplot::theme_half_open()
Expand Down
6 changes: 3 additions & 3 deletions src/data/keep_results.R
@@ -1,6 +1,6 @@
library(here)

.resultsfile <- here("results/results.RData")
.resultsfile <- here::here("results/results.RData")

if (!exists('results')) {
results <- tryCatch({
Expand All @@ -26,6 +26,6 @@ if (!exists('results')) {
}

.Last <- function() {
source(here("src/data/pgfkeys_params.R"),local=TRUE)

library(here)
source(here::here("src/data/pgfkeys_params.R"),local=TRUE)
}
10 changes: 6 additions & 4 deletions src/data/pgfkeys_params.R
Expand Up @@ -6,16 +6,15 @@ local({
library(purrr)
library(here)
library(stringr)
source(here("src/data/global_params.R"), local = TRUE)
source(here::here("src/data/global_params.R"), local = TRUE)

keysfile <- here("results","keys.tex")
keysfile <- here::here("results","keys.tex")
invisible(suppressWarnings(file.remove(keysfile)))
keysfile <- file(keysfile,"w")

cat(
map_chr(sort(names(as.list(paper))),
function(name) {
print(name)
sprintf("\\pgfkeyssetvalue{/KC22/%s}{%s}\n",name,paper[[name]])
}
),
Expand All @@ -24,11 +23,14 @@ local({
cat(c(sprintf("\\pgfkeyssetvalue{/model/%s}{%s}\n","high_stop",min(model$p_inf_stop)),
sprintf("\\pgfkeyssetvalue{/model/%s}{%s}\n","low_stop",max(model$p_inf_stop))),
file=keysfile)

if (!exists('results')) {
result=list()
}

cat(
map_chr(sort(names(as.list(results))),
function(name) {
print(name)
val = results[[name]]
sprintf("\\pgfkeyssetvalue{/results/%s}{%s}\n",name,val)
}
Expand Down
9 changes: 6 additions & 3 deletions src/import-and-impute/INFLUD20.R
Expand Up @@ -81,15 +81,18 @@ if (!exists("citywide_infected")) {

## For non-residents who joined the program, we also include ID_MUNICIP and IT_MN_INTE, assuming that
## they would have been recorded through the local hospitals or reported by the city itself.

citywide_infected <-
dat_all |>
filter(PCR_SARS2 == 1 | CLASSI_FIN==5) |>
filter(date_onset >= as.Date("2020-01-01"),date_onset <= as.Date("2020-12-31")) |>
filter(if_all(c("ID_RG_RESI","ID_MN_RESI"), ~ . == "ITAJAI") | # True city residents
if_any(c("ID_MUNICIP","ID_MN_INTE"),~ . == "ITAJAI")) # Reported through city
if_any(c("ID_MUNICIP","ID_MN_INTE"),~ . == "ITAJAI")) |> # Reported through city
## No use retaining a hospitalised one for a
## match if we can't get the date of hospitalisation
filter(!(hospitalised & is.na(date_hospitalised)))

citywide_infected
citywide_infected

})
}
8 changes: 4 additions & 4 deletions src/import-and-impute/kerr_early_infection_match.R
Expand Up @@ -40,7 +40,7 @@ kerr_early_infection_tbl <- local({

match_death_universe <-
citywide_infected |>
filter(death) |> select(-hospitalised)
filter(death) |> select(-hospitalised,-race,-type_2_diabetes)

message(sprintf("There are %d records to consider for matching deaths between KC22 and DATASUS",nrow(match_death_universe)))
results[['match_death_universe']] = nrow(match_death_universe)
Expand All @@ -50,8 +50,7 @@ kerr_early_infection_tbl <- local({
multiple = "all") |>
filter(date_onset <= paper$end) |>
arrange(desc(date_onset)) |>
mutate(hospitalised=ifelse(is.na(date_hospitalised),hospitalised,TRUE)) |>
group_by(date_birth, sex, hospitalised, death) |>
group_by(date_birth, sex, death) |>
mutate(n_matches = n()) |>
filter(row_number() == 1) |>
ungroup() |>
Expand All @@ -65,7 +64,7 @@ kerr_early_infection_tbl <- local({

match_hosp_universe <-
citywide_infected |>
filter(hospitalised,!death) |> select(-death)
filter(hospitalised,!death) |> select(-death,-race,-type_2_diabetes)

message(sprintf("There are %d records to consider for matching hospitalised survivors between KC22 and DATASUS",nrow(match_hosp_universe)))
results[['match_hosp_universe']] = nrow(match_hosp_universe)
Expand All @@ -77,6 +76,7 @@ kerr_early_infection_tbl <- local({
filter(date_onset <= paper$end) |>
arrange(desc(date_onset)) |>
group_by(date_birth, sex, hospitalised, death) |>
group_by(date_birth, sex, death) |>
mutate(n_matches = n()) |>
filter(row_number() == 1) |>
ungroup() |>
Expand Down
13 changes: 8 additions & 5 deletions src/main_and_supplemental_tables.R
Expand Up @@ -110,7 +110,7 @@ overview_gt <- function(outcome_df) {
}


overview_gt_to_latex <- function(gt_obj, model_name, probabilistic, infected_only) {
overview_gt_to_latex <- function(gt_obj, model_name, probabilistic, infected_only,label) {
caption <- sprintf(
"\\caption{%s %s %s}",
sprintf("%s model with %s stop on infection.", model_name, ifelse(probabilistic, "probabilistic", "uniform")),
Expand All @@ -136,7 +136,7 @@ overview_gt_to_latex <- function(gt_obj, model_name, probabilistic, infected_onl
fixed("\\begin{tabulary}{l|rrrrrrrr}"),
"\n\n\\begin{table}[h!tb]\n\\footnotesize\n\\begin{tabulary}{\\linewidth}{lRrrRRrrR}"
) |>
paste0(sprintf("%s\n\\end{table}\n\n", caption), collapse = "\n")
paste0(sprintf("%s\n%s\n\\end{table}\n\n", caption,glue("\\label{{tab:{label}}}")), collapse = "\n")
}


Expand Down Expand Up @@ -215,7 +215,8 @@ supplemental_tables <- c(supplemental_tables,
cols_hide(columns=matches("corrected")) |>
overview_gt_to_latex(l$name,
probabilistic = l$stopping == "probabilistic",
infected_only = l$subset == "infected")
infected_only = l$subset == "infected",
label=l$name)
}))


Expand All @@ -224,8 +225,10 @@ cat(as.character(supplemental_tables), file = here("results/supplementary.tex"))


table4 <- overview_gt(overview_dfs[["sim3-all-probabilistic"]])

cat(as.character(overview_gt_to_latex(table4,"i-KC22", probabilistic=TRUE, infected_only=FALSE)),
cat(as.character(overview_gt_to_latex(table4,"i-KC22",
probabilistic=TRUE,
infected_only=FALSE,
label="main_results")),
file=here("results/table4.tex"))


1 change: 0 additions & 1 deletion src/main_simulations.R
Expand Up @@ -18,7 +18,6 @@ gc <- function() {
iterations <- 1000

models <- c("sim1", "sim2", "sim3")
models <- c("sim3")

for (model_name in models) {
print(tic(glue("{model_name}: ({iterations} times)"),quiet=FALSE))
Expand Down
5 changes: 3 additions & 2 deletions src/simulation_models.R
Expand Up @@ -102,9 +102,10 @@ sim3_model <- local({
date_onset = paper$start+days)

## now predict using a loess smoother
mapped$pred <- predict(loess(n ~ days,mapped,span=0.2,degree=0),model=TRUE); plot(mapped$pred)
mapped$pred <- predict(loess(n ~ days,mapped,span=0.1,degree=0),model=TRUE);
plot(mapped$date_onset,mapped$pred)
mapped$.weights <- (mapped$pred - min(mapped$pred))/(max(mapped$pred) - min(mapped$pred))

#plot(mapped$date_onset,mapped$.weights)
overall_imputed <- imputed_infections |> restrict_dates()
mapped_imputed <- mapped |>
slice_sample(n=nrow(imputed_infections),replace=TRUE,weight_by=.weights) |>
Expand Down

0 comments on commit 9232737

Please sign in to comment.