version 0.4.2

cran · Jun 13, 2023 · 10f79ca · 10f79ca
commit 10f79ca
Show file tree

Hide file tree

Showing 57 changed files with 6,800 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,41 @@
+Package: risks
+Title: Estimate Risk Ratios and Risk Differences using Regression
+Version: 0.4.2
+Authors@R: 
+    c(person(given = "Konrad",
+             family = "Stopsack",
+             role = c("aut", "cre"),
+             email = "stopsack@post.harvard.edu",
+             comment = c(ORCID = "0000-0002-0722-1311")),
+      person(given = "Travis",
+             family = "Gerke",
+             role = c("aut"),
+             email = "tgerke@mail.harvard.edu",
+             comment = c(ORCID = "0000-0002-9500-8907")))
+Description: Risk ratios and risk differences are estimated using regression 
+  models that allow for binary, categorical, and continuous exposures and 
+  confounders. Implemented are marginal standardization after fitting logistic 
+  models (g-computation) with delta-method and bootstrap standard errors, 
+  Miettinen's case-duplication approach (Schouten et al. 1993, 
+  <doi:10.1002/sim.4780121808>), log-binomial (Poisson) models with empirical 
+  variance (Zou 2004, <doi:10.1093/aje/kwh090>), binomial models with starting 
+  values from Poisson models (Spiegelman and Hertzmark 2005, 
+  <doi:10.1093/aje/kwi188>), and others.
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.2.3
+URL: https://stopsack.github.io/risks/
+BugReports: https://github.com/stopsack/risks/issues
+Suggests: addreg, covr, knitr, logbin, MASS, rmarkdown, testthat
+Imports: boot, bcaboot, broom, dplyr, purrr, rlang, sandwich, stats,
+        tibble, tidyr
+VignetteBuilder: knitr
+Depends: R (>= 3.5.0)
+NeedsCompilation: no
+Packaged: 2023-06-12 12:41:09 UTC; stopsack
+Author: Konrad Stopsack [aut, cre] (<https://orcid.org/0000-0002-0722-1311>),
+  Travis Gerke [aut] (<https://orcid.org/0000-0002-9500-8907>)
+Maintainer: Konrad Stopsack <stopsack@post.harvard.edu>
+Repository: CRAN
+Date/Publication: 2023-06-13 08:10:02 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,56 @@
+e1f8887be95017c4fd9f239400c407c8 *DESCRIPTION
+878aa9f2534c5f5da75581f199719bc7 *NAMESPACE
+43444254366457799325b6afd4ed3061 *R/breastcancer.R
+c58c4553c84bcb163c27bbc7f9fdce6d *R/caseduplicate.R
+ad86a729d22e210f9d831a0200cdd348 *R/estimate_risk.R
+7ae3c5979b66868ef9609b0732a26242 *R/glm-utils.R
+5e8e67eb3dcade398d003d685a0505c5 *R/mantel_haenszel.R
+95582a56a96fc22edbb10bcca5f97d2b *R/margstd-utils.R
+233b3788b650f24f51bf99752c04479e *R/margstd_delta.R
+3a96ca0f41d7ea6573b2d22676bc9ef4 *R/margstd_variable.R
+d5387bf969c808cbccdbc76b753dae6f *R/postestimation.R
+7df2ad2b56ce9513c99731cb4c95b9ae *R/risks-package.R
+86d30e8449d88a4264bf3b86df710044 *R/risks.R
+c4dfefa242c6a14a7887ed67a1a344a7 *R/robpoisson-utils.R
+aa8e28ff094b8a83cdbbbdea34d28c61 *R/zestimate_risk-utils.R
+445944d902bd599f788fea3741c15a18 *build/vignette.rds
+bbbb4ff5f2497cf2a2c09c9fe4553991 *data/breastcancer.rda
+8bebd10f116619216483ad29fe62eee3 *inst/doc/margstd.R
+70ea5ca7a5971883a0720cd4a57faeef *inst/doc/margstd.Rmd
+e2dc10fee93f7bbd149b7a8b48597ee9 *inst/doc/margstd.html
+828bf0beee700749e104176654d31563 *inst/doc/models.R
+aaf8335a0f6362163c0b83c981721421 *inst/doc/models.Rmd
+b5345a23bbce134e7674245d3d6a540a *inst/doc/models.html
+b7ef068554d3ca65c10f64889b1a793f *inst/doc/risks.R
+fe753999cdf472e0e09d10be75ad5257 *inst/doc/risks.Rmd
+0f5fabbecf2f35c0369e4a92475e32f4 *inst/doc/risks.html
+360d194655b4fd1273c30f13dea75036 *man/breastcancer.Rd
+87df2b95c5d26fc28f72e2370cbe3b6f *man/confint.duplicate.Rd
+e3313345d5ab6f4d5bfc88aba8525fac *man/confint.margstd_boot.Rd
+c6f22543488e55af586f2dca14fd5055 *man/confint.margstd_delta.Rd
+731c73e8e330e3250a10f52a46414cb9 *man/confint.robpoisson.Rd
+98c8fc9634a37bbb53295c78d977ca5e *man/conv.test.Rd
+078de1c3f7d5166605e366be2293ba40 *man/figures/logo.png
+389d6d5b9468ad38ec5a785dfa222200 *man/figures/risks-hex_full-size.png
+d8cc791081763dd03fb65e935257bf4f *man/print.risks.Rd
+a3c8c365b91dadc64615b37c87a8a8ef *man/print.summary.risks.Rd
+9ed7f8c21b815dc38f3eb8c6619373cf *man/riskratio.Rd
+3f14a7904b69f42502a55ba57c90ee61 *man/risks.Rd
+67a76afb9d936c979df2b129584f8afd *man/rr_rd_mantel_haenszel.Rd
+e1145048dd26572f5649325ba01b3bdb *man/summary.duplicate.Rd
+8d922bdb884697606f14f08ecb03b27a *man/summary.margstd_boot.Rd
+455d068f1d86e12845fb517269a53cb1 *man/summary.margstd_delta.Rd
+6bfe8ba5703a9c81367f17429c39475a *man/summary.risks.Rd
+b3794c56cb4dcbc983079384102de1a6 *man/summary.robpoisson.Rd
+89a035e7a15de898337c95245c121371 *man/tidy.risks.Rd
+46e6e8e8e9d1f6826ac8ed1acb9872ef *tests/testthat.R
+191c7796d49824b9175093278641638d *tests/testthat/test-bootci.R
+e102c33c6e227b01b0600f601255418b *tests/testthat/test-coefficients.R
+f920d4b9f7faf7f303dfa5f5d5b10751 *tests/testthat/test-exceptions.R
+e73aa0a87bdea6c031a84dcd22099350 *tests/testthat/test-interactions.R
+fbde9ba06e4a2f6a227bcfcf27bcc98b *tests/testthat/test-print.R
+6f1a8d0fe54fb59d9f57e9614a6bd11d *tests/testthat/test-stderr.R
+687b3ea4889f754cc6d7f59d5ac74a41 *tests/testthat/test-tidy.R
+70ea5ca7a5971883a0720cd4a57faeef *vignettes/margstd.Rmd
+aaf8335a0f6362163c0b83c981721421 *vignettes/models.Rmd
+fe753999cdf472e0e09d10be75ad5257 *vignettes/risks.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,23 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(confint,duplicate)
+S3method(confint,margstd_boot)
+S3method(confint,margstd_delta)
+S3method(confint,robpoisson)
+S3method(print,risks)
+S3method(print,summary.risks)
+S3method(summary,duplicate)
+S3method(summary,margstd_boot)
+S3method(summary,margstd_delta)
+S3method(summary,risks)
+S3method(summary,robpoisson)
+S3method(tidy,risks)
+export(conv.test)
+export(riskdiff)
+export(riskratio)
+export(rr_rd_mantel_haenszel)
+import(broom)
+import(stats)
+importFrom(dplyr,"%>%")
+importFrom(rlang,":=")
+importFrom(rlang,.data)
diff --git a/R/breastcancer.R b/R/breastcancer.R
@@ -0,0 +1,18 @@
+#' Breast Cancer Data
+#'
+#' A cohort of women with breast cancer and complete follow-up, as used by
+#' Spiegelman and Hertzmark (Am J Epidemiol 2005) and
+#' Greenland (Am J Epidemiol 2004).
+#'
+#' @format ## `breastcancer`
+#' A tibble with 192 rows and 3 columns:
+#' \describe{
+#'   \item{death}{Death, binary: 0, 1}
+#'   \item{stage}{Cancer stage, 3 categories}
+#'   \item{receptor}{Hormone receptor status, binary: "High", "Low"}
+#'   ...
+#' }
+#' @source
+#' Newman SC. Biostatistical methods in epidemiology.
+#' New York, NY: Wiley, 2001, table 5.3
+"breastcancer"
diff --git a/R/caseduplicate.R b/R/caseduplicate.R
@@ -0,0 +1,164 @@
+# Helper functions for estimate_risk(approach = "duplicate") and
+# the use of case duplication in approach = "auto" or approach = "all"
+
+#' @import stats
+
+# estimate_duplicate: internal, fitting a logistic model with case duplication
+# (Miettinen/Schouten approach to directly estimating relative risks)
+estimate_duplicate <- function(formula, data, ...) {
+  yvar <- as.character(all.vars(formula)[1])
+  data <- data %>%
+    dplyr::mutate(.clusterid = dplyr::row_number())
+  data <- dplyr::bind_rows(data,
+                           data %>%
+                             dplyr::rename(outc = dplyr::one_of(!!yvar)) %>%
+                             dplyr::filter(.data$outc == 1) %>%
+                             dplyr::mutate(outc = 0) %>%
+                             dplyr::rename(!!yvar := "outc"))
+  fit <- eval(substitute(stats::glm(formula = formula,
+                                    family = binomial(link = "logit"),
+                                    data = data)))
+  class(fit) <- c("duplicate", class(fit))
+  fit <- estimate_maxprob(fit = fit, formula = formula, data = data,
+                          link = "logit")
+  return(fit)
+}
+
+
+#' Clustering-corrected confidence intervals for case duplication model
+#'
+#' Estimate confidence intervals for the case duplication model
+#' with robust/sandwich/empirical covariance structure.
+#'
+#' @param object Fitted model
+#' @param parm Not used
+#' @param level Confidence level, defaults to 0.95
+#' @param ... Additional arguments, not used
+#' @return Matrix: First column, lower bound; second column, upper bound.
+#' @export
+confint.duplicate <- function(object, parm = NULL, level = 0.95, ...) {
+  # modified after stats:::confint.default()
+  cf <- coef(object)
+  pnames <- names(cf)
+  #if (missing(parm))
+  parm <- pnames
+  #else if (is.numeric(parm))
+  #  parm <- pnames[parm]
+  a <- (1 - level)/2
+  a <- c(a, 1 - a)
+  pct <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3),
+               "%")
+  fac <- qnorm(a)
+  ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm, pct))
+  # Robust covariance accounting for clustering
+  obj_sandwich <- object
+  class(obj_sandwich) <- "glm"
+  ses <- sqrt(diag(sandwich::sandwich(
+    x = obj_sandwich,
+    bread. = sandwich::bread(obj_sandwich),
+    meat. = sandwich::meatCL(obj_sandwich,
+                             type = "HC0",
+                             cluster = object$data$.clusterid))))
+  ci[] <- cf[parm] + ses %o% fac
+  return(ci)
+}
+
+
+#' Summary for logistic model with case duplication and cluster-robust covariance
+#'
+#' Summarize results from fitting a logistic model with case duplication and
+#' cluster-robust covariance.
+#' The output is the same as for a regular \code{summary(glm(...))},
+#' except for using cluster-robust standard errors.
+#'
+#' @param object Model
+#' @param dispersion Not used
+#' @param correlation Not used
+#' @param symbolic.cor Not used
+#' @param ... Other arguments, not used
+#'
+#' @return Model summary (list)
+#' @export
+summary.duplicate <- function(object, dispersion = NULL, correlation = FALSE,
+                              symbolic.cor = FALSE, ...) {
+  # a modification of summary.glm()
+  est.disp <- FALSE
+  df.r <- object$df.residual
+  if (is.null(dispersion))
+    dispersion <- if (object$family$family %in% c("poisson", "binomial"))
+      1
+  else if (df.r > 0) {
+    est.disp <- TRUE
+    if (any(object$weights == 0))
+      warning("observations with zero weight not used for calculating dispersion")
+    sum((object$weights * object$residuals^2)[object$weights > 0])/df.r
+  }
+  else {
+    est.disp <- TRUE
+    NaN
+  }
+  aliased <- is.na(coef(object))
+  p <- object$rank
+  if (p > 0) {
+    p1 <- 1L:p
+    Qr <- object$qr
+    if(is.null(Qr))
+      stop("lm object does not have a proper 'qr' component.\n Rank zero or should not have used lm(.., qr=FALSE).")
+    coef.p <- object$coefficients[Qr$pivot[p1]]
+    ###### changes here #######
+    # covmat.unscaled <- sandwich::vcovCL(object, type = "HC0",
+    #                                     cluster = object$data$.clusterid)
+    obj_sandwich <- object
+    class(obj_sandwich) <- "glm"
+    covmat.unscaled <- sandwich::sandwich(
+      x = obj_sandwich,
+      bread. = sandwich::bread(x = obj_sandwich),
+      meat. = sandwich::meatCL(x = obj_sandwich,
+                               type = "HC0",
+                               cluster = object$data$.clusterid))
+    ###### end changes ########
+    covmat <- dispersion * covmat.unscaled
+    var.cf <- diag(covmat)
+    s.err <- sqrt(var.cf)
+    tvalue <- coef.p/s.err
+    dn <- c("Estimate", "Std. Error")
+    if (!est.disp) {
+      pvalue <- 2 * pnorm(-abs(tvalue))
+      coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+      dimnames(coef.table) <- list(names(coef.p), c(dn,
+                                                    "z value", "Pr(>|z|)"))
+    }
+    else if (df.r > 0) {
+      pvalue <- 2 * pt(-abs(tvalue), df.r)
+      coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
+      dimnames(coef.table) <- list(names(coef.p), c(dn,
+                                                    "t value", "Pr(>|t|)"))
+    }
+    else {
+      coef.table <- cbind(coef.p, NaN, NaN, NaN)
+      dimnames(coef.table) <- list(names(coef.p), c(dn,
+                                                    "t value", "Pr(>|t|)"))
+    }
+    df.f <- NCOL(Qr$qr)
+  }
+  else {
+    coef.table <- matrix(, 0L, 4L)
+    dimnames(coef.table) <- list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
+    covmat.unscaled <- covmat <- matrix(, 0L, 0L)
+    df.f <- length(aliased)
+  }
+  keep <- match(c("call", "terms", "family", "deviance", "aic",
+                  "contrasts", "df.residual", "null.deviance", "df.null",
+                  "iter", "na.action"), names(object), 0L)
+  ans <- c(object[keep], list(deviance.resid = residuals(object, type = "deviance"),
+                              coefficients = coef.table, aliased = aliased,
+                              dispersion = dispersion, df = c(object$rank, df.r, df.f),
+                              cov.unscaled = covmat.unscaled, cov.scaled = covmat))
+  if (correlation && p > 0) {
+    dd <- sqrt(diag(covmat.unscaled))
+    ans$correlation <- covmat.unscaled/outer(dd, dd)
+    ans$symbolic.cor <- symbolic.cor
+  }
+  class(ans) <- "summary.glm"
+  return(ans)
+}