version 0.1.0

cran · Sep 6, 2023 · 3902357 · 3902357
commit 3902357
Show file tree

Hide file tree

Showing 51 changed files with 4,418 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,29 @@
+Package: pmcalibration
+Type: Package
+Title: Calibration Curves for Clinical Prediction Models
+Version: 0.1.0
+Authors@R: person(given = "Stephen", family = "Rhodes", email= "steverho89@gmail.com", role=c("aut", "cre", "cph"))
+Maintainer: Stephen Rhodes <steverho89@gmail.com>
+Description: Fit calibrations curves for clinical prediction models and calculate several associated 
+  metrics (Eavg, E50, E90, Emax). Ideally predicted probabilities from a prediction model 
+  should align with observed probabilities. Calibration curves relate predicted probabilities 
+  (or a transformation thereof) to observed outcomes via a flexible non-linear smoothing function. 
+  'pmcalibration' allows users to choose between several smoothers (regression splines, generalized 
+  additive models/GAMs, lowess, loess). Both binary and time-to-event outcomes are supported. 
+  See Van Calster et al. (2016) <doi:10.1016/j.jclinepi.2015.12.005>; 
+  Austin and Steyerberg (2019) <doi:10.1002/sim.8281>; 
+  Austin et al. (2020) <doi:10.1002/sim.8570>.
+License: GPL-3
+Encoding: UTF-8
+RoxygenNote: 7.2.3
+URL: https://github.com/stephenrho/pmcalibration
+BugReports: https://github.com/stephenrho/pmcalibration/issues
+Imports: Hmisc, MASS, checkmate, chk, mgcv, splines, graphics, stats,
+        methods, survival, pbapply, parallel
+Suggests: knitr, rmarkdown, data.table, ggplot2, rms, simsurv
+VignetteBuilder: knitr
+NeedsCompilation: no
+Packaged: 2023-09-06 00:19:48 UTC; stephenrhodes
+Author: Stephen Rhodes [aut, cre, cph]
+Repository: CRAN
+Date/Publication: 2023-09-06 17:50:02 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,50 @@
+160f6f22d69e6bc0c7b957dc7aaff786 *DESCRIPTION
+af906c553074a9c812cec11b34d5b470 *NAMESPACE
+670200d48723b53bb3f4670d58bc9cc6 *NEWS.md
+c62cae6d5c238ef18f1714aa0c07d33f *R/boots.R
+57dc29751cea05fdecd1cebe137653b7 *R/cal_metrics.R
+09cef37b31e564a77148ee910b14c0ee *R/gam_cal.R
+28b65549b8fd303d5cc9bed607cad736 *R/glm_cal.R
+5494dbf06642b6c953e8fd922ca026e6 *R/loess_cal.R
+53768e9d0e917e76719acd89bbc93b54 *R/lowess_cal.R
+2f9527fd444cc1ae5adc937292eecfd1 *R/pmcalibration-package.R
+b6b11f72bc558d338b022f1bf7482aa4 *R/pmcalibration.R
+95ef99c07f5e3d25a61cfa1abd9430f8 *R/reg_spline_X.R
+5c77ad462a80d03dbf30d456f926f90d *R/simb.R
+f0ec4790527fa5e72311f49abee4ec71 *R/utils.R
+8b4cc880f5e95bd33660143895dfa864 *README.md
+7f2094c7b4d5faa522b624cffc621f8b *build/partial.rdb
+188b4c8d72b32f84e9f3705068380101 *build/vignette.rds
+1a6dd0fd18f7279750ef1b139770ad63 *inst/CITATION
+ac03fa6f1f2968abb55ae4d271e928df *inst/doc/external-validation.R
+804ce8807369bb07c0112bcce0b8ee33 *inst/doc/external-validation.Rmd
+b2ebb9e8e648a85fb4d64fef02d87cb9 *inst/doc/external-validation.html
+4cb8200ebd060462fb41e38dbbfc974e *inst/doc/internal-validation.R
+ede590738ba260e78aefc4466058cb33 *inst/doc/internal-validation.Rmd
+f5d1722f8d75445470276e6492500563 *inst/doc/internal-validation.html
+26dbfc04a3d51905944d4b2c0dc649f9 *man/boot.Rd
+5cc461197c6dd4758249b2e91dbb4e31 *man/cal_metrics.Rd
+94466ae9691188acec1fe52643939626 *man/gam_cal.Rd
+0e3f807238518f90c102489733f5b3cb *man/get_cc.Rd
+c1de95c0e12545146beec0df1e072cf3 *man/glm_cal.Rd
+41d1a3d4cbc1d34fe30dd1acd306633e *man/invlogit.Rd
+c028a37bb9d30ea398fe354f871bc627 *man/loess_cal.Rd
+fea49aff72a1c8db64fedfe1aeb24160 *man/logistic_cal.Rd
+3d81e6fc6d6fa839831460f759f9cad5 *man/logit.Rd
+683553d1e51e9b9c79b2c46a6cd4eda6 *man/lowess_cal.Rd
+e4b22c2bc1294064b2ce2eb665e3c237 *man/plot.pmcalibration.Rd
+062c657fbcf787d9b592a231c151d6fd *man/pmcalibration-package.Rd
+87fa4c448f237492a71923dbdfc5b3e1 *man/pmcalibration.Rd
+5cdbd8180736a0d2532b297ccdf021e3 *man/predict_lowess.Rd
+923b8fafffdf7a7118f8e2e6842d6686 *man/print.logistic_cal.Rd
+711f158c5a4185e63758164a2bb29b24 *man/print.logistic_calsummary.Rd
+b0aa7f7d5b505a0993907762cc4687cb *man/print.pmcalibration.Rd
+6028edbc05dd7febde17b7f30655dbad *man/print.pmcalibrationsummary.Rd
+fa588867ba8ec97a1da91442e00e960e *man/reg_spline_X.Rd
+fdf1223c5ca67eae1facfba3c230e20b *man/run_boots.Rd
+37809ac9ccaf25d6dde120ff744ba38c *man/sim_dat.Rd
+fb4dcdb42bb013ac6bb0556e665a35a5 *man/simb.Rd
+2e22574c11673991b54d0672ce4868da *man/summary.logistic_cal.Rd
+9f2d0c9ffcea2658cc859da586bd61b3 *man/summary.pmcalibration.Rd
+804ce8807369bb07c0112bcce0b8ee33 *vignettes/external-validation.Rmd
+ede590738ba260e78aefc4466058cb33 *vignettes/internal-validation.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,52 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(boot,gam_cal)
+S3method(boot,glm_cal)
+S3method(boot,loess_cal)
+S3method(boot,lowess_cal)
+S3method(plot,pmcalibration)
+S3method(print,logistic_cal)
+S3method(print,logistic_calsummary)
+S3method(print,pmcalibration)
+S3method(print,pmcalibrationsummary)
+S3method(simb,gam_cal)
+S3method(simb,glm_cal)
+S3method(simb,loess_cal)
+S3method(simb,lowess_cal)
+S3method(summary,logistic_cal)
+S3method(summary,pmcalibration)
+export(boot)
+export(cal_metrics)
+export(gam_cal)
+export(get_cc)
+export(glm_cal)
+export(invlogit)
+export(loess_cal)
+export(logistic_cal)
+export(logit)
+export(lowess_cal)
+export(pmcalibration)
+export(predict_lowess)
+export(reg_spline_X)
+export(run_boots)
+export(sim_dat)
+export(simb)
+importFrom(graphics,abline)
+importFrom(graphics,lines)
+importFrom(methods,is)
+importFrom(stats,approx)
+importFrom(stats,binomial)
+importFrom(stats,coef)
+importFrom(stats,confint)
+importFrom(stats,glm)
+importFrom(stats,loess)
+importFrom(stats,lowess)
+importFrom(stats,median)
+importFrom(stats,pnorm)
+importFrom(stats,predict)
+importFrom(stats,qnorm)
+importFrom(stats,quantile)
+importFrom(stats,rbinom)
+importFrom(stats,rnorm)
+importFrom(stats,summary.glm)
+importFrom(stats,vcov)
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,3 @@
+# pmcalibration 0.1.0
+
+* Initial CRAN submission.
diff --git a/R/boots.R b/R/boots.R
@@ -0,0 +1,100 @@
+#' Bootstrap resample a calibration curve object
+#'
+#' @param cal an object created using one of the \code{cal} functions
+#'
+#' @return bootstrap resamples of calibration metrics and values for plotting
+#' @keywords internal
+#' @export
+boot <- function(cal){
+  UseMethod("boot", cal)
+}
+
+#' @rdname boot
+#' @export
+boot.glm_cal <- function(cal){
+  # y <- cal$y; p <- cal$p; X <- cal$X; Xp <- cal$Xp
+  #
+  # i <- sample(1:length(y), replace = TRUE)
+  #
+  # b <- glm_cal(y = y[i], p = p[i], X = X[i, ], Xp = Xp, save_data=FALSE, save_mod = FALSE)
+
+  y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp; time <- cal$time
+
+  i <- sample(1:length(y), replace = TRUE)
+
+  args <- list(
+    y = y[i], p = p[i], x = x[i], xp = xp, time=time, save_data=FALSE, save_mod=FALSE
+  )
+
+  args <- c(args, cal$smooth_args)
+
+  b <- do.call(glm_cal, args)
+
+  return(b)
+}
+
+#' @rdname boot
+#' @export
+boot.gam_cal <- function(cal){
+  y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp; time <- cal$time
+
+  i <- sample(1:length(y), replace = TRUE)
+
+  args <- list(
+    y = y[i], p = p[i], x = x[i], xp = xp, time=time, save_data=FALSE, save_mod=FALSE
+  )
+
+  args <- c(args, cal$smooth_args)
+
+  b <- do.call(gam_cal, args)
+
+  return(b)
+}
+
+#' @rdname boot
+#' @export
+boot.lowess_cal <- function(cal){
+  y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp
+
+  i <- sample(1:length(y), replace = TRUE)
+
+  b <- lowess_cal(y = y[i], p = p[i], x = x[i], xp = xp, save_data=FALSE)
+
+  return(b)
+}
+
+#' @rdname boot
+#' @export
+boot.loess_cal <- function(cal){
+  y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp
+
+  i <- sample(1:length(y), replace = TRUE)
+
+  b <- loess_cal(y = y[i], p = p[i], x = x[i], xp = xp, save_data = FALSE, save_mod = FALSE)
+
+  return(b)
+}
+
+
+#' Wrapper to run bootstrap resamples using \code{parallel}
+#'
+#' @param cal an object created with one of the \code{_cal} functions
+#' @param R number of resamples (default = 1000)
+#' @param cores number of cores (for \code{parallel})
+#'
+#' @return a list created by one of the \code{boot.} functions
+#' @keywords internal
+#' @export
+run_boots <- function(cal, R = 1000, cores=1){
+
+  cl <- parallel::makeCluster(cores)
+  # replace with pbapply?
+  parallel::clusterExport(cl, varlist = c("cal", "R"),
+                          envir = environment())
+  # out <- parallel::parLapply(cl, 1:R, function(i) boot(cal = cal))
+  out <- pbapply::pblapply(seq(R), function(i) boot(cal = cal), cl = cl)
+  parallel::stopCluster(cl)
+  closeAllConnections()
+
+  return(out)
+}
diff --git a/R/cal_metrics.R b/R/cal_metrics.R
@@ -0,0 +1,49 @@
+#' Calculate calibration metrics from calibration curve
+#'
+#' @description
+#' Calculates metrics used for summarizing calibration curves. See Austin and Steyerberg (2019)
+#'
+#' @param p predicted probabilities
+#' @param p_c probabilities from the calibration curve
+#'
+#' @return a named vector of metrics based on absolute difference between predicted and calibration curve implied probabilities \code{d = abs(p - p_c)}
+#' \itemize{
+#'  \item{Eavg - average absolute difference (aka integrated calibration index or ICI)}
+#'  \item{E50 - median absolute difference}
+#'  \item{E90 - 90th percentile absolute difference}
+#'  \item{Emax - maximum absolute difference}
+#'  \item{ECI - average squared difference. Estimated calibration index (Van Hoorde et al. 2015)}
+#' }
+#'
+#' @references Austin PC, Steyerberg EW. (2019) The Integrated Calibration Index (ICI) and related metrics for quantifying the calibration of logistic regression models. \emph{Statistics in Medicine}. 38, pp. 1–15. https://doi.org/10.1002/sim.8281
+#' @references Van Hoorde, K., Van Huffel, S., Timmerman, D., Bourne, T., Van Calster, B. (2015). A spline-based tool to assess and visualize the calibration of multiclass risk predictions. \emph{Journal of Biomedical Informatics}, 54, pp. 283-93
+#' @references Van Calster, B., Nieboer, D., Vergouwe, Y., De Cock, B., Pencina M., Steyerberg E.W. (2016). A calibration hierarchy for risk models was defined: from utopia to empirical data. \emph{Journal of Clinical Epidemiology}, 74, pp. 167-176
+#' @export
+#' @examples
+#' library(pmcalibration)
+#'
+#' LP <- rnorm(100) # linear predictor
+#' p_c <- invlogit(LP) # actual probabilities
+#' p <- invlogit(LP*1.3) # predicted probabilities that are miscalibrated
+#'
+#' cal_metrics(p = p, p_c = p_c)
+cal_metrics <- function(p, p_c){
+  # code modified from the Austin and Steyerberg (2019, SiM) paper
+  stopifnot(length(p) == length(p_c))
+
+  if (any(is.na(p_c) | is.infinite(p_c))){
+    d <- NA
+    message("some p_c's are missing or Inf")
+  } else{
+    d <- abs(p - p_c)
+  }
+
+  m <- c('Eavg' = mean(d),
+        'E50' = median(d),
+        'E90' = if (any(is.na(d))) NA else unname(quantile(d, .9)),
+        'Emax' = max(d),
+        'ECI' = mean(d^2)*100
+        )
+
+  return(m)
+}
diff --git a/R/gam_cal.R b/R/gam_cal.R
@@ -0,0 +1,107 @@
+#' fits a calibration curve via gam
+#'
+#' @param y binary or a time-to-event (\code{Surv}) outcome. For the former \code{family = binomial(link="logit")} and for the latter \code{family = mgcv::cox.ph()}.
+#' @param p predicted probabilities
+#' @param x predictor (could be transformation of \code{p})
+#' @param xp values for plotting (same scale as \code{x})
+#' @param time time to calculate survival probabilities at (only relevant if \code{y} is a \code{Surv} object)
+#' @param save_data whether to save the data elements in the returned object
+#' @param save_mod whether to save the model in the returned object
+#' @param pw save pointwise standard errors for plotting
+#' @param ... additional arguments for \code{mgcv::gam} and \code{mgcv::s}
+#'
+#' @returns list of class \code{gam_cal}
+#' @keywords internal
+#' @export
+#' @examples
+#' library(pmcalibration)
+#' # simulate some data
+#' n <- 500
+#' dat <- sim_dat(N = n, a1 = .5, a3 = .2)
+#' head(dat)
+#' # predictions
+#' p <- with(dat, invlogit(.5 + x1 + x2 + x1*x2*.1))
+#'
+#' gam_cal(y = dat$y, p = p, x = p, xp = NULL, k = 20, method="REML")
+gam_cal <- function(y, p, x, xp, time=NULL, save_data = TRUE, save_mod = TRUE, pw = FALSE, ...){
+
+  dots <- list(...)
+  if ("bs" %in% names(dots)) bs <- dots[['bs']] else bs <- "tp"
+  if ("k" %in% names(dots)) k <- dots[['k']] else k <- -1
+  if ("fx" %in% names(dots)) fx <- dots[['fx']] else fx <- FALSE
+  if ("method" %in% names(dots)) method <- dots[['method']] else method <- "GCV.Cp"
+
+  surv <- is(y, "Surv")
+
+  # fit the calibration curve model
+  if (surv){
+    times <- y[, 1]
+    events <- y[, 2]
+    d <- data.frame(times, events, x)
+
+    mod <- mgcv::gam(times ~ s(x, k = k, fx = fx, bs = bs), data = d,
+                     family = mgcv::cox.ph(),
+                     weights = events,
+                     method = method)
+
+    p_c = 1 - as.vector(predict(mod, type = "response", newdata = data.frame(times = time, x=x)))
+  } else{
+    d <- data.frame(y, x)
+
+    mod <- mgcv::gam(y ~ s(x, k = k, fx = fx, bs = bs), data = d,
+                     family = binomial(link="logit"),
+                     method = method)
+
+    p_c <- as.vector(predict(mod, type = "response"))
+  }
+
+  if (!is.null(xp)){
+    if (pw){
+      if (surv){
+        p_c_p <- predict(mod, type = "response", newdata = data.frame(times = time, x = xp), se.fit=TRUE)
+        p_c_plot <- 1 - as.vector(p_c_p$fit)
+      } else{
+        p_c_p <- predict(mod, newdata = data.frame(x = xp), type = "response", se.fit = TRUE)
+        p_c_plot <- as.vector(p_c_p$fit)
+      }
+      p_c_plot_se <- as.vector(p_c_p$se)
+
+    } else{
+      if (surv){
+        p_c_plot <- 1 - as.vector(predict(mod, type = "response",
+                                       newdata = data.frame(times = time, x = xp)))
+      } else{
+        p_c_plot <- as.vector(predict(mod, newdata = data.frame(x = xp), type = "response"))
+      }
+      p_c_plot_se <- NULL
+    }
+  } else{
+    p_c_plot <- NULL
+    p_c_plot_se <- NULL
+  }
+
+  out <- list(
+    y = if (save_data) y else NULL,
+    p = if (save_data) p else NULL,
+    x = if (save_data) x else NULL,
+    xp = if (save_data) xp else NULL,
+    p_c = p_c,
+    metrics = cal_metrics(p, p_c),
+    p_c_plot = p_c_plot,
+    p_c_plot_se = p_c_plot_se,
+    model = if (save_mod) mod else NULL,
+    smooth_args = list(
+      smooth = "gam",
+      bs = bs,
+      k = k,
+      fx = fx,
+      method = method
+    ),
+    time = time,
+    outcome = ifelse(surv, "tte", "binary")
+  )
+
+  class(out) <- "gam_cal"
+  return(out)
+}
+