Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenrho authored and cran-robot committed Sep 6, 2023
0 parents commit 3902357
Show file tree
Hide file tree
Showing 51 changed files with 4,418 additions and 0 deletions.
29 changes: 29 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Package: pmcalibration
Type: Package
Title: Calibration Curves for Clinical Prediction Models
Version: 0.1.0
Authors@R: person(given = "Stephen", family = "Rhodes", email= "steverho89@gmail.com", role=c("aut", "cre", "cph"))
Maintainer: Stephen Rhodes <steverho89@gmail.com>
Description: Fit calibrations curves for clinical prediction models and calculate several associated
metrics (Eavg, E50, E90, Emax). Ideally predicted probabilities from a prediction model
should align with observed probabilities. Calibration curves relate predicted probabilities
(or a transformation thereof) to observed outcomes via a flexible non-linear smoothing function.
'pmcalibration' allows users to choose between several smoothers (regression splines, generalized
additive models/GAMs, lowess, loess). Both binary and time-to-event outcomes are supported.
See Van Calster et al. (2016) <doi:10.1016/j.jclinepi.2015.12.005>;
Austin and Steyerberg (2019) <doi:10.1002/sim.8281>;
Austin et al. (2020) <doi:10.1002/sim.8570>.
License: GPL-3
Encoding: UTF-8
RoxygenNote: 7.2.3
URL: https://github.com/stephenrho/pmcalibration
BugReports: https://github.com/stephenrho/pmcalibration/issues
Imports: Hmisc, MASS, checkmate, chk, mgcv, splines, graphics, stats,
methods, survival, pbapply, parallel
Suggests: knitr, rmarkdown, data.table, ggplot2, rms, simsurv
VignetteBuilder: knitr
NeedsCompilation: no
Packaged: 2023-09-06 00:19:48 UTC; stephenrhodes
Author: Stephen Rhodes [aut, cre, cph]
Repository: CRAN
Date/Publication: 2023-09-06 17:50:02 UTC
50 changes: 50 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
160f6f22d69e6bc0c7b957dc7aaff786 *DESCRIPTION
af906c553074a9c812cec11b34d5b470 *NAMESPACE
670200d48723b53bb3f4670d58bc9cc6 *NEWS.md
c62cae6d5c238ef18f1714aa0c07d33f *R/boots.R
57dc29751cea05fdecd1cebe137653b7 *R/cal_metrics.R
09cef37b31e564a77148ee910b14c0ee *R/gam_cal.R
28b65549b8fd303d5cc9bed607cad736 *R/glm_cal.R
5494dbf06642b6c953e8fd922ca026e6 *R/loess_cal.R
53768e9d0e917e76719acd89bbc93b54 *R/lowess_cal.R
2f9527fd444cc1ae5adc937292eecfd1 *R/pmcalibration-package.R
b6b11f72bc558d338b022f1bf7482aa4 *R/pmcalibration.R
95ef99c07f5e3d25a61cfa1abd9430f8 *R/reg_spline_X.R
5c77ad462a80d03dbf30d456f926f90d *R/simb.R
f0ec4790527fa5e72311f49abee4ec71 *R/utils.R
8b4cc880f5e95bd33660143895dfa864 *README.md
7f2094c7b4d5faa522b624cffc621f8b *build/partial.rdb
188b4c8d72b32f84e9f3705068380101 *build/vignette.rds
1a6dd0fd18f7279750ef1b139770ad63 *inst/CITATION
ac03fa6f1f2968abb55ae4d271e928df *inst/doc/external-validation.R
804ce8807369bb07c0112bcce0b8ee33 *inst/doc/external-validation.Rmd
b2ebb9e8e648a85fb4d64fef02d87cb9 *inst/doc/external-validation.html
4cb8200ebd060462fb41e38dbbfc974e *inst/doc/internal-validation.R
ede590738ba260e78aefc4466058cb33 *inst/doc/internal-validation.Rmd
f5d1722f8d75445470276e6492500563 *inst/doc/internal-validation.html
26dbfc04a3d51905944d4b2c0dc649f9 *man/boot.Rd
5cc461197c6dd4758249b2e91dbb4e31 *man/cal_metrics.Rd
94466ae9691188acec1fe52643939626 *man/gam_cal.Rd
0e3f807238518f90c102489733f5b3cb *man/get_cc.Rd
c1de95c0e12545146beec0df1e072cf3 *man/glm_cal.Rd
41d1a3d4cbc1d34fe30dd1acd306633e *man/invlogit.Rd
c028a37bb9d30ea398fe354f871bc627 *man/loess_cal.Rd
fea49aff72a1c8db64fedfe1aeb24160 *man/logistic_cal.Rd
3d81e6fc6d6fa839831460f759f9cad5 *man/logit.Rd
683553d1e51e9b9c79b2c46a6cd4eda6 *man/lowess_cal.Rd
e4b22c2bc1294064b2ce2eb665e3c237 *man/plot.pmcalibration.Rd
062c657fbcf787d9b592a231c151d6fd *man/pmcalibration-package.Rd
87fa4c448f237492a71923dbdfc5b3e1 *man/pmcalibration.Rd
5cdbd8180736a0d2532b297ccdf021e3 *man/predict_lowess.Rd
923b8fafffdf7a7118f8e2e6842d6686 *man/print.logistic_cal.Rd
711f158c5a4185e63758164a2bb29b24 *man/print.logistic_calsummary.Rd
b0aa7f7d5b505a0993907762cc4687cb *man/print.pmcalibration.Rd
6028edbc05dd7febde17b7f30655dbad *man/print.pmcalibrationsummary.Rd
fa588867ba8ec97a1da91442e00e960e *man/reg_spline_X.Rd
fdf1223c5ca67eae1facfba3c230e20b *man/run_boots.Rd
37809ac9ccaf25d6dde120ff744ba38c *man/sim_dat.Rd
fb4dcdb42bb013ac6bb0556e665a35a5 *man/simb.Rd
2e22574c11673991b54d0672ce4868da *man/summary.logistic_cal.Rd
9f2d0c9ffcea2658cc859da586bd61b3 *man/summary.pmcalibration.Rd
804ce8807369bb07c0112bcce0b8ee33 *vignettes/external-validation.Rmd
ede590738ba260e78aefc4466058cb33 *vignettes/internal-validation.Rmd
52 changes: 52 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Generated by roxygen2: do not edit by hand

S3method(boot,gam_cal)
S3method(boot,glm_cal)
S3method(boot,loess_cal)
S3method(boot,lowess_cal)
S3method(plot,pmcalibration)
S3method(print,logistic_cal)
S3method(print,logistic_calsummary)
S3method(print,pmcalibration)
S3method(print,pmcalibrationsummary)
S3method(simb,gam_cal)
S3method(simb,glm_cal)
S3method(simb,loess_cal)
S3method(simb,lowess_cal)
S3method(summary,logistic_cal)
S3method(summary,pmcalibration)
export(boot)
export(cal_metrics)
export(gam_cal)
export(get_cc)
export(glm_cal)
export(invlogit)
export(loess_cal)
export(logistic_cal)
export(logit)
export(lowess_cal)
export(pmcalibration)
export(predict_lowess)
export(reg_spline_X)
export(run_boots)
export(sim_dat)
export(simb)
importFrom(graphics,abline)
importFrom(graphics,lines)
importFrom(methods,is)
importFrom(stats,approx)
importFrom(stats,binomial)
importFrom(stats,coef)
importFrom(stats,confint)
importFrom(stats,glm)
importFrom(stats,loess)
importFrom(stats,lowess)
importFrom(stats,median)
importFrom(stats,pnorm)
importFrom(stats,predict)
importFrom(stats,qnorm)
importFrom(stats,quantile)
importFrom(stats,rbinom)
importFrom(stats,rnorm)
importFrom(stats,summary.glm)
importFrom(stats,vcov)
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# pmcalibration 0.1.0

* Initial CRAN submission.
100 changes: 100 additions & 0 deletions R/boots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#' Bootstrap resample a calibration curve object
#'
#' @param cal an object created using one of the \code{cal} functions
#'
#' @return bootstrap resamples of calibration metrics and values for plotting
#' @keywords internal
#' @export
boot <- function(cal){
UseMethod("boot", cal)
}

#' @rdname boot
#' @export
boot.glm_cal <- function(cal){
# y <- cal$y; p <- cal$p; X <- cal$X; Xp <- cal$Xp
#
# i <- sample(1:length(y), replace = TRUE)
#
# b <- glm_cal(y = y[i], p = p[i], X = X[i, ], Xp = Xp, save_data=FALSE, save_mod = FALSE)

y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp; time <- cal$time

i <- sample(1:length(y), replace = TRUE)

args <- list(
y = y[i], p = p[i], x = x[i], xp = xp, time=time, save_data=FALSE, save_mod=FALSE
)

args <- c(args, cal$smooth_args)

b <- do.call(glm_cal, args)

return(b)
}

#' @rdname boot
#' @export
boot.gam_cal <- function(cal){
y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp; time <- cal$time

i <- sample(1:length(y), replace = TRUE)

args <- list(
y = y[i], p = p[i], x = x[i], xp = xp, time=time, save_data=FALSE, save_mod=FALSE
)

args <- c(args, cal$smooth_args)

b <- do.call(gam_cal, args)

return(b)
}

#' @rdname boot
#' @export
boot.lowess_cal <- function(cal){
y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp

i <- sample(1:length(y), replace = TRUE)

b <- lowess_cal(y = y[i], p = p[i], x = x[i], xp = xp, save_data=FALSE)

return(b)
}

#' @rdname boot
#' @export
boot.loess_cal <- function(cal){
y <- cal$y; p <- cal$p; x <- cal$x; xp <- cal$xp

i <- sample(1:length(y), replace = TRUE)

b <- loess_cal(y = y[i], p = p[i], x = x[i], xp = xp, save_data = FALSE, save_mod = FALSE)

return(b)
}


#' Wrapper to run bootstrap resamples using \code{parallel}
#'
#' @param cal an object created with one of the \code{_cal} functions
#' @param R number of resamples (default = 1000)
#' @param cores number of cores (for \code{parallel})
#'
#' @return a list created by one of the \code{boot.} functions
#' @keywords internal
#' @export
run_boots <- function(cal, R = 1000, cores=1){

cl <- parallel::makeCluster(cores)
# replace with pbapply?
parallel::clusterExport(cl, varlist = c("cal", "R"),
envir = environment())
# out <- parallel::parLapply(cl, 1:R, function(i) boot(cal = cal))
out <- pbapply::pblapply(seq(R), function(i) boot(cal = cal), cl = cl)
parallel::stopCluster(cl)
closeAllConnections()

return(out)
}
49 changes: 49 additions & 0 deletions R/cal_metrics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#' Calculate calibration metrics from calibration curve
#'
#' @description
#' Calculates metrics used for summarizing calibration curves. See Austin and Steyerberg (2019)
#'
#' @param p predicted probabilities
#' @param p_c probabilities from the calibration curve
#'
#' @return a named vector of metrics based on absolute difference between predicted and calibration curve implied probabilities \code{d = abs(p - p_c)}
#' \itemize{
#' \item{Eavg - average absolute difference (aka integrated calibration index or ICI)}
#' \item{E50 - median absolute difference}
#' \item{E90 - 90th percentile absolute difference}
#' \item{Emax - maximum absolute difference}
#' \item{ECI - average squared difference. Estimated calibration index (Van Hoorde et al. 2015)}
#' }
#'
#' @references Austin PC, Steyerberg EW. (2019) The Integrated Calibration Index (ICI) and related metrics for quantifying the calibration of logistic regression models. \emph{Statistics in Medicine}. 38, pp. 1–15. https://doi.org/10.1002/sim.8281
#' @references Van Hoorde, K., Van Huffel, S., Timmerman, D., Bourne, T., Van Calster, B. (2015). A spline-based tool to assess and visualize the calibration of multiclass risk predictions. \emph{Journal of Biomedical Informatics}, 54, pp. 283-93
#' @references Van Calster, B., Nieboer, D., Vergouwe, Y., De Cock, B., Pencina M., Steyerberg E.W. (2016). A calibration hierarchy for risk models was defined: from utopia to empirical data. \emph{Journal of Clinical Epidemiology}, 74, pp. 167-176
#' @export
#' @examples
#' library(pmcalibration)
#'
#' LP <- rnorm(100) # linear predictor
#' p_c <- invlogit(LP) # actual probabilities
#' p <- invlogit(LP*1.3) # predicted probabilities that are miscalibrated
#'
#' cal_metrics(p = p, p_c = p_c)
cal_metrics <- function(p, p_c){
# code modified from the Austin and Steyerberg (2019, SiM) paper
stopifnot(length(p) == length(p_c))

if (any(is.na(p_c) | is.infinite(p_c))){
d <- NA
message("some p_c's are missing or Inf")
} else{
d <- abs(p - p_c)
}

m <- c('Eavg' = mean(d),
'E50' = median(d),
'E90' = if (any(is.na(d))) NA else unname(quantile(d, .9)),
'Emax' = max(d),
'ECI' = mean(d^2)*100
)

return(m)
}
107 changes: 107 additions & 0 deletions R/gam_cal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#' fits a calibration curve via gam
#'
#' @param y binary or a time-to-event (\code{Surv}) outcome. For the former \code{family = binomial(link="logit")} and for the latter \code{family = mgcv::cox.ph()}.
#' @param p predicted probabilities
#' @param x predictor (could be transformation of \code{p})
#' @param xp values for plotting (same scale as \code{x})
#' @param time time to calculate survival probabilities at (only relevant if \code{y} is a \code{Surv} object)
#' @param save_data whether to save the data elements in the returned object
#' @param save_mod whether to save the model in the returned object
#' @param pw save pointwise standard errors for plotting
#' @param ... additional arguments for \code{mgcv::gam} and \code{mgcv::s}
#'
#' @returns list of class \code{gam_cal}
#' @keywords internal
#' @export
#' @examples
#' library(pmcalibration)
#' # simulate some data
#' n <- 500
#' dat <- sim_dat(N = n, a1 = .5, a3 = .2)
#' head(dat)
#' # predictions
#' p <- with(dat, invlogit(.5 + x1 + x2 + x1*x2*.1))
#'
#' gam_cal(y = dat$y, p = p, x = p, xp = NULL, k = 20, method="REML")
gam_cal <- function(y, p, x, xp, time=NULL, save_data = TRUE, save_mod = TRUE, pw = FALSE, ...){

dots <- list(...)
if ("bs" %in% names(dots)) bs <- dots[['bs']] else bs <- "tp"
if ("k" %in% names(dots)) k <- dots[['k']] else k <- -1
if ("fx" %in% names(dots)) fx <- dots[['fx']] else fx <- FALSE
if ("method" %in% names(dots)) method <- dots[['method']] else method <- "GCV.Cp"

surv <- is(y, "Surv")

# fit the calibration curve model
if (surv){
times <- y[, 1]
events <- y[, 2]
d <- data.frame(times, events, x)

mod <- mgcv::gam(times ~ s(x, k = k, fx = fx, bs = bs), data = d,
family = mgcv::cox.ph(),
weights = events,
method = method)

p_c = 1 - as.vector(predict(mod, type = "response", newdata = data.frame(times = time, x=x)))
} else{
d <- data.frame(y, x)

mod <- mgcv::gam(y ~ s(x, k = k, fx = fx, bs = bs), data = d,
family = binomial(link="logit"),
method = method)

p_c <- as.vector(predict(mod, type = "response"))
}

if (!is.null(xp)){
if (pw){
if (surv){
p_c_p <- predict(mod, type = "response", newdata = data.frame(times = time, x = xp), se.fit=TRUE)
p_c_plot <- 1 - as.vector(p_c_p$fit)
} else{
p_c_p <- predict(mod, newdata = data.frame(x = xp), type = "response", se.fit = TRUE)
p_c_plot <- as.vector(p_c_p$fit)
}
p_c_plot_se <- as.vector(p_c_p$se)

} else{
if (surv){
p_c_plot <- 1 - as.vector(predict(mod, type = "response",
newdata = data.frame(times = time, x = xp)))
} else{
p_c_plot <- as.vector(predict(mod, newdata = data.frame(x = xp), type = "response"))
}
p_c_plot_se <- NULL
}
} else{
p_c_plot <- NULL
p_c_plot_se <- NULL
}

out <- list(
y = if (save_data) y else NULL,
p = if (save_data) p else NULL,
x = if (save_data) x else NULL,
xp = if (save_data) xp else NULL,
p_c = p_c,
metrics = cal_metrics(p, p_c),
p_c_plot = p_c_plot,
p_c_plot_se = p_c_plot_se,
model = if (save_mod) mod else NULL,
smooth_args = list(
smooth = "gam",
bs = bs,
k = k,
fx = fx,
method = method
),
time = time,
outcome = ifelse(surv, "tte", "binary")
)

class(out) <- "gam_cal"
return(out)
}

0 comments on commit 3902357

Please sign in to comment.