Skip to content

Commit

Permalink
version 1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
cwolock authored and cran-robot committed Mar 18, 2024
1 parent 9770bba commit d7ebb24
Show file tree
Hide file tree
Showing 30 changed files with 1,808 additions and 1,666 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: survML
Title: Flexible Estimation of Conditional Survival Functions Using
Machine Learning
Version: 1.0.0
Version: 1.1.0
Authors@R:
person(given = "Charles",
family = "Wolock",
Expand All @@ -11,21 +11,22 @@ Authors@R:
Description: Tools for flexible estimation of conditional survival
functions using off-the-shelf machine learning tools. Implements both
global and local survival stacking. See Wolock CJ, Gilbert PB,
Simon N, and Carone M (2022+) <arXiv:2211.03031>.
Simon N, and Carone M (2024) <doi:10.1080/10618600.2024.2304070>.
License: GPL (>= 3)
Encoding: UTF-8
RoxygenNote: 7.2.3
Depends: SuperLearner (>= 2.0.28),
Imports: Iso (>= 0.0.18.1)
Suggests: knitr, rmarkdown, testthat (>= 3.0.0), ggplot2 (>= 3.4.0)
Suggests: knitr, rmarkdown, testthat (>= 3.0.0), ggplot2 (>= 3.4.0),
gam (>= 1.22.0)
Config/testthat/edition: 3
VignetteBuilder: knitr
URL: https://github.com/cwolock/survML
BugReports: https://github.com/cwolock/survML/issues
NeedsCompilation: no
Packaged: 2023-07-08 03:44:06 UTC; cwolo
Packaged: 2024-03-17 01:51:19 UTC; cwolock
Author: Charles Wolock [aut, cre, cph]
(<https://orcid.org/0000-0003-3527-1102>)
Maintainer: Charles Wolock <cwolock@gmail.com>
Repository: CRAN
Date/Publication: 2023-07-08 10:00:03 UTC
Date/Publication: 2024-03-17 05:30:02 UTC
57 changes: 29 additions & 28 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
aeff46b5e4942b2dd645ca3ddf35b5ac *DESCRIPTION
d39ad963a5e1c570e808509b5c80e3e4 *NAMESPACE
de25bc578afd9862e67d21e1ab6b8d02 *NEWS.md
54b026c9cd55a89e540676ebab216347 *R/compute_exponential.R
0097b169aa4f3fea293a70d5e28b314f *R/compute_prodint.R
0f892603dd645000247c30ba4ef4f30f *R/f_w_algorithms.R
cec78aef17eaae98749accf49fc3c73c *R/f_w_stack.R
70951adaebc9bdd077817e3dc36fb496 *R/f_y_algorithms.R
243fb2fb5f09c3e0bf3e7b4d93aac283 *R/f_y_stack.R
2052a2513c0d213530fee018d79f1684 *DESCRIPTION
2586a2c9c655f54f5593fcb264f6e397 *NAMESPACE
fb80267d6795697c10442c478071ef1f *NEWS.md
903b9f8bdaa3e09eb0eeb0eeb7b47cb5 *R/compute_exponential.R
01398ba4798ae48a5c4c9ee4ccd67fa8 *R/compute_prodint.R
168d203319ae88cb642176802a5c27d3 *R/f_w_algorithms.R
8a9defc4d3a638aa3f08b7b1304c819f *R/f_w_stack.R
0cd8a4c8489e93682e1418bd1fd489a4 *R/f_y_algorithms.R
0b3a4e3dd130ebab1b997af2a314fe01 *R/f_y_stack.R
8cf292f37cf7cd8ab9171885c7d93e6a *R/p_delta.R
b26b3f5e7e63ddc7ffb29d2659915a8b *R/p_delta_algorithms.R
d454895b92cb2fe59cc45d1636343541 *R/stackG.R
12a123bb248c84ccb584cd983691ef78 *R/stackL.R
dd57200c375a6bdc8c6f64098df64967 *R/stack_utils.R
64e5a94366c18effd2447b56565194c9 *R/survML-package.R
838384e28a205ec68bb86f110c783b4a *README.md
58d1f4a857b195daeda0cc3a888fb14e *build/vignette.rds
d062c8083254adb9135dcc5bfc408846 *inst/doc/basic-usage.R
550ac35b81de5013b44878e4af404056 *inst/doc/basic-usage.Rmd
2a8aa5ce1f334c92a6d0ffb59f5a137c *inst/doc/basic-usage.html
59931622ce6db2589489f6173b41e0d7 *man/figures/README-plot_stackG_example-1.png
716a866567c74507b118d4759e8852b4 *man/figures/README-plot_stackG_example_cens-1.png
a347897dc1f2670115fa571318719f73 *man/figures/README-plot_stackL_example-1.png
51d2f7dea4b4a55df03acfb6ded10dff *man/stackG.Rd
3485c75a57f68abe5134edbfb1b33481 *man/stackL.Rd
156366dd8615a91d1dfdc2aed0b47239 *man/survML-package.Rd
73f67fbeaaabe941df28192ecc0fc31b *tests/testthat.R
132be4d2f1e9aa9648849042e64f5029 *tests/testthat/test-test-stackG.R
cefe3880a80221d843986b4f61a99a66 *tests/testthat/test-test-stackL.R
550ac35b81de5013b44878e4af404056 *vignettes/basic-usage.Rmd
bfe5c54b71ba6fa0f038be3efbe9cdbc *R/stackG.R
f6f9d52da3883d68efaa93c4b3fc28fd *R/stackL.R
94e5b1527e501b83ebd91a44e5c59b0d *R/stack_utils.R
e15545709dc5c7353d5dd6e8690d76eb *R/survML-package.R
1ab5cae4d4b000f3dd425da7cb045fa1 *README.md
8faf250d78a120ba4c7ea2186efddd24 *build/partial.rdb
611756181ea4b004b3ecddaefca9f579 *build/vignette.rds
6dbfdb33c122d98ab0a38f51ee6ed758 *inst/doc/basic-usage.R
8b6abf6a187879862f1a4ac7aa8e2624 *inst/doc/basic-usage.Rmd
cdad9f88514aa186e9b9fb860e430cd3 *inst/doc/basic-usage.html
13b0f17b5a2110a879af8d37e8cc09be *man/figures/README-plot_stackG_example-1.png
0ffa0573da00d663dc62fb48c31eb615 *man/figures/README-plot_stackG_example_cens-1.png
5db83c9953582a5d7d1d2e067014f33d *man/figures/README-plot_stackL_example-1.png
a5c8d270a155f892e4d189c709d58172 *man/stackG.Rd
48596b087162b64e027fd12b904a7d7e *man/stackL.Rd
6fd5182c3aa78106da4c158e73b7180e *man/survML-package.Rd
dd2e0738cb4bd538970a254ab5f2eed6 *tests/testthat.R
f1ccce52ca681de696eb02af109d7ac5 *tests/testthat/test-test-stackG.R
88198a235a81bc0bafd333998d9b22d3 *tests/testthat/test-test-stackL.R
8b6abf6a187879862f1a4ac7aa8e2624 *vignettes/basic-usage.Rmd
14 changes: 7 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by roxygen2: do not edit by hand
S3method(predict,stackG)
S3method(predict,stackL)
export(stackG)
export(stackL)
importFrom(SuperLearner,SuperLearner)
# Generated by roxygen2: do not edit by hand

S3method(predict,stackG)
S3method(predict,stackL)
export(stackG)
export(stackL)
importFrom(SuperLearner,SuperLearner)
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# survML 1.1.0

* Added `gam` to `SUGGESTS` in order to allow `SuperLearner` package to make corresponding change without breaking vignettes.
* Added `time_grid_fit` option to main `stackG` function in order to allow more flexibility in choosing time grids.
* Minor bug fixes.

# survML 1.0.0

* Initial CRAN submission.
70 changes: 22 additions & 48 deletions R/compute_exponential.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,13 @@
#'
#' @noRd
compute_exponential <- function(cdf_uncens,
cdf_cens = NA,
cdf_marg = NA,
entry_uncens = NA,
entry_cens = NA,
entry_marg = NA,
p_uncens,
newtimes,
time_grid,
denom_method = "stratified",
truncation = TRUE){
cdf_cens = NA,
entry_uncens = NA,
entry_cens = NA,
p_uncens,
newtimes,
time_grid,
truncation = TRUE){

if (time_grid[1] == 0){ # 013123 added this to try to get better predictions at time 0
time_grid <- time_grid[-1]
Expand All @@ -37,49 +34,26 @@ compute_exponential <- function(cdf_uncens,
S_Y_1_pred_left <- c(1, 1-S_Y_1_curr[-length(S_Y_1_curr)])# probability of being "at risk" at time t
### CHECK TO MAKE SURE THIS IS CORRECT WITH THE DISCRETIZATION OF TIME

if (!truncation){ # truncation
if (denom_method != "stratified"){# marginal
S_Y_curr <- cdf_marg[1:curr_length]
S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
low <- S_Y_pred_left
} else{
S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
low_right <- S_Y_0_pred_left * (1 - p_uncens)
low_left <- S_Y_1_pred_left * p_uncens
}

if (!truncation){ # no truncation
S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
low_right <- S_Y_0_pred_left * (1 - p_uncens)
low_left <- S_Y_1_pred_left * p_uncens

# product form
if (denom_method == "stratified"){
#print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
} else{
S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/low))
}
#print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
} else{ # if there is truncation
if (denom_method != "stratified"){
S_Y_curr <- cdf_marg[1:curr_length]
F_W_curr <- entry_marg[1:curr_length]
S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
low <- S_Y_pred_left * F_W_curr
} else{ # marginal
S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
F_W_1_curr <- entry_uncens[1:curr_length]
F_W_0_curr <-entry_cens[1:curr_length]
low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens
}

S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
F_W_1_curr <- entry_uncens[1:curr_length]
F_W_0_curr <-entry_cens[1:curr_length]
low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens

# product form
if (denom_method == "stratified"){
#print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
} else{
S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/low))
}
#print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
}

if (curr_length == 0){
Expand Down
51 changes: 13 additions & 38 deletions R/compute_prodint.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,17 @@
#' @param p_uncens Prediction of the probability of being uncensored
#' @param newtimes Times at which to make the prediction
#' @param time_grid Grid of time points over which to discretize the product integral
#' @param denom_method Method of computing the denominator
#'
#' @return A vector of estimates of the survival function over \code{time_grid}
#'
#' @noRd
compute_prodint <- function(cdf_uncens,
cdf_cens = NA,
cdf_marg = NA,
entry_uncens = NA,
entry_cens = NA,
entry_marg = NA,
p_uncens,
newtimes,
time_grid,
denom_method = "stratified",
truncation = TRUE){


Expand All @@ -40,46 +36,25 @@ compute_prodint <- function(cdf_uncens,
### CHECK TO MAKE SURE THIS IS CORRECT WITH THE DISCRETIZATION OF TIME

if (!truncation){ # truncation
if (denom_method != "stratified"){# marginal
S_Y_curr <- cdf_marg[1:curr_length]
S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
low <- S_Y_pred_left
} else{
S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
low_right <- S_Y_0_pred_left * (1 - p_uncens)
low_left <- S_Y_1_pred_left * p_uncens
}
S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
low_right <- S_Y_0_pred_left * (1 - p_uncens)
low_left <- S_Y_1_pred_left * p_uncens

# product form
if (denom_method == "stratified"){
S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
} else{
S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/low)
}
S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
} else{ # if there is truncation
if (denom_method != "stratified"){
S_Y_curr <- cdf_marg[1:curr_length]
F_W_curr <- entry_marg[1:curr_length]
S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
low <- S_Y_pred_left * F_W_curr
} else{ # marginal
S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
F_W_1_curr <- entry_uncens[1:curr_length]
F_W_0_curr <-entry_cens[1:curr_length]
low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens
}
S_Y_0_curr <- cdf_cens[1:curr_length]
S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
F_W_1_curr <- entry_uncens[1:curr_length]
F_W_0_curr <-entry_cens[1:curr_length]
low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens


# product form
if (denom_method == "stratified"){
#print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
} else{
S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/low)
}
#print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
}

if (curr_length == 0){
Expand Down
31 changes: 16 additions & 15 deletions R/f_w_algorithms.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ f_w_stack_SuperLearner <- function(time,
entry,
X,
censored,
time_grid,
bin_size,
SL_control,
time_basis,
Expand All @@ -39,25 +40,26 @@ f_w_stack_SuperLearner <- function(time,
obsWeights <- SL_control$obsWeights
}

if (is.null(time_grid)){
bin_variable <- time
if (!is.null(bin_size)){
#time_grid <- quantile(dat$time, probs = seq(0, 1, by = bin_size))
time_grid <- sort(unique(stats::quantile(bin_variable, type = 1, probs = seq(0, 1, by = bin_size))))
time_grid <- c(0, time_grid) # 013123 changed this to try to get better predictions at time 0
#time_grid[1] <- 0 # manually set first point to 0, instead of first observed time
} else{
time_grid <- sort(unique(bin_variable))
time_grid <- c(0, time_grid)
}
}

cv_folds <- split(sample(1:length(time)), rep(1:SL_control$V, length = length(time)))

X <- as.matrix(X)
time <- as.matrix(time)
entry <- as.matrix(entry)
dat <- data.frame(X, time, entry)


if (!is.null(bin_size)){
#time_grid <- quantile(dat$time, probs = seq(0, 1, by = bin_size))
time_grid <- sort(unique(stats::quantile(time, probs = seq(0, 1, by = bin_size))))
time_grid <- c(0, time_grid) # 013123 changed this to try to get better predictions at time 0
#time_grid[1] <- 0 # manually set first point to 0, instead of first observed time
} else{
time_grid <- sort(unique(time))
time_grid <- c(0, time_grid)

}

ids <- seq(1:length(time))

if (!is.null(obsWeights)){
Expand All @@ -72,14 +74,13 @@ f_w_stack_SuperLearner <- function(time,
stacked <- stack_entry(time = time,
entry = entry,
X = stackX,
time_grid = time_grid,
time_basis = "continuous")
time_grid = time_grid)

# change t to dummy variable
if (time_basis == "dummy"){
stacked$t <- factor(stacked$t)
dummy_mat <- stats::model.matrix(~-1 + t, data=stacked)
risk_set_names <- paste0("risk_set_", seq(1, (length(time_grid))))
risk_set_names <- paste0("risk_set_", seq(1, (length(time_grid)-1)))
colnames(dummy_mat) <- risk_set_names
stacked$t <- NULL
stacked <- cbind(dummy_mat, stacked)
Expand Down
2 changes: 2 additions & 0 deletions R/f_w_stack.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ f_w_stack <- function(time,
X,
entry,
censored,
time_grid,
bin_size,
learner = "SuperLearner",
SL_control,
Expand All @@ -48,6 +49,7 @@ f_w_stack <- function(time,
event = event,
X = X,
censored = censored,
time_grid = time_grid,
bin_size = bin_size,
SL_control = SL_control,
time_basis = time_basis,
Expand Down

0 comments on commit d7ebb24

Please sign in to comment.