version 1.1.0

cran · Mar 18, 2024 · d7ebb24 · d7ebb24
1 parent 9770bba
commit d7ebb24
Show file tree

Hide file tree

Showing 30 changed files with 1,808 additions and 1,666 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: survML
 Title: Flexible Estimation of Conditional Survival Functions Using
         Machine Learning
-Version: 1.0.0
+Version: 1.1.0
 Authors@R: 
     person(given = "Charles",
            family = "Wolock",
@@ -11,21 +11,22 @@ Authors@R:
 Description: Tools for flexible estimation of conditional survival 
               functions using off-the-shelf machine learning tools. Implements both 
               global and local survival stacking. See Wolock CJ, Gilbert PB,
-              Simon N, and Carone M (2022+) <arXiv:2211.03031>. 
+              Simon N, and Carone M (2024) <doi:10.1080/10618600.2024.2304070>. 
 License: GPL (>= 3)
 Encoding: UTF-8
 RoxygenNote: 7.2.3
 Depends: SuperLearner (>= 2.0.28),
 Imports: Iso (>= 0.0.18.1)
-Suggests: knitr, rmarkdown, testthat (>= 3.0.0), ggplot2 (>= 3.4.0)
+Suggests: knitr, rmarkdown, testthat (>= 3.0.0), ggplot2 (>= 3.4.0),
+        gam (>= 1.22.0)
 Config/testthat/edition: 3
 VignetteBuilder: knitr
 URL: https://github.com/cwolock/survML
 BugReports: https://github.com/cwolock/survML/issues
 NeedsCompilation: no
-Packaged: 2023-07-08 03:44:06 UTC; cwolo
+Packaged: 2024-03-17 01:51:19 UTC; cwolock
 Author: Charles Wolock [aut, cre, cph]
     (<https://orcid.org/0000-0003-3527-1102>)
 Maintainer: Charles Wolock <cwolock@gmail.com>
 Repository: CRAN
-Date/Publication: 2023-07-08 10:00:03 UTC
+Date/Publication: 2024-03-17 05:30:02 UTC
diff --git a/MD5 b/MD5
@@ -1,30 +1,31 @@
-aeff46b5e4942b2dd645ca3ddf35b5ac *DESCRIPTION
-d39ad963a5e1c570e808509b5c80e3e4 *NAMESPACE
-de25bc578afd9862e67d21e1ab6b8d02 *NEWS.md
-54b026c9cd55a89e540676ebab216347 *R/compute_exponential.R
-0097b169aa4f3fea293a70d5e28b314f *R/compute_prodint.R
-0f892603dd645000247c30ba4ef4f30f *R/f_w_algorithms.R
-cec78aef17eaae98749accf49fc3c73c *R/f_w_stack.R
-70951adaebc9bdd077817e3dc36fb496 *R/f_y_algorithms.R
-243fb2fb5f09c3e0bf3e7b4d93aac283 *R/f_y_stack.R
+2052a2513c0d213530fee018d79f1684 *DESCRIPTION
+2586a2c9c655f54f5593fcb264f6e397 *NAMESPACE
+fb80267d6795697c10442c478071ef1f *NEWS.md
+903b9f8bdaa3e09eb0eeb0eeb7b47cb5 *R/compute_exponential.R
+01398ba4798ae48a5c4c9ee4ccd67fa8 *R/compute_prodint.R
+168d203319ae88cb642176802a5c27d3 *R/f_w_algorithms.R
+8a9defc4d3a638aa3f08b7b1304c819f *R/f_w_stack.R
+0cd8a4c8489e93682e1418bd1fd489a4 *R/f_y_algorithms.R
+0b3a4e3dd130ebab1b997af2a314fe01 *R/f_y_stack.R
 8cf292f37cf7cd8ab9171885c7d93e6a *R/p_delta.R
 b26b3f5e7e63ddc7ffb29d2659915a8b *R/p_delta_algorithms.R
-d454895b92cb2fe59cc45d1636343541 *R/stackG.R
-12a123bb248c84ccb584cd983691ef78 *R/stackL.R
-dd57200c375a6bdc8c6f64098df64967 *R/stack_utils.R
-64e5a94366c18effd2447b56565194c9 *R/survML-package.R
-838384e28a205ec68bb86f110c783b4a *README.md
-58d1f4a857b195daeda0cc3a888fb14e *build/vignette.rds
-d062c8083254adb9135dcc5bfc408846 *inst/doc/basic-usage.R
-550ac35b81de5013b44878e4af404056 *inst/doc/basic-usage.Rmd
-2a8aa5ce1f334c92a6d0ffb59f5a137c *inst/doc/basic-usage.html
-59931622ce6db2589489f6173b41e0d7 *man/figures/README-plot_stackG_example-1.png
-716a866567c74507b118d4759e8852b4 *man/figures/README-plot_stackG_example_cens-1.png
-a347897dc1f2670115fa571318719f73 *man/figures/README-plot_stackL_example-1.png
-51d2f7dea4b4a55df03acfb6ded10dff *man/stackG.Rd
-3485c75a57f68abe5134edbfb1b33481 *man/stackL.Rd
-156366dd8615a91d1dfdc2aed0b47239 *man/survML-package.Rd
-73f67fbeaaabe941df28192ecc0fc31b *tests/testthat.R
-132be4d2f1e9aa9648849042e64f5029 *tests/testthat/test-test-stackG.R
-cefe3880a80221d843986b4f61a99a66 *tests/testthat/test-test-stackL.R
-550ac35b81de5013b44878e4af404056 *vignettes/basic-usage.Rmd
+bfe5c54b71ba6fa0f038be3efbe9cdbc *R/stackG.R
+f6f9d52da3883d68efaa93c4b3fc28fd *R/stackL.R
+94e5b1527e501b83ebd91a44e5c59b0d *R/stack_utils.R
+e15545709dc5c7353d5dd6e8690d76eb *R/survML-package.R
+1ab5cae4d4b000f3dd425da7cb045fa1 *README.md
+8faf250d78a120ba4c7ea2186efddd24 *build/partial.rdb
+611756181ea4b004b3ecddaefca9f579 *build/vignette.rds
+6dbfdb33c122d98ab0a38f51ee6ed758 *inst/doc/basic-usage.R
+8b6abf6a187879862f1a4ac7aa8e2624 *inst/doc/basic-usage.Rmd
+cdad9f88514aa186e9b9fb860e430cd3 *inst/doc/basic-usage.html
+13b0f17b5a2110a879af8d37e8cc09be *man/figures/README-plot_stackG_example-1.png
+0ffa0573da00d663dc62fb48c31eb615 *man/figures/README-plot_stackG_example_cens-1.png
+5db83c9953582a5d7d1d2e067014f33d *man/figures/README-plot_stackL_example-1.png
+a5c8d270a155f892e4d189c709d58172 *man/stackG.Rd
+48596b087162b64e027fd12b904a7d7e *man/stackL.Rd
+6fd5182c3aa78106da4c158e73b7180e *man/survML-package.Rd
+dd2e0738cb4bd538970a254ab5f2eed6 *tests/testthat.R
+f1ccce52ca681de696eb02af109d7ac5 *tests/testthat/test-test-stackG.R
+88198a235a81bc0bafd333998d9b22d3 *tests/testthat/test-test-stackL.R
+8b6abf6a187879862f1a4ac7aa8e2624 *vignettes/basic-usage.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,7 @@
-# Generated by roxygen2: do not edit by hand
-
-S3method(predict,stackG)
-S3method(predict,stackL)
-export(stackG)
-export(stackL)
-importFrom(SuperLearner,SuperLearner)
+# Generated by roxygen2: do not edit by hand
+
+S3method(predict,stackG)
+S3method(predict,stackL)
+export(stackG)
+export(stackL)
+importFrom(SuperLearner,SuperLearner)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# survML 1.1.0
+
+* Added `gam` to `SUGGESTS` in order to allow `SuperLearner` package to make corresponding change without breaking vignettes. 
+* Added `time_grid_fit` option to main `stackG` function in order to allow more flexibility in choosing time grids. 
+* Minor bug fixes. 
+
 # survML 1.0.0
 
 * Initial CRAN submission.
diff --git a/R/compute_exponential.R b/R/compute_exponential.R
@@ -11,16 +11,13 @@
 #'
 #' @noRd
 compute_exponential <- function(cdf_uncens,
-                            cdf_cens = NA,
-                            cdf_marg = NA,
-                            entry_uncens = NA,
-                            entry_cens = NA,
-                            entry_marg = NA,
-                            p_uncens,
-                            newtimes,
-                            time_grid,
-                            denom_method = "stratified",
-                            truncation = TRUE){
+                                cdf_cens = NA,
+                                entry_uncens = NA,
+                                entry_cens = NA,
+                                p_uncens,
+                                newtimes,
+                                time_grid,
+                                truncation = TRUE){
 
   if (time_grid[1] == 0){ # 013123 added this to try to get better predictions at time 0
     time_grid <- time_grid[-1]
@@ -37,49 +34,26 @@ compute_exponential <- function(cdf_uncens,
     S_Y_1_pred_left <- c(1, 1-S_Y_1_curr[-length(S_Y_1_curr)])# probability of being "at risk" at time t
     ### CHECK TO MAKE SURE THIS IS CORRECT WITH THE DISCRETIZATION OF TIME
 
-    if (!truncation){ # truncation
-      if (denom_method != "stratified"){# marginal
-        S_Y_curr <- cdf_marg[1:curr_length]
-        S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
-        low <- S_Y_pred_left
-      } else{
-        S_Y_0_curr <- cdf_cens[1:curr_length]
-        S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
-        low_right <- S_Y_0_pred_left * (1 - p_uncens)
-        low_left <- S_Y_1_pred_left * p_uncens
-      }
-
+    if (!truncation){ # no truncation
+      S_Y_0_curr <- cdf_cens[1:curr_length]
+      S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
+      low_right <- S_Y_0_pred_left * (1 - p_uncens)
+      low_left <- S_Y_1_pred_left * p_uncens
 
       # product form
-      if (denom_method == "stratified"){
-        #print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
-        S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
-      } else{
-        S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/low))
-      }
+      #print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
+      S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
     } else{ # if there is truncation
-      if (denom_method != "stratified"){
-        S_Y_curr <- cdf_marg[1:curr_length]
-        F_W_curr <- entry_marg[1:curr_length]
-        S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
-        low <- S_Y_pred_left * F_W_curr
-      } else{ # marginal
-        S_Y_0_curr <- cdf_cens[1:curr_length]
-        S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
-        F_W_1_curr <- entry_uncens[1:curr_length]
-        F_W_0_curr <-entry_cens[1:curr_length]
-        low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
-        low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens
-      }
-
+      S_Y_0_curr <- cdf_cens[1:curr_length]
+      S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
+      F_W_1_curr <- entry_uncens[1:curr_length]
+      F_W_0_curr <-entry_cens[1:curr_length]
+      low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
+      low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens
 
       # product form
-      if (denom_method == "stratified"){
-        #print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
-        S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
-      } else{
-        S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/low))
-      }
+      #print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
+      S_T_est <- exp(-sum(p_uncens * dF_Y_1_pred/(low_left + low_right)))
     }
 
     if (curr_length == 0){

diff --git a/R/compute_prodint.R b/R/compute_prodint.R
@@ -5,21 +5,17 @@
 #' @param p_uncens Prediction of the probability of being uncensored
 #' @param newtimes Times at which to make the prediction
 #' @param time_grid Grid of time points over which to discretize the product integral
-#' @param denom_method Method of computing the denominator
 #'
 #' @return A vector of estimates of the survival function over \code{time_grid}
 #'
 #' @noRd
 compute_prodint <- function(cdf_uncens,
                             cdf_cens = NA,
-                            cdf_marg = NA,
                             entry_uncens = NA,
                             entry_cens = NA,
-                            entry_marg = NA,
                             p_uncens,
                             newtimes,
                             time_grid,
-                            denom_method = "stratified",
                             truncation = TRUE){
 
 
@@ -40,46 +36,25 @@ compute_prodint <- function(cdf_uncens,
     ### CHECK TO MAKE SURE THIS IS CORRECT WITH THE DISCRETIZATION OF TIME
 
     if (!truncation){ # truncation
-      if (denom_method != "stratified"){# marginal
-        S_Y_curr <- cdf_marg[1:curr_length]
-        S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
-        low <- S_Y_pred_left
-      } else{
-        S_Y_0_curr <- cdf_cens[1:curr_length]
-        S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
-        low_right <- S_Y_0_pred_left * (1 - p_uncens)
-        low_left <- S_Y_1_pred_left * p_uncens
-      }
+      S_Y_0_curr <- cdf_cens[1:curr_length]
+      S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
+      low_right <- S_Y_0_pred_left * (1 - p_uncens)
+      low_left <- S_Y_1_pred_left * p_uncens
 
       # product form
-      if (denom_method == "stratified"){
-        S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
-      } else{
-        S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/low)
-      }
+      S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
     } else{ # if there is truncation
-      if (denom_method != "stratified"){
-        S_Y_curr <- cdf_marg[1:curr_length]
-        F_W_curr <- entry_marg[1:curr_length]
-        S_Y_pred_left <- c(1, 1-S_Y_curr[-length(S_Y_curr)])
-        low <- S_Y_pred_left * F_W_curr
-      } else{ # marginal
-        S_Y_0_curr <- cdf_cens[1:curr_length]
-        S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
-        F_W_1_curr <- entry_uncens[1:curr_length]
-        F_W_0_curr <-entry_cens[1:curr_length]
-        low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
-        low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens
-      }
+      S_Y_0_curr <- cdf_cens[1:curr_length]
+      S_Y_0_pred_left <- c(1, 1-S_Y_0_curr[-length(S_Y_0_curr)])# probability of being "at risk" at time t
+      F_W_1_curr <- entry_uncens[1:curr_length]
+      F_W_0_curr <-entry_cens[1:curr_length]
+      low_right <- F_W_0_curr * S_Y_0_pred_left * (1 - p_uncens)
+      low_left <- F_W_1_curr * S_Y_1_pred_left * p_uncens
 
 
       # product form
-      if (denom_method == "stratified"){
-        #print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
-        S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
-      } else{
-        S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/low)
-      }
+      #print(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
+      S_T_est <- prod(1 - p_uncens * dF_Y_1_pred/(low_left + low_right))
     }
 
     if (curr_length == 0){

diff --git a/R/f_w_algorithms.R b/R/f_w_algorithms.R
@@ -15,6 +15,7 @@ f_w_stack_SuperLearner <- function(time,
                                    entry,
                                    X,
                                    censored,
+                                   time_grid,
                                    bin_size,
                                    SL_control,
                                    time_basis,
@@ -39,25 +40,26 @@ f_w_stack_SuperLearner <- function(time,
     obsWeights <- SL_control$obsWeights
   }
 
+  if (is.null(time_grid)){
+    bin_variable <- time
+    if (!is.null(bin_size)){
+      #time_grid <- quantile(dat$time, probs = seq(0, 1, by = bin_size))
+      time_grid <- sort(unique(stats::quantile(bin_variable, type = 1, probs = seq(0, 1, by = bin_size))))
+      time_grid <- c(0, time_grid) # 013123 changed this to try to get better predictions at time 0
+      #time_grid[1] <- 0 # manually set first point to 0, instead of first observed time
+    } else{
+      time_grid <- sort(unique(bin_variable))
+      time_grid <- c(0, time_grid)
+    }
+  }
+
   cv_folds <- split(sample(1:length(time)), rep(1:SL_control$V, length = length(time)))
 
   X <- as.matrix(X)
   time <- as.matrix(time)
   entry <- as.matrix(entry)
   dat <- data.frame(X, time, entry)
 
-
-  if (!is.null(bin_size)){
-    #time_grid <- quantile(dat$time, probs = seq(0, 1, by = bin_size))
-    time_grid <- sort(unique(stats::quantile(time, probs = seq(0, 1, by = bin_size))))
-    time_grid <- c(0, time_grid) # 013123 changed this to try to get better predictions at time 0
-    #time_grid[1] <- 0 # manually set first point to 0, instead of first observed time
-  } else{
-    time_grid <- sort(unique(time))
-    time_grid <- c(0, time_grid)
-
-  }
-
   ids <- seq(1:length(time))
 
   if (!is.null(obsWeights)){
@@ -72,14 +74,13 @@ f_w_stack_SuperLearner <- function(time,
   stacked <- stack_entry(time = time,
                          entry = entry,
                          X = stackX,
-                         time_grid = time_grid,
-                         time_basis = "continuous")
+                         time_grid = time_grid)
 
   # change t to dummy variable
   if (time_basis == "dummy"){
     stacked$t <- factor(stacked$t)
     dummy_mat <- stats::model.matrix(~-1 + t, data=stacked)
-    risk_set_names <- paste0("risk_set_", seq(1, (length(time_grid))))
+    risk_set_names <- paste0("risk_set_", seq(1, (length(time_grid)-1)))
     colnames(dummy_mat) <- risk_set_names
     stacked$t <- NULL
     stacked <- cbind(dummy_mat, stacked)

diff --git a/R/f_w_stack.R b/R/f_w_stack.R
@@ -37,6 +37,7 @@ f_w_stack <- function(time,
                       X,
                       entry,
                       censored,
+                      time_grid,
                       bin_size,
                       learner = "SuperLearner",
                       SL_control,
@@ -48,6 +49,7 @@ f_w_stack <- function(time,
                                   event = event,
                                   X = X,
                                   censored = censored,
+                                  time_grid = time_grid,
                                   bin_size = bin_size,
                                   SL_control = SL_control,
                                   time_basis = time_basis,