Merge branch 'master' of https://github.com/boost-R/mboost

boost-R · Dec 1, 2017 · bc52c9a · bc52c9a
2 parents 363fd5c + a5311a3
commit bc52c9a
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 54 deletions.
diff --git a/R/crossvalidation.R b/R/crossvalidation.R
@@ -9,7 +9,7 @@ cvrisk <- function(object, ...)
 
 cvrisk.mboost <- function (object, folds = cv(model.weights(object)),
                            grid = 0:mstop(object), papply = mclapply,
-                           fun = NULL, corrected = TRUE, mc.preschedule = FALSE,
+                           fun = NULL, mc.preschedule = FALSE,
                            ...) {
 
     papply <- match.fun(papply)
@@ -29,44 +29,14 @@ cvrisk.mboost <- function (object, folds = cv(model.weights(object)),
     fam_name <- object$family@name
     call <- deparse(object$call)
     if (is.null(fun)) {
-        if (fam_name != "Cox Partial Likelihood" || !corrected) {
-            dummyfct <- function(weights, oobweights) {
-                mod <- fitfct(weights = weights, oobweights = oobweights)
-                mstop(mod) <- max(grid)
-                ## return all risk values in grid (+ 1 as 0 is included)
-                risk(mod)[grid + 1]
-            }
-        } else {
-            ## If family = CoxPH(), cross-validation needs to be computed as in
-            ## Verweij, van Houwelingen (1993), Cross-validation in survival
-            ## analysis, Statistics in Medicine, 12:2305-2314.
-            plloss <- environment(object$family@risk)[["plloss"]]
-
-            if (is.null(fun)) {
-                if (0 %in% grid) {
-                    warning("All values in ", sQuote("grid"), " must be greater 0 if ",
-                            'family = "CoxPH", hence 0 is dropped from grid')
-                    grid <- grid[grid != 0]
-                }
-                dummyfct <- function(weights, oobweights) {
-                    ## <FIXME> Should the risk be computed on the inbag
-                    ## (currently done) or on the oobag observations?
-                    mod <- fitfct(weights = weights, oobweights = oobweights,
-                                  risk = "inbag")
-                    mstop(mod) <- max(grid)
-
-                    pr <- predict(mod, aggregate = "cumsum")
-                    ## <FIXME> are the weights w really equal to 1? Shouldn't it
-                    ## be equal to the original fitting weights? Is this
-                    ## computed on ALL observations (currently done) or only on
-                    ## the OOBAG observations?
-                    lplk <- apply(pr[, grid], 2, function(f)
-                        sum(plloss(y = object$response, f = f, w = 1)))
-                    ## return negative "cvl"
-                    - mod$risk()[grid] - lplk
-                }
-            }
+        dummyfct <- function(weights, oobweights) {
+            mod <- fitfct(weights = weights, oobweights = oobweights)
+            mstop(mod) <- max(grid)
+            ## return all risk values in grid (+ 1 as 0 is included)
+            risk(mod)[grid + 1]
         }
+        if (fam_name == "Cox Partial Likelihood" && all(rowSums(folds == 0) == 1))
+            stop("Leave-one-out cross-validation cannot be used with ", sQuote("family = CoxPH()"))
     } else { ## !is.null(fun)
         dummyfct <- function(weights, oobweights) {
             mod <- fitfct(weights = weights, oobweights = oobweights)

diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
@@ -1,6 +1,15 @@
 \name{NEWS}
 \title{News for Package 'mboost'}
 
+\section{Changes in mboost version 2.8-2 (2017-xx-yy)}{
+  \subsection{User-visible changes}{
+    \itemize{
+      \item Removed corrected cross-validation for Cox models (Verweij and van Houwelingen, 1993)
+        as it was not working. Closes \href{https://github.com/boost-R/mboost/issues/85}{#85}.
+    }
+  }
+}
+
 \section{Changes in mboost version 2.8-1 (2017-07-19)}{
   \subsection{User-visible changes}{
     \itemize{

diff --git a/man/cvrisk.Rd b/man/cvrisk.Rd
@@ -13,7 +13,7 @@
 \method{cvrisk}{mboost}(object, folds = cv(model.weights(object)),
        grid = 0:mstop(object),
        papply = mclapply,
-       fun = NULL, corrected = TRUE, mc.preschedule = FALSE, ...)
+       fun = NULL, mc.preschedule = FALSE, ...)
 cv(weights, type = c("bootstrap", "kfold", "subsampling"),
    B = ifelse(type == "kfold", 10, 25), prob = 0.5, strata = NULL)
 
@@ -40,11 +40,6 @@ cv(weights, type = c("bootstrap", "kfold", "subsampling"),
   \item{fun}{ if \code{fun} is NULL, the out-of-sample risk is returned. \code{fun},
               as a function of \code{object}, may extract any other characteristic
               of the cross-validated models. These are returned as is.}
-  \item{corrected}{ if \code{TRUE}, the corrected cross-validation
-    scheme of Verweij and van Houwelingen (1993) is used in case of Cox
-    models. Otherwise, the naive standard cross-validation scheme is
-    used.
-  }
   \item{mc.preschedule}{
     preschedule tasks if are parallelized using \code{\link{mclapply}}
     (default: \code{FALSE})? For details see \code{\link{mclapply}}.
@@ -112,9 +107,6 @@ cv(weights, type = c("bootstrap", "kfold", "subsampling"),
     Medicine}, \bold{51}, 178--186. \cr
   DOI: \url{http://dx.doi.org/10.3414/ME11-02-0030}
 
-  Verweij and van Houwelingen (1993). Cross-validation in survival
-  analysis. \emph{Statistics in Medicine}, \bold{12}:2305--2314.
-
 }
 \seealso{\code{\link{AIC.mboost}} for
   \code{AIC} based selection of the stopping iteration. Use \code{mstop}

diff --git a/tests/testthat/test-cvrisk.R b/tests/testthat/test-cvrisk.R
@@ -74,14 +74,11 @@ if (require("survival")) {
     fm <- Surv(futime,fustat) ~ age + resid.ds + rx + ecog.ps
     fit <- glmboost(fm, data = ovarian, family = CoxPH())
 
-    test_that("corrected crossvalidation works for CoxPH models", {
-        expect_warning(cvrisk(fit, corrected = TRUE), "All values in .*grid.* must be greater 0 if family = .*CoxPH.*, hence 0 is dropped from grid")
+    test_that("crossvalidation works for CoxPH models", {
+        expect_error(cvrisk(fit, folds = cv(weights = model.weights(fit), type = "kfold", B = nrow(ovarian)),
+                            grid = 0:10), "Leave-one-out cross-validation cannot be used with .*family = CoxPH().*")
 
-        expect_silent(cvr <- cvrisk(fit, grid = seq(1, 101, by = 2), corrected = TRUE))
-        expect_equal(dim(cvr), c(25, 51))
-        ## expect_gt(mstop(cvr), 1) ## currently broken
-
-        expect_silent(cvr_uncor <- cvrisk(fit, grid = seq(0, 10, by = 2), corrected = FALSE))
+        expect_silent(cvr_uncor <- cvrisk(fit, grid = seq(0, 10, by = 2)))
         expect_equal(dim(cvr_uncor), c(25, 6))
         expect_gt(mstop(cvr_uncor), 0)
     })