Implement RCG family and downstream tests; closes #86

boost-R · May 15, 2018 · 45f6b05 · 45f6b05
1 parent fa0d6fe
commit 45f6b05
Show file tree

Hide file tree

Showing 5 changed files with 176 additions and 7 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -22,7 +22,7 @@ export(glmboost,
        boost_control, mstop, Family,
        GaussReg, Gaussian, GaussClass, Laplace, Binomial, Poisson, GammaReg, QuantReg,
        ExpectReg, NBinomial, PropOdds, Weibull, Loglog, Lognormal, AUC, mboost_fit,
-       Huber, AdaExp, Gehan, Cindex, CoxPH, Hurdle, Multinomial, FP, IPCweights,
+       Huber, AdaExp, Gehan, Cindex, CoxPH, Hurdle, Multinomial, RCG, FP, IPCweights,
        cvrisk, cv, 
        bbs, bols, bspatial, brandom, btree, bss, bns, brad, bkernel, bmono, bmrf, buser,
        survFit, selected, selected.mboost,
@@ -31,6 +31,7 @@ export(glmboost,
        predict.mboost, predict.glmboost,
        stabsel.mboost, stabsel_parameters.mboost,
        confint.mboost, confint.glmboost, plot.mboost.ci, lines.mboost.ci, print.glmboost.ci,
+       downstream.test,
        varimp, plot.varimp, as.data.frame.varimp,
        mboost_intern)
        ###, basesel, fitsel)

diff --git a/R/family.R b/R/family.R
@@ -1140,3 +1140,90 @@ Cindex <- function (sigma = 0.1, ipcw = 1) {
 }
 
 
+# RCG model for bounded response variables, log link
+RCG <- function (nuirange = c(0, 1), offrange = c(-5, 5)) {
+    sigma <- c(1, 0.2)
+    plloss <- function(sigma, y, f, w = 1) {
+        alpha <- sigma[1]
+        rho <- sigma[2]
+        if (length(f) == 1)
+            f <- rep(f, length(y))
+        my <- 1 + (exp(f) - 1) * y
+        dmy <- my ^ 2 - 4 * rho * exp(f) * y * (1 - y)
+        fy <- lgamma(2 * alpha) - 2 * lgamma(alpha) + alpha * f + 
+            alpha * log(1 - rho) + log(my) + (alpha - 1) * log(y * (1 - y)) - 
+            (alpha + 0.5) * log(dmy)
+        return(-fy)
+    }
+    riskS <- function(sigma, y, fit, w = 1)
+        sum(w * plloss(y = y, f = fit, sigma = sigma))
+    risk <- function(y, f, w = 1)
+        sum(w * plloss(y = y, f = f, sigma = sigma))
+    ngradient <- function(y, f, w = 1) {
+        sigma <<- optim(par = sigma, fn = riskS, y = y, fit = f,
+            lower = c(0.001, 0.001), upper = c(100, 0.99),
+            w = w, method = "L-BFGS-B")$par
+        if (length(f) == 1)
+            f <- rep(f, length(y))
+        alpha <- sigma[1]
+        rho <- sigma[2]
+        my <- 1 + (exp(f) - 1) * y
+        dmy <- my ^ 2 - 4 * rho * exp(f) * y * (1 - y)
+        return(alpha + (y * exp(f)) / my - (2 * alpha + 1) * exp(f) *
+                   y * (my - 2 * rho * (1 - y)) / dmy)
+    }
+    offset <- function(y, w = 1)
+        0
+    dRCG <- function(f, sigma) {
+        alpha <- sigma[1]
+        rho <- sigma[2]
+        densw <- function(w, alpha, rho, f)
+            w * gamma(2 * alpha) / (gamma(alpha)) ^ 2 * (1 - rho) ^ alpha *
+            (exp(f))^alpha * ((exp(f) - 1) * w + 1) * (w * (1 - w))^(alpha - 1) /
+            ((((exp(f) - 1) * w + 1) ^ 2 - 4 * rho * exp(f) * w * (1 - w)) ^ (alpha + 0.5))
+        res <- rep(0, length(f))
+        for (k in 1:length(f))
+            res[k] <- integrate(densw, lower = 0, upper = 1, alpha, rho, f[k])$value
+        return(res)
+    }
+    response <- function(f, sigma)
+        dRCG(f, sigma)
+    Fisher <- function(y, alpha, rho, X, coefs) {
+        X <- data.frame(X)
+        f.i <- matrix(0, ncol = length(coefs), nrow = length(coefs))
+        for (i in 1:length(y)) {
+            wobs <- as.numeric(y[i])
+            x <- as.matrix(X[i,])
+            xt <- t(x)
+            xtx <- xt %*% x
+            exb <- as.numeric(exp(x %*% coefs))
+            D1 <- (exb - 1) * wobs + 1
+            D2 <- D1 ^ 2 - 4 * rho * exb * wobs * (1 - wobs)
+            f.i <- f.i + (D1 * wobs * exb - (wobs * exb) ^ 2) * xtx / (D1 ^ 2) -
+                (alpha + 0.5) / (D2 ^ 2) * 
+                (2 * wobs * exb * xtx * (2 * wobs * exb - wobs + 1 - 2 * rho * (1 - wobs)) 
+                 * D2 - xtx * (2 * wobs * exb * (D1 - 2 * rho * (1 - wobs)))^2)
+        }
+        Fisher.information <- -f.i
+        return(Fisher.information)
+    }
+
+    Family(
+        ngradient = ngradient,
+        risk = risk,
+        offset = offset,
+        # Fisher = Fisher,
+        check_y = function(y) {
+            if(!all(y < 1 & y > 0))
+                stop("y must be < 1 and > 0")
+            y
+        },
+        nuisance = function()
+            return(sigma),
+        response = function(f)
+            response(f, sigma = sigma),
+        rclass = function(f)
+            f,
+        name = "Ratio of Correlated Gammas (RCG)"
+    )
+}
diff --git a/R/methods.R b/R/methods.R
@@ -641,3 +641,25 @@ risk <- function(object, ...)
 risk.mboost <- function(object, ...) {
     object$risk()
 }
+
+downstream.test <- function(object, ...) {
+    if (object$family@name != "Ratio of Correlated Gammas (RCG)")
+        stop("downstream tests currently only implemented for RCG family")
+    if (!inherits(object, "glmboost"))
+        stop("downstream tests currently only implemented for linear models, i.e., glmboost")
+
+    # calculate Fisher information matrix
+    alpha <- nuisance(object)[1]
+    rho <- nuisance(object)[2]
+    coefs <- coef(object, which = "")
+    y <- object$response
+    ## do we need the original design matrix (X1) or the centered design (X) matrix?
+    X <- as.data.frame(extract(object, what = "design"))
+    X1 <- data.frame(model.matrix(~ x1 + x2))
+
+    Fisher <- environment(object$family@ngradient)[["Fisher"]](y, alpha, rho, X1, coefs)
+
+    # downstream hypothesis tests
+    p_value <- sapply(1:length(coefs), function(i) (1 - pnorm(abs(coefs[i]) / sqrt(solve(Fisher)[i, i]))) * 2)
+    return(p_value)
+}
diff --git a/man/Family.Rd b/man/Family.Rd
@@ -22,6 +22,8 @@
 \alias{Hurdle}
 \alias{Multinomial}
 \alias{Cindex}
+\alias{RCG}
+
 \title{ Gradient Boosting Families }
 \description{
     \code{boost_family} objects provide a convenient way to specify loss functions
@@ -62,6 +64,8 @@ Gehan()
 Hurdle(nuirange = c(0, 100))
 Multinomial()
 Cindex(sigma = 0.1, ipcw = 1)
+RCG(nuirange = c(0, 1), offrange = c(-5, 5))
+
 }
 \arguments{
   \item{ngradient}{ a function with arguments \code{y}, \code{f} and \code{w} implementing the
@@ -98,7 +102,7 @@ Cindex(sigma = 0.1, ipcw = 1)
           searched for the minimum risk w.r.t. the nuisance parameter.
 	  In case of \code{PropOdds}, the starting values for
 	  the nuisance parameters. }
-  \item{offrange}{ interval to search for offset in.}
+  \item{offrange}{ interval to search in for offset.}
   \item{sigma}{smoothness parameter for sigmoid functions inside \code{Cindex}.}
   \item{ipcw}{vector containing inverse probability of censoring weights for all observations. If omitted, it is estimated inside \code{Cindex} family.}
   \item{\dots}{ additional arguments to link functions.}
@@ -212,7 +216,17 @@ distribution approximation used in Ma and Huang (2005).
   (see \code{\link{Surv}} for details). For details on \code{Gehan()} see
   Johnson and Long (2011).
 
-  \code{Cindex()} optimizes the concordance-index for survival data (often denoted     as Harrell's C or C-index). The concordance index evaluates the rank-based           concordance probability between the model and the outcome. The C-index measures      whether large values of the model are associated with short survival times  and      vice versa. The interpretation is similar to the AUC: A C-index of 1 represents a    perfect discrimination while a C-index of 0.5 will be achieved by a completely       non-informative marker. The \code{Cindex()} family is based on an estimator by       Uno et al. (2011), which incorporates inverse probability of censoring weighting     \code{ipcw}. To make the estimator differentiable, sigmoid functions are applied;    the corresponding smoothness can be controlled via \code{sigma}. For details on      \code{Cindex()} see Mayr and Schmid (2014).
+  \code{Cindex()} optimizes the concordance-index for survival data (often denoted
+  as Harrell's C or C-index). The concordance index evaluates the rank-based
+  concordance probability between the model and the outcome. The C-index measures
+  whether large values of the model are associated with short survival times  and
+  vice versa. The interpretation is similar to the AUC: A C-index of 1 represents a
+  perfect discrimination while a C-index of 0.5 will be achieved by a completely
+  non-informative marker. The \code{Cindex()} family is based on an estimator by
+  Uno et al. (2011), which incorporates inverse probability of censoring weighting
+  \code{ipcw}. To make the estimator differentiable, sigmoid functions are applied;
+  the corresponding smoothness can be controlled via \code{sigma}. For details on
+  \code{Cindex()} see Mayr and Schmid (2014).
     
   
   Hurdle models for zero-inflated count data can be fitted by using a combination
@@ -230,7 +244,9 @@ distribution approximation used in Ma and Huang (2005).
   moment. The class corresponding to the last level of the factor coding
   of the response is used as reference class.
   
-  
+  \code{RCG()} implements the ratio of correlated gammas (RCG) model proposed
+  by Weinhold et al. (2016).
+
 }
 \section{Warning}{
   The coefficients resulting from boosting with family
@@ -302,7 +318,10 @@ distribution approximation used in Ma and Huang (2005).
     \emph{Annals of Applied Statistics}, \bold{5}, 1081--1101.
     
 
-  }
+    Weinhold, L., S. Pechlivanis, S. Wahl, P. Hoffmann and M. Schmid (2016) A Statistical Model for the
+    Analysis of Bounded Response Variables in DNA Methylation Studies.
+    \emph{BMC Bioinformatics}. 2016; 17: 480. \url{http://dx.doi.org/10.1186/s12859-016-1347-4}
+}
 \seealso{\code{\link{mboost}} for the usage of \code{Family}s. See
     \code{\link{boost_family-class}} for objects resulting from a call to \code{Family}. }
 \author{
@@ -369,6 +388,40 @@ pred2 <- predict(mlm, type = "response", newdata = newdata)
 pred[1, ]
 pred2
 
+### Example for RCG model
+## generate covariate values
+set.seed(12345)
+x1 <- rnorm(500)
+x2 <- rnorm(500)
+## generate linear predictors
+zetaM <- 0.1 + 0.3 * x1 - 0.5 * x2 
+zetaU <- 0.1 - 0.1 * x1 + 0.2 * x2
+## generate beta values
+M <- rgamma(500, shape = 2, rate = exp(zetaM))
+U <- rgamma(500, shape = 2, rate = exp(zetaU))
+y <- M / (M + U)
+
+## fit RCG model
+data <- data.frame(y, x1, x2)
+RCGmodel <- glmboost(y ~ x1 + x2, data = data, family = RCG(),
+                     control = boost_control(mstop = 2000,
+                     trace = TRUE, nu = 0.01))
+## true coefficients: gamma = (0.0, 0.4, -0.7),
+##                    alpha (= shape) = 2,
+##                    rho = 0
+## compare to coefficient estimates
+coef(RCGmodel)
+nuisance(RCGmodel)
+
+## compute downstream tests 
+## (only suitable without early stopping, i.e., if likelihood based model converged)
+downstream.test(RCGmodel)
+
+## compute conditional expectations
+predictions <- predict(RCGmodel, type = "response")
+plot(predictions, y)
+abline(0,1)
+
 \donttest{############################################################
 ## Do not run and check these examples automatically as
 ## they take some time

diff --git a/man/methods.Rd b/man/methods.Rd
@@ -51,8 +51,6 @@
 \alias{nuisance}
 \alias{nuisance.mboost}
 
-
-
 \title{ Methods for Gradient Boosting Objects }
 \description{
   Methods for models fitted by boosting algorithms.
@@ -114,6 +112,8 @@ mstop(x) <- value
 \method{risk}{mboost}(object, ...)
 
 \method{nuisance}{mboost}(object)
+
+downstream.test(object, ...)
 }
 \arguments{
   \item{object}{ objects of class \code{glmboost}, \code{gamboost},
@@ -302,6 +302,12 @@ mstop(x) <- value
   Note that \code{logLik} and \code{AIC} only make sense when the
   corresponding \code{\link{Family}} implements the appropriate loss
   function.
+  
+  \code{downstream.test} computes tests for linear models fitted via \code{\link{glmboost}}
+  with a likelihood based loss function and only suitable without early stopping, i.e., 
+  if likelihood based model converged. In order to work, the Fisher matrix must
+  be implemented in the \code{\link{Family}}; currently this is only the case for 
+  family \code{\link{RCG}}.
 
 }
 \note{