gamlss.dist::BB, gamlss.dist::BI don't work #12

fabian-s · 2016-04-08T09:09:52Z

These are supposed to be called with a 2-column response giving successes & failures, but that doesn't seem to have been implemented yet here.

I have an ugly fix for cases where # of trials is constant for all the data (see below), but that's unsatisfactory for most practical applications, probably.

> options(error=traceback, deparse.max.lines = 10)
> library("gamboostLSS")
> library("gamlss")
> 
> # y ~ Binomial(p = invlogit(x + z), n= 60)
> n <- 100
> x <- rnorm(n)
> z <- rnorm(n)
> data <- data.frame(y = rbinom(n, p = plogis(x + z), size = 60), x = x, z= z)
> data$ymat <- with(data, cbind(success = data$y, fail = 60 - data$y))
> (m_gamlss <- gamlss(ymat ~ x + z, data = data, family = BB))
GAMLSS-RS iteration 1: Global Deviance = 700.266 
GAMLSS-RS iteration 2: Global Deviance = 553.8168 
GAMLSS-RS iteration 3: Global Deviance = 532.8859 
GAMLSS-RS iteration 4: Global Deviance = 532.8656 
GAMLSS-RS iteration 5: Global Deviance = 532.8656 

Family:  c("BB", "Beta Binomial") 
Fitting method: RS() 

Call:  gamlss(formula = ymat ~ x + z, family = BB, data = data) 

Mu Coefficients:
(Intercept)            x            z  
  -0.005859     1.028064     0.994731  
Sigma Coefficients:
(Intercept)  
     -5.265  

 Degrees of Freedom for the fit: 4 Residual Deg. of Freedom   96 
Global Deviance:     532.866 
            AIC:     540.866 
            SBC:     551.286 
> # gamlss(ymat ~ x + z, data = data, family = BI) # samesame
> 
> Betabin <- as.families(fname="BB")
> gamboostLSS(ymat ~ x + z, data = data, families = Betabin)
Error in eval(expr, envir, enclos) : object 'bd' not found
No traceback available 
> # bd is the "binomial denominator" (i.e., # of trials), not being handed over!
> 
> #-------------------------------------------------------------------------------
> # ugly, hacky fix: set bd-defaults to 60 wherever it's needed
> Betabin60 <- Betabin
> add_bd_60 <- function(f){
+   frmls <- formals(f)
+   frmls$bd <- 60
+   formals(f) <- frmls
+   f
+ }
> with(environment(Betabin60[[1]]@ngradient), {
+   #all slots share same env, so enough to do this for one slot
+   FAM$dldm <- add_bd_60(FAM$dldm)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$dldd  <- add_bd_60(FAM$dldd)
+   FAM$d2ldd2  <- add_bd_60(FAM$d2ldd2)
+   FAM$G.dev.incr <- add_bd_60(FAM$G.dev.incr)
+   FAM$mu.initial <- quote(mu <- (y + 0.5)/(60 + 1))
+   pdf <- add_bd_60(pdf)
+ })
> with(environment(Betabin60[[2]]@ngradient), {
+   FAM$dldm <- add_bd_60(FAM$dldm)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$dldd  <- add_bd_60(FAM$dldd)
+   FAM$d2ldd2  <- add_bd_60(FAM$d2ldd2)
+   FAM$G.dev.incr <- add_bd_60(FAM$G.dev.incr)
+   FAM$mu.initial <- quote(mu <- (y + 0.5)/(60 + 1))
+   pdf <- add_bd_60(pdf)
+ })
> #-------------------------------------------------------------------------------
> data$ones <- rep(1, n)
> # matrix-response like gamlss produces multivariate model for success and failure
> # probabilities (I guess?!?), not model for sucess probabilities: 
> coef(gamboostLSS(list(
+   mu = ymat ~ bols(x) + bols(z), 
+   sigma = ymat ~ bols(ones, intercept=FALSE)),
+   data = data, families = Betabin60))
$mu
$mu$`bols(x)`
(Intercept)           x        <NA>        <NA> 
-0.20896247  0.97583503  0.02081331 -0.97472539 

$mu$`bols(z)`
  (Intercept)             z          <NA>          <NA> 
 0.0001546734  0.9402122029 -0.2000348943 -0.9390756321 

attr(,"offset")
[1] 0.1940493

$sigma
$sigma$`bols(ones, intercept = FALSE)`
     ones      <NA> 
-3.221820 -3.210209 

attr(,"offset")
[1] 0
>
> coef(gamboostLSS(list(
+   mu = y ~ bols(x) + bols(z),
+   sigma = y ~ bols(ones, intercept=FALSE)),
+   data = data, families = Betabin60))
$mu
$mu$`bols(x)`
(Intercept)           x 
 -0.2089625   0.9758350 

$mu$`bols(z)`
 (Intercept)            z 
0.0001546734 0.9402122029 

attr(,"offset")
[1] 0.1940493

$sigma
$sigma$`bols(ones, intercept = FALSE)`
    ones 
-3.22182 

attr(,"offset")
[1] 0

codefile is here: https://gist.github.com/fabian-s/b952063213f22c31ab9ea99f681a56d7

The text was updated successfully, but these errors were encountered:

hofnerb · 2016-07-28T06:57:04Z

@mayrandy can you have a look at this issue?

Do we have any opportunity to work with matrix calued outcomes within the families? Actually this should be possible as we can also use complex outcomes such as Surv(t, delta). I think we need to have specialized families here. Is this worth it?

If we do not have a proper fix we should at least issue a warning or an error if someone tries to use these families.

mayrandy · 2016-08-23T17:28:04Z

As far as I can see, this should not be a big issue. We only need to change as.families() so that it checks if this beta denominator exists as an argument, i.e, something like "bd" %in% names(formals(pdf)).

If that is the case, we'll have to do
bd <- y[,1] +y[,2]
y <- y[,1]
the rest should be playing around with the arguments so that bd is included when needed in ngradient, loss, mu.initial, etc.

fixing issue #12 -> makes families "BB" and "BI" work by including beta denominator 'bd'

mayrandy · 2016-08-24T12:17:22Z

OK, this should work now with this commit to devel:

library("gamboostLSS")
library("gamlss")

set.seed(123)
n <- 100
x <- rnorm(n)
z <- rnorm(n)
data <- data.frame(y = rbinom(n, p = plogis(x + z), size = 60), x = x, z= z)
data$ymat <- with(data, cbind(success = data$y, fail = 60 - data$y))

m_gamlss <- gamlss(ymat ~ x + z, data = data, family = BB)

Betabin <- as.families(fname="BB")
g1 <- glmboostLSS(ymat ~ x + z, data = data, families = Betabin)

coef(m_gamlss)
#(Intercept)           x           z 
#-0.01365737  1.00916892  0.96522587 

coef(g1[1000], off2int = TRUE)$mu
#$mu
#(Intercept)           x           z 
#-0.01324596  1.00947447  0.96550888 

# now BI via mboost 
g2 <- glmboost(ymat ~x + z, data = data, family = as.families("BI"))
# ignore warning
coef(g2, off2int = TRUE)
#(Intercept)           x           z 
#-0.01368482  1.00917328  0.96521239

hofnerb · 2016-08-24T12:43:20Z

Thx.

fabian-s mentioned this issue Apr 8, 2016

Family for Binomial(n, p) instead of Binomial(1, p) boost-R/mboost#34

Closed

hofnerb assigned hofnerb and mayrandy Jul 28, 2016

mayrandy added a commit that referenced this issue Aug 24, 2016

Update as.families.R

109daf3

fixing issue #12 -> makes families "BB" and "BI" work by including beta denominator 'bd'

hofnerb added a commit that referenced this issue Aug 24, 2016

Added test and close #12

2321180

hofnerb closed this as completed Aug 24, 2016

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

gamlss.dist::BB, gamlss.dist::BI don't work #12

gamlss.dist::BB, gamlss.dist::BI don't work #12

fabian-s commented Apr 8, 2016

hofnerb commented Jul 28, 2016

mayrandy commented Aug 23, 2016

mayrandy commented Aug 24, 2016

hofnerb commented Aug 24, 2016

gamlss.dist::BB, gamlss.dist::BI don't work #12

gamlss.dist::BB, gamlss.dist::BI don't work #12

Comments

fabian-s commented Apr 8, 2016

hofnerb commented Jul 28, 2016

mayrandy commented Aug 23, 2016

mayrandy commented Aug 24, 2016

hofnerb commented Aug 24, 2016