adding gompertz and generalized gamma distributions

fndemarqui · Mar 20, 2024 · 1bfed84 · 1bfed84
1 parent 0a7efc6
commit 1bfed84
Show file tree

Hide file tree

Showing 34 changed files with 2,127 additions and 588 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: survstan
 Title: Fitting Survival Regression Models via 'Stan' 
-Version: 0.0.6.1
+Version: 0.0.7
 Authors@R: 
     c(
     person(given = "Fabio", family = "Demarqui", role = c("aut", "cre", "cph"), email = "fndemarqui@est.ufmg.br", comment = c(ORCID = "0000-0001-9236-1986")),
     person(given = "Andrew", family = "Johnson", role = "ctb")
     )
-Description: Parametric survival regression models under the maximum likelihood approach via 'Stan'. Implemented regression models include accelerated failure time models, proportional hazards models, proportional odds models, accelerated hazard models, Yang and Prentice models, and extended hazard models. Available baseline survival distributions include exponential, Weibull, log-normal, log-logistic, gamma, rayleigh and fatigue (Birnbaum-Saunders) distributions. References: Lawless (2002) <ISBN:9780471372158>; Bennett (1982) <doi:10.1002/sim.4780020223>; Chen and Wang(2000) <doi:10.1080/01621459.2000.10474236>; Demarqui and Mayrink (2021) <doi:10.1214/20-BJPS471>.
+Description: Parametric survival regression models under the maximum likelihood approach via 'Stan'. Implemented regression models include accelerated failure time models, proportional hazards models, proportional odds models, accelerated hazard models, Yang and Prentice models, and extended hazard models. Available baseline survival distributions include exponential, Weibull, log-normal, log-logistic, gamma, generalized gamma, rayleigh, Gompertz and fatigue (Birnbaum-Saunders) distributions. References: Lawless (2002) <ISBN:9780471372158>; Bennett (1982) <doi:10.1002/sim.4780020223>; Chen and Wang(2000) <doi:10.1080/01621459.2000.10474236>; Demarqui and Mayrink (2021) <doi:10.1214/20-BJPS471>.
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)

diff --git a/NAMESPACE b/NAMESPACE
@@ -19,12 +19,24 @@ S3method(vcov,survstan)
 export(aftreg)
 export(ahreg)
 export(cross_time)
+export(dggprentice)
+export(dggstacy)
+export(dgompertz)
 export(ehreg)
 export(estimates)
 export(ggresiduals)
+export(pggprentice)
+export(pggstacy)
+export(pgompertz)
 export(phreg)
 export(poreg)
+export(qggprentice)
+export(qggstacy)
+export(qgompertz)
 export(rank_models)
+export(rggprentice)
+export(rggstacy)
+export(rgompertz)
 export(se)
 export(tidy)
 export(ypreg)

diff --git a/NEWS.md b/NEWS.md
@@ -48,3 +48,9 @@
 # survstan 0.0.6.1
 
 - Correcting parametrization of survival functions for AH and EH models.
+
+
+# survstan 0.0.7
+
+- All implemented models now can accommodate a offset variable in the linear predictors.
+- Implementation of Gompertz, generalized gamma (original Stacy's parametrization and alternative Prentice's parametrization) distributions.
diff --git a/R/aftreg.R b/R/aftreg.R
@@ -40,15 +40,21 @@ aftreg <- function(formula, data, baseline = "weibull", dist = NULL, init = 0, .
   p <- ncol(X)
   tau <- max(time)
   y <- time/tau
+  offset <- stats::model.offset(mf)
+  if(is.null(offset)){
+    offset <- rep(0, n)
+  }
 
-
-  output <- list(call = Call, formula = stats::formula(mt),
+  output <- list(call = Call, formula = stats::formula(mt), offset = offset,
                  terms = mt, mf = mf, baseline = baseline, survreg = "aft",
                  n = n, p = p, tau = tau, labels = labels)
 
+  if(init == 0 & baseline == "ggprentice"){
+    init <- inits("aft", p)
+  }
   baseline <- set_baseline(baseline)
 
-  stan_data <- list(time=y, event=event, X=X, n=n, p=p,
+  stan_data <- list(time=y, event=event, X=X, n=n, p=p, offset = offset,
                     baseline=baseline, survreg = 1, tau = tau)
   fit <- rstan::optimizing(stanmodels$survreg, data = stan_data, hessian = TRUE, init = init, ...)
   res <- reparametrization(fit, survreg = "aft", output$baseline, labels, tau, p)
@@ -59,10 +65,12 @@ aftreg <- function(formula, data, baseline = "weibull", dist = NULL, init = 0, .
 
 
   pars <- output$estimates
-  if(p>0){
-    lp <- as.numeric(X%*%pars[1:p])
-    time <- time*exp(-lp)
+  if(p==0){
+    lp <- 0 + offset
+  }else{
+    lp <- as.numeric(X%*%pars[1:p]) + offset
   }
+  time <- time*exp(-lp)
 
   output$residuals <- cumhaz(time, pars, baseline, p)
   output$event <- event

diff --git a/R/ahreg.R b/R/ahreg.R
@@ -41,15 +41,21 @@ ahreg <- function(formula, data, baseline = "weibull", dist = NULL, init = 0, ..
   p <- ncol(X)
   tau <- max(time)
   y <- time/tau
+  offset <- stats::model.offset(mf)
+  if(is.null(offset)){
+    offset <- rep(0, n)
+  }
 
-
-  output <- list(call = Call, formula = stats::formula(mt),
+  output <- list(call = Call, formula = stats::formula(mt), offset = offset,
                  terms = mt, mf = mf, baseline = baseline, survreg = "ah",
                  n = n, p = p, tau = tau, labels = labels)
 
+  if(init == 0 & baseline == "ggprentice"){
+    init <- inits("ah", p)
+  }
   baseline <- set_baseline(baseline)
 
-  stan_data <- list(time=y, event=event, X=X, n=n, p=p,
+  stan_data <- list(time=y, event=event, X=X, n=n, p=p, offset = offset,
                     baseline=baseline, survreg = 4, tau = tau)
   fit <- rstan::optimizing(stanmodels$survreg, data = stan_data, hessian = TRUE, init = init, ...)
   res <- reparametrization(fit, survreg = "ah", output$baseline, labels, tau, p)
@@ -60,9 +66,9 @@ ahreg <- function(formula, data, baseline = "weibull", dist = NULL, init = 0, ..
 
   pars <- output$estimates
   if(p==0){
-    lp <- 0
+    lp <- 0 + offset
   }else{
-    lp <- as.numeric(X%*%pars[1:p])
+    lp <- as.numeric(X%*%pars[1:p]) + offset
     time <- time*exp(-lp)
   }
   H0 <- cumhaz(time, pars, baseline, p)

diff --git a/R/bootstrap.R b/R/bootstrap.R
@@ -1,4 +1,13 @@
 
+rename_mf <- function(mf){
+  vars <- names(mf)
+  off <- vars[grep("offset", vars)]
+  substr(off, start = 8, stop=nchar(off)-1)
+  vars[grep("offset", vars)] <- substr(off, start = 8, stop=nchar(off)-1)
+  names(mf) <- vars
+  return(mf)
+}
+
 bootstrap <- function(object, nboot, cores, ...){
 
   # cl <- parallel::makeCluster(cores)
@@ -13,11 +22,18 @@ bootstrap <- function(object, nboot, cores, ...){
   formula <- stats::update(formula, survival::Surv(time, status) ~ .)
   mf <- object$mf
   resp <- stats::model.response(mf)
+
   time <- resp[,1]
   status <- resp[,2]
 
+  offset <- stats::model.offset(mf)
+  if(!is.null(offset)){
+    mf <- rename_mf(mf)
+  }
+
   mf <- mf %>%
     dplyr::select(-dplyr::starts_with("Surv("))
+
   data <- data.frame(
     time = time,
     status = status
@@ -28,6 +44,7 @@ bootstrap <- function(object, nboot, cores, ...){
   p <- object$p
   tau <- object$tau
 
+
   index <- 1:n
   index1 <- which(status==1)
   index2 <- which(status==0)

diff --git a/R/cross_time.R b/R/cross_time.R
@@ -1,10 +1,10 @@
 
-diffSurv <- function(time, X1, X2, pars, baseline, survreg, p){
-    lp11 <- as.numeric(X1%*%pars[1:p])
-    lp12 <- as.numeric(X2%*%pars[1:p])
+diffSurv <- function(time, X1, X2, offset1, offset2, pars, baseline, survreg, p){
+    lp11 <- as.numeric(X1%*%pars[1:p]) + offset1
+    lp12 <- as.numeric(X2%*%pars[1:p]) + offset2
     if(survreg == "yp" | survreg == "eh"){
-      lp21 <- as.numeric(X1%*%pars[(p+1):(2*p)])
-      lp22 <- as.numeric(X2%*%pars[(p+1):(2*p)])
+      lp21 <- as.numeric(X1%*%pars[(p+1):(2*p)]) + offset1
+      lp22 <- as.numeric(X2%*%pars[(p+1):(2*p)]) + offset2
     }else{
       lp11 <- 0
       lp22 <- 0
@@ -22,9 +22,9 @@ diffSurv <- function(time, X1, X2, pars, baseline, survreg, p){
   return(St1-St2)
 }
 
-crossing_time <- function(X1, X2, tau0=tau0, tau=tau, pars, baseline, survreg, p){
+crossing_time <- function(X1, X2, offset1, offset2, tau0=tau0, tau=tau, pars, baseline, survreg, p){
   I <- c(tau0, 1.5*tau)
-  t <- try(stats::uniroot(diffSurv, interval=I, X1=X1, X2=X2, pars=pars, baseline=baseline, survreg=survreg, p=p)$root, TRUE)
+  t <- try(stats::uniroot(diffSurv, interval=I, X1=X1, X2=X2, offset1=offset1, offset2=offset2, pars=pars, baseline=baseline, survreg=survreg, p=p)$root, TRUE)
   if(is(t, "try-error")){
     return(NA)
   }else{
@@ -98,10 +98,15 @@ cross_time.survstan <- function(object, newdata1, newdata2,
   beta <- pars[1:p]
   phi <- pars[(p+1):(2*p)]
 
-  lp_short1 <- as.numeric(X1%*%beta)
-  lp_short2 <- as.numeric(X2%*%beta)
-  lp_long1 <- as.numeric(X1%*%phi)
-  lp_long2 <- as.numeric(X2%*%phi)
+  mf1 <- stats::model.frame(Terms, data = newdata1)
+  mf2 <- stats::model.frame(Terms, data = newdata2)
+  n <- nrow(newdata1)
+  offset1 <- stats::model.offset(mf1)
+  offset2 <- stats::model.offset(mf2)
+  if(is.null(offset1)){
+    offset1 <- rep(0, n)
+    offset2 <- rep(0, n)
+  }
 
   tau0 <- min(time)
   tau <- object$tau
@@ -112,7 +117,7 @@ cross_time.survstan <- function(object, newdata1, newdata2,
   t <- c()
 
   for(i in 1:nrow(newdata1)){
-    t[i] <- crossing_time(X1=X1[i,], X2=X2[i,], tau0=tau0, tau=tau, pars=pars, baseline=baseline, survreg=survreg, p)
+    t[i] <- crossing_time(X1=X1[i,], X2=X2[i,], offset1=offset1, offset2=offset2, tau0=tau0, tau=tau, pars=pars, baseline=baseline, survreg=survreg, p)
   }
   pars <- bootstrap(object, nboot=nboot, cores = cores)
 

diff --git a/R/dists.R b/R/dists.R
@@ -1,4 +1,5 @@
 
 
 survstan_distributions <- c("exponential", "weibull", "lognormal", "loglogistic",
-                            "fatigue", "gamma", "rayleigh")
+                            "fatigue", "gamma", "rayleigh", "gompertz",
+                            "ggstacy", "ggprentice")
diff --git a/R/ehreg.R b/R/ehreg.R
@@ -41,14 +41,21 @@ ehreg <- function(formula, data, baseline = "weibull", dist = NULL, init = 0, ..
   p <- ncol(X)
   tau <- max(time)
   y <- time/tau
+  offset <- stats::model.offset(mf)
+  if(is.null(offset)){
+    offset <- rep(0, n)
+  }
 
-  output <- list(call = Call, formula = stats::formula(mt),
+  output <- list(call = Call, formula = stats::formula(mt), offset = offset,
                  terms = mt, mf = mf, baseline = baseline, survreg = "eh",
                  n = n, p = p, tau = tau, labels = labels)
 
+  if(init == 0 & baseline == "ggprentice"){
+    init <- inits("eh", p)
+  }
   baseline <- set_baseline(baseline)
 
-  stan_data <- list(time=y, event=event, X=X, n=n, p=p,
+  stan_data <- list(time=y, event=event, X=X, n=n, p=p, offset = offset,
                     baseline=baseline, survreg = 6, tau = tau)
   fit <- rstan::optimizing(stanmodels$survreg, data = stan_data, hessian = TRUE, init = init, ...)
   res <- reparametrization(fit, survreg = "eh", output$baseline, labels, tau, p)
@@ -59,11 +66,11 @@ ehreg <- function(formula, data, baseline = "weibull", dist = NULL, init = 0, ..
 
   pars <- output$estimates
   if(p==0){
-    lp1 <- 0
-    lp2 <- 0
+    lp1 <- 0 + offset
+    lp2 <- 0 + offset
   }else{
-    lp1 <- as.numeric(X%*%pars[1:p])
-    lp2 <- as.numeric(X%*%pars[(p+1):(2*p)])
+    lp1 <- as.numeric(X%*%pars[1:p]) + offset
+    lp2 <- as.numeric(X%*%pars[(p+1):(2*p)]) + offset
   }
 
   p <- 2*p

diff --git a/R/emmeans.R b/R/emmeans.R
@@ -70,3 +70,40 @@ emm_basis.ypreg = function(object, trms, xlev, grid, term = c("short", "long"),
 }
 
 
+#' @rdname emmeans-survstan-helpers
+#' @param object An object of the same class as is supported by a new method.
+#' @param term character specifying whether AF or RH term regression coefficients are to be used.
+#' @param ... Additional parameters that may be supported by the method.
+recover_data.ehreg <- function (object, term = c("AF", "RH"), ...){
+  term <- match.arg(term)
+  frame <- object$model
+  fcall = object$call
+  if (term %in% c("AF", "RH"))
+    trms = stats::delete.response(stats::terms(object, model = term))
+  emmeans::recover_data(fcall, trms, object$na.action,
+                        frame = frame, pwts = stats::weights(object), ...)
+}
+
+emm_basis.ehreg = function(object, trms, xlev, grid, term = c("AF", "RH"), ...){
+  term <- match.arg(term)
+  m = stats::model.frame(trms, grid, na.action = stats::na.pass, xlev = xlev)
+  X = stats::model.matrix(trms, m, contrasts.arg = object$contrasts)[, -1, drop = FALSE]
+  bhat = stats::coef(object)
+  p <- length(bhat)/2
+  if(term=="AF"){
+    bhat <- bhat[1:p]
+    V <- vcov(object)[1:p, 1:p]
+  }else{
+    bhat <- bhat[(p+1):(2*p)]
+    V <- vcov(object)[(p+1):(2*p), (p+1):(2*p)]
+  }
+
+  Xmat = stats::model.matrix(trms, data=object$mf)[, -1, drop = FALSE]
+  nbasis = matrix(NA)
+  dfargs = list(df = Inf)
+  dffun = function(k, dfargs) dfargs$df
+  list(X = X, bhat = bhat, nbasis = nbasis, V = V,
+       dffun = dffun, dfargs = dfargs)
+}
+
+