version 1.2-3

cran · Feb 24, 2021 · 5f8901f · 5f8901f
1 parent 1b1b085
commit 5f8901f
Show file tree

Hide file tree

Showing 9 changed files with 231 additions and 156 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mlt
 Title: Most Likely Transformations
-Version: 1.2-2
-Date: 2021-02-12
+Version: 1.2-3
+Date: 2021-02-22
 Authors@R: person("Torsten", "Hothorn", role = c("aut", "cre"),
                   email = "Torsten.Hothorn@R-project.org", comment = c(ORCID = "0000-0001-8301-0471"))  
 Description: Likelihood-based estimation of conditional transformation
@@ -15,8 +15,8 @@ URL: http://ctm.R-forge.R-project.org
 License: GPL-2
 Encoding: UTF-8
 NeedsCompilation: no
-Packaged: 2021-02-12 13:53:40 UTC; hothorn
+Packaged: 2021-02-24 07:48:00 UTC; hothorn
 Author: Torsten Hothorn [aut, cre] (<https://orcid.org/0000-0001-8301-0471>)
 Maintainer: Torsten Hothorn <Torsten.Hothorn@R-project.org>
 Repository: CRAN
-Date/Publication: 2021-02-13 00:10:08 UTC
+Date/Publication: 2021-02-24 08:20:02 UTC
diff --git a/MD5 b/MD5
@@ -1,10 +1,10 @@
-f68d95fd59214a1b1490c6d4ed7d614c *DESCRIPTION
+1784d82db47ba762c4da71f25ef65fa9 *DESCRIPTION
 44dc29e1096ee97227a727914c71efe0 *NAMESPACE
 226446b622fa6ccff2633ccd5af47b75 *R/R.R
 d2cd3551b75d248b1630149d9a11f48f *R/confband.R
 85bfdf07717f5a022e84c7ce5ff891f8 *R/ctm.R
 098ce237d6af53626d8d2cd138e47443 *R/distr.R
-a3272914744fa0f5d6565cb9eafaa9db *R/dpq_etc.R
+9223c303cd72fedb43084863ce2f2df7 *R/dpq_etc.R
 70f18f8a82d9b63d3b1e7cb33201ffc0 *R/loglik.R
 399714763b4bd3bd99e576a58057884c *R/methods.R
 70e68a7d0cd22a234f4eb74b06add404 *R/mlt.R
@@ -13,24 +13,23 @@ a3272914744fa0f5d6565cb9eafaa9db *R/dpq_etc.R
 879724557cf8f27b085fccc3579e8c0f *R/predict.R
 40c693617393d342c4b957c725770273 *R/sample.R
 514ae5ccf7ba2841bda0c0b2e358b2ce *R/utils.R
-d8473edb4dc4bed14b3518e2701e827d *build/partial.rdb
+6e0e58a92378d5a2a75ca3c602dcebb6 *build/partial.rdb
 1c9695dee7162d6e72883b552906e753 *cleanup
 903952ead54ce0029d7ceb59c44645c3 *inst/CITATION
-7e856cfbcc785b785274c76f8be7b136 *inst/NEWS.Rd
-9e7da8008d4c41f098d9ffba0e00756a *man/R.Rd
+34357ef16dcff7ef04ba4634f7e51657 *inst/NEWS.Rd
+2fd36e671344c1f894b4f06235156ce6 *man/R.Rd
 4141097f19682037945cf253537240c9 *man/confband.Rd
 b211532488e33a74047c6cbdce2e3246 *man/ctm-methods.Rd
 dbe2d83228567a26ae5f2af6d8ab9203 *man/ctm.Rd
 30d6369b93ba5ebb221d6a47b851f6ce *man/mlt-methods.Rd
-3338817cb685f7ed177fc68d2b6ad4b5 *man/mlt-package.Rd
+d4dc0f0568a35ea8c0c0a3ee14a20233 *man/mlt-package.Rd
 c5add13dbdb8903c05a351768268d940 *man/mlt.Rd
 a4990dc3e06fe47fc8fdc6deb4712968 *man/mltoptim.Rd
-7a2d1b4432a506806eef86dc4ce095d0 *man/predict.Rd
+18e45384a6a7af7be1a801b23d9419ff *man/predict.Rd
 db0bcd162bd3125c10bd2a365fa92a3b *tests/2sample.R
 5a9f502537a95ce85d5627eff0a7b610 *tests/2sample.Rout.save
 1761c0ee2fa1a622b95d41dae932a559 *tests/Cox-Ex.R
 be7cf5845dbab7aaec2bf4b7518842d9 *tests/Cox-Ex.Rout.save
-1ad2ce6b21829a5ec4991c0a079b6a10 *tests/Examples/mlt-Ex.Rout.save
 431aabe6cb130b2cc8eafbcaf6a21b15 *tests/bugfixes.R
 bc3008e0de096dd16c88f119f434f983 *tests/bugfixes.Rout.save
 cab1777631a72c7a1dd8b7138cbdfd6f *tests/distr-Ex.R

diff --git a/R/dpq_etc.R b/R/dpq_etc.R
@@ -276,8 +276,8 @@ qmlt <- function(object, newdata = NULL, q = NULL, prob = .5, n = 50,
                  nrow = nrow(trm), ncol = length(prob), byrow = TRUE)
 
     ret <- .invf(object, f = trm, q = q, z = qu)
-    i <- as.vector(matrix(1:length(ret), ncol = nrow(trm), byrow = TRUE))
-    ret <- if (is.matrix(ret)) ret[i,] else ret[i]
+    i <- as.vector(matrix(1:NROW(ret), ncol = nrow(trm), byrow = TRUE))
+    ret <- if (is.data.frame(ret)) ret[i,] else ret[i]
 
     ### arrays of factors are not allowed
     if (is.factor(q)) return(ret)

diff --git a/build/partial.rdb b/build/partial.rdb
diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd
@@ -2,7 +2,15 @@
 \name{NEWS}
 \title{NEWS file for the mlt package}
 
-\section{Changes in Version 1.2-2 (2020-02-12)}{
+\section{Changes in Version 1.2-3 (2021-02-22)}{
+  \subsection{New features}{
+    \itemize{
+      \item{Improve documentation.}
+    }
+  }
+}
+
+\section{Changes in Version 1.2-2 (2021-02-12)}{
   \subsection{Bugfixes}{
     \itemize{
       \item{Sampling from unconditional models did not pay attention to
@@ -11,7 +19,7 @@
   }
 }
 
-\section{Changes in Version 1.2-1 (2020-02-03)}{
+\section{Changes in Version 1.2-1 (2021-02-03)}{
   \subsection{New features}{
     \itemize{
       \item{Quantiles and thus simulations are now computationally more exact and more robust.

diff --git a/man/R.Rd b/man/R.Rd
@@ -10,7 +10,7 @@
 \alias{as.Surv}
 \alias{as.Surv.response}
 \title{
-    Response Variable
+    Response Variables
 }
 \description{
     Represent a possibly censored or truncated response variable
@@ -49,13 +49,77 @@ as.Surv(object)
 \details{
   \code{R} is basically an extention of \code{\link[survival]{Surv}} for the
   representation of arbitrarily censored or truncated measurements at any scale.
+  The \code{storage.mode} of \code{object} determines if models are fitted
+  by the discrete likelihood (integers or factors) or the continuous
+  likelihood (log-density for numeric \code{object}s). Interval-censoring
+  is given by intervals (\code{cleft}, \code{cright}], also for integers and
+  factors (see example below). Left- and right-truncation can be defined
+  by the \code{tleft} and \code{tright} arguments. Existing \code{Surv}
+  objects can be converted using \code{R(Surv(...))}$ and, in some cases, an
+  \code{as.Surv()} method exists for representing \code{response} objects as
+  \code{Surv} objects.
 
   \code{R} applied to a list calls \code{R} for each of the list elements
   and returns a joint object.
+
+  More examples can be found in Hothorn (2018) and in 
+  \code{vignette("tram", package = "tram")}.
+
+}
+\references{
+
+  Torsten Hothorn (2020), Most Likely Transformations: The mlt Package,
+  \emph{Journal of Statistical Software}, \bold{92}(1), 1--68,
+  \doi{10.18637/jss.v092.i01}
+
 }
 \examples{
 
+ library("survival")
+
+ ### randomly right-censored continuous observations
+ time <- as.double(1:9)
+ event <- rep(c(FALSE, TRUE), length = length(time))
+
+ Surv(time, event)
+ R(Surv(time, event))
+
+ ### right-censoring, left-truncation
+ ltm <- 1:9 / 10
+ Surv(ltm, time, event)
+ R(Surv(ltm, time, event))
+
+ ### interval-censoring
+ Surv(ltm, time, type = "interval2")
+ R(Surv(ltm, time, type = "interval2"))
+
+ ### interval-censoring, left/right-truncation
+ lc <- as.double(1:4)
+ lt <- c(NA, NA, 7, 8)
+ rt <- c(NA, 9, NA, 10)
+ x <- c(3, NA, NA, NA)
+ rc <- as.double(11:14)
+ R(x, cleft = lt, cright = rt)
+ as.Surv(R(x, cleft = lt, cright = rt))
+ R(x, tleft = 1, cleft = lt, cright = rt)
+ R(x, tleft = 1, cleft = lt, cright = rt, tright = 15)
+ R(x, tleft = lc, cleft = lt, cright = rt, tright = rc)
+
+ ### discrete observations: counts
+ x <- 0:9
+ R(x)
+ ### partially interval-censored counts
+ rx <- c(rep(NA, 6), rep(15L, 4))
+ R(x, cright = rx)
+
  ### ordered factor
- R(gl(3, 3, labels = LETTERS[1:3]))
+ x <- gl(5, 2, labels = LETTERS[1:5], ordered = TRUE)
+ R(x)
+ ### interval-censoring (ie, observations can have multiple levels)
+ lx <- ordered(c("A", "A", "B", "C", "D", "E"), 
+               levels = LETTERS[1:5], labels = LETTERS[1:5])
+ rx <- ordered(c("B", "D", "E", "D", "D", "E"), 
+               levels = LETTERS[1:5], labels = LETTERS[1:5])
+ R(rx, cleft = lx, cright = rx)
 
 }
diff --git a/man/mlt-package.Rd b/man/mlt-package.Rd
@@ -4,24 +4,27 @@
 \title{General Information on the \pkg{mlt} Package}
 \description{
   The \pkg{mlt} package implements maximum likelihood estimation in 
-  conditional transformation models as introduced by Hothorn et al. (2018).
+  conditional transformation models as introduced by Hothorn et al. (2020).
 
   An introduction to the package is available in the \code{mlt} package
-  vignette from package \code{mlt.docreg} (Hothorn, 2018).
+  vignette from package \code{mlt.docreg} (Hothorn, 2020).
 
   A short talk on most likely transformations is available from
   \url{https://channel9.msdn.com/Events/useR-international-R-User-conference/useR2016/Most-Likely-Transformations}.
 
+  Novice users might find the high(er) level interfaces offered by package
+  \pkg{tram} more convenient.
+
 }
 \references{
 
   Torsten Hothorn, Lisa Moest, Peter Buehlmann (2018), Most Likely
   Transformations, \emph{Scandinavian Journal of Statistics}, \bold{45}(1),
   110--134, \doi{10.1111/sjos.12291}.
 
-  Torsten Hothorn (2018), Most Likely Transformations: The mlt Package,
-  \emph{Journal of Statistical Software}, forthcoming. URL: 
-  \url{https://cran.r-project.org/package=mlt.docreg}.
+  Torsten Hothorn (2020), Most Likely Transformations: The mlt Package,
+  \emph{Journal of Statistical Software}, \bold{92}(1), 1--68,
+  \doi{10.18637/jss.v092.i01}
 
 }
 \author{

diff --git a/man/predict.Rd b/man/predict.Rd
@@ -59,7 +59,21 @@
   for all observations in \code{newdata} and plots these functions (according to 
   \code{type}). \code{predict} evaluates the transformation function over a grid
   of \code{q} values for all observations in \code{newdata} and returns the
-  result as a matrix (where _columns_ correspond to _rows_ in \code{newdata}).
+  result as a matrix (where _columns_ correspond to _rows_ in
+  \code{newdata}, see examples). Lack of \code{type = "mean"} is a feature
+  and not a bug.
+
+  Argument \code{type} defines the scale of the plots or predictions:
+  \code{type = "distribution"} means the cumulative distribution function, 
+  \code{type = "survivor"} is the survivor function (one minus distribution
+  function), \code{type = "density"} the absolute continuous or discrete
+  density (depending on the response), \code{type = "hazard"}, \code{type =
+  "cumhazard"}, and \code{type = "odds"} refers to the hazard (absolute
+  continuous or discrete), cumulative hazard (defined as minus log-survivor
+  function in both the absolute continuous and discrete cases), and odds
+  (distribution divided by survivor) functions. The quantile function can be
+  evaluated for probabilities \code{prob} by \code{type = "quantile"}.
+
   Note that the \code{predict} method for \code{ctm} objects requires all
   model coefficients to be specified in this unfitted model.
   \code{simulate} draws samples from \code{object} by numerical inversion of the
@@ -69,4 +83,123 @@
   want the methods to pay attention to offsets, specify them as a variable
   in the model with fixed regression coefficient using the \code{fixed}
   argument in \code{\link{mlt}}.
+
+  More examples can be found in Hothorn (2018).
+
+}
+\references{
+
+  Torsten Hothorn (2020), Most Likely Transformations: The mlt Package,
+  \emph{Journal of Statistical Software}, \bold{92}(1), 1--68,
+  \doi{10.18637/jss.v092.i01}
+
+}
+\examples{
+
+  library("survival")
+  op <- options(digits = 2)
+
+  ### GBSG2 dataset
+  data("GBSG2", package = "TH.data")
+
+  ### right-censored response
+  GBSG2$y <- with(GBSG2, Surv(time, cens))
+
+  ### define Bernstein(log(time)) parameterisation
+  ### of transformation function. The response
+  ### is bounded (log(0) doesn't work, so we use log(1))
+  ### support defines the support of the Bernstein polynomial
+  ### and add can be used to make the grid wider (see below)
+  rvar <- numeric_var("y", bounds = c(0, Inf), 
+                      support = c(100, 2000))
+  rb <- Bernstein_basis(rvar, order = 6, ui = "increasing")
+  ### dummy coding of menopausal status
+  hb <- as.basis(~ 0 + menostat, data = GBSG2)
+  ### treatment contrast of hormonal treatment
+  xb <- as.basis(~ horTh, data = GBSG2, remove_intercept = TRUE)
+
+  ### set-up and fit Cox model, stratified by menopausal status
+  m <- ctm(rb, interacting = hb, shifting = xb, todistr = "MinExtrVal")
+  fm <- mlt(m, data = GBSG2)
+
+  ### generate grid for all three variables
+  ### note that the response grid ranges between 1 (bounds[1])
+  ### and 2000 (support[2])
+  (d <- mkgrid(m, n = 10))
+  ### data.frame of menopausal status and treatment
+  nd <- do.call("expand.grid", d[-1])
+
+  ### plot model on different scales, for all four combinations
+  ### of menopausal status and hormonal treatment
+  typ <- c("distribution", "survivor", "density", "hazard", 
+           "cumhazard", "odds")
+  layout(matrix(1:6, nrow = 2))
+  nl <- sapply(typ, function(tp) 
+      ### K = 500 makes densities and hazards smooth
+      plot(fm, newdata = nd, type = tp, col = 1:nrow(nd), K = 500))
+  legend("topleft", lty = 1, col = 1:nrow(nd), 
+         legend = do.call("paste", nd), bty = "n")
+
+  ### plot calls predict, which generates a grid with K = 50
+  ### response values
+  ### note that a K x nrow(newdata) matrix is returned
+  ### (for reasons explained in the next example)
+  predict(fm, newdata = nd, type = "survivor")
+
+  ### newdata can take a list, and evaluates the survivor
+  ### function on the grid defined by newdata 
+  ### using a linear array model formulation and is 
+  ### extremely efficient (wrt computing time and memory)
+  ### d[1] (the response grid) varies fastest
+  ### => the first dimension of predict() is always the response,
+  ### not the dimension of the predictor variables (like one 
+  ### might expect)
+  predict(fm, newdata = d, type = "survivor")
+
+  ### owing to this structure, the result can be quickly stored in 
+  ### a data frame as follows
+  cd <- do.call("expand.grid", d)
+  cd$surv <- c(S <- predict(fm, newdata = d, type = "survivor"))
+
+  ### works for distribution functions
+  all.equal(1 - S, predict(fm, newdata = d, type = "distribution"))
+  ### cumulative hazard functions
+  all.equal(-log(S), predict(fm, newdata = d, type = "cumhazard"))
+  ### log-cumulative hazard functions (= trafo, for Cox models)
+  all.equal(log(-log(S)), predict(fm, newdata = d, type = "logcumhazard"))
+  all.equal(log(-log(S)), predict(fm, newdata = d, type = "trafo"))
+  ### densities, hazards, or odds functions
+  predict(fm, newdata = d, type = "density")
+  predict(fm, newdata = d, type = "hazard")
+  predict(fm, newdata = d, type = "odds")
+  ### and quantiles (10 and 20%)
+  predict(fm, newdata = d[-1], type = "quantile", prob = 1:2 / 10)
+
+  ### note that some quantiles are only defined as intervals
+  ### (> 2000, in this case). Intervals are returned as an "response" 
+  ### object, see ?R. Unfortunately, these can't be stored as array, so
+  ### a data.frame is returned where the quantile varies first
+  p <- c(list(prob = 1:9/10), d[-1])
+  np <- do.call("expand.grid", p)
+  (Q <- predict(fm, newdata = d[-1], type = "quantile", prob = 1:9 / 10))
+  np$Q <- Q
+  np
+
+  ### simulating from the model works by inverting the distribution 
+  ### function; some obs are right-censored at 2000
+  (s <- simulate(fm, newdata = nd, nsim = 3))
+  ### convert to Surv
+  sapply(s, as.Surv)
+
+  ### generate 3 parametric bootstrap samples from the model
+  tmp <- GBSG2[, c("menostat", "horTh")]
+  s <- simulate(fm, newdata = tmp, nsim = 3)
+  ### refit the model using the simulated response
+  lapply(s, function(y) {
+    tmp$y <- y
+    coef(mlt(m, data = tmp))
+  })
+
+  options(op)
+
 }