diff --git a/DESCRIPTION b/DESCRIPTION index 9dba051..bec7889 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mlt Title: Most Likely Transformations -Version: 1.2-2 -Date: 2021-02-12 +Version: 1.2-3 +Date: 2021-02-22 Authors@R: person("Torsten", "Hothorn", role = c("aut", "cre"), email = "Torsten.Hothorn@R-project.org", comment = c(ORCID = "0000-0001-8301-0471")) Description: Likelihood-based estimation of conditional transformation @@ -15,8 +15,8 @@ URL: http://ctm.R-forge.R-project.org License: GPL-2 Encoding: UTF-8 NeedsCompilation: no -Packaged: 2021-02-12 13:53:40 UTC; hothorn +Packaged: 2021-02-24 07:48:00 UTC; hothorn Author: Torsten Hothorn [aut, cre] () Maintainer: Torsten Hothorn Repository: CRAN -Date/Publication: 2021-02-13 00:10:08 UTC +Date/Publication: 2021-02-24 08:20:02 UTC diff --git a/MD5 b/MD5 index 85acc44..07b1bea 100644 --- a/MD5 +++ b/MD5 @@ -1,10 +1,10 @@ -f68d95fd59214a1b1490c6d4ed7d614c *DESCRIPTION +1784d82db47ba762c4da71f25ef65fa9 *DESCRIPTION 44dc29e1096ee97227a727914c71efe0 *NAMESPACE 226446b622fa6ccff2633ccd5af47b75 *R/R.R d2cd3551b75d248b1630149d9a11f48f *R/confband.R 85bfdf07717f5a022e84c7ce5ff891f8 *R/ctm.R 098ce237d6af53626d8d2cd138e47443 *R/distr.R -a3272914744fa0f5d6565cb9eafaa9db *R/dpq_etc.R +9223c303cd72fedb43084863ce2f2df7 *R/dpq_etc.R 70f18f8a82d9b63d3b1e7cb33201ffc0 *R/loglik.R 399714763b4bd3bd99e576a58057884c *R/methods.R 70e68a7d0cd22a234f4eb74b06add404 *R/mlt.R @@ -13,24 +13,23 @@ a3272914744fa0f5d6565cb9eafaa9db *R/dpq_etc.R 879724557cf8f27b085fccc3579e8c0f *R/predict.R 40c693617393d342c4b957c725770273 *R/sample.R 514ae5ccf7ba2841bda0c0b2e358b2ce *R/utils.R -d8473edb4dc4bed14b3518e2701e827d *build/partial.rdb +6e0e58a92378d5a2a75ca3c602dcebb6 *build/partial.rdb 1c9695dee7162d6e72883b552906e753 *cleanup 903952ead54ce0029d7ceb59c44645c3 *inst/CITATION -7e856cfbcc785b785274c76f8be7b136 *inst/NEWS.Rd -9e7da8008d4c41f098d9ffba0e00756a *man/R.Rd +34357ef16dcff7ef04ba4634f7e51657 *inst/NEWS.Rd +2fd36e671344c1f894b4f06235156ce6 *man/R.Rd 4141097f19682037945cf253537240c9 *man/confband.Rd b211532488e33a74047c6cbdce2e3246 *man/ctm-methods.Rd dbe2d83228567a26ae5f2af6d8ab9203 *man/ctm.Rd 30d6369b93ba5ebb221d6a47b851f6ce *man/mlt-methods.Rd -3338817cb685f7ed177fc68d2b6ad4b5 *man/mlt-package.Rd +d4dc0f0568a35ea8c0c0a3ee14a20233 *man/mlt-package.Rd c5add13dbdb8903c05a351768268d940 *man/mlt.Rd a4990dc3e06fe47fc8fdc6deb4712968 *man/mltoptim.Rd -7a2d1b4432a506806eef86dc4ce095d0 *man/predict.Rd +18e45384a6a7af7be1a801b23d9419ff *man/predict.Rd db0bcd162bd3125c10bd2a365fa92a3b *tests/2sample.R 5a9f502537a95ce85d5627eff0a7b610 *tests/2sample.Rout.save 1761c0ee2fa1a622b95d41dae932a559 *tests/Cox-Ex.R be7cf5845dbab7aaec2bf4b7518842d9 *tests/Cox-Ex.Rout.save -1ad2ce6b21829a5ec4991c0a079b6a10 *tests/Examples/mlt-Ex.Rout.save 431aabe6cb130b2cc8eafbcaf6a21b15 *tests/bugfixes.R bc3008e0de096dd16c88f119f434f983 *tests/bugfixes.Rout.save cab1777631a72c7a1dd8b7138cbdfd6f *tests/distr-Ex.R diff --git a/R/dpq_etc.R b/R/dpq_etc.R index 5af0724..d1bc801 100644 --- a/R/dpq_etc.R +++ b/R/dpq_etc.R @@ -276,8 +276,8 @@ qmlt <- function(object, newdata = NULL, q = NULL, prob = .5, n = 50, nrow = nrow(trm), ncol = length(prob), byrow = TRUE) ret <- .invf(object, f = trm, q = q, z = qu) - i <- as.vector(matrix(1:length(ret), ncol = nrow(trm), byrow = TRUE)) - ret <- if (is.matrix(ret)) ret[i,] else ret[i] + i <- as.vector(matrix(1:NROW(ret), ncol = nrow(trm), byrow = TRUE)) + ret <- if (is.data.frame(ret)) ret[i,] else ret[i] ### arrays of factors are not allowed if (is.factor(q)) return(ret) diff --git a/build/partial.rdb b/build/partial.rdb index 09c9a55..65014af 100644 Binary files a/build/partial.rdb and b/build/partial.rdb differ diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index 06a4553..0de0335 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -2,7 +2,15 @@ \name{NEWS} \title{NEWS file for the mlt package} -\section{Changes in Version 1.2-2 (2020-02-12)}{ +\section{Changes in Version 1.2-3 (2021-02-22)}{ + \subsection{New features}{ + \itemize{ + \item{Improve documentation.} + } + } +} + +\section{Changes in Version 1.2-2 (2021-02-12)}{ \subsection{Bugfixes}{ \itemize{ \item{Sampling from unconditional models did not pay attention to @@ -11,7 +19,7 @@ } } -\section{Changes in Version 1.2-1 (2020-02-03)}{ +\section{Changes in Version 1.2-1 (2021-02-03)}{ \subsection{New features}{ \itemize{ \item{Quantiles and thus simulations are now computationally more exact and more robust. diff --git a/man/R.Rd b/man/R.Rd index ea98bab..2956798 100644 --- a/man/R.Rd +++ b/man/R.Rd @@ -10,7 +10,7 @@ \alias{as.Surv} \alias{as.Surv.response} \title{ - Response Variable + Response Variables } \description{ Represent a possibly censored or truncated response variable @@ -49,13 +49,77 @@ as.Surv(object) \details{ \code{R} is basically an extention of \code{\link[survival]{Surv}} for the representation of arbitrarily censored or truncated measurements at any scale. + The \code{storage.mode} of \code{object} determines if models are fitted + by the discrete likelihood (integers or factors) or the continuous + likelihood (log-density for numeric \code{object}s). Interval-censoring + is given by intervals (\code{cleft}, \code{cright}], also for integers and + factors (see example below). Left- and right-truncation can be defined + by the \code{tleft} and \code{tright} arguments. Existing \code{Surv} + objects can be converted using \code{R(Surv(...))}$ and, in some cases, an + \code{as.Surv()} method exists for representing \code{response} objects as + \code{Surv} objects. \code{R} applied to a list calls \code{R} for each of the list elements and returns a joint object. + + More examples can be found in Hothorn (2018) and in + \code{vignette("tram", package = "tram")}. + +} +\references{ + + Torsten Hothorn (2020), Most Likely Transformations: The mlt Package, + \emph{Journal of Statistical Software}, \bold{92}(1), 1--68, + \doi{10.18637/jss.v092.i01} + } \examples{ + library("survival") + + ### randomly right-censored continuous observations + time <- as.double(1:9) + event <- rep(c(FALSE, TRUE), length = length(time)) + + Surv(time, event) + R(Surv(time, event)) + + ### right-censoring, left-truncation + ltm <- 1:9 / 10 + Surv(ltm, time, event) + R(Surv(ltm, time, event)) + + ### interval-censoring + Surv(ltm, time, type = "interval2") + R(Surv(ltm, time, type = "interval2")) + + ### interval-censoring, left/right-truncation + lc <- as.double(1:4) + lt <- c(NA, NA, 7, 8) + rt <- c(NA, 9, NA, 10) + x <- c(3, NA, NA, NA) + rc <- as.double(11:14) + R(x, cleft = lt, cright = rt) + as.Surv(R(x, cleft = lt, cright = rt)) + R(x, tleft = 1, cleft = lt, cright = rt) + R(x, tleft = 1, cleft = lt, cright = rt, tright = 15) + R(x, tleft = lc, cleft = lt, cright = rt, tright = rc) + + ### discrete observations: counts + x <- 0:9 + R(x) + ### partially interval-censored counts + rx <- c(rep(NA, 6), rep(15L, 4)) + R(x, cright = rx) + ### ordered factor - R(gl(3, 3, labels = LETTERS[1:3])) + x <- gl(5, 2, labels = LETTERS[1:5], ordered = TRUE) + R(x) + ### interval-censoring (ie, observations can have multiple levels) + lx <- ordered(c("A", "A", "B", "C", "D", "E"), + levels = LETTERS[1:5], labels = LETTERS[1:5]) + rx <- ordered(c("B", "D", "E", "D", "D", "E"), + levels = LETTERS[1:5], labels = LETTERS[1:5]) + R(rx, cleft = lx, cright = rx) } diff --git a/man/mlt-package.Rd b/man/mlt-package.Rd index 5263f41..97ff0f3 100644 --- a/man/mlt-package.Rd +++ b/man/mlt-package.Rd @@ -4,14 +4,17 @@ \title{General Information on the \pkg{mlt} Package} \description{ The \pkg{mlt} package implements maximum likelihood estimation in - conditional transformation models as introduced by Hothorn et al. (2018). + conditional transformation models as introduced by Hothorn et al. (2020). An introduction to the package is available in the \code{mlt} package - vignette from package \code{mlt.docreg} (Hothorn, 2018). + vignette from package \code{mlt.docreg} (Hothorn, 2020). A short talk on most likely transformations is available from \url{https://channel9.msdn.com/Events/useR-international-R-User-conference/useR2016/Most-Likely-Transformations}. + Novice users might find the high(er) level interfaces offered by package + \pkg{tram} more convenient. + } \references{ @@ -19,9 +22,9 @@ Transformations, \emph{Scandinavian Journal of Statistics}, \bold{45}(1), 110--134, \doi{10.1111/sjos.12291}. - Torsten Hothorn (2018), Most Likely Transformations: The mlt Package, - \emph{Journal of Statistical Software}, forthcoming. URL: - \url{https://cran.r-project.org/package=mlt.docreg}. + Torsten Hothorn (2020), Most Likely Transformations: The mlt Package, + \emph{Journal of Statistical Software}, \bold{92}(1), 1--68, + \doi{10.18637/jss.v092.i01} } \author{ diff --git a/man/predict.Rd b/man/predict.Rd index 45e3d07..16a2d5c 100644 --- a/man/predict.Rd +++ b/man/predict.Rd @@ -59,7 +59,21 @@ for all observations in \code{newdata} and plots these functions (according to \code{type}). \code{predict} evaluates the transformation function over a grid of \code{q} values for all observations in \code{newdata} and returns the - result as a matrix (where _columns_ correspond to _rows_ in \code{newdata}). + result as a matrix (where _columns_ correspond to _rows_ in + \code{newdata}, see examples). Lack of \code{type = "mean"} is a feature + and not a bug. + + Argument \code{type} defines the scale of the plots or predictions: + \code{type = "distribution"} means the cumulative distribution function, + \code{type = "survivor"} is the survivor function (one minus distribution + function), \code{type = "density"} the absolute continuous or discrete + density (depending on the response), \code{type = "hazard"}, \code{type = + "cumhazard"}, and \code{type = "odds"} refers to the hazard (absolute + continuous or discrete), cumulative hazard (defined as minus log-survivor + function in both the absolute continuous and discrete cases), and odds + (distribution divided by survivor) functions. The quantile function can be + evaluated for probabilities \code{prob} by \code{type = "quantile"}. + Note that the \code{predict} method for \code{ctm} objects requires all model coefficients to be specified in this unfitted model. \code{simulate} draws samples from \code{object} by numerical inversion of the @@ -69,4 +83,123 @@ want the methods to pay attention to offsets, specify them as a variable in the model with fixed regression coefficient using the \code{fixed} argument in \code{\link{mlt}}. + + More examples can be found in Hothorn (2018). + +} +\references{ + + Torsten Hothorn (2020), Most Likely Transformations: The mlt Package, + \emph{Journal of Statistical Software}, \bold{92}(1), 1--68, + \doi{10.18637/jss.v092.i01} + +} +\examples{ + + library("survival") + op <- options(digits = 2) + + ### GBSG2 dataset + data("GBSG2", package = "TH.data") + + ### right-censored response + GBSG2$y <- with(GBSG2, Surv(time, cens)) + + ### define Bernstein(log(time)) parameterisation + ### of transformation function. The response + ### is bounded (log(0) doesn't work, so we use log(1)) + ### support defines the support of the Bernstein polynomial + ### and add can be used to make the grid wider (see below) + rvar <- numeric_var("y", bounds = c(0, Inf), + support = c(100, 2000)) + rb <- Bernstein_basis(rvar, order = 6, ui = "increasing") + ### dummy coding of menopausal status + hb <- as.basis(~ 0 + menostat, data = GBSG2) + ### treatment contrast of hormonal treatment + xb <- as.basis(~ horTh, data = GBSG2, remove_intercept = TRUE) + + ### set-up and fit Cox model, stratified by menopausal status + m <- ctm(rb, interacting = hb, shifting = xb, todistr = "MinExtrVal") + fm <- mlt(m, data = GBSG2) + + ### generate grid for all three variables + ### note that the response grid ranges between 1 (bounds[1]) + ### and 2000 (support[2]) + (d <- mkgrid(m, n = 10)) + ### data.frame of menopausal status and treatment + nd <- do.call("expand.grid", d[-1]) + + ### plot model on different scales, for all four combinations + ### of menopausal status and hormonal treatment + typ <- c("distribution", "survivor", "density", "hazard", + "cumhazard", "odds") + layout(matrix(1:6, nrow = 2)) + nl <- sapply(typ, function(tp) + ### K = 500 makes densities and hazards smooth + plot(fm, newdata = nd, type = tp, col = 1:nrow(nd), K = 500)) + legend("topleft", lty = 1, col = 1:nrow(nd), + legend = do.call("paste", nd), bty = "n") + + ### plot calls predict, which generates a grid with K = 50 + ### response values + ### note that a K x nrow(newdata) matrix is returned + ### (for reasons explained in the next example) + predict(fm, newdata = nd, type = "survivor") + + ### newdata can take a list, and evaluates the survivor + ### function on the grid defined by newdata + ### using a linear array model formulation and is + ### extremely efficient (wrt computing time and memory) + ### d[1] (the response grid) varies fastest + ### => the first dimension of predict() is always the response, + ### not the dimension of the predictor variables (like one + ### might expect) + predict(fm, newdata = d, type = "survivor") + + ### owing to this structure, the result can be quickly stored in + ### a data frame as follows + cd <- do.call("expand.grid", d) + cd$surv <- c(S <- predict(fm, newdata = d, type = "survivor")) + + ### works for distribution functions + all.equal(1 - S, predict(fm, newdata = d, type = "distribution")) + ### cumulative hazard functions + all.equal(-log(S), predict(fm, newdata = d, type = "cumhazard")) + ### log-cumulative hazard functions (= trafo, for Cox models) + all.equal(log(-log(S)), predict(fm, newdata = d, type = "logcumhazard")) + all.equal(log(-log(S)), predict(fm, newdata = d, type = "trafo")) + ### densities, hazards, or odds functions + predict(fm, newdata = d, type = "density") + predict(fm, newdata = d, type = "hazard") + predict(fm, newdata = d, type = "odds") + ### and quantiles (10 and 20%) + predict(fm, newdata = d[-1], type = "quantile", prob = 1:2 / 10) + + ### note that some quantiles are only defined as intervals + ### (> 2000, in this case). Intervals are returned as an "response" + ### object, see ?R. Unfortunately, these can't be stored as array, so + ### a data.frame is returned where the quantile varies first + p <- c(list(prob = 1:9/10), d[-1]) + np <- do.call("expand.grid", p) + (Q <- predict(fm, newdata = d[-1], type = "quantile", prob = 1:9 / 10)) + np$Q <- Q + np + + ### simulating from the model works by inverting the distribution + ### function; some obs are right-censored at 2000 + (s <- simulate(fm, newdata = nd, nsim = 3)) + ### convert to Surv + sapply(s, as.Surv) + + ### generate 3 parametric bootstrap samples from the model + tmp <- GBSG2[, c("menostat", "horTh")] + s <- simulate(fm, newdata = tmp, nsim = 3) + ### refit the model using the simulated response + lapply(s, function(y) { + tmp$y <- y + coef(mlt(m, data = tmp)) + }) + + options(op) + } diff --git a/tests/Examples/mlt-Ex.Rout.save b/tests/Examples/mlt-Ex.Rout.save deleted file mode 100644 index 010dd09..0000000 --- a/tests/Examples/mlt-Ex.Rout.save +++ /dev/null @@ -1,132 +0,0 @@ - -R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out" -Copyright (C) 2020 The R Foundation for Statistical Computing -Platform: x86_64-pc-linux-gnu (64-bit) - -R is free software and comes with ABSOLUTELY NO WARRANTY. -You are welcome to redistribute it under certain conditions. -Type 'license()' or 'licence()' for distribution details. - - Natural language support but running in an English locale - -R is a collaborative project with many contributors. -Type 'contributors()' for more information and -'citation()' on how to cite R or R packages in publications. - -Type 'demo()' for some demos, 'help()' for on-line help, or -'help.start()' for an HTML browser interface to help. -Type 'q()' to quit R. - -> pkgname <- "mlt" -> source(file.path(R.home("share"), "R", "examples-header.R")) -> options(warn = 1) -> library('mlt') -Loading required package: basefun -Loading required package: variables -> -> base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') -> base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv') -> cleanEx() -> nameEx("R") -> ### * R -> -> flush(stderr()); flush(stdout()) -> -> ### Name: R -> ### Title: Response Variable -> ### Aliases: R R.Surv R.factor R.ordered R.numeric R.integer R.list -> ### R.response as.Surv as.Surv.response -> -> ### ** Examples -> -> -> ### ordered factor -> R(gl(3, 3, labels = LETTERS[1:3])) -Warning in R.factor(gl(3, 3, labels = LETTERS[1:3])) : - response is unordered factor; - results may depend on order of levels -[1] (NA, A] (NA, A] (NA, A] (A, B] (A, B] (A, B] (B, NA] (B, NA] (B, NA] -> -> -> -> -> cleanEx() -> nameEx("mlt") -> ### * mlt -> -> flush(stderr()); flush(stdout()) -> -> ### Name: mlt -> ### Title: Most Likely Transformations -> ### Aliases: mlt -> -> ### ** Examples -> -> -> ### set-up conditional transformation model for conditional -> ### distribution of dist given speed -> dist <- numeric_var("dist", support = c(2.0, 100), bounds = c(0, Inf)) -> speed <- numeric_var("speed", support = c(5.0, 23), bounds = c(0, Inf)) -> ctmm <- ctm(response = Bernstein_basis(dist, order = 4, ui = "increasing"), -+ interacting = Bernstein_basis(speed, order = 3)) -> -> ### fit model -> mltm <- mlt(ctmm, data = cars) -> -> ### plot data -> plot(cars) -> ### predict quantiles and overlay data with model via a "quantile sheet" -> q <- predict(mltm, newdata = data.frame(speed = 0:24), type = "quantile", -+ p = 2:8 / 10, K = 500) -> tmp <- apply(q, 1, function(x) lines(0:24, x, type = "l")) -> -> -> -> -> cleanEx() -> nameEx("mltoptim") -> ### * mltoptim -> -> flush(stderr()); flush(stdout()) -> -> ### Name: mltoptim -> ### Title: Control Optimisation -> ### Aliases: mltoptim -> ### Keywords: list -> -> ### ** Examples -> -> -> ### set-up linear transformation model for conditional -> ### distribution of dist given speed -> dist <- numeric_var("dist", support = c(2.0, 100), bounds = c(0, Inf)) -> ctmm <- ctm(response = Bernstein_basis(dist, order = 4, ui = "increasing"), -+ shifting = ~ speed, data = cars) -> -> ### use auglag with kkt2.check = TRUE => the numerically determined -> ### hessian is returned as "optim_hessian" slot -> op <- mltoptim(auglag = list(maxtry = 5, kkt2.check = TRUE))[1] -> mltm <- mlt(ctmm, data = cars, scale = FALSE, optim = op) -> -> ### compare analytical and numerical hessian -> all.equal(c(Hessian(mltm)), c(mltm$optim_hessian), tol = 1e-4) -[1] TRUE -> -> -> -> -> ### *