-
Notifications
You must be signed in to change notification settings - Fork 16
/
efpt.R
170 lines (168 loc) · 8.33 KB
/
efpt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# Could also get CDF simply by making tostate absorbing and calculating pmatrix.
# TODO time-dependent covariates?. Unclear if the expectation has a solution for piecewise-constant rate.
#' Expected first passage time
#'
#' Expected time until first reaching a particular state or set of states in a
#' Markov model.
#'
#' The expected first passage times from each of a set of states
#' \eqn{\mathbf{i}}{i} to to the remaining set of states
#' \eqn{\overline{\mathbf{i}}}{ibar} in the state space, for a model with
#' transition intensity matrix \eqn{Q}, are
#'
#' \deqn{-Q_{\mathbf{i},\mathbf{i}}^{-1} \mathbf{1}}{-Q_{i,i}^{-1} 1}
#'
#' where \eqn{\mathbf{1}}{1} is a vector of ones, and
#' \eqn{Q_{\mathbf{i},\mathbf{i}}}{Q_{i,i}} is the square subset of \eqn{Q}
#' pertaining to states \eqn{\mathbf{i}}{i}.
#'
#' It is equal to the sum of mean sojourn times for all states between the
#' "from" and "to" states in a unidirectional model. If there is non-zero
#' chance of reaching an absorbing state before reaching \code{tostate}, then
#' it is infinite. It is trivially zero if the "from" state equals
#' \code{tostate}.
#'
#' This function currently only handles time-homogeneous Markov models. For
#' time-inhomogeneous models it will assume that \eqn{Q} equals the average
#' intensity matrix over all times and observed covariates. Simulation might
#' be used to handle time dependence.
#'
#' Note this is the \emph{expectation} of first passage time, and the
#' confidence intervals are CIs for this mean, not predictive intervals for the
#' first passage time. The full distribution of the first passage time to a
#' set of states can be obtained by setting the rows of the intensity matrix
#' \eqn{Q} corresponding to that set of states to zero to make a model where
#' those states are absorbing. The corresponding transition probability matrix
#' \eqn{Exp(Qt)} then gives the probabilities of having hit or passed that
#' state by a time \eqn{t} (see the example below). This is implemented in
#' \code{\link{ppass.msm}}.
#'
#' @param x A fitted multi-state model, as returned by \code{\link{msm}}.
#' @param qmatrix Instead of \code{x}, you can simply supply a transition
#' intensity matrix in \code{qmatrix}.
#' @param tostate State, or set of states supplied as a vector, for which to
#' estimate the first passage time into. Can be integer, or character matched
#' to the row names of the Q matrix.
#' @param start Starting state (integer). By default (\code{start="all"}),
#' this will return a vector of expected passage times from each state in turn.
#'
#' Alternatively, this can be used to obtain the expected first passage time
#' from a \emph{set} of states, rather than single states. To achieve this,
#' \code{state} is set to a vector of weights, with length equal to the number
#' of states in the model. These weights should be proportional to the
#' probability of starting in each of the states in the desired set, so that
#' weights of zero are supplied for other states. The function will calculate
#' the weighted average of the expected passage times from each of the
#' corresponding states.
#' @param covariates Covariate values defining the intensity matrix for the
#' fitted model \code{x}, as supplied to \code{\link{qmatrix.msm}}.
#' @param ci If \code{"normal"}, then calculate a confidence interval by
#' simulating \code{B} random vectors from the asymptotic multivariate normal
#' distribution implied by the maximum likelihood estimates (and covariance
#' matrix) of the log transition intensities and covariate effects.
#'
#' If \code{"bootstrap"} then calculate a confidence interval by non-parametric
#' bootstrap refitting. This is 1-2 orders of magnitude slower than the
#' \code{"normal"} method, but is expected to be more accurate. See
#' \code{\link{boot.msm}} for more details of bootstrapping in \pkg{msm}.
#'
#' If \code{"none"} (the default) then no confidence interval is calculated.
#' @param cl Width of the symmetric confidence interval, relative to 1.
#' @param B Number of bootstrap replicates.
#' @param cores Number of cores to use for bootstrapping using parallel
#' processing. See \code{\link{boot.msm}} for more details.
#' @param ... Arguments to pass to \code{\link{MatrixExp}}.
#' @return A vector of expected first passage times, or "hitting times", from
#' each state to the desired state.
#' @author C. H. Jackson \email{chris.jackson@@mrc-bsu.cam.ac.uk}
#' @seealso \code{\link{sojourn.msm}}, \code{\link{totlos.msm}},
#' \code{\link{boot.msm}}.
#' @references Norris, J. R. (1997) Markov Chains. Cambridge University Press.
#' @keywords models
#' @examples
#'
#' twoway4.q <- rbind(c(-0.5, 0.25, 0, 0.25), c(0.166, -0.498, 0.166, 0.166),
#' c(0, 0.25, -0.5, 0.25), c(0, 0, 0, 0))
#' efpt.msm(qmatrix=twoway4.q, tostate=3)
#' # given in state 1, expected time to reaching state 3 is infinite
#' # since may die (state 4) before entering state 3
#'
#' # If we remove the death state from the model, EFPTs become finite
#' Q <- twoway4.q[1:3,1:3]; diag(Q) <- 0; diag(Q) <- -rowSums(Q)
#' efpt.msm(qmatrix=Q, tostate=3)
#'
#' # Suppose we cannot die or regress while in state 2, can only go to state 3
#' Q <- twoway4.q; Q[2,4] <- Q[2,1] <- 0; diag(Q) <- 0; diag(Q) <- -rowSums(Q)
#' efpt.msm(qmatrix=Q, tostate=3)
#' # The expected time from 2 to 3 now equals the mean sojourn time in 2.
#' -1/Q[2,2]
#'
#' # Calculate cumulative distribution of the first passage time
#' # into state 3 for the following three-state model
#' Q <- twoway4.q[1:3,1:3]; diag(Q) <- 0; diag(Q) <- -rowSums(Q)
#' # Firstly form a model where the desired hitting state is absorbing
#' Q[3,] <- 0
#' MatrixExp(Q, t=10)[,3]
#' ppass.msm(qmatrix=Q, tot=10)
#' # Given in state 1 at time 0, P(hit 3 by time 10) = 0.479
#' MatrixExp(Q, t=50)[,3] # P(hit 3 by time 50) = 0.98
#' ppass.msm(qmatrix=Q, tot=50)
#'
#'
#' @export efpt.msm
efpt.msm <- function(x=NULL, qmatrix=NULL, tostate, start="all", covariates="mean",
ci=c("none","normal","bootstrap"), cl = 0.95, B = 1000, cores=NULL, ...)
{
ci <- match.arg(ci)
if (!is.null(x)) {
if (!inherits(x, "msm")) stop("expected x to be a msm model")
qmatrix <- qmatrix.msm(x, covariates=covariates, ci="none")
}
else if (!is.null(qmatrix)) {
if (!is.matrix(qmatrix) || (nrow(qmatrix) != ncol(qmatrix)))
stop("expected qmatrix to be a square matrix")
if (ci != "none") {warning("No fitted model supplied: not calculating confidence intervals."); ci <- "none"}
}
if (is.character(tostate)) {
if (!tostate %in% rownames(qmatrix)) stop(sprintf("state \"%s\" unknown", tostate))
tostate <- match(tostate, rownames(qmatrix))
}
est <- rep(NA, nrow(qmatrix))
## EFPT is zero if we're already in tostate
est[tostate] <- 0
abstate <- absorbing.msm(qmatrix=qmatrix)
## EFPT is infinite for other absorbing states
est[setdiff(abstate,tostate)] <- Inf
fromstate <- setdiff(1:nrow(qmatrix), union(abstate,tostate))
## EFPT is infinite if any chance of absorbing elsewhere before
## hitting tostate. To calculate this, form Q matrix with tostate
## made absorbing, and look at P matrix in unit time.
Qred <- qmatrix; Qred[tostate,] <- 0
Pmat <- MatrixExp(Qred, ...)
Pmat[Pmat < 1e-16] <- 0
p.abs <- rowSums(Pmat[fromstate,setdiff(abstate,tostate),drop=FALSE])
est[fromstate][p.abs>0] <- Inf
## Any states left from which EFPT to tostate is nonzero and finite.
## Use standard linear equation solution
## see, e.g. equation (3) of Harrison and Knottenbelt (2001)
if (any(is.na(est))){
fromstate <- which(is.na(est))
Q <- as.matrix(qmatrix[fromstate, fromstate])
est[fromstate] <- solve(-Q, rep(1,nrow(Q)))
}
if (!is.character(start)) {
if (!is.numeric(start) || (length(start)!=nrow(qmatrix)))
stop("Expected \"start\" to be \"all\" or a numeric vector of length ", nrow(qmatrix))
start <- start / sum(start)
est <- est %*% start
}
else if (any(start!="all"))
stop("Expected \"start\" to be \"all\" or a numeric vector of length ", nrow(qmatrix))
e.ci <- switch(ci,
bootstrap = efpt.ci.msm(x=x, qmatrix=qmatrix, tostate=tostate, start=start, covariates=covariates, cl=cl, B=B, cores=cores),
normal = efpt.normci.msm(x=x, qmatrix=qmatrix, tostate=tostate, start=start, covariates=covariates, cl=cl, B=B),
none = NULL)
res <- if (ci=="none") est else rbind(est, e.ci)
class(res) <- "msm.estbystate"
res
}