/
standardise.R
161 lines (156 loc) · 6.15 KB
/
standardise.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#' Constructor for a standardising population used for survextrap
#' outputs
#'
#' Standardised outputs are outputs from models with covariates, that
#' are defined by marginalising (averaging) over covariate values in a
#' given population, rather than being conditional on a given
#' covariate value.
#'
#' @details These are produced by generating a Monte Carlo sample from
#' the joint distribution of parameters \eqn{\theta} and covariate
#' values \eqn{X}, \eqn{p(X,\theta) = p(\theta|X)p(X)}, where
#' \eqn{p(X)} is defined by the empirical distribution of covariates
#' in the standard population.
#'
#' Hence applying a vectorised output function \eqn{g()} (such as the
#' RMST or survival probability) to this sample produces a sample from
#' the posterior of \eqn{\int g(\theta|X) dX}: the average RMST (say)
#' for a heterogeneous population.
#'
#' @aliases standardize_to
#'
#' @details See the Examples vignette for some examples and notes on
#' computation.
#'
#' @param newdata Data frame describing a population.
#'
#' @param random By default this is \code{FALSE}, indicating that
#' standardised samples should be obtained by concatenating the
#' posterior samples for each covariate value in the standard
#' population. The sample from the standardised posterior of
#' parameters then has size \code{niter} times the number of rows in
#' \code{newdata}, where \code{niter} is the number of MCMC
#' iterations used in the original \code{survextrap} fit. Computing
#' the resulting output function (e.g. RMST which uses numerical
#' integration) can then be computationally intensive if this sample
#' size is large.
#'
#' A quicker alternative is to sample a random row of the standard
#' population for each MCMC iteration. The standardised sample from
#' the posterior then has size \code{niter}. This is specified by
#' using \code{random=TRUE}. If this is used, then the result
#' depends on the random number seed, and it should be checked that
#' the results are stable to within the required number of
#' significant figures. If not, run \code{survextrap} with more
#' MCMC iterations or increase \code{nstd} here.
#'
#' @param nstd Number of draws from the population distribution used
#' per MCMC sample from the parameters when \code{random=TRUE}.
#' With the default of 1, the value of the covariate vector \eqn{X}
#' is essentially treated as if it were an additional parameter in
#' the Bayesian model, drawn by Monte Carlo independently of the
#' remaining parameters.
#'
#' @return A copy of \code{newdata}, but with attributes added to
#' indicate that this should be used as a standard population. When
#' this \code{newdata} is passed to \code{survextrap}'s output
#' functions, the outputs will then be presented as an average over
#' the empirical distribution of covariate values described by
#' \code{newdata}, rather than as one output per row of
#' \code{newdata} (distinct covariate values).
#'
#' @examples
#' rxph_mod <- survextrap(Surv(years, status) ~ rx, data=colons, fit_method="opt")
#' ref_pop <- data.frame(rx = c("Obs","Lev+5FU"))
#'
#' # covariate-specific outputs
#' survival(rxph_mod, t = c(5,10), newdata = ref_pop)
#'
#' # standardised outputs
#' survival(rxph_mod, t = c(5,10), newdata = standardise_to(ref_pop))
#'
#' @export
standardise_to <- function(newdata, nstd=1, random=FALSE){
attr(newdata, "std") <- TRUE
if (!is.numeric(nstd)) stop("`nstd` must be numeric")
if (nstd < 1) stop("`nstd` must be 1 or more")
attr(newdata, "nstd") <- nstd
attr(newdata, "random") <- random
newdata
}
#' @rdname standardise_to
#' @export
standardize_to <- standardise_to
#' Marginalise output of get_pars over a standard population
#'
#' Without marginalisation, \code{get_pars} returns, for each
#' parameter \eqn{\theta} and covariate value \eqn{X}, samples from
#' the posterior of \eqn{\theta|X}. For each parameter, \code{nvals}
#' sample sets of size \code{niter} are produced, one for each set of
#' covariate values. \code{niter} is the number of MCMC samples
#' requested, and \code{nvals} is the number of rows of
#' \code{newdata}.
#'
#' With marginalisation, \code{get_pars} returns a single set of
#' parameter values, sampled from the joint distribution of
#' \eqn{\theta|X} and \eqn{X}.
#'
#' Hence applying a vectorised function \eqn{g()} to the marginalised
#' sample produces a sample from the posterior of \eqn{\int
#' g(\theta|X) dX}.
#'
#' The distribution of \eqn{X} is defined by the empirical
#' distribution of values in the \code{newdata} supplied to
#' \code{get_pars}.
#'
#' @inheritParams standardise_to
#'
#' @param pars output of \code{get_pars}
#'
#' @return A list with components:
#'
#' \code{alpha} Log hazard scale. Matrix with \code{niter} rows and
#' one column.
#'
#' \code{coefs} Spline basis coefficients. Array with dimensions
#' \code{niter} x \code{1} x \code{nvars}, where \code{nvars} is the
#' basis dimension.
#'
#' \code{pcure} Cure probability in cure models. Matrix with
#' \code{niter} rows and one column.
#'
#' @noRd
marginalise_pars <- function(pars, nstd, random=FALSE){
niter <- pars$niter
nvals <- ncol(pars$alpha)
nvars <- dim(pars$coefs)[3]
if (random){
alpha <- numeric(niter*nstd)
coefs <- matrix(nrow=niter*nstd, ncol=nvars)
for (i in 1:nstd){
popind <- sample(1:nvals, niter, replace=TRUE)
matind <- niter*(i-1) + 1:niter
alpha[matind] <- pars$alpha[cbind(1:niter, popind)]
inds3 <- cbind(rep(matind, nvars), popind, rep(1:nvars, each=niter))
coefs[matind,] <- pars$coefs[inds3]
}
alpha_std <- matrix(alpha, ncol=1)
coefs_std <- array(coefs, dim = c(niter, 1, nvars))
if (!is.null(pars$pcure)){
pcure <- numeric(niter*nstd)
for (i in 1:nstd){
pcure[matind] <- pars$pcure[cbind(1:niter, popind)]
}
pcure_std <- matrix(pcure, ncol=1)
} else pcure_std <- NULL
} else {
niter <- niter*nvals
alpha_std <- matrix(pars$alpha, ncol=1)
pcure_std <- if (!is.null(pars$pcure)) matrix(as.vector(pars$pcure), ncol=1)
coefs_std <- array(pars$coefs, dim=c(niter, 1, nvars))
}
list(alpha = alpha_std,
coefs = coefs_std,
pcure = pcure_std,
niter = niter)
}