-
Notifications
You must be signed in to change notification settings - Fork 21
/
model_prediction.R
400 lines (352 loc) · 17.8 KB
/
model_prediction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
#' Model predictions
#'
#' Function generates a data frame of model predictions for the typical value in the population,
#' individual predictions and data predictions. The function can also be used to generate datasets
#' without predictions using the design specified in the arguments.
#'
#' @param poped.db A PopED database created by \code{\link{create.poped.database}}.
#' @param models_to_use Which model numbers should we use?
#' Model numbers are defined in \code{design} below using \code{model_switch}. For an explanation see \code{\link{create_design}}.
#' @param model_num_points How many extra observation rows should be created in the data frame for each group or individual
#' per model. If used then the points are placed evenly between \code{model_minxt} and \code{model_maxxt}. This option
#' is used by \code{\link{plot_model_prediction}} to simulate the response of the model on a finer grid then the defined design.
#' If \code{NULL} then only the input design is used. Can be a single value or a vector the same length as the number of models.
#' @param model_minxt The minimum time value for extra observation rows indicated by \code{model_num_points}.
#' A vector the same length as the number of models
#' @param model_maxxt The minimum time value for extra observation rows indicated by \code{model_num_points}.
#' A vector the same length as the number of models
#' @param include_sample_times Should observations rows in the output data frame include the times indicated in the input design?
#' @param IPRED Should we simulate individual predictions?
#' @param DV should we simulate observations?
#' @param include_a Should we include the continuous design variables in the output?
#' @param include_x Should we include the discrete design variables in the output?
#' @param groups_to_use Which groups should we include in the output data frame?Allowed values are \code{"all"} or
#' a vector of numbers indicating the groups to include, e.g. \code{c(1,3,6)}.
#' @param filename A filename that the data frame should be written to in comma separate value (csv) format.
#' @param dosing A list of lists that adds dosing records to the data frame (Each inner list corresponding to a group in the design).
#' @param design A list that is passed as arguments to the function \code{\link{create_design}} to create a design object.
#' @param model A list containing the model elements to use for the predictions
#' @param parameters A list of parameters to use in the model predictions.
#' @param predictions Should the resulting data frame have predictions? Either \code{TRUE} or \code{FALSE}
#' or \code{NULL} in which case the function decides based on other arguments.
#' @param manipulation A list of one or more \code{\link[base]{expression}} arguments. Each expression is
#' evaluated using the code \code{for(i in 1:length(manipulation)){df <- within(df,{eval(manipulation[[i]])})}}.
#' Can be used to transform
#' or create new columns in the resulting data frame. Note that these transformations are created after any model predictions occur,
#' so transformations in columns having to do with input to model predictions will not affect the predictions.
#' @param PI Compute prediction intervals for the data given the model. Predictions are based on first-order approximations to
#' the model variance and a log-normality assumption of that variance (by default), if all predictions are positive, otherwise a
#' normal distribution is assumed.
#' @param PI_conf_level The confidence level for the prediction interval computed.
#' @param PI_ln_dist Should the PI calculation be done assuming log-normal or a normal distribution. TRUE is the default and
#' indicates a log-normal distribution. If any of the PRED values from the model are negative then a normal distribution is
#' assumed.
#'
#' @return A dataframe containing a design and (potentially) simulated data with some dense grid of samples and/or
#' based on the input design.
#'
#' @family evaluate_design
#' @family Simulation
#'
#' @example tests/testthat/examples_fcn_doc/examples_model_prediction.R
#'
#' @export
# @importFrom mvtnorm rmvnorm
# @importFrom dplyr rbind_list
model_prediction <- function(poped.db=NULL,
design=list( ## passed to create_design
xt=poped.db$design[["xt"]], ## -- Matrix defining the initial sampling schedule row major, can also be a list of vectors--
groupsize=poped.db$design$groupsize, ## -- Vector defining the size of the different groups (num individuals in each group) --
m=poped.db$design[["m"]], ## -- Number of groups, computed from xt if not defined --
x=poped.db$design[["x"]], ## -- Matrix defining the initial discrete values --
a=poped.db$design[["a"]], ## -- Vector defining the initial covariate values --
ni=poped.db$design$ni, ## -- Vector defining the number of samples for each group, computed as all elements of xt by default --
model_switch=poped.db$design$model_switch),
model = list(
fg_pointer=poped.db$model$fg_pointer,
ff_pointer=poped.db$model$ff_pointer,
ferror_pointer= poped.db$model$ferror_pointer),
parameters=list(
docc=poped.db$parameters$docc,
d = poped.db$parameters$d,
bpop = poped.db$parameters$bpop,
covd = poped.db$parameters$covd,
covdocc = poped.db$parameters$covdocc,
sigma = poped.db$parameters$sigma),
IPRED=FALSE,
DV=FALSE,
dosing=NULL, # otherwise a list of lists with "Time" and dosing columns needed for each group
predictions=NULL,
filename=NULL,
models_to_use="all",
model_num_points=NULL,
model_minxt=NULL,
model_maxxt=NULL,
include_sample_times=T,
groups_to_use="all",
include_a = TRUE,
include_x = TRUE,
manipulation=NULL,
PI = FALSE,
PI_conf_level = 0.95,
PI_ln_dist = TRUE)
{
if(is.null(predictions)){
predictions=FALSE
if(!is.null(poped.db) || (!is.null(unlist(parameters))) && !is.null(unlist(model)) && !is.null(unlist(design))) predictions=TRUE
}
if(is.null(poped.db) && is.null(unlist(design))) stop("Either 'poped.db' or 'design' need to be defined")
design <- do.call(create_design,design)
NumOcc=poped.db$parameters$NumOcc ## this should be removed and put into design
if(DV) IPRED=T
with(design,{
maxxt=poped.choose(poped.db$design_space$maxxt,xt) ## -- Matrix defining the max value for each sample --
minxt=poped.choose(poped.db$design_space$minxt,xt) ## -- Matrix defining the min value for each sample --
#--- size checking --------
if(!is.null(dosing)){
if(!length(dosing)==m){
if(length(dosing) == 1) {
dosing <- rep(dosing,m)
} else {
stop("dosing argument does not have the right dimensions.
Must be 1 list or a list of lists the size of the number of groups")
}
}
}
if(predictions){
docc_size = 0
if((!isempty(parameters$docc[,2]))){
docc_size = size(parameters$docc[,2,drop=F],1)
}
d_size = 0
if((!isempty(parameters$d[,2]))){
d_size = size(parameters$d[,2,drop=F],1)
}
}
used_times <- zeros(size(xt))
for(i in 1:size(xt,1)) used_times[i,1:ni[i]] <- 1
if(all(groups_to_use=="all")){
groups_to_use = 1:size(xt,1)
}
if(all(models_to_use=="all")){
#models_to_use = unique(as.vector(model_switch))
models_to_use = unique(as.vector(model_switch[used_times==1]))
}
df <- data.frame()
id_num_start <- 1
for(i in 1:length(groups_to_use)){
if(!exists("a")){
a_i = zeros(0,1)
} else if((isempty(a))){
a_i = zeros(0,1)
} else {
a_i = a[groups_to_use[i],,drop=F]
}
if(!exists("x")){
x_i = zeros(0,1)
} else if((isempty(x))){
x_i = zeros(0,1)
} else {
x_i = x[groups_to_use[i],,drop=F]
}
num_ids = groupsize[groups_to_use[i]]
if(all(is.null(model_num_points))){
xt_i = xt[groups_to_use[i],1:ni[groups_to_use[i]]]
model_switch_i = model_switch[groups_to_use[i],1:ni[groups_to_use[i]]]
if(!all(models_to_use == unique(as.vector(model_switch[used_times==1])))){ ## needs testing
xt_i = xt_i[model_switch_i %in% models_to_use]
model_switch_i = model_switch_i[model_switch_i %in% models_to_use]
}
} else {
xt_i <- c()
model_switch_i <- c()
if(length(models_to_use)>1 && length(model_num_points)==1) model_num_points <- rep(model_num_points,length(models_to_use))
for(j in models_to_use){
if(is.null(model_minxt)){
minv <- min(as.vector(minxt[model_switch==j]),na.rm = TRUE)
} else {
if(length(models_to_use)>1 && length(model_minxt)==1) model_minxt <- rep(model_minxt,length(models_to_use))
minv = model_minxt[j]
}
if(is.null(model_maxxt)){
maxv <- max(as.vector(maxxt[model_switch==j]),na.rm = TRUE)
} else {
if(length(models_to_use)>1 && length(model_maxxt)==1) model_maxxt <- rep(model_maxxt,length(models_to_use))
maxv = model_maxxt[j]
} #xt = t(seq(minv,maxv,length.out=model_num_points[i]))
tmp_num_pts <- model_num_points[j]
if(length(model_num_points)<j) tmp_num_pts <- model_num_points[1]
xt_i= c(xt_i,seq(minv,maxv,length.out=tmp_num_pts))
#model.pred <- rbind(xt)
#model.pred <- data.frame(Time=xt)
#model.pred <- c(model.pred,foo=xt)
#browser()
model_switch_i = c(model_switch_i,j*matrix(1,1,tmp_num_pts))
}
if(include_sample_times){
xt_i_extra = xt[groups_to_use[i],1:ni[groups_to_use[i]]]
model_switch_i_extra = model_switch[groups_to_use[i],1:ni[groups_to_use[i]]]
if(!all(models_to_use == unique(as.vector(model_switch[used_times==1])))){ ## needs testing
xt_i_extra = xt_i_extra[model_switch_i_extra %in% models_to_use]
model_switch_i_extra = model_switch_i_extra[model_switch_i_extra %in% models_to_use]
}
tmp.include <- !(xt_i_extra %in% xt_i)
xt_i <- c(xt_i,xt_i_extra[tmp.include])
model_switch_i <- c(model_switch_i,model_switch_i_extra[tmp.include])
tmp.order <- order(xt_i)
xt_i <- xt_i[tmp.order]
model_switch_i <- model_switch_i[tmp.order]
}
}
pred <- NA
group.df <- data.frame(Time=xt_i,PRED=pred,Group=groups_to_use[i],Model=model_switch_i)
if(predictions){
bpop_val <- parameters$bpop[,2,drop=F]
b_ind = zeros(1,d_size)
bocc_ind = zeros(docc_size,NumOcc)
g0 = feval(model$fg_pointer,x_i,a_i,bpop_val,b_ind,bocc_ind)
pred_list <- feval(model$ff_pointer,model_switch_i,xt_i,g0,poped.db)
pred_list$poped.db <- NULL
pred <- drop(pred_list[[1]])
group.df["PRED"] <- pred
# TODO: add this to both IPRED and DV and chnage extra columns names to replct where
# prediction comes from.
if(length(pred_list)>1){
# pred_list <- pred_list[sapply(pred_list, is.numeric)]
extra_df <- data.frame(pred_list[-1])
group.df <- cbind(group.df,extra_df)
}
if(PI){
sigma_full = parameters$sigma
d_full = getfulld(parameters$d[,2],parameters$covd)
docc_full = getfulld(parameters$docc[,2,drop=F],parameters$covdocc)
cov <- v(as.matrix(model_switch_i),as.matrix(xt_i),
t(x_i),t(a_i),bpop_val,t(b_ind),bocc_ind,d_full,
sigma_full,docc_full,poped.db)[[1]]
PI_alpha <- 1-PI_conf_level
z_val <- qnorm(1-PI_alpha/2)
#z_val <- qt(1-PI_alpha/2,df=(sum(design$groupsize)-5))
se_val <- sqrt(diag(cov))
PI_u <- pred + z_val*se_val
PI_l <- pred - z_val*se_val
#PI_ln_dist <- TRUE
if(PI_ln_dist && all(pred>0)){
# using method of moments (Stein)
# assuming univariate distributions
se_val_log <- sqrt(log(diag(cov)/(pred^2)+1))
mean_val_log <- log(pred^2/sqrt(diag(cov) + pred^2))
PI_u <- exp(mean_val_log + z_val*se_val_log)
PI_l <- exp(mean_val_log - z_val*se_val_log)
}
group.df <- data.frame(group.df,PI_l=PI_l,PI_u=PI_u)
}
}
# group.df <- data.frame(Time=xt_i,PRED=drop(pred[[1]]),Group=groups_to_use[i],
# ##paste("Group",i,sep="_"),
# Model=model_switch_i)
#
if(include_a && !isempty(a_i)){
rownames(a_i) <- NULL
group.df <- data.frame(group.df,a_i)
}
if(include_x && !isempty(x_i)){
rownames(x_i) <- NULL
group.df <- data.frame(group.df,x_i)
}
# if(include_id){
# #parameters$groupsize[groups_to_use[i]]
# id_num_end <- id_num_start + num_ids - 1
# group.df <- merge(group.df,data.frame(ID=id_num_start:id_num_end))
# id_num_start <- id_num_end + 1
# group.df <- group.df[c(length(group.df),1:(length(group.df)-1))] #reorder columns
# }
if(IPRED){
group.df.ipred <- data.frame()
bocc_start= 1
id_num_end <- id_num_start + num_ids - 1
id_vals <- id_num_start:id_num_end
if(predictions){
fulld = getfulld(parameters$d[,2],parameters$covd)
fulldocc = getfulld(parameters$docc[,2,drop=F],parameters$covdocc)
if(any(size(fulld)==0)){
b_sim_matrix = zeros(num_ids,0)
} else {
b_sim_matrix = mvtnorm::rmvnorm(num_ids,sigma=fulld)
}
bocc_sim_matrix = zeros(num_ids*NumOcc,length(parameters$docc[,2,drop=F]))
if(nrow(fulldocc)!=0) bocc_sim_matrix = mvtnorm::rmvnorm(num_ids*NumOcc,sigma=fulldocc)
}
for(j in 1:num_ids){
tmp.df <- group.df
if(predictions){
bocc_stop=bocc_start + NumOcc - 1
if(nrow(fulldocc)==0){
bocc_start=0
bocc_stop=0
}
fg_sim = feval(model$fg_pointer,x_i,a_i,parameters$bpop[,2,drop=F],b_sim_matrix[j,],t(bocc_sim_matrix[bocc_start:bocc_stop,]))
bocc_start = bocc_stop + 1
ipred <- feval(model$ff_pointer,model_switch_i,xt_i,fg_sim,poped.db)
ipred <- drop(ipred[[1]])
} else {
ipred <- xt_i*NA
}
#ID <- (i-1)*num_ids+j
ID <- id_vals[j]
tmp.df["ID"] <- ID
tmp.df["IPRED"] <- ipred
if(DV){
if(predictions){
eps_sim = mvtnorm::rmvnorm(length(xt_i),sigma=parameters$sigma)
dv <- feval(model$ferror_pointer,model_switch_i,xt_i,fg_sim,eps_sim,poped.db)
dv <- drop(dv[[1]])
} else {
dv <- xt_i*NA
}
tmp.df["DV"] <- dv
}
if(!is.null(dosing)){
dose.df <- data.frame(dosing[groups_to_use[i]])
# add constant values within a group to dosing records
for(nam in names(tmp.df[!(names(tmp.df) %in% c("IPRED","PRED","DV","Time"))])){
#if(length(unique(tmp.df[nam]))==1) dose.df <- data.frame(dose.df,nam=tmp.df[1,nam])
if(length(unique(tmp.df[nam]))==1) dose.df[nam] <- tmp.df[1,nam]
}
dose.df$dose_record_tmp <- 1
#if(packageVersion("dplyr") >= "0.5.0"){
tmp.df <- dplyr::bind_rows(dose.df,tmp.df)
#} else {
# tmp.df <- dplyr::rbind_list(dose.df,tmp.df)
#}
tmp.df <- tmp.df[order(tmp.df$Time,tmp.df$dose_record_tmp),]
tmp.df$dose_record_tmp <- NULL
}
group.df.ipred <- rbind(group.df.ipred,tmp.df)
}
id_num_start <- id_num_end + 1
group.df <- group.df.ipred
}
df <- rbind(df,group.df)
#model.pred <- rbind(model.pred,i=returnArgs[[1]])
#model.pred[paste("Group",i,sep="_")] <- returnArgs[[1]]
}
#reorder columns
first_names <- c("ID","Time","DV","IPRED","PRED")
first_names <- first_names[first_names %in% names(df)]
other_names <- names(df[!(names(df) %in% first_names)])
df <- df[c(first_names,other_names)]
df$Group <- as.factor(df$Group)
df$Model <- as.factor(df$Model)
if(IPRED) df$ID <- as.factor(df$ID)
row.names(df) <- NULL
if(!is.null(manipulation)){
for(i in 1:length(manipulation)){
df <- within(df,{
eval(manipulation[[i]])
})
}
}
if(!is.null(filename)) write.table(x=df, filename, row.names=FALSE, quote=FALSE, na=".",sep=",")
return( df )
}) # end with(design,{})
}