-
Notifications
You must be signed in to change notification settings - Fork 14
/
ctStanFit.R
816 lines (716 loc) · 37.1 KB
/
ctStanFit.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
#' Update a ctStanFit object
#'
#' Either to include different data, or because you have upgraded ctsem and the internal data structure has changed.
#'
#' @param oldfit fit object to be upgraded
#' @param data replacement long format data object
#' @param recompile whether to force a recompile -- safer but slower and usually unnecessary.
#' @param refit if TRUE, refits the model using the old estimates as a starting point. Only applicable for
#' optimized fits, not sampling.
#' @param ... extra arguments to pass to ctStanFit
#'
#' @return updated ctStanFit object.
#' @export
#'
#' @examples
#' newfit <- ctStanFitUpdate(ctstantestfit,refit=FALSE)
ctStanFitUpdate <- function(oldfit, data=NA, recompile=FALSE,refit=FALSE,...){
if(!refit) message('Trying to do a quick update -- if there are problems, try with refit=TRUE for more robustness')
dots <- list(...)
args <- as.list(oldfit$args)
for(n in names(dots)){
args[[n]] <- dots[[n]]
}
if(length(oldfit$stanfit$stanfit@sim) > 0) refit=FALSE
args$fit <- refit
args$inits <- oldfit$stanfit$rawest
args$ctstanmodel <- oldfit$ctstanmodelbase
newargs <- as.list(args(ctStanFit))
for(argi in names(args)){
if(argi %in% names(args)) newargs[[argi]] <- args[[argi]] else message(argi, ' is no longer a valid argument, dropping...')
}
if(length(data==1)) args$datalong <- standatatolong(oldfit$standata,origstructure = TRUE,ctm=oldfit$ctstanmodelbase)
if(length(data) > 1) args$datalong <- data
newfit <- do.call(ctStanFit,args)
if(!refit){
oldfit$standata <- newfit$standata
if(oldfit$ctstanmodel$recompile || recompile) oldfit$stanmodel <- rstan::stan_model(model_code = newfit$stanmodeltext) else
oldfit$stanmodel <- stanmodels$ctsm
}
if(refit) oldfit <- newfit
return(oldfit)
}
T0VARredundancies <- function(ctm) {
whichT0VAR_T0MEANSindvarying <- ctm$pars$matrix %in% 'T0VAR' &
is.na(ctm$pars$value) &
(ctm$pars$row %in% ctm$pars$row[ctm$pars$matrix %in% 'T0MEANS' & ctm$pars$indvarying] |
ctm$pars$col %in% ctm$pars$row[ctm$pars$matrix %in% 'T0MEANS' & ctm$pars$indvarying])
if(any(whichT0VAR_T0MEANSindvarying)){
message('Free T0VAR parameters as well as indvarying T0MEANS -- fixing T0VAR pars to diag matrix of 1e-3')
ctm$pars$value[whichT0VAR_T0MEANSindvarying & ctm$pars$col == ctm$pars$row ] <- 1e-3
ctm$pars$value[whichT0VAR_T0MEANSindvarying & ctm$pars$col != ctm$pars$row ] <- 0
ctm$pars$param[whichT0VAR_T0MEANSindvarying] <- NA
ctm$pars$transform[whichT0VAR_T0MEANSindvarying] <- NA
ctm$pars$indvarying[whichT0VAR_T0MEANSindvarying] <- FALSE
}
return(ctm)
}
# verbosify<-function(sf,verbose=2){
# sm <- sf$stanmodel
# sd <- sf$standata
# sd$verbose=as.integer(verbose)
#
# sfr <- stan_reinitsf(sm,sd)
# log_prob(sfr,sf$stanfit$rawest)
# }
#' ctStanFit
#'
#' Fits a ctsem model specified via \code{\link{ctModel}} with type either 'stanct' or 'standt'.
#'
#' @param datalong long format data containing columns for subject id (numeric values, 1 to max subjects), manifest variables,
#' any time dependent (i.e. varying within subject) predictors,
#' and any time independent (not varying within subject) predictors.
#' @param ctstanmodel model object as generated by \code{\link{ctModel}} with type='stanct' or 'standt', for continuous or discrete time
#' models respectively.
#' @param stanmodeltext already specified Stan model character string, generally leave NA unless modifying Stan model directly.
#' (Possible after modification of output from fit=FALSE)
#' @param intoverstates logical indicating whether or not to integrate over latent states using a Kalman filter.
#' Generally recommended to set TRUE unless using non-gaussian measurement model.
#' @param binomial Deprecated. Logical indicating the use of binary rather than Gaussian data, as with IRT analyses.
#' This now sets \code{intoverstates = FALSE} and the \code{manifesttype} of every indicator to 1, for binary.
#' @param fit If TRUE, fit specified model using Stan, if FALSE, return stan model object without fitting.
#' @param intoverpop if 'auto', set to TRUE if optimizing and FALSE if using hmc.
#' if TRUE, integrates over population distribution of parameters rather than full sampling.
#' Allows for optimization of non-linearities and random effects.
#' @param sameInitialTimes if TRUE, include an empty observation for every subject that has no observation
#' at the earliest observation time of the dataset. This ensures that the T0MEANS occurs for every subject at the same time,
#' rather than just at the earliest observation for that subject. Important when modelling trends over time, age, etc.
#' @param plot if TRUE, for sampling, a Shiny program is launched upon fitting to interactively plot samples.
#' May struggle with many (e.g., > 5000) parameters. For optimizing, various optimization details are plotted -- in development.
#' @param derrind deprecated, latents involved in dynamic error calculations are determined automatically now.
#' @param optimize if TRUE, use \code{\link{stanoptimis}} function for maximum a posteriori / importance sampling estimates,
#' otherwise use the HMC sampler from Stan, which is (much) slower, but generally more robust, accurate, and informative.
#' @param optimcontrol list of parameters sent to \code{\link{stanoptimis}} governing optimization / importance sampling.
#' @param nopriors deprecated, use priors argument. logical. If TRUE, any priors are disabled -- sometimes desirable for optimization.
#' @param priors if TRUE, priors are included in computations, otherwise specified priors are ignored.
#' @param iter used when \code{optimize=FALSE}. number of iterations, half of which will be devoted to warmup by default when sampling.
#' When optimizing, this is the maximum number of iterations to allow -- convergence hopefully occurs before this!
#' @param inits either character string 'optimize, NULL, or vector of (unconstrained)
#' parameter start values, as returned by the rstan function \code{rstan::unconstrain_pars}, or the parameter values
#' found in a ctsem fit object \code{myfit$stanfit$rawest} (or \code{$rawposterior}) for instance.
#' @param chains used when \code{optimize=FALSE}. Number of chains to sample, during HMC or post-optimization importance sampling. Unless the cores
#' argument is also set, the number of chains determines the number of cpu cores used, up to
#' the maximum available minus one. Irrelevant when \code{optimize=TRUE}.
#' @param cores number of cpu cores to use. Either 'maxneeded' to use as many as available minus one,
#' up to the number of chains, or a positive integer. If \code{optimize=TRUE}, more cores are generally faster.
#' @param control Used when \code{optimize=FALSE}. List of arguments sent to \code{\link[rstan]{stan}} control argument,
#' regarding warmup / sampling behaviour. Unless specified, values used are:
#' list(adapt_delta = .8, adapt_window=2, max_treedepth=10, adapt_init_buffer=2, stepsize = .001)
#' @param nlcontrol List of non-linear control parameters.
#' \code{maxtimestep} must be a positive numeric, specifying the largest time
#' span covered by the numerical integration. The large default ensures that for each observation time interval,
#' only a single step of exponential integration is used. When \code{maxtimestep} is smaller than the observation time interval,
#' the integration is nested within an Euler like loop.
#' Smaller values may offer greater accuracy, but are slower and not always necessary. Given the exponential integration,
#' linear model elements are fit exactly with only a single step.
#' @param verbose Integer from 0 to 2. Higher values print more information during model fit -- for debugging.
#' @param stationary Logical. If TRUE, T0VAR and T0MEANS input matrices are ignored,
#' the parameters are instead fixed to long run expectations. More control over this can be achieved
#' by instead setting parameter names of T0MEANS and T0VAR matrices in the input model to 'stationary', for
#' elements that should be fixed to stationarity.
#' @param forcerecompile logical. For development purposes.
#' If TRUE, stan model is recompiled, regardless of apparent need for compilation.
#' @param saveCompile if TRUE and compilation is needed / requested, writes the stan model to
#' the parent frame as ctsem.compiled (unless that object already exists and is not from ctsem), to avoid unnecessary recompilation.
#' @param savescores Logical. If TRUE, output from the Kalman filter is saved in output. For datasets with many variables
#' or time points, will increase file size substantially.
#' @param savesubjectmatrices Logical. If TRUE, subject specific matrices are saved --
#' only relevant when either time dependent predictors or individual differences are
#' used. Can increase memory usage dramatically in large models, and can be computed after fitting using ctExtract
#' or ctStanSubjectPars .
#' @param saveComplexPars Logical. If TRUE, also save rowwise output of any complex parameters specified,
#' i.e. combinations of parameters, functions and states.
#' @param gendata Logical -- If TRUE, uses provided data for only covariates and a time and missingness structure, and
#' generates random data according to the specified model / priors.
#' Generated data is in the $Ygen subobject after running \code{extract} on the fit object.
#' For datasets with many manifest variables or time points, file size may be large.
#' To generate data based on the posterior of a fitted model, see \code{\link{ctStanGenerateFromFit}}.
#' @param vb Logical. Use variational Bayes algorithm from stan? Only kind of working, not recommended.
#' @param ... additional arguments to pass to \code{\link[rstan]{stan}} function.
#' @export
#' @examples
#' \donttest{
#'
#' #generate a ctStanModel relying heavily on defaults
#' model<-ctModel(type='stanct',
#' latentNames=c('eta1','eta2'),
#' manifestNames=c('Y1','Y2'),
#' MANIFESTVAR=diag(.1,2),
#' TDpredNames='TD1',
#' TIpredNames=c('TI1','TI2','TI3'),
#' LAMBDA=diag(2))
#'
#' fit<-ctStanFit(ctstantestdat, model,priors=TRUE)
#'
#' summary(fit)
#'
#' plot(fit,wait=FALSE)
#'
#' #### extended examples
#'
#' library(ctsem)
#' set.seed(3)
#'
#' # Data generation (run this, but no need to understand!) -----------------
#'
#' Tpoints <- 20
#' nmanifest <- 4
#' nlatent <- 2
#' nsubjects<-20
#'
#' #random effects
#' age <- rnorm(nsubjects) #standardised
#' cint1<-rnorm(nsubjects,2,.3)+age*.5
#' cint2 <- cint1*.5+rnorm(nsubjects,1,.2)+age*.5
#' tdpredeffect <- rnorm(nsubjects,5,.3)+age*.5
#'
#' for(i in 1:nsubjects){
#' #generating model
#' gm<-ctModel(Tpoints=Tpoints,n.manifest = nmanifest,n.latent = nlatent,n.TDpred = 1,
#' LAMBDA = matrix(c(1,0,0,0, 0,1,.8,1.3),nrow=nmanifest,ncol=nlatent),
#' DRIFT=matrix(c(-.3, .2, 0, -.5),nlatent,nlatent),
#' TDPREDMEANS=matrix(c(rep(0,Tpoints-10),1,rep(0,9)),ncol=1),
#' TDPREDEFFECT=matrix(c(tdpredeffect[i],0),nrow=nlatent),
#' DIFFUSION = matrix(c(1, 0, 0, .5),2,2),
#' CINT = matrix(c(cint1[i],cint2[i]),ncol=1),
#' T0VAR=diag(2,nlatent,nlatent),
#' MANIFESTVAR = diag(.5, nmanifest))
#'
#' #generate data
#' newdat <- ctGenerate(ctmodelobj = gm,n.subjects = 1,burnin = 2,
#' dtmat<-rbind(c(rep(.5,8),3,rep(.5,Tpoints-9))))
#' newdat[,'id'] <- i #set id for each subject
#' newdat <- cbind(newdat,age[i]) #include time independent predictor
#' if(i ==1) {
#' dat <- newdat[1:(Tpoints-10),] #pre intervention data
#' dat2 <- newdat #including post intervention data
#' }
#' if(i > 1) {
#' dat <- rbind(dat, newdat[1:(Tpoints-10),])
#' dat2 <- rbind(dat2,newdat)
#' }
#' }
#' colnames(dat)[ncol(dat)] <- 'age'
#' colnames(dat2)[ncol(dat)] <- 'age'
#'
#'
#' #plot generated data for sanity
#' plot(age)
#' matplot(dat[,gm$manifestNames],type='l',pch=1)
#' plotvar <- 'Y1'
#' plot(dat[dat[,'id']==1,'time'],dat[dat[,'id']==1,plotvar],type='l',
#' ylim=range(dat[,plotvar],na.rm=TRUE))
#' for(i in 2:nsubjects){
#' points(dat[dat[,'id']==i,'time'],dat[dat[,'id']==i,plotvar],type='l',col=i)
#' }
#'
#'
#' dat2[,gm$manifestNames][sample(1:length(dat2[,gm$manifestNames]),size = 100)] <- NA
#'
#'
#' #data structure
#' head(dat2)
#'
#'
#' # Model fitting -----------------------------------------------------------
#'
#' ##simple univariate default model
#'
#' m <- ctModel(type = 'stanct', manifestNames = c('Y1'), LAMBDA = diag(1))
#' ctModelLatex(m)
#'
#' #Specify univariate linear growth curve
#'
#' m1 <- ctModel(type = 'stanct',
#' manifestNames = c('Y1'), latentNames=c('eta1'),
#' DRIFT=matrix(-.0001,nrow=1,ncol=1),
#' DIFFUSION=matrix(0,nrow=1,ncol=1),
#' T0VAR=matrix(0,nrow=1,ncol=1),
#' CINT=matrix(c('cint1'),ncol=1),
#' T0MEANS=matrix(c('t0m1'),ncol=1),
#' LAMBDA = diag(1),
#' MANIFESTMEANS=matrix(0,ncol=1),
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1))
#'
#' ctModelLatex(m1)
#'
#' #fit
#' f1 <- ctStanFit(datalong = dat2, ctstanmodel = m1, optimize=TRUE, priors=FALSE)
#'
#' summary(f1)
#'
#' #plots of individual subject models v data
#' ctKalman(f1,plot=TRUE,subjects=1,kalmanvec=c('y','yprior'),timestep=.01)
#' ctKalman(f1,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),timestep=.01,errorvec=NA)
#'
#' ctStanPostPredict(f1, wait=FALSE) #compare randomly generated data from posterior to observed data
#'
#' cf<-ctCheckFit(f1) #compare mean and covariance of randomly generated data to observed cov
#' plot(cf,wait=FALSE)
#'
#' ### Further example models
#'
#' #Include intervention
#' m2 <- ctModel(type = 'stanct',
#' manifestNames = c('Y1'), latentNames=c('eta1'),
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect
#' DRIFT=matrix(-1e-5,nrow=1,ncol=1),
#' DIFFUSION=matrix(0,nrow=1,ncol=1),
#' CINT=matrix(c('cint1'),ncol=1),
#' T0MEANS=matrix(c('t0m1'),ncol=1),
#' T0VAR=matrix(0,nrow=1,ncol=1),
#' LAMBDA = diag(1),
#' MANIFESTMEANS=matrix(0,ncol=1),
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1))
#'
#'
#'
#' #Individual differences in intervention, Bayesian estimation, covariates
#' m2i <- ctModel(type = 'stanct',
#' manifestNames = c('Y1'), latentNames=c('eta1'),
#' TIpredNames = 'age',
#' TDpredNames = 'TD1', #this line includes the intervention
#' TDPREDEFFECT=matrix(c('tdpredeffect||TRUE'),nrow=1,ncol=1), #intervention effect
#' DRIFT=matrix(-1e-5,nrow=1,ncol=1),
#' DIFFUSION=matrix(0,nrow=1,ncol=1),
#' CINT=matrix(c('cint1'),ncol=1),
#' T0MEANS=matrix(c('t0m1'),ncol=1),
#' T0VAR=matrix(0,nrow=1,ncol=1),
#' LAMBDA = diag(1),
#' MANIFESTMEANS=matrix(0,ncol=1),
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1))
#'
#'
#' #Including covariate effects
#' m2ic <- ctModel(type = 'stanct',
#' manifestNames = c('Y1'), latentNames=c('eta1'),
#' n.TIpred = 1, TIpredNames = 'age',
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect
#' DRIFT=matrix(-1e-5,nrow=1,ncol=1),
#' DIFFUSION=matrix(0,nrow=1,ncol=1),
#' CINT=matrix(c('cint1'),ncol=1),
#' T0MEANS=matrix(c('t0m1'),ncol=1),
#' T0VAR=matrix(0,nrow=1,ncol=1),
#' LAMBDA = diag(1),
#' MANIFESTMEANS=matrix(0,ncol=1),
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1))
#'
#' m2ic$pars$indvarying[m2ic$pars$matrix %in% 'TDPREDEFFECT'] <- TRUE
#'
#'
#' #Include deterministic dynamics
#' m3 <- ctModel(type = 'stanct',
#' manifestNames = c('Y1'), latentNames=c('eta1'),
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect
#' DRIFT=matrix('drift11',nrow=1,ncol=1),
#' DIFFUSION=matrix(0,nrow=1,ncol=1),
#' CINT=matrix(c('cint1'),ncol=1),
#' T0MEANS=matrix(c('t0m1'),ncol=1),
#' T0VAR=matrix('t0var11',nrow=1,ncol=1),
#' LAMBDA = diag(1),
#' MANIFESTMEANS=matrix(0,ncol=1),
#' MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1))
#'
#'
#'
#'
#'
#' #Add system noise to allow for fluctuations that persist in time
#' m3n <- ctModel(type = 'stanct',
#' manifestNames = c('Y1'), latentNames=c('eta1'),
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect
#' DRIFT=matrix('drift11',nrow=1,ncol=1),
#' DIFFUSION=matrix('diffusion',nrow=1,ncol=1),
#' CINT=matrix(c('cint1'),ncol=1),
#' T0MEANS=matrix(c('t0m1'),ncol=1),
#' T0VAR=matrix('t0var11',nrow=1,ncol=1),
#' LAMBDA = diag(1),
#' MANIFESTMEANS=matrix(0,ncol=1),
#' MANIFESTVAR=matrix(c(0),nrow=1,ncol=1))
#'
#'
#'
#' #include 2nd latent process
#'
#' m4 <- ctModel(n.manifest = 2,n.latent = 2, type = 'stanct',
#' manifestNames = c('Y1','Y2'), latentNames=c('L1','L2'),
#' n.TDpred=1,TDpredNames = 'TD1',
#' TDPREDEFFECT=matrix(c('tdpredeffect1','tdpredeffect2'),nrow=2,ncol=1),
#' DRIFT=matrix(c('drift11','drift21','drift12','drift22'),nrow=2,ncol=2),
#' DIFFUSION=matrix(c('diffusion11','diffusion21',0,'diffusion22'),nrow=2,ncol=2),
#' CINT=matrix(c('cint1','cint2'),nrow=2,ncol=1),
#' T0MEANS=matrix(c('t0m1','t0m2'),nrow=2,ncol=1),
#' T0VAR=matrix(c('t0var11','t0var21',0,'t0var22'),nrow=2,ncol=2),
#' LAMBDA = matrix(c(1,0,0,1),nrow=2,ncol=2),
#' MANIFESTMEANS=matrix(c(0,0),nrow=2,ncol=1),
#' MANIFESTVAR=matrix(c('merror1',0,0,'merror2'),nrow=2,ncol=2))
#'
#' #dynamic factor model -- fixing CINT to 0 and freeing indicator level intercepts
#'
#' m3df <- ctModel(type = 'stanct',
#' manifestNames = c('Y2','Y3'), latentNames=c('eta1'),
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect
#' DRIFT=matrix('drift11',nrow=1,ncol=1),
#' DIFFUSION=matrix('diffusion',nrow=1,ncol=1),
#' CINT=matrix(c(0),ncol=1),
#' T0MEANS=matrix(c('t0m1'),ncol=1),
#' T0VAR=matrix('t0var11',nrow=1,ncol=1),
#' LAMBDA = matrix(c(1,'Y3loading'),nrow=2,ncol=1),
#' MANIFESTMEANS=matrix(c('Y2_int','Y3_int'),nrow=2,ncol=1),
#' MANIFESTVAR=matrix(c('Y2residual',0,0,'Y3residual'),nrow=2,ncol=2))
#'
#' }
ctStanFit<-function(datalong, ctstanmodel, stanmodeltext=NA, iter=1000, intoverstates=TRUE, binomial=FALSE,
fit=TRUE, intoverpop='auto', sameInitialTimes=FALSE, stationary=FALSE,plot=FALSE, derrind=NA,
optimize=TRUE, optimcontrol=list(),
nlcontrol = list(), nopriors=NA, priors=FALSE, chains=2,
cores=ifelse(optimize,getOption("mc.cores", 2L),'maxneeded'),
inits=NULL,
forcerecompile=FALSE,saveCompile=TRUE,savescores=FALSE,
savesubjectmatrices=FALSE, saveComplexPars=FALSE,
gendata=FALSE,
control=list(),verbose=0,vb=FALSE,...){
if(!is.na(nopriors)){
warning('nopriors argument is deprecated, use priors argument in future')
priors <- !nopriors
}
if(any(!is.na(derrind))) warning('derrind argment is deprecated, computed automatically now')
datalong <- data.frame(datalong)
if(!ctstanmodel$timeName %in% colnames(datalong) && !ctstanmodel$continuoustime) {
dtable <- data.table(datalong)
dtable[,.ObsCount:=1:.N,by=ctstanmodel$id]
datalong[[ctstanmodel$timeName]] <- dtable[['.ObsCount']]
rm(dtable)
}
datalong <- datalong[order(datalong[[ctstanmodel$subjectIDname]],datalong[[ctstanmodel$timeName]]),] #sort by subject, time.
datavars <- c(ctstanmodel$timeName,ctstanmodel$subjectIDname, ctstanmodel$manifestNames,ctstanmodel$TDpredNames,ctstanmodel$TIpredNames)
sapply(datavars,function(x){
if(!x %in% colnames(datalong)) stop(paste0(x,' column not found in data!'))
if(!x %in% ctstanmodel$subjectIDname){ #if not an id column
if(any(!is.numeric(as.numeric(datalong[!is.na(datalong[,x]),x])))) stop(x ,' column contains non-numeric data!')
}
})
if(!'ctStanModel' %in% class(ctstanmodel)) stop('not a ctStanModel object')
#set nlcontrol defaults
if(is.null(nlcontrol$maxtimestep)) nlcontrol$maxtimestep = 999999
if(is.null(nlcontrol$Jstep)) nlcontrol$Jstep = 1e-6
args=c(as.list(environment()), list(...)) #as.list((match.call(expand.dots=FALSE)))
args$datalong <- NULL
args$ctstanmodel <- NULL
ctm <- ctstanmodel
if(!is.null(ctm$TIpredAuto) && ctm$TIpredAuto %in% c(1L,TRUE)){ #if auto tipred, set all effects to true
for(tip in ctm$TIpredNames){
ctm$pars[[paste0(tip,'_effect')]] <- TRUE
}
}
if(optimize && !priors) message("Maximum likelihood estimation requested")
if(optimize && priors && (is.null(optimcontrol$is) || optimcontrol$is %in% FALSE)) message("Maximum a posteriori estimation requested")
if(optimize && priors && (!is.null(optimcontrol$is) && optimcontrol$is %in% TRUE)) message("Bayesian estimation via optimization and importance sampling requested")
if(!optimize) message("Bayesian estimation via Stan's NUTS sampler requested")
###stationarity
if(stationary) {
stop('Stationary option temporarily unavailable -- reductions needed to pass all CRAN checks')
ctm$pars$param[ctm$pars$matrix %in% c('T0VAR','T0MEANS')] <- 'stationary'
ctm$pars$value[ctm$pars$matrix %in% c('T0VAR','T0MEANS')] <- NA
ctm$pars$indvarying[ctm$pars$matrix %in% c('T0VAR','T0MEANS')] <- FALSE
}
#collect individual stationary elements and update ctm$pars
if(any(ctm$pars$param %in% 'stationary')) stop('Stationary option temporarily unavailable -- reductions needed to pass all CRAN checks')
ctm$t0varstationary <- as.matrix(rbind(ctm$pars[which(ctm$pars$param %in% 'stationary' & ctm$pars$matrix %in% 'T0VAR'),c('row','col')]))
if(nrow(ctm$t0varstationary) > 0){ #ensure upper tri is consistent with lower
for(i in 1:nrow(ctm$t0varstationary)){
if(ctm$t0varstationary[i,1] != ctm$t0varstationary[i,2]) ctm$t0varstationary <- rbind(ctm$t0varstationary,ctm$t0varstationary[i,c(2,1)])
}}
ctm$t0varstationary = unique(ctm$t0varstationary) #remove any duplicated rows
ctm$t0meansstationary <- as.matrix(rbind(ctm$pars[which(ctm$pars$param[ctm$pars$matrix %in% 'T0MEANS'] %in% 'stationary'),c('row','col')]))
ctm$pars$value[ctm$pars$param %in% 'stationary'] <- -99 #does this get inserted?
ctm$pars$indvarying[ctm$pars$param %in% 'stationary'] <- FALSE
ctm$pars$transform[ctm$pars$param %in% 'stationary'] <- NA
ctm$pars$param[ctm$pars$param %in% 'stationary'] <- NA
if(length(unique(datalong[,ctm$subjectIDname]))==1 && any(ctm$pars$indvarying[is.na(ctm$pars$value)]==TRUE)){
# is.null(ctm$fixedrawpopmeans) && is.null(ctm$fixedsubpars) & is.null(ctm$forcemultisubject)) {
ctm$pars$indvarying <- FALSE
message('Individual variation not possible as only 1 subject! indvarying set to FALSE on all parameters')
}
if(length(unique(datalong[,ctm$subjectIDname]))==1 & any(is.na(ctm$pars$value[ctm$pars$matrix %in% 'T0VAR']))){
# is.null(ctm$fixedrawpopmeans) & is.null(ctm$fixedsubpars) & is.null(ctm$forcemultisubject)) {
for(ri in 1:nrow(ctm$pars)){
if(is.na(ctm$pars$value[ri]) && ctm$pars$matrix[ri] %in% 'T0VAR'){
ctm$pars$value[ri] <- ifelse(ctm$pars$row[ri] == ctm$pars$col[ri], 1, 0)
}
}
message('Free T0VAR parameters fixed to diagonal matrix of 1 as only 1 subject - consider appropriateness!')
}
if(any(duplicated(ctm$pars$param[ctm$pars$matrix %in% 'T0MEANS']))) stop(paste0(
'Unfortunately, duplicate T0MEANS parameters must be specified via inclusion of additional PARS matrix in ctModel: e.g.,
ctModel(... #regular model code
PARS=c("t0mPar||TRUE"), #specify an additional parameter called t0mPar, with random effects
T0MEANS=c("t0mPar","t0mPar"), #insert this parameter into the T0MEANS matrix as many times as needed.
... #regular model code)
'))
if(binomial){
message('Binomial argument deprecated -- in future set manifesttype in the model object to 1 for binary indicators')
intoverstates <- FALSE
ctm$manifesttype[] <- 1
}
recompile <- FALSE
if(!optimize && !priors){
message('HMC sampling requested, but priors disabled -- are you sure? consider setting priors=TRUE')
# !priors <- FALSE
}
if(optimize && !intoverstates) warning('intoverstates=TRUE required for sensible optimization! Proceed onwards to weird output at own risk!')
if(intoverpop == 'auto') intoverpop <-
ifelse(optimize && any(ctm$pars$indvarying[is.na(ctm$pars$value)]),TRUE,FALSE)
# if(optimize && !intoverpop && any(ctm$pars$indvarying[is.na(ctm$pars$value)]) &&
# is.null(ctm$fixedrawpopchol) && is.null(ctm$fixedsubpars)){
# intoverpop <- TRUE
# message('Setting intoverpop=TRUE to enable optimization of random effects...')
# }
# if(intoverpop==TRUE && !any(ctm$pars$indvarying[is.na(ctm$pars$value)])) {
# # message('No individual variation -- disabling intoverpop switch');
# intoverpop <- FALSE
# }
ctm <- ctModel0DRIFT(ctm, ctm$continuoustime) #offset 0 drift
ctm$pars <- ctModelStatesAndPARS(ctm$pars,statenames = ctm$latentNames,tdprednames=ctm$TDpredNames) #replace latent states and PARS with state and PAR[] refs, need this early because we rely on [] detection
if(intoverpop) ctm <- ctStanModelIntOverPop(ctm) #extend system matrices for individual differences
#jacobian addition
ctm$jacobian <- try(ctJacobian(ctm))
if('try-error' %in% class(ctm$jacobian)) ctm$jacobian <- ctJacobian(ctm,simplify=FALSE)
# ctm$jacobian <- unfoldmats( #replaces matrix references with base parameter
# c(listOfMatrices(ctm$pars),ctm$jacobian))
ctm$jacobian <- ctm$jacobian[names(ctStanMatricesList()$jacobian)]
jl <- ctModelUnlist(ctm$jacobian,names(ctm$jacobian))
jl <- jl[apply(jl,1,function(x) any(!is.na(x))),] #clean up messy leftovers of NA's
jl2 <- as.data.frame(rbind(data.table(ctm$pars[1,]),data.table(jl),fill=TRUE))[-1,]
for(i in 1:nrow(jl2)){ #copy base parameter transforms etc to jacobian when needed
if(!is.na(jl2$param[i]) && jl2$param[i] %in% ctm$pars$param){
jl2[i, !colnames(jl2) %in% list('matrix','row','col')] <-
ctm$pars[which(ctm$pars$param %in% jl2$param[i])[1], !colnames(ctm$pars) %in% list('matrix','row','col')]
}
}
ctm$pars <- rbind(ctm$pars,jl2)
ctm$pars <- ctModelStatesAndPARS(ctm$pars,statenames = ctm$latentNames,tdprednames=ctm$TDpredNames) #replace any new state and par refs with square bracket refs
ctm <- ctModelTransformsToNum(ctm)
ctm$pars <- ctStanModelCleanctspec(ctm$pars)
ctm <- T0VARredundancies(ctm)
if(!all(ctm$pars$transform[!is.na(suppressWarnings(as.integer(ctm$pars$transform)))] %in% c(0,1,2,3,4))) stop('Unknown transform specified -- integers should be 0 to 4')
#fix binary manifestvariance
if(any(ctm$manifesttype > 0)){ #if any non continuous variables, (with free parameters)...
errfix <- which(ctm$pars$matrix %in% 'MANIFESTVAR' &
(ctm$pars$row %in% which(ctm$manifesttype==1) |
ctm$pars$col %in% which(ctm$manifesttype==1)) &
is.na(suppressWarnings(as.numeric(
ctm$pars$value))))
if(length(errfix) > 0){
message('Fixing any free MANIFESTVAR parameters for binary indicators to deterministic calculation')
ctm$pars$value[errfix] <- 1e-5
ctm$pars[errfix,c('param','transform','multiplier','offset','meanscale','inneroffset','sdscale')] <- NA
ctm$pars$indvarying[errfix] <- FALSE
}}
ctm$modelmats <- ctStanModelMatrices(ctm)
ctm <- ctStanCalcsList(ctm,save=saveComplexPars) #get extra calculations and adjust model spec as needed???
#store values in ctm
ctm$intoverpop <- as.integer(intoverpop)
ctm$nlatentpop <- as.integer(ifelse(ctm$intoverpop ==1, max(ctm$pars$row[ctm$pars$matrix %in% 'T0MEANS']), ctm$n.latent))
ctm$intoverstates <- as.integer(intoverstates)
ctm$priors <- as.integer(priors)
ctm$stationary <- as.integer(stationary)
ctm$nlcontrol <- nlcontrol
#recompile checks
if(forcerecompile) recompile <- TRUE
if(naf(!is.na(ctm$rawpopsdbaselowerbound))) recompile <- TRUE
if(ctm$rawpopsdbase != 'normal(0,1)') recompile <- TRUE
if(ctm$rawpopsdtransform != 'log1p_exp(2*rawpopsdbase-1) .* sdscale') ctm$recompile <- TRUE
if(any(ctm$modelmats$matsetup[,'transform'] < -10)) recompile <- TRUE #if custom transforms needed
ncalcsNoJ<- length(unlist(ctm$modelmats$calcs)[!grepl('JAx[',unlist(ctm$modelmats$calcs),fixed=TRUE)])
if(ncalcsNoJ > 0) recompile <- TRUE
if(!recompile &&
length(unlist(ctm$modelmats$calcs)[!grepl('JAx[',unlist(ctm$modelmats$calcs),fixed=TRUE)])>0)
message('Finite difference jacobian used to avoid recompiling -- use forcerecompile=TRUE for analytic jacobians')
#further model adjustments conditional on recompile
if(!recompile){ #then use finite diffs for some elements
#collect row and column of complicated jacobian elements into vector
ctm$JAxfinite <- array(as.integer(unique(
unlist(ctm$modelmats$matsetup[ctm$modelmats$matsetup$matrix %in% 52 &
ctm$modelmats$matsetup$when == -999, 'col']))))# &
# ctm$modelmats$matsetup$copyrow < 1,c('row','col')]))))
ctm$Jyfinite <- array(as.integer(unique(
unlist(ctm$modelmats$matsetup[ctm$modelmats$matsetup$matrix %in% 54 &
ctm$modelmats$matsetup$when == -999, 'col']))))
}
if(recompile) ctm$JAxfinite <- ctm$Jyfinite <- array(as.integer(c()))
ctm$recompile <- recompile
standata <- ctStanData(ctm,datalong,optimize=optimize, sameInitialTimes=sameInitialTimes)
standata$verbose=as.integer(verbose)
standata$savesubjectmatrices=as.integer(savesubjectmatrices)
standata$gendata=as.integer(gendata)
if(standata$savesubjectmatrices==1L) savescores = TRUE
standata$savescores=as.integer(savescores)
# print(standata$savesubjectmatrices)
#####post model / data checks
if(cores=='maxneeded') cores=max(1,min(c(chains,parallel::detectCores()-1)))
if(is.logical(stanmodeltext)) {
stanmodeltext<- ctStanModelWriter(ctm, gendata, ctm$modelmats$extratforms,ctm$modelmats$matsetup)
}
if(fit){
# if(gendata && stanmodels$ctsmgen@model_code != stanmodeltext) recompile <- TRUE
# if(!gendata && paste0(stanmodels$ctsm@model_code) != paste0(stanmodeltext)) recompile <- TRUE
# STAN_NUM_THREADS <- Sys.getenv('STAN_NUM_THREADS',unset=NA)
# Sys.setenv(STAN_NUM_THREADS=cores)
if(recompile || forcerecompile) {
message('Compiling model...')
#r4.2 / windows / old rstan check
if(.Platform$OS.type=="windows" && R.version$major %in% 4 && as.numeric(R.version$minor) >= 2 &&
unlist(utils::packageVersion('rstan'))[2] < 25){
stop('
*****
Compiling not possible with R version 4.2+ on Windows with Rstan version < 2.26
To upgrade rstan, close and restart all R sessions then run:
install.packages("StanHeaders", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
install.packages("rstan", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
*****')
# a=readline('Attempt to upgrade Rstan from Stan repository? y/n')
# if(a %in% c('yes','y','Y','Yes','YES')){
# message(
# install.packages("StanHeaders", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
# install.packages("rstan", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
# }
}
sm <- stan_model(model_name='ctsem', model_code = c(stanmodeltext),auto_write=TRUE)
# ,allow_undefined = TRUE,verbose=TRUE,
# includes = paste0(
# '\n#include "', file.path(getwd(), 'syl2.hpp'),'"',
# '\n')
if(saveCompile){
if(exists(x = 'ctsem.compiled',envir= parent.frame())
&& !'stanmodel' %in% class(get('ctsem.compiled',envir = parent.frame()))){
warning('ctsem.compiled object already exists, not saving compile')
} else assign(x = 'ctsem.compiled',sm,envir = parent.frame())
}
}
if(!recompile && !forcerecompile) {
if(!gendata) sm <- stanmodels$ctsm else sm <- stanmodels$ctsmgen
}
# configure inits ---------------------------------------------------------
initOptim <- (length(inits)==1 && (inits=='optimize' || inits=='Optimize' || inits=='optimise' || inits=='Optimise'))
if(optimize || initOptim){ #then set optimcontrol list
optimcontrol$cores <- cores
optimcontrol$verbose <- verbose
optimcontrol$priors <- as.logical(priors)
optimcontrol$standata=standata
optimcontrol$sm=sm
optimcontrol$init=inits
optimcontrol$plot=plot
optimcontrol$matsetup <- data.frame(ctm$modelmats$matsetup)
}
if(!optimize && !is.null(inits)){
if('list' %in% class(inits)){
staninits=inits
} else {
if(initOptim){ #then first optimize to get inits
if(!intoverpop && length(unique(datalong[[ctm$subjectIDname]]) > 1) && any(ctm$pars$indvarying)) stop('Cannot optimize to get inits unless intoverpop=TRUE')
optimcontrol$init <- NULL
optimcontrol$tol=1e-7
if(!intoverpop & ! intoverstates) stop('Cannot initialize with optimization unless intoverpop and intoverstates are set to TRUE')
inits <- do.call(stanoptimis,optimcontrol)$rawest
}
sf <- stan_reinitsf(sm,standata)
staninits <- list()
if(chains > 1){ #set all chains to same inits
for(i in 1:chains){
staninits[[i]]<-constrain_pars(sf,inits+rnorm(length(inits),0,.01))
}
}
}
}
if(!optimize){
#control arguments for rstan
# if(is.null(control$adapt_term_buffer)) control$adapt_term_buffer <- min(c(iter/10,max(iter-20,75)))
if(is.null(control$adapt_delta)) control$adapt_delta <- .8
if(is.null(control$adapt_window)) control$adapt_window <- 5
if(is.null(control$max_treedepth)) control$max_treedepth <- 10
if(is.null(control$adapt_init_buffer)) control$adapt_init_buffer=2
if(is.null(control$stepsize)) control$stepsize=.001
if(is.null(control$metric)) control$metric='diag_e'
message('Sampling...')
#
stanargs <- list(object = sm,
init_r=.03,
save_warmup=as.logical(plot),
refresh=20,
iter=iter,
data = standata, chains = chains, control=control,
cores=cores,
...)
if(!is.null(inits)) stanargs$init=staninits
if(vb){
if(!intoverpop && standata$nindvarying > 0) warning('Poor results are expected with variational inference and sampling individual differences! Suggest disabling vb or enabling intoverpop.')
stanfit <- list(stanfit=rstan::vb(object = sm,data=standata, importance_resampling=TRUE,tol_rel_obj=1e-3))
} else{ #if not vi
if(plot==TRUE) stanfit <- suppressWarnings(list(stanfit=do.call(stanWplot,stanargs))) else stanfit <- suppressWarnings(list(stanfit=do.call(sampling,stanargs)))
#find the median sample and compute kalman scores etc for this
# browser()
e=rstan::extract(stanfit$stanfit)
# middle <- which(abs(e$ll-quantile(e$ll,.5)) == min(abs(e$ll-quantile(e$ll,.5) )))
# middle <- which(e$ll==max(e$ll))
stanfit$rawposterior <- t(stan_unconstrainsamples(fit = stanfit$stanfit,standata = standata))
stanfit$rawest <- apply(stanfit$rawposterior,2,median)
}
}
if(optimize==TRUE) {
stanfit <- do.call(stanoptimis,optimcontrol) #eval(parse(text=opcall))
#update data that may have changed during optimization
for(ni in names(stanfit$standata)){
standata[[ni]] <- stanfit$standata[[ni]]
}
if(ctm$n.TIpred>0){
ctm$modelmats$TIPREDEFFECTsetup <- stanfit$standata$TIPREDEFFECTsetup
ms <- ctm$modelmats$matsetup
ms$tipred <- 0L
parswithtipreds <- sort(unique(ms$param[ms$param >0 & ms$when %in% c(0,-1) & ms$copyrow < 1]))
parswithtipreds<-parswithtipreds[apply(stanfit$standata$TIPREDEFFECTsetup,1,sum)>0]
ms$tipred[ms$param >0 & ms$when %in% c(0,-1) & ms$copyrow < 1 & ms$param %in% parswithtipreds] <- 1L
ctm$modelmats$matsetup <- ms
}
}
# if(is.na(STAN_NUM_THREADS)) Sys.unsetenv('STAN_NUM_THREADS') else Sys.setenv(STAN_NUM_THREADS = STAN_NUM_THREADS) #reset sys env
} # end if fit==TRUE
#convert missings back to NA's for data output
standataout<-standata
standataout$Y[standataout$Y==99999] <- NA
standataout$tipreds[standataout$tipreds==99999] <- NA
# standataout <- utils::relist((standataout),skeleton=standata)
setup=list(recompile=recompile,idmap=standata$idmap,matsetup=ctm$modelmats$matsetup,matvalues=ctm$modelmats$matvalues,
popsetup=ctm$modelmats$matsetup[ctm$modelmats$matsetup$when %in% c(0,-1) & ctm$modelmats$matsetup$param > 0,],
popvalues=ctm$modelmats$matvalues[ctm$modelmats$matsetup$when %in% c(0,-1) & ctm$modelmats$matsetup$param > 0,],
extratforms=ctm$modelmats$extratforms)
if(fit) {
stanfit$transformedparsfull <- suppressMessages(stan_constrainsamples(sm = sm,standata = standata,
savesubjectmatrices = TRUE, samples = matrix(stanfit$rawest,1),cores=1,savescores=TRUE,pcovn=5000))
out <- list(args=args,
setup=setup,
stanmodeltext=stanmodeltext, data=standataout, ctdatastruct=datalong[c(1,nrow(datalong)),],standata=standata,
ctstanmodelbase=ctstanmodel, ctstanmodel=ctm,stanmodel=sm, stanfit=stanfit)
class(out) <- 'ctStanFit'
out$stanfit$kalman<-suppressMessages(ctStanKalman(out,pointest = TRUE))
}
if(!fit) out=list(args=args,setup=setup,
stanmodeltext=stanmodeltext,data=standataout, ctdatastruct=datalong[c(1,nrow(datalong)),],standata=standata,
ctstanmodelbase=ctstanmodel, ctstanmodel=ctm)
return(out)
}