/
siman_zipplot.ado
492 lines (420 loc) · 15.7 KB
/
siman_zipplot.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
*! version 1.8.12 25oct2023
* version 1.8.12 25oct2023 IW Added true value to note
* version 1.8.11 16oct2023 EMZ minor update to warning message (# graphs each of # panels)
* version 1.8.10 03oct2023 EMZ update to warning message when if/by conditions used
* version 1.8.9 02oct2023 EMZ bug fix so works with dgm == x when dgm defined >1 variable
* version 1.8.8 05sep2023 EMZ specified name used if true >1 value, named graph with suffix _x
* version 1.8.7 15aug2023 EMZ minor bug fix in name for when multiple graphs being printed out
* version 1.8.6 21july2023 IW suppress unwanted "obs dropped" message
* version 1.8.5 04july2023 EMZ major re-write for graphs when dgm is defined by > 1 variable, all combinations displayed on 1 graph. lpoint/rpoint not
* hard coded.
* version 1.8.4 16may2023 EMZ bug fix for multiple estimands with multiple targets, formatting to title
* version 1.8.3 27mar2023 EMZ minor bug fix for when missing method
* version 1.8.2 06mar2023 EMZ minor bug fix for when method is string
* version 1.8.2 02mar2023 EMZ fixed bug, now if dgm and method are numeric labelled string, the label values will be used in the graphs
* version 1.8.1 30jan2023 IW removed rows() and xsize() so they can be user-specified (in bygr() and outside, respectively)
* version 1.8 07nov2022 EMZ added to code so now allows graphs split out by every dgm variable and level if multiple dgm variables declared.
* version 1.7 05sep2022 EMZ added additional error message
* version 1.6 14july2022 EMZ fixed bug so name() allowed in call
* version 1.5 30june2022 EMZ fixed bug where axis crosses
* version 1.4 24mar2022 EMZ changes (suppress DGM=1 if no DGM/only 1 DGM)
* version 1.3 02mar2022 EMZ changes from IW further testing
* version 1.2 06jan2021 EMZ updates from IW testing (bug fixes)
* version 1.1 25Jan2021 Ella Marley-Zagar, MRC Clinical Trials Unit at UCL. Based on Tim Morris' simulation tutorial do file.
* File to produce the zip plot
*******************************************************************************************************************************************************
capture program drop siman_zipplot
program define siman_zipplot, rclass
version 15
syntax [if][in] [, * BY(varlist) ///
NONCOVeroptions(string) COVeroptions(string) SCAtteroptions(string) ///
TRUEGRaphoptions(string) BYGRaphoptions(string) SCHeme(string) ymin(integer 0) ]
foreach thing in `_dta[siman_allthings]' {
local `thing' : char _dta[siman_`thing']
}
if "`setuprun'"!="1" {
di as error "siman_setup needs to be run first."
exit 498
}
* if estimate or se are missing, give error message as program requires them for the graph(s)
capture confirm _lci _uci
if _rc {
if mi("`estimate'") | mi("`se'") {
di as error "siman zipplot requires either lower & upper CI" _n "or estimate & se (optionally df)"
exit 498
}
}
* if true is missing, produce an error message
if "`true'"=="" {
di as error "The variable 'true' is missing so siman zipplot can not be created. Please create a variable in your dataset called true containing the true value(s) re-run siman setup with true() option specified."
exit 498
}
tempfile origdata
qui save `origdata'
* If data is not in long-long format, then reshape
if `nformat'!=1 {
qui siman reshape, longlong
foreach thing in `_dta[siman_allthings]' {
local `thing' : char _dta[siman_`thing']
}
}
* if the user has not specified 'if' in the siman zipplot syntax, but there is one from siman setup then use that 'if'
if ("`if'"=="" & "`ifsetup'"!="") local ifzipplot = `"`ifsetup'"'
else local ifzipplot = `"`if'"'
qui tempvar touseif
qui generate `touseif' = 0
qui replace `touseif' = 1 `ifzipplot'
preserve
qui sort `dgm' `target' `method' `touseif'
* The 'if' condition will only apply to dgm, target and method. The 'if' condition is not allowed to be used on rep and an error message will be issued if the user tries to do so
capture by `dgm' `target' `method': assert `touseif'==`touseif'[_n-1] if _n>1
if _rc == 9 {
di as error "The 'if' condition can not be applied to 'rep' in siman zipplot. If you have not specified an 'if' in siman zipplot, but you specified one in siman setup, then that 'if' will have been applied to siman zipplot."
exit 498
}
restore
qui keep if `touseif'
* if the user has not specified 'in' in the siman zipplot syntax, but there is one from siman setup then use that 'in'
if ("`in'"=="" & "`insetup'"!="") local inzipplot = `"`insetup'"'
else local inzipplot = `"`in'"'
qui tempvar tousein
qui generate `tousein' = 0
qui replace `tousein' = 1 `inzipplot'
qui keep if `tousein'
preserve
* keep estimates data only
qui drop if `rep'<0
* take out underscores at the end of variable names if there are any
foreach u of var * {
if substr("`u'",strlen("`u'"),1)=="_" {
local U = substr("`u'", 1, index("`u'","_") - 1)
if "`U'" != "" {
capture rename `u' `U'
if _rc di as txt "problem with `u'"
}
}
}
if substr("`estimate'",strlen("`estimate'"),1)=="_" local estimate = substr("`estimate'", 1, index("`estimate'","_") - 1)
if substr("`se'",strlen("`se'"),1)=="_" local se = substr("`se'", 1, index("`se'","_") - 1)
* define 'by'
if !mi("`by'") {
local byvar = "`by'"
}
else if mi("`by'") {
if "`dgmcreated'" == "1" & "`methodcreated'" == "1" local byvar "`target'"
else if "`dgmcreated'" == "1" & "`methodcreated'" == "0" local byvar "`target' `method'"
else if "`dgmcreated'" == "0" & "`methodcreated'" == "1" local byvar "`dgm' `target'"
else local byvar = "`dgm' `target' `method'"
}
* Zip plot of confidence intervals
capture confirm variable _lci
if _rc {
capture confirm variable df
if !_rc {
qui gen float _lci = `estimate' + (`se'*invttail(`df', .025))
local lci _lci
qui gen float _uci = `estimate' + (`se'*invttail(`df', .975))
local uci _uci
}
else {
qui gen float _lci = `estimate' + (`se'*invnorm(.025))
local lci _lci
qui gen float _uci = `estimate' + (`se'*invnorm(.975))
local uci _uci
}
}
if "`method'"!="" {
* for value labels of method
qui tab `method'
local nmethodlabels = `r(r)'
qui levels `method', local(levels)
tokenize `"`levels'"'
forvalues m = 1/`nmethodlabels' {
local methodlabel`m' "``m''"
}
}
if "`target'"!="" {
* for value labels of target
qui tab `target'
local ntargetlabels = `r(r)'
qui levels `target', local(levels)
tokenize `"`levels'"'
forvalues k = 1/`ntargetlabels' {
local targetlabel`k' "``k''"
}
}
capture confirm number `true'
if _rc {
qui gen `true'calc = .
if "`true'"!="" {
qui tab `true'
local ntrue = `r(r)'
if `r(r)'==1 {
qui levelsof `true', local(levels)
local `true'value = `r(levels)'
local `true'value1 = `r(levels)'
local `true'label1 = `r(levels)'
*local `true'number`truevalue' `truevalue'
qui replace `true'calc = ``true'value'
local truevalue ``true'value'
}
else if `r(r)'>1 {
* Get true label values
cap qui labelsof `true'
cap qui ret list
if `"`r(labels)'"'!="" {
local 0 = `"`r(labels)'"'
forvalues t = 1/`ntrue' {
gettoken `true'label`t' 0 : 0, parse(" ")
* local `true'number`t' `t'
local `true'value`t' `t'
qui replace `true'calc = ``true'value`t'' if `true' == float(`t')
local truelabels = 1
}
}
else {
local truelabels = 0
qui tab `true'
local ntrue = `r(r)'
qui levelsof `true', local(levels)
tokenize `"`levels'"'
forvalues t = 1/`ntrue' {
local `true'label`t' ``t''
local `true'value`t' `t'
qui replace `true'calc = ``true'value`t'' if `true' == float(``t'')
}
}
/*
qui levels `true', local(truelevels)
tokenize `"`truelevels'"'
forvalues j = 1/`ntrue' {
local truevalue`j' "``j''"
}
*/
}
}
}
else {
local ntrue = 1
local truevalue = `true'
local truevalue1 = `true'
local truelabel1 = `true'
local truenumber1 1
qui gen truecalc = `true'
local true "true"
}
local dgmorig = "`dgm'"
local numberdgms: word count `dgm'
if `numberdgms'==1 {
qui tab `dgm'
local ndgmlabels = `r(r)'
}
if `numberdgms'!=1 {
local ndgmlabels = `numberdgms'
local dgmexcludetrue: list dgm - true
local dgm `dgmexcludetrue'
}
foreach dgmvar in `dgm' {
local dgmlabels = 0
qui tab `dgmvar'
local ndgmvar = `r(r)'
* Get dgm label values
cap qui labelsof `dgmvar'
cap qui ret list
if `"`r(labels)'"'!="" {
local 0 = `"`r(labels)'"'
forvalues i = 1/`ndgmvar' {
gettoken `dgmvar'dlabel`i' 0 : 0, parse(": ")
local dgmlabels = 1
}
}
else {
local dgmlabels = 0
qui levels `dgmvar', local(levels)
tokenize `"`levels'"'
forvalues i = 1/`ndgmvar' {
local `dgmvar'dlabel`i' `i'
}
}
qui tab `dgmvar'
local n`dgmvar'labels = `r(r)'
}
* For coverage (or type I error), use true θ for null value
* so p<=.05 is a non-covering interval
* make sure using actual true value and not the label value (e.g. using 0.5, 0.67 and not 1, 2 etc)
* if true is just one value, then sorting on it won't make a difference so can use the below code for all cases of true
capture confirm variable _p`estimate'
if _rc {
qui bysort `true': gen float _p`estimate' = 1-normal(abs(`estimate'-`true'calc)/`se') // if sim outputs df, use ttail and remove 1-'
}
capture confirm variable _covers
if _rc {
qui bysort `true' : gen byte _covers = _p`estimate' > .025 // binary indicator of whether ci covers true estimate
}
sort `byvar' `true' _p`estimate'
capture confirm variable _p`estimate'rank
if _rc {
qui bysort `byvar' `true': gen double _p`estimate'rank = 100 - (_n/(_N/100)) // scale from 0-100. This will be vertical axis.
}
* Create MC conf. int. for coverage
capture confirm variable _covlb
if _rc {
qui gen float _covlb = .
}
capture confirm variable _covub
if _rc {
qui gen float _covub = .
}
* Need to know what format method is in (string or numeric)
local methodstringindi = 0
capture confirm string variable `method'
if !_rc local methodstringindi = 1
* to get lb and ub of CIs per dgm/method/target/true combinations, create groups to map these on to
sort `byvar' `true'
qui egen group = group(`byvar' `true')
tempfile masterdata
qui save `masterdata'
qui statsby, by(group) clear: ci proportions _covers
tempfile statsbydata
qui save `statsbydata'
qui use `masterdata', clear
qui merge m:1 group using `statsbydata', keepusing(lb ub)
* check
* ci proportions _covers if method == 1 & estimand == 1 & true == 0.5
qui replace _covlb = 100*(lb)
qui replace _covub = 100*(ub)
qui drop _merge lb ub
qui bysort `byvar': replace _covlb = . if _n>1
qui bysort `byvar': replace _covub = . if _n>1
//capture confirm variable _lpoint
// if _rc {
qui egen _lpoint = min(_lci) , by(`byvar' `true')
//}
//capture confirm variable _rpoint
// if _rc {
qui egen _rpoint = max(_uci) , by(`byvar' `true')
//}
* Can't tokenize/substr as many "" in the string
if !mi(`"`options'"') {
tempvar _namestring
qui gen `_namestring' = `"`options'"'
qui split `_namestring', parse(`"name"')
local options = `_namestring'1
cap confirm var `_namestring'2
if !_rc {
local namestring = `_namestring'2
local name = `"name`namestring'"'
local options: list options - name
* strip out the actual name out of the command
local namelastpart = subinstr(`"`name'"',"name("," ",1)
local namefirstpart = strtrim(subinstr(`"`namelastpart'"',", replace)"," ",1))
*di `"`namefirstpart'"'
local namestub = substr(`namefirstpart',1,.)
}
}
* check if many graphs will be created - if so warn the user
local dgmcount: word count `dgm'
qui tokenize `dgm'
if `dgmcreated' == 0 {
forvalues j = 1/`dgmcount' {
qui tab ``j''
local nlevels = r(r)
local dgmvarsandlevels `"`dgmvarsandlevels'"' `"``j''"' `" (`nlevels') "'
if `j' == 1 local totaldgmnum = `nlevels'
else local totaldgmnum = `totaldgmnum'*`nlevels'
}
}
if "`numtarget'" == "N/A" local numtargetcheck = 1
else {
* need to re-count in case there is an 'if' statement. Data is in long-long format from reshape above
qui tab `target'
local numtargetcheck = `r(r)'
}
if "`nummethod'" == "N/A" local nummethodcheck = 1
else {
* need to re-count in case there is an 'if' statement. Data is in long-long format from reshape above
qui tab `method'
local nummethodcheck = `r(r)'
}
if "`totaldgmnum'" == "" local totaldgmnum = 1
if "`by'" == "`method'" {
local totaldgmnum = 1
local numtargetcheck = 1
}
if !mi("`by'") & strpos("`dgm'", "`by'")>0 {
local nummethodcheck = 1
cap qui tab `by'
local totaldgmnum = `r(r)'
}
local numtruecheck = 1
if !mi("`ntrue'") local numtruecheck `ntrue'
local graphnumcheck = (`totaldgmnum' * `nummethodcheck' * `numtargetcheck')/`numtruecheck'
if `graphnumcheck' > 15 {
di as smcl as text "{p 0 2}Warning: `numtruecheck' graphs each of `graphnumcheck' panels will be created: consider using 'if' condition or 'by' option as detailed in {help siman_zipplot:siman zipplot}{p_end}"
}
if `numtruecheck' > 1 di as txt "Drawing `numtruecheck' graphs (1 per true value)"
* Plot of confidence interval coverage:
* First two rspike plots: Monte Carlo confidence interval for percent coverage
* second two rspike plots: confidence intervals for individual reps
* blue intervals cover, purple do not
* scatter plot (white dots) are point estimates - probably unnecessary
tempfile graphdata
qui save `graphdata'
if `ntrue' == 1 {
#delimit ;
twoway
(rspike `lci' `uci' _p`estimate'rank if !_covers & _p`estimate'rank>=`ymin', hor lw(medium) pstyle(p2) lcol(%30) `noncoveroptions')
(rspike `lci' `uci' _p`estimate'rank if _covers & _p`estimate'rank>=`ymin', hor lw(medium) pstyle(p1) lcol(%30) `coveroptions')
(scatter _p`estimate'rank `estimate' if _p`estimate'rank>=`ymin', msym(p) mcol(white%30) `scatteroptions') // plots point estimates in white
(pci `ymin' `truevalue' 100 `truevalue', pstyle(p5) lw(thin) `truegraphoptions')
(rspike _lpoint _rpoint _covlb, hor lw(thin) pstyle(p5)) // MC
(rspike _lpoint _rpoint _covub, hor lw(thin) pstyle(p5))
,
xtitle("95% confidence intervals")
ytitle("Centile")
ylab(5 50 95)
by(`byvar', ixaxes noxrescale iscale(*.9) `bygraphoptions')
legend(order(1 "Non-coverers" 2 "Coverers"))
`scheme'
`options'
`name'
;
#delimit cr
}
else if `ntrue'>1 {
* note have to use true_`j' in name to get true_1 etc, not value as will error out if e.g. have 0.25 in the name
forvalues k = 1/`ntrue' {
qui keep if `true'calc == `k'
* have to create local noname for the loop (to re-set name later, so that each graph is named)
local noname 0
if mi("`name'") local noname = 1
if `noname'==1 local name "name(simanzip_true_`k', replace)"
else local name "name(`namestub'_`k', replace)"
#delimit ;
twoway
(rspike `lci' `uci' _p`estimate'rank if !_covers & `true'calc == `k' & _p`estimate'rank>=`ymin', hor lw(medium) pstyle(p2) lcol(%30) `noncoveroptions')
(rspike `lci' `uci' _p`estimate'rank if _covers & `true'calc == `k' & _p`estimate'rank>=`ymin', hor lw(medium) pstyle(p1) lcol(%30) `coveroptions')
(scatter _p`estimate'rank `estimate' if `true'calc == `k' & _p`estimate'rank>=`ymin', msym(p) mcol(white%30) `scatteroptions') // plots point estimates in white
(pci `ymin' ``true'label`k'' 100 ``true'label`k'', pstyle(p5) lw(thin) `truegraphoptions')
(rspike _lpoint _rpoint _covlb, hor lw(thin) pstyle(p5)) // MC
(rspike _lpoint _rpoint _covub, hor lw(thin) pstyle(p5))
,
xtitle("95% confidence intervals")
ytitle("Centile")
ylab(5 50 95)
by(`byvar', ixaxes noxrescale iscale(*.9) note("Graphs by `byvar', true=``true'label`k''") `bygraphoptions' )
legend(order(1 "Non-coverers" 2 "Coverers"))
`scheme'
`options'
`name'
;
#delimit cr
use `graphdata', clear
* have to re-set otherwise name will not be updated
if `noname' == 1 local name ""
}
}
restore
local dgm = "`dgmorig'"
qui use `origdata', clear
end