/
PAR_simulation_220117b.do
729 lines (564 loc) · 23.9 KB
/
PAR_simulation_220117b.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
* This is a Stata do file (compatible with Stata 17.0) that implements the
* simulation in Malani, Rosenbaum, Archie, Alberts (2022) from scratch.
********************
********************
* Preliminaries
********************
********************
// Continuous scrolling, clear data, set memory, set seed for random numbers
set more 1
drop _all
set max_memory 128g
set seed 32679 // Happy bday, Stacy!
// Log file
cap log close
cap log using sim_210629, replace
// Libraries
local datalib "/afs/crc.nd.edu/user/a/amalani/PAR"
local homelib "/afs/crc.nd.edu/user/a/amalani/PAR"
local parlib "/afs/crc.nd.edu/user/a/amalani/PAR"
*local datalib "/scratch/midway2/amalani"
*local homelib "/home/amalani/"
*local parlib "/home/amalani/rosenbaum/par"
*local datalib "\\midwaysmb.rcc.uchicago.edu\homes\rosenbaum\parcap
//Change directory
cap cd "`datalib'"
// Notes to self
*Q: Do we want to aggregate groups of monkeys?
// Toggles --> allows you to run parts of code at a time
// Toggle macros to 1 to run, 0 to not run
// Toggle 1 sim at a time so you don't generate excess files and violate quota
local gtoolkit 0
local model 0 // permanently 0 so we can collapse text
local func_strat 0 // permanently 0
local func_ninp 0 // permanently 0
local indvar 0 // generate unique covariate vectors
local make_coef_10 0
local test_coef_10x16 0
local test_coef_10x16_merge 0
local screen_coef 0
local sim_data 0 // generate sim data sets
local test_regs 0 // test for DC and PAR with sim data sets
// Load gtoolkit to work with large data sets more quickly
if `gtoolkit'==1{
* Load necessary software
cap ssc install gtools
gtools, upgrade
}
********************
********************
* Simulation primitives
********************
********************
// We generate realities. Then we generate a data set for each reality to test for PAR, DC in that reality.
// Each data set has `regressor' covariate vectors x `u' error terms. Total `ressors'*`u' obs.
local p = 3 // number of powers for Taylor series expansion of for truth in each reality = 3
local regressors 100 // building number of covariate vectors for a reality
local u 20 // multiples of each caovariate vector, i.e., generate u errors for each covariate vector
********************
********************
* INDEPENDENT VARS
********************
********************
if `indvar'==1{
/*
The goal is to create a data set of `regressors' obs on var for purpose of simulating.
Later we can add obs to make the distribution of errors look nicer.
*/
// Create data set with `regressors' empty obs
clear *
set obs `regressors' // sim size --> keep low to reduce size of data sets for Stata
// Create e0 (initial rank): uniformly distributed from 0 to 1
gen e0 = (_n-1)/(_N-1) // higher is better
la var e0 "Childhood rank"
// Create \Delta e or echange (change in rank): normally distributed with mean and SD from baboon data
gen echange = rnormal(-0.03,0.21) // drawn from su of momsubjectrankdiff
// i.e., (rank of baboon as adult) MINUS (rank that baboon's mother held at time infant was born)
la var echange "Random change in rank"
// Create final rank: is sum of e0 and echange
// This is NOT an important step for the reality that we create
// because our reality is y_1 = f(e0,echange). e1 plays no role.
gen e1 = e0 + echange // calculate final rank as sum of e0 and echange
la var e1 "Adult rank"
/* DO NOT FLATTEN OUT e1 AND MODIFY echange
// Do not run the following code unless you want to flatten e1 and care about e1 more than echange
gsort +e1 // sort the final rank
gen e1_order = _n
replace e1 = (e1_order - 1)/(_N-1) // create final rank based on
drop e1_order
// Generate change in rank if we use the flattened e1.
// Calculate the mean and SD to see if it aligns with baboon data after flattening.
gen echange_final = e1 - e0
la var echange_final "Final change in rank (after making e1 uniform)"
su echange echange_final
*/
// Calculate absolute value of change using flattened e1
gen ed = ((e1 - e0)^2)^0.5
la var ed "|e1 - e0|"
// Generate obs number variable for each covariate vector
sort e0
gen obsID = _n // sorted by e0
la var obsID "Regressor combination ID"
// Generate more regressors --> combinations of regressors for null, DC, par
// 2nd order
gen e02 = e0^2
la var e02 "e0^2"
gen e12 = e1^2
la var e12 "e1^2"
gen ed2 = ed^2
la var ed2 "|e1 - e0|^2"
gen e0_e1 = e0 * e1
la var e0_e1 "e1 x e0"
gen e0_ed = e0 * ed
la var e0_ed "e0 x ed"
gen e1_ed = e1 * ed
la var e1_ed "e1 x ed"
// 3rd order
gen e03 = e0^3
la var e03 "e0^3"
gen e13 = e1^3
la var e13 "e1^3"
gen ed3 = ed^3
la var ed3 "|e1 - e0|^3"
gen e0_e12 = e0 * (e1^2)
la var e0_e12 "e0 x (e1^2)"
gen e02_e1 = (e0^2) * e1
la var e02_e1 "(e0^2) x e1"
gen e0_ed2 = e0 * (ed^2)
la var e0_ed2 "e0 * (|e1 - e0|^2)"
gen e02_ed = (e0^2) * ed
la var e02_ed "(e0^2) * |e1 - e0|"
gen e1_ed2 = e1 * (ed^2)
la var e1_ed2 "e1 * (|e1 - e0|^2)"
gen e12_ed = (e1^2) * ed
la var e12_ed "(e1^2) * |e1 - e0|"
su e*
* Save data with unique covariate vectors
save vars_indep, replace
} // End toggle; checks out
********************
********************
* REALITIES
********************
********************
********************
* Generate coef files
********************
if `make_coef_10'==1 {
* Primitives
// Clear all variables
clear *
// Set range and increments of each coefficient
local mincoef = -1 // min coefficient in power function
local maxcoef = 1 // max coefficient in power function
local increm = 0.5 // Increment for coefficients in power function
local steps `=((`maxcoef'-`mincoef')/`increm')+1'
// Specify number of and create counter for coefficeients
local k 10 // 10 coef for 2 input & 3 powers or for 3 inputs and 2 powers
local s = `k'-1 // useful when starting index at 0
local o `=`steps'^`k''
di "steps = `steps', k=`k', s=`s', o=`o'"
* Generate coefficients
// Initialise coefs (i.e., the vars representing coefs) at missing
set obs `steps'
forval m = 0/`s' {
gen a`m' = . // set equal to missing to start
la var a`m' "Coef on x^`p'" // coefficient values
}
// Generate variation in coefs
// This is done in a sequance of expand commands; be sure to sort before each one!
gen check = .
gen obsID0 = _n
forval i=1/`steps' {
replace a0 = `mincoef'+((`i'-1)*`increm') if obsID0==`i'
}
forval j = 1/`s' {
local j_1 = `j'-1
qui: expand `steps'
hashsort obsID`j_1'
qui: gen obsID`j' = _n
qui: replace check = (obsID`j'-((obsID`j_1'-1)*`steps'))
forval i=1/`steps' {
qui: replace a`j' = `mincoef'+((`i'-1)*`increm') if (obsID`j'-((obsID`j_1'-1)*`steps'))==`i'
}
}
su // check result
* CLean up and save
// Eliminate excess vars; add ID variable
drop obsID* check
gen coefID = _n
la var coefID "ID for coef vector"
// Compress to save space, then save
compress
save coef_`k', replace
} // end toggle; checks out: 9.765m obs (i.e., coef vectors or possible realities)
*stop
********************
* To screen realities (coefficient vectors) for PAR/DC, create variation in covariates e0, ed
********************
// Generate 4 values of each of 2 covariates (total 16)
// These covariates will be equally spaced because this is to test for PAR/DC in each reality
// The goal is not (yet) to generate a realistic data set that examines whether IR, QR regression work
if `test_coef_10x16' == 1{
// Load coefs file
use coef_10, clear
* I have to create combinations of e0 and ed, so need 4*4 (if increment by 0.3333)
// Code to reduce space for debugging
*keep a0 // just for testing code, keep subset of vars
*cap drop *flag* *ID
*keep if _n < 1001
// Keep just the coef id to save space; you can merge in coefs later
keep coefID
// Create 1st variable, call it "x", with 4 stop (0,.3,.6,1)
timer on 1 // track time since these steps take time
expand 4
hashsort coefID // hashsort is faster (it's from gtools, so make sure that's installed and updated)
gen long xID = _n
gen byte x = ((xID - ((coefID-1)*4))-1) // x goes from 0(0.334)1 for each coef vector
timer off 1
// Create 2nd variable, call it "z", with 4 stops
timer on 2
expand 4
hashsort xID
gen long zID = _n
gen byte z = ((zID - ((xID-1)*4))-1) // z goes from 0(0.334)1 for each coef vector
timer off 2
// Check times for each step
timer list
// We worked in integers to save space and mem; now we divide by 3 to get fractions, i.e., change 1,...,4 --> 0,.3,.6,1
recast float x z // change type to allow fractions
gen e0 = x/3
gen ed = z/3
drop x z
rename xID e0ID
rename zID edID
// Summarize, and drop unnecessary vars
su coefID e0ID edID e0 ed
drop e0ID edID
// Save file of e0 and ed w/ coefID's, but not coefs themselves
compress
hashsort coefID
save coef_10x16, replace
} // end toggle; checks out: 156m obs = 5^10 x 16
*stop
********************
* Merge coefs and coariates for testing
********************
if `test_coef_10x16_merge' == 1 {
// Load coef data
use coef_10, clear
// Merge in e0, ed from coef_10x16
timer on 1
hashsort coefID
*gen w = runiform() // use these 2 lines to take a subsample fo data for debugging
*keep if w < 0.01
merge 1:m coefID using coef_10x16, keep(3) // NB: coef_10x16 sorted before saving
// Conpress and save data
compress
save coef_10x16_merged, replace // we save file in case program crashes
timer off 1
// Check times for each step
timer list
} // End toggle; Checks out 156m obs w inputs
*stop
********************
* Screen out infeasible realities, test realities for PAR, DC
********************
if `screen_coef==1' {
// Load data
use coef_10x16_merged, clear
// Gen flags for range, monotonicity
cap gen byte range_flagy = 0
cap gen byte monot_flage0 = 0
cap gen byte monot_flaged = 0
// Clear timers
timer clear
// Generate outcome, test range // works for binary or fraction/share/percent outcome vars
timer on 1 // start timer for feasible y check
gen float y = a0 + (a1*e0) + (a2*ed) + (a3*e0*e0) + (a4*ed*ed) + (a5*e0*ed) + (a6*(e0^3)) + (a7*(ed^3)) + (a8*(e0*ed*ed)) + (a9*(e0*e0*ed))
// This simulates y (fertility)
replace range_flagy = 1 if (y<0 | y>1) // flag any y that is outside acceptable range [0,1]
hashsort coefID // use gtools version of sort bec faster with big data
by coefID: gegen byte range_flag = max(range_flagy)
// flag any coef vector if any e0 or ed causes out of range error for coef vector
su e0 ed if range_flag==0
drop range_flagy
timer off 1
drop if range_flag==1 // save time on subsequent calcs
// drop any coef vector if any e0 or ed causes out of range error for coef vector
// Drop 154 million obs (before drop, total # obs = # coef vectors x 16 covariate)!
// Test for DC, PAR
timer on 2 // start timer for DC and PAR flags
gen float yde0 = a1 + (2*a3*e0) + (a5*ed) + (3*a6*e0*e0) + (a8*ed*ed) + (2*a9*e0*ed) // generate derivative wrt e0
gen float yded = a2 + (2*a4*ed) + (a5*e0) + (3*a7*ed*ed) + (2*a8*e0*ed) + (a9*e0*e0) // generate derivative wrt ed
replace monot_flage0 = 1 if sign(yde0) > 0 // flag any funcs that are monotone increasing in e0 (DC)
replace monot_flaged = 1 if sign(yded) < 0 // flag any funcs that are monotone decreasing in ed (PAR)
hashsort coefID
gegen byte dc_flag_meantrue = mean(monot_flage0), by(coefID)
gegen byte par_flag_meantrue = mean(monot_flaged), by(coefID)
gegen byte dc_flag_true = min(monot_flage0), by(coefID) // always true
gegen byte par_flag_true = min(monot_flaged), by(coefID)
gegen byte dc_flag_false = max(monot_flage0), by(coefID)
gegen byte par_flag_false = max(monot_flaged), by(coefID)
replace dc_flag_false = 1 - dc_flag_false // always false
// Have to do this in 2 steps because gegen does not allow this in 1 step
replace par_flag_false = 1 - par_flag_false
la var dc_flag_meantrue "DC model is true (on ave.)"
la var par_flag_meantrue "PAR model is true (on ave.)"
la var dc_flag_true "DC model is true (all regressor values)"
la var par_flag_true "PAR model is true (all regressor values)"
la var dc_flag_false "DC model is false (all regressor values)"
la var par_flag_false "PAR model is false (all regressor values)"
su e0 ed yde0 yded if dc_flag_meantrue==1
su e0 ed yde0 yded if par_flag_meantrue==1
drop monot_flage0 monot_flaged
timer off 2
// Check times for each step
timer list
// Keep coefs and DC PAR flags only
local keepvars "a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 dc_flag* par_flag*"
keep `keepvars'
// Drop duplicates. This drops all the covariate variation used to test range and PAR/DC.
gduplicates report `keepvars'
gduplicates drop `keepvars', force // drops 1.95 m obs
gen long coefID=_n
// Check variables you keep, check how many realities have DC, PAR
su `keepvars'
tab dc_flag_meantrue par_flag_meantrue
// Save coefs
save coef_flagged, replace
*save /home/amalani/rosenbaum/par/coef_flagged, replace
*cap rm coef_10x16_merged
} // end toggle
// 130,201 survive out of range restriction
// 2% pass DC on ave, 6% fail DC on ave
// 2% pass PAR on ave, 6% fail PAR on ave
// They are symmetric because regression is symmetric in e0 and ed and we span e0 and ed symmetrically
// But it's not the same set of coeff that pass each one:
/*
. tab dc_flag_meantrue par_flag_meantrue
DC model | PAR model is true (on
is true | ave.)
(on ave.) | 0 1 | Total
-----------+----------------------+----------
0 | 124,749 2,697 | 127,446
1 | 2,697 58 | 2,755
-----------+----------------------+----------
Total | 127,446 2,755 | 130,201
*/
*stop
********************
********************
* SIMULATE IR REGS
********************
********************
********************
* Flesh out data set with error terms
********************
if `sim_data'==1 {
// Load screened coefs (ie realities)
use "`datalib'/coef_flagged.dta", clear // 130k obs
// Create space to merge in (realistic) indep variables for data set.
recast int coefID
sort coefID // recall this is a bew coefID that counts at coef vectors that meet tests
*su coefID
expand `regressors' // create space for all possible e0's -> 130k x 20 = 2.6m
hashsort coefID // sort in coefID because expand tacks on end
gen int obsID = _n
replace obsID = (_n - ((coefID-1)*`regressors')) // create variable to m:1 merge on
*su // obsID should be from 1-`regressors'
// Merge in indep vars for simulated regs
merge m:1 obsID using "`datalib'/vars_indep.dta"
tab _merge
drop _merge
drop e03 e13 ed3 e0_e12 e02_e1 e0_ed2 e02_ed e1_ed2 e12_ed echange*
compress
*su coefID obsID
// Save data for use later
hashsort coefID obsID // use to check data editor
save "`datalib'/simdatafortest_noerror.dta", replace
// Create space for more obs for each x; we will add in errors later
hashsort coefID obsID // use to check data editor
expand `u' // so this makes 2000 = 100x20 obs per coef vector
// now at 52m obs
// Take stock
hashsort coefID obsID
*su coefID obsID
*su
// Generate hypothetical y's
gen float y = a0 + (a1*e0) + (a2*ed) + (a3*e0*e0) + (a4*ed*ed) + (a5*e0*ed) + (a6*(e0^3)) + (a7*(ed^3)) + (a8*(e0*ed*ed)) + (a9*(e0*e0*ed))
// deterministic
// Generate draws on an error term for each coef vector
// We take a average organism, who starts at rank 0 and sees no change. What is it's outcome?
// We can calculate the variance of the error from that mean outcome.
// We draw normal errors with mean 0 and the variance just calculated.
// Why normal? Number of positve draws from n binomials with mean p converges to normal with mean np and variance np(1-p).
gen A = a0 + (a1*0.5) + (a3*0.25) + (a6*0.125) // calculate the mean at the average value of x
gen s2 = A*(1-A) // calculate the binomial varance
su s2
gen u = rnormal(0,s2) // generate a random variable with mean 0 and variance equal to the bonomial variance
/* We do not do this:
* Use variance of residual from reg of fertility on rank 1 to add randomness
* Mean (SD) of residual is 0 (.3524) if repstats_state=1
*/
// Calculate outcomes with this variance
replace y = y + u
la var y "Fertility"
// Save data
save "`datalib'/simdatafortest.dta", replace
}
*stop
********************
* Test interaction and quadratic regression models
********************
if `test_regs'==1 { //
*******************
* Run regressions *
*******************
// Load sim data
use "`datalib'/simdatafortest.dta", clear // n = 260,402,000
// Run IR reg, save results
*regressby y e0 e1 e0_e1, by(coefID) save(ir_results)
// Do QR regression, save results
*regressby y e0 ed e02 ed2 e0_ed, by(coefID) save(qr_results)
// Run visualization test regardless of whether IR regression yield sig coef on interaction term
// We'll screen on sig coef on interaction later
// Check how outcomes change with adult environment
// First, for those who start in low quality environments
*keep if e0 < 0.5 // regressby does not allow if statements
*regressby y e1, by(coefID) save(vislow_results)
//Second, for thsoe who start in high quality environments
*suse "`datalib'/simdatafortest.dta", clear // n = 260,402,000
keep if e0 > 0.5
regressby y e1, by(coefID) save(vishigh_results)
*stop
***********************
* Analyze regressions *
***********************
// Interaction regression
// Load IR results
use "`datalib'/ir_results.dta", clear
// Conventional test for PAR with IR
// STEP ONE: Is interaction sig?
gen ir_par_inter_flag = abs(_b_e0_e1 / _se_e0_e1) > 1.96 // flag for interaction term being sig
la var ir_par_inter_flag "Positive test - sig interaction term with IR reg"
// STEP TWO comes later
// Conventional test for DC with IR
gen ir_dc_conv_flag = abs(_b_e0 / _se_e0) > 1.96 // flag for coef on e0 term being sig
la var ir_dc_conv_flag "Positive test - sig coef on e0 with IR reg"
// Definition aligned test for PAR with IR reg:
// Is derivative of IR reg wrt ed (|echange|) negative at mean values of e0 and ed?
gen ir_par_def_mean = (_b_e1 + (_b_e0_e1 * 0.5)) * (-1)
// multiply by -1 because mean value of echange is -0.03
gen ir_par_def_var = (_se_e1^2) + ((_se_e0_e1 * 0.5)^2) + 2*_cov_e0_e1_e1*0.5
gen ir_par_def_se = sqrt(ir_par_def_var)
gen ir_par_def_flag = ir_par_def_mean / ir_par_def_se < -1.96
// negative and sig
la var ir_par_def_flag "Positive test - definition aligned PAR test with IR reg"
//Definition aligned test for DC with IR
gen ir_dc_def_mean = _b_e0 + (_b_e0_e1 * 0.47) // 0.47 because mean of echange is -0.03
gen ir_dc_def_var = (_se_e0^2) + ((_se_e0_e1 * 0.47)^2) + 2*_cov_e0_e1_e1*0.47
gen ir_dc_def_se = sqrt(ir_dc_def_var)
gen ir_dc_def_flag = ir_dc_def_mean / ir_dc_def_se > 1.96
// positive and sig
la var ir_dc_def_flag "Positive test - definition aligned DC test with IR reg"
// Save flags only in results file
keep coefID ir_*
sort coefID
save "`datalib'/test_results.dta", replace
// Visualization test
// Load lower visualization datalib, rename coefs to mark them as lower
use "`datalib'/vislow_results.dta", clear
rename _b_cons _b_cons_low
rename _b_e1 _b_e1_low
// Merge higher visualization datalib, rename coefs to mark them as higher
sort coefID
merge 1:1 coefID using "`datalib'/vishigh_results.dta"
tab _merge
drop _merge
rename _b_cons _b_cons_high
rename _b_e1 _b_e1_high
// Flag whether high-birth organisms starts lower and ends higher
gen vis_startlower_flag = _b_cons_high < _b_cons_low
gen vis_endhigher_flag = _b_cons_high + _b_e1_high > _b_cons_low + _b_e1_low
gen vis_flag = (vis_startlower_flag == 1 & vis_endhigher_flag == 1)
la var vis_flag "Positive test - visualization test"
// Flag whether high (low) birth organisms have positive (negative) slope
gen vis_strict_flag = (_b_e1_high > 0 & _b_e1_low < 0)
la var vis_strict_flag "Positive test - strict visualization test"
// Flag whether slope of 4th quartile is higher (relaxed visualization test)
gen vis_relaxed_flag = (_b_e1_high > _b_e1_low)
la var vis_relaxed_flag "Positive test - relaxed visualization test"
// Merge results with results file
keep coefID vis_flag vis_strict_flag vis_relaxed_flag
sort coefID
merge 1:1 coefID using "`datalib'/test_results.dta"
table _m
drop _merge
save "`datalib'/test_results.dta", replace
// Generate conventional PAR test for IR reg
gen ir_par_conv_flag = (ir_par_inter_flag == 1 & vis_flag == 1)
la var ir_par_conv_flag "Positive test - conventional PAR test with IR reg"
// Generate strict conventional PAR test for IR reg
gen ir_par_conv_strict_flag = (ir_par_inter_flag == 1 & vis_strict_flag == 1)
la var ir_par_conv_strict_flag "Positive test - strict conventional PAR test with IR reg"
// Generate relaxed conventional PAR test for IR reg
gen ir_par_conv_relaxed_flag = (ir_par_inter_flag == 1 & vis_relaxed_flag == 1)
la var ir_par_conv_relaxed_flag "Positive test - relaxed conventional PAR test with IR reg"
// Save results from IR tests
save "`datalib'/test_results_temp.dta", replace
// Quadratic regression
// Load QR results
use "`datalib'/qr_results.dta", clear
// Definition aligned test for PAR with QR
gen qr_par_def_mean = _b_ed + (2 * _b_ed2 * 0.03) + (_b_e0_ed * 0.5)
gen qr_par_def_var = (_se_ed^2) + ((2 * _se_ed2 * 0.03)^2) ///
+ ((_se_e0_ed * 0.5)^2) ///
+ (2 * _cov_ed2_ed * 0.03) ///
+ (2 * _cov_e0_ed_ed2 * 0.5) ///
+ (2 * _cov_e0_ed_ed2 * 0.03 * 0.5)
gen qr_par_def_se = sqrt(qr_par_def_var)
gen qr_par_def_flag = (qr_par_def_mean / qr_par_def_se < -1.96)
// negative and sig
la var qr_par_def_flag "Positive test - def aligned PAR test with QR reg"
// Def aligned test for DC with QR
gen qr_dc_def_mean = _b_e0 + (2 * _b_e02 * 0.5) + (_b_e0_ed * 0.03)
gen qr_dc_def_var = (_se_e0^2) + ((2 * _se_e02 * 0.5)^2) ///
+ ((_se_e0_ed * 0.03)^2) ///
+ (2 * _cov_e02_e0 * 0.5) ///
+ (2 * _cov_e0_ed_e0 * 0.03) ///
+ (2 * _cov_e0_ed_e02 * 0.5 * 0.03)
gen qr_dc_def_se = sqrt(qr_dc_def_var)
gen qr_dc_def_flag = (qr_dc_def_mean / qr_dc_def_se > 1.96)
// positive and sig
la var qr_dc_def_flag "Positive test - def aligned DC test with QR reg"
// Save flags only
keep coefID qr_*
// Merge in IR test results
sort coefID
merge 1:1 coefID using "`datalib'/test_results_temp.dta"
table _m
drop _merge
// Save test results with both IR and QR tests
rm "`datalib'/test_results_temp.dta"
save test_results_temp, replace
// Coin flip
// Generate coin-flip test
gen coinflip_flag = rbinomial(1,0.5)
la var coinflip_flag "Positive test - from coin flip"
***********************
* Sensitivity and specificity
***********************
// Merge in truth, keeping only the DC and PAR true flags (at mean)
merge 1:1 coefID using "`datalib'/coef_flagged.dta", keepusing(dc_flag_meantrue par_flag_meantrue)
tab _m
drop _merge
// Tabulate true and false positives
gen total = 1 // counter
table par_flag_meantrue, stat(fvpercent total coinflip_flag ir_par_conv_strict_flag )
table par_flag_meantrue, stat(fvpercent total ir_par_conv_flag ir_par_conv_relaxed_flag )
table par_flag_meantrue, stat(fvpercent total ir_par_def_flag qr_par_def_flag)
table dc_flag_meantrue, stat(fvpercent total coinflip_flag ir_dc_conv_flag)
table dc_flag_meantrue, stat(fvpercent total ir_dc_def_flag qr_dc_def_flag)
}
*stop