/
ORIV runs Design 5.do
321 lines (270 loc) · 13.6 KB
/
ORIV runs Design 5.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
*DO-FILE TO CHECK THE DATA GENERATED BY RONALD
*DESIGN 5 - ONLY AM NO GN
*HANS, 9 APRIL 2022
*cd "C:\Users\41153jvk\Dropbox (Erasmus Universiteit Rotterdam)\GEIGHEI\projects\Measurement error\"
*cd "C:\Users\Hans\Dropbox (Erasmus Universiteit Rotterdam)\GEIGHEI\projects\Measurement error\"
cd "C:\Users\41153jvk\Dropbox (Erasmus Universiteit Rotterdam)\Genoeconomics\Measurement error PGS\Runs"
capture log close
clear matrix
set seed 1
local repl = 100
mat am = (0.00 \ 0.25 \ 0.50 \ 0.75 \ 1.00)
local rowsam = rowsof(am)
mat samplesizesPRED = (16000)
local rowsPRED = rowsof(samplesizesPRED)
*INITIALIZE MATRICES
mat h2 = J(`rowsPRED',`repl',0)
mat h2se = J(`rowsPRED',`repl',0)
mat coefmeta = J(`rowsPRED',`repl',0)
mat coeforiv = J(`rowsPRED',`repl',0)
mat coefbecker = J(`rowsPRED',`repl',0)
mat semeta = J(`rowsPRED',`repl',0)
mat seoriv = J(`rowsPRED',`repl',0)
mat sebecker = J(`rowsPRED',`repl',0)
mat segreml = J(`rowsPRED',`repl',0)
mat biasmeta = J(`rowsPRED',`repl',0)
mat biasoriv = J(`rowsPRED',`repl',0)
mat biasbecker = J(`rowsPRED',`repl',0)
mat rmsemeta = J(`rowsPRED',`repl',0)
mat rmseoriv = J(`rowsPRED',`repl',0)
mat rmsebecker = J(`rowsPRED',`repl',0)
mat rmsegreml = J(`rowsPRED',`repl',0)
mat F = J(`rowsPRED',`repl',0)
mat coefmetafe = J(`rowsPRED',`repl',0)
mat coeforivfe = J(`rowsPRED',`repl',0)
mat semetafe = J(`rowsPRED',`repl',0)
mat seorivfe = J(`rowsPRED',`repl',0)
mat biasmetafe = J(`rowsPRED',`repl',0)
mat biasorivfe = J(`rowsPRED',`repl',0)
mat rmsemetafe = J(`rowsPRED',`repl',0)
mat rmseorivfe = J(`rowsPRED',`repl',0)
mat Ffe = J(`rowsPRED',`repl',0)
mat avg_coefmeta = J(`rowsPRED',1,0)
mat avg_coeforiv = J(`rowsPRED',1,0)
mat avg_coefbecker = J(`rowsPRED',1,0)
mat se_coefmeta = J(`rowsPRED',1,0)
mat se_coeforiv = J(`rowsPRED',1,0)
mat se_coefbecker = J(`rowsPRED',1,0)
mat se_coefgreml = J(`rowsPRED',1,0)
mat avg_biasmeta = J(`rowsPRED',1,0)
mat avg_biasoriv = J(`rowsPRED',1,0)
mat avg_biasbecker = J(`rowsPRED',1,0)
mat avg_rmsemeta = J(`rowsPRED',1,0)
mat avg_rmseoriv = J(`rowsPRED',1,0)
mat avg_rmsebecker = J(`rowsPRED',1,0)
mat avg_rmsegreml = J(`rowsPRED',1,0)
mat avg_F = J(`rowsPRED',1,0)
mat avg_h2 = J(`rowsPRED',1,0)
mat avg_seh2 = J(`rowsPRED',1,0)
mat avg_coefmetafe = J(`rowsPRED',1,0)
mat avg_coeforivfe = J(`rowsPRED',1,0)
mat se_coeftruefe = J(`rowsPRED',1,0)
mat se_coefmetafe = J(`rowsPRED',1,0)
mat se_coeforivfe = J(`rowsPRED',1,0)
mat avg_biasmetafe = J(`rowsPRED',1,0)
mat avg_biasorivfe = J(`rowsPRED',1,0)
mat avg_rmsemetafe = J(`rowsPRED',1,0)
mat avg_rmseorivfe = J(`rowsPRED',1,0)
mat avg_Ffe = J(`rowsPRED',1,0)
local beta_G = sqrt(0.25)
local beta_G_wf = sqrt(0.2)
forvalues j = 1/`rowsam' {
if `j' == 1 {
local s = "0.00"
}
if `j'==2 {
local s = "0.25"
}
if `j'==3 {
local s = "0.50"
}
if `j'==4 {
local s = "0.75"
}
if `j'==5 {
local s = "1.00"
}
di "Assortative mating " `s'
forvalues k = 1/`rowsPRED' {
di "Prediction sample size " samplesizesPRED[`k',1]
forvalues i = 1/`repl' {
if mod(`i',25)==0{ //show only every 25 iterations
di "Doing iteration `i' out of `repl'"
}
qui import delimited "DESIGN.5.RHO_AM.`s'.RUN.`i'.HSq.out", clear
qui sum heritability
mat h2[`k',`i'] = r(mean)
qui sum standarderror
mat h2se[`k',`i'] = r(mean)
*LOAD DATA
qui import delimited "DESIGN.5.RHO_AM.`s'.RUN.`i'.pgi", clear
*******************************************************************************
* BETWEEN-FAMILY
*******************************************************************************
*THIS IS USING THE META-ANALYSIS PGI
qui reg y pgigwaspooled, cluster(pid)
estimates store e_3
mat temp = r(table)
mat coefmeta[`k',`i'] = temp[1,1]
mat semeta[`k',`i'] = temp[2,1]
mat biasmeta[`k',`i'] = (temp[1,1]-`beta_G')/`beta_G'
mat rmsemeta[`k',`i'] = sqrt((temp[1,1]-`beta_G')^2)
*PGI-RC
sca rhobecker = sqrt(h2[`k',`i']/e(r2))
mat coefbecker[`k',`i']=rhobecker*temp[1,1]
mat sebecker[`k',`i']=rhobecker*temp[2,1]
mat biasbecker[`k',`i'] = (coefbecker[`k',`i']-`beta_G')/`beta_G'
mat rmsebecker[`k',`i'] = sqrt((coefbecker[`k',`i']-`beta_G')^2)
*INCORPORATING GREML UNCERTAINTY
mat segreml[`k',`i']=h2se[`k',`i']/(2*sqrt(h2[`k',`i']))
mat rmsegreml[`k',`i']=sqrt(biasbecker[`k',`i']^2+segreml[`k',`i']^2)
*COMPUTE THE CORRELATION BETWEEN THE SCORES TO DETERMINE THE SCALING FACTOR
qui reg pgigwas1 pgigwas2, cluster(pid)
sca corrscores = _b[pgigwas2]
*THIS IS APPLYING ORIV WITH USING THE SCORES AS IVS FOR EACH OTHER
preserve
qui replace pgigwas1 = pgigwas1/sqrt(corrscores) if corrscores > 0
qui replace pgigwas2 = pgigwas2/sqrt(corrscores) if corrscores > 0
quietly {
expand 2, generate(replicant)
}
qui gen mainVar = pgigwas1 if replicant == 0
qui replace mainVar = pgigwas2 if replicant == 1
qui gen instrument = pgigwas2 if replicant == 0
qui replace instrument = pgigwas1 if replicant == 1
qui ivreghdfe y (mainVar=instrument), absorb(replicant) cluster(pid iid)
mat coeforiv[`k',`i'] = _b[mainVar]
mat seoriv[`k',`i'] = _se[mainVar]
mat biasoriv[`k',`i'] = (_b[mainVar]-`beta_G')/`beta_G'
mat rmseoriv[`k',`i'] = sqrt((_b[mainVar]-`beta_G')^2)
mat F[`k',`i'] = e(widstat)
restore
*******************************************************************************
* WITHIN-FAMILY
*******************************************************************************
qui xtset pid
*THIS IS USING THE META-ANALYSIS PGI, WITH FIXED EFFECTS
qui xtreg y pgigwaspooled, fe cluster(pid)
estimates store e_3
mat temp = r(table)
mat coefmetafe[`k',`i'] = temp[1,1]
mat semetafe[`k',`i'] = temp[2,1]
mat biasmetafe[`k',`i'] = (temp[1,1]-`beta_G_wf')/`beta_G_wf'
mat rmsemetafe[`k',`i'] = sqrt((temp[1,1]-`beta_G_wf')^2)
*ORIV WITH FIXED EFFECTS
preserve
qui replace pgigwas1 = pgigwas1/sqrt(corrscores) if corrscores > 0
qui replace pgigwas2 = pgigwas2/sqrt(corrscores) if corrscores > 0
quietly {
expand 2, generate(replicant)
}
egen newid = concat(pid replicant)
gen long newidreal = real(newid)
qui xtset newidreal
qui gen mainVar = pgigwas1 if replicant == 0
qui replace mainVar = pgigwas2 if replicant == 1
qui gen instrument = pgigwas2 if replicant == 0
qui replace instrument = pgigwas1 if replicant == 1
qui ivreghdfe y (mainVar=instrument), absorb(newidreal) cluster(pid iid)
mat coeforivfe[`k',`i'] = _b[mainVar]
mat seorivfe[`k',`i'] = _se[mainVar]
mat biasorivfe[`k',`i'] = (_b[mainVar]-`beta_G_wf')/`beta_G_wf'
mat rmseorivfe[`k',`i'] = sqrt((_b[mainVar]-`beta_G_wf')^2)
mat Ffe[`k',`i'] = e(widstat)
restore
}
}
mata: st_matrix("avg_coef`j'meta", rowsum(st_matrix("coefmeta")/`repl'))
mata: st_matrix("avg_coef`j'oriv", rowsum(st_matrix("coeforiv")/`repl'))
mata: st_matrix("avg_coef`j'becker", rowsum(st_matrix("coefbecker")/`repl'))
mata: st_matrix("se_coef`j'meta", rowsum(st_matrix("semeta")/`repl'))
mata: st_matrix("se_coef`j'oriv", rowsum(st_matrix("seoriv")/`repl'))
mata: st_matrix("se_coef`j'becker", rowsum(st_matrix("sebecker")/`repl'))
mata: st_matrix("se_coef`j'greml", rowsum(st_matrix("segreml")/`repl'))
mata: st_matrix("avg_bias`j'meta", rowsum(st_matrix("biasmeta")/`repl'))
mata: st_matrix("avg_bias`j'oriv", rowsum(st_matrix("biasoriv")/`repl'))
mata: st_matrix("avg_bias`j'becker", rowsum(st_matrix("biasbecker")/`repl'))
mata: st_matrix("avg_rmse`j'meta", rowsum(st_matrix("rmsemeta")/`repl'))
mata: st_matrix("avg_rmse`j'oriv", rowsum(st_matrix("rmseoriv")/`repl'))
mata: st_matrix("avg_rmse`j'becker", rowsum(st_matrix("rmsebecker")/`repl'))
mata: st_matrix("avg_rmse`j'greml", rowsum(st_matrix("rmsegreml")/`repl'))
mata: st_matrix("avg`j'_F", rowsum(st_matrix("F")/`repl'))
mata: st_matrix("avg_coef`j'metafe", rowsum(st_matrix("coefmetafe")/`repl'))
mata: st_matrix("avg_coef`j'orivfe", rowsum(st_matrix("coeforivfe")/`repl'))
mata: st_matrix("se_coef`j'metafe", rowsum(st_matrix("semetafe")/`repl'))
mata: st_matrix("se_coef`j'orivfe", rowsum(st_matrix("seorivfe")/`repl'))
mata: st_matrix("avg_bias`j'metafe", rowsum(st_matrix("biasmetafe")/`repl'))
mata: st_matrix("avg_bias`j'orivfe", rowsum(st_matrix("biasorivfe")/`repl'))
mata: st_matrix("avg_rmse`j'metafe", rowsum(st_matrix("rmsemetafe")/`repl'))
mata: st_matrix("avg_rmse`j'orivfe", rowsum(st_matrix("rmseorivfe")/`repl'))
mata: st_matrix("avg`j'_Ffe", rowsum(st_matrix("Ffe")/`repl'))
mata: st_matrix("avg_h2`j'", rowsum(st_matrix("h2")/`repl'))
mata: st_matrix("avg_seh2`j'", rowsum(st_matrix("h2se")/`repl'))
*STORE RESULTS IN A MATRIX AND PLOT THE RESULTS
mat repl`j' = J(`rowsPRED',1,`repl')
mat AM`j' = J(`rowsPRED',1,am[`j',1])
mat bf`j' = (AM`j', samplesizesPRED, avg_coef`j'meta, se_coef`j'meta, avg_coef`j'meta - 1.96*se_coef`j'meta, avg_coef`j'meta + 1.96*se_coef`j'meta, /*
*/ avg_coef`j'oriv, se_coef`j'oriv, avg_coef`j'oriv - 1.96*se_coef`j'oriv, avg_coef`j'oriv + 1.96 * se_coef`j'oriv, /*
*/ avg_coef`j'becker, se_coef`j'becker, avg_coef`j'becker - 1.96*se_coef`j'becker, avg_coef`j'becker + 1.96*se_coef`j'becker, /*
*/ avg_coef`j'becker, se_coef`j'greml, avg_coef`j'becker - 1.96*se_coef`j'greml, avg_coef`j'becker + 1.96*se_coef`j'greml, repl`j')
matrix colnames bf`j' = AM sample_size coef_meta se_meta lb_meta ub_meta coef_oriv se_oriv lb_oriv ub_oriv coef_becker se_becker lb_becker ub_becker /*
*/ coef_becker se_greml lb_greml ub_greml replications
mat overall_rmse`j' = (AM`j', samplesizesPRED, avg_rmse`j'meta, avg_rmse`j'oriv, avg_rmse`j'becker, avg_rmse`j'greml, repl`j')
matrix colnames overall_rmse`j' = AM sample_size rmse_meta rmse_oriv rmse_becker rmse_greml replications
mat wf`j' = (AM`j', samplesizesPRED, avg_coef`j'metafe, se_coef`j'metafe, avg_coef`j'metafe - 1.96*se_coef`j'metafe, avg_coef`j'metafe + 1.96*se_coef`j'metafe, /*
*/ avg_coef`j'orivfe, se_coef`j'orivfe, avg_coef`j'orivfe - 1.96*se_coef`j'orivfe, avg_coef`j'orivfe + 1.96 * se_coef`j'orivfe, repl`j')
matrix colnames wf`j' = AM sample_size coef_meta se_meta lb_meta ub_meta coef_oriv se_oriv lb_oriv ub_oriv replications
mat overall_rmse`j'fe = (AM`j', samplesizesPRED, avg_rmse`j'metafe, avg_rmse`j'orivfe, repl`j')
matrix colnames overall_rmse`j'fe = AM sample_size rmse_meta rmse_oriv replications
mat overall_F`j' = (AM`j', samplesizesPRED, avg`j'_F)
matrix colnames overall_F`j' = AM sample_size Fstat
mat overall_Ffe`j' = (AM`j', samplesizesPRED, avg`j'_Ffe)
matrix colnames overall_Ffe`j' = AM sample_size Fstat
}
mat bf_AM = (bf1 \ bf2 \ bf3 \ bf4 \ bf5)
putexcel set "sim_results.xls", sheet("bf_design5") modify
putexcel A1=matrix(bf_AM), colnames
mat wf_AM = (wf1 \ wf2 \ wf3 \ wf4 \ wf5)
putexcel set "sim_results.xls", sheet("wf_design5") modify
putexcel A1=matrix(wf_AM), colnames
mat rmse_AM = (overall_rmse1 \ overall_rmse2 \ overall_rmse3 \ overall_rmse4 \ overall_rmse5)
putexcel set "sim_results.xls", sheet("bf_rmse_design5") modify
putexcel A1=matrix(rmse_AM), colnames
mat rmse_AM_fe = (overall_rmse1fe \ overall_rmse2fe \ overall_rmse3fe \ overall_rmse4fe \ overall_rmse5fe)
putexcel set "sim_results.xls", sheet("wf_rmse_design5") modify
putexcel A1=matrix(rmse_AM_fe), colnames
mat F_AM = (overall_F1 \ overall_F2 \ overall_F3 \ overall_F4 \ overall_F5)
putexcel set "sim_results.xls", sheet("bf_F_design5") modify
putexcel A1=matrix(F_AM), colnames
mat F_AM_fe = (overall_Ffe1 \ overall_Ffe2 \ overall_Ffe3 \ overall_Ffe4 \ overall_Ffe5)
putexcel set "sim_results.xls", sheet("wf_F_design5") modify
putexcel A1=matrix(F_AM_fe), colnames
*MAKE PLOTS
local beta_G = sqrt(0.25)
import excel "sim_results.xls", sheet("bf_design5") firstrow clear
replace AM = AM*100
mkmat coef_meta lb_meta ub_meta, matrix(meta)
mat rownames meta = 0 0.25 0.5 0.75 1
mkmat coef_becker lb_becker ub_becker, matrix(becker)
mat rownames becker = 0 0.25 0.5 0.75 1
mkmat coef_oriv lb_oriv ub_oriv, matrix(oriv)
mat rownames oriv = 0 0.25 0.5 0.75 1
mkmat coef_becker lb_greml ub_greml, matrix(becker_greml)
mat rownames becker_greml = 0 0.25 0.5 0.75 1
matrix transpose_meta = meta'
matrix transpose_becker = becker'
matrix transpose_oriv = oriv'
matrix transpose_greml = becker_greml'
coefplot matrix(transpose_meta) matrix(transpose_oriv) matrix(transpose_becker) matrix(transpose_greml), vertical grid(between) ci((2 3)) ylabel(0.3(0.1)0.6) yline(`beta_G', lpattern(dash)) scheme(s1mono) ytitle("Estimated coefficient") xtitle("Assortative mating") legend(rows(1) lab(2 "Meta-analysis") lab(4 "ORIV") lab(6 "PGI-RC (default)") lab(8 "PGI-RC (GREML unc.)"))
graph export "C:\Users\41153jvk\Dropbox (Erasmus Universiteit Rotterdam)\GEIGHEI\projects\Measurement error\Figures revision\design5-bf-greml.pdf", replace
local beta_G_wf = sqrt(0.2)
import excel "sim_results.xls", sheet("wf_design5") firstrow clear
replace AM = AM*100
mkmat coef_meta lb_meta ub_meta, matrix(meta)
mat rownames meta = 0 0.25 0.5 0.75 1
mkmat coef_oriv lb_oriv ub_oriv, matrix(oriv)
mat rownames oriv = 0 0.25 0.5 0.75 1
matrix transpose_meta = meta'
matrix transpose_oriv = oriv'
coefplot matrix(transpose_meta) matrix(transpose_oriv), vertical grid(between) ci((2 3)) scheme(s1mono) ytitle("Estimated coefficient") yline(0.5, lpattern(dash)) ylabel(0.3(0.1)0.6) xtitle("Assortative mating") legend(rows(1) lab(2 "Meta-analysis") lab(4 "ORIV"))
graph export "C:\Users\41153jvk\Dropbox (Erasmus Universiteit Rotterdam)\GEIGHEI\projects\Measurement error\Figures revision\design5-wf-greml.pdf", replace