-
Notifications
You must be signed in to change notification settings - Fork 7
/
st.do
53 lines (47 loc) · 2.1 KB
/
st.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// extract GEN file for each LD region in EPIC-Omics HRC imputed data
set more off
local F /gen_omics/data/EPIC-Norfolk/HRC
local T /scratch/tempjhz22/LDcalc/HRC
gzuse if chr!=25 using `F'/SNPinfo.dta.gz, clear
sort chr pos rsid
gen maf=cond(FreqA2<=0.5, FreqA2, 1-FreqA2)
gen MAC=2*21044*maf
rename FreqA2 exp_freq_a1
order rsid pos exp_freq_a1 info type RSnum
tempfile f0
!rm -f `T'/Extract.sh
!touch `T'/Extract.sh
forval k=1/22 {
preserve
keep if chr==`k'
save `f0', replace
import delimited using st.bed, asdouble delim(" ") clear
keep chr start end
drop if end<=start
destring chr, replace
keep if chr==`k'
drop chr
sort start
count
local nclus=r(N)
merge 1:1 _n using `f0', nogen
forval j=1/`nclus' {
local lowr=start[`j']
local uppr=end[`j']
local f="chr`k'_`lowr'_`uppr'"
local qctool="/genetics/bin/qctool-1.4"
local samples="/gen_omics/data/EPIC-Norfolk/HRC/EPIC-Norfolk.sample"
local excl_samples="/genetics/data/gwas/1-11-17/exclude.dat"
outsheet rsid pos exp_freq_a1 info type RSnum if pos>=`lowr' & pos<=`uppr' & MAC>=3 & info>=0.4 using `T'/`f'.txt, names noquote replace nolab delim(" ")
!echo -e "sge \"`qctool' -g `F'/chr`k'.gen.gz -og `f'.gen -s `samples' -excl-samples `excl_samples' -incl-range `lowr'-`uppr' -omit-chromosome -snp-missing-call-rate 0.100001 -snp-missing-rate 0.05 -maf 0.000072 1 -info 0.4 1 -sort; cut -d' ' -f1 `f'.gen > `f'.snpid; grep -w -f `f'.snpid `f'.txt > `f'.info; gzip -f `f'.gen\"" >> `T'/Extract.sh
}
restore
}
cd `T'
!chmod u+x Extract.sh
!./Extract.sh
// origina scripts
// order rsid pos exp_freq_a1 info type RSnum
// outsheet rsid if pos>=`lowr' & pos<=`uppr' & (MAC<3 | info<0.4) using `T'/exc`f'.txt, nonames noquote replace nolab
// outsheet rsid pos exp_freq_a1 info type RSnum if pos>=`lowr' & pos<=`uppr' & MAC>=3 & info>=0.4 using `T'/chr`f'.info, names noquote replace nolab delim(" ")
// !echo -e "sge \"/genetics/bin/qctool -g `F'/chr`k'.gen.gz -og chr`f'.gen -incl-range `lowr'-`uppr' -omit-chromosome -excl-rsids exc`f'.txt -sort; gzip -f chr`f'.gen\"" >> `T'/Extract.sh