-
Notifications
You must be signed in to change notification settings - Fork 160
/
dbsnp.yaml
39 lines (39 loc) · 1.57 KB
/
dbsnp.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# recipe_outfiles: move symlinks first, not otherwise
---
attributes:
name: dbsnp
version: 156-20230320
recipe:
full:
recipe_type: bash
recipe_cmds:
- |
build=156
version=GCF_000001405.25
url=https://ftp.ncbi.nih.gov/snp/archive/b$build/VCF/$version.gz
mkdir -p variation
cd variation
wget -c -O dbsnp-${build}-orig.vcf.gz $url
wget -c -O dbsnp-${build}-orig.vcf.gz.tbi $url.tbi
remap_url=https://raw.githubusercontent.com/dpryan79/ChromosomeMappings/master/GRCh37_NCBI2UCSC.txt
# remove chromosomes not included in hg19 from remap
wget --no-check-certificate -qO- $remap_url | sed 's/\r//' | awk '{if(NF==2) print $0}'> remap.tsv
export TMPDIR=`pwd`
# remove NW_ contigs absent in hg19
gunzip -c dbsnp-${build}-orig.vcf.gz | grep -v "NW_" | bgzip -c > dbsnp-${build}.noNW.vcf.gz
tabix dbsnp-${build}.noNW.vcf.gz
bcftools annotate -Ou --rename-chrs remap.tsv dbsnp-${build}.noNW.vcf.gz | \
bcftools sort -m 1G -Oz -T . -o dbsnp-${build}.vcf.gz
tabix -f -p vcf dbsnp-${build}.vcf.gz
tabix -f -p vcf -C dbsnp-${build}.vcf.gz
ln -sf dbsnp-${build}.vcf.gz dbsnp.vcf.gz
ln -sf dbsnp-${build}.vcf.gz.tbi dbsnp.vcf.gz.tbi
ln -sf dbsnp-${build}.vcf.gz.csi dbsnp.vcf.gz.csi
cd ..
recipe_outfiles:
- variation/dbsnp.vcf.gz
- variation/dbsnp.vcf.gz.csi
- variation/dbsnp.vcf.gz.tbi
- variation/dbsnp-156.vcf.gz
- variation/dbsnp-156.vcf.gz.csi
- variation/dbsnp-156.vcf.gz.tbi