-
Notifications
You must be signed in to change notification settings - Fork 155
Expand file tree
/
Copy pathseq.yaml
More file actions
36 lines (36 loc) · 1.21 KB
/
seq.yaml
File metadata and controls
36 lines (36 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Full hg38/GRCh38 reference genome distributed by 1000 genomes
# Derived from NCBI set with HLA and decoy alternative alleles
# ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/
---
attributes:
name: seq
version: 1000g-20150219_1
recipe:
full:
recipe_type: bash
recipe_cmds:
- |
url=ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome
base=GRCh38_full_analysis_set_plus_decoy_hla
new=hg38
mkdir -p seq
for suffix in .dict .fa.fai
do
[[ -f seq/$new$suffix ]] || wget -c -O seq/$new$suffix $url/$base$suffix
done
# gzipped references for use with ensembl-vep HGVS
urlgz=https://s3.amazonaws.com/biodata/hg38_bundle
for suffix in .fa.gz .fa.gz.fai .fa.gz.gzi
do
[[ -f seq/$new$suffix ]] || wget --no-check-certificate -c -O seq/$new$suffix $urlgz/$new$suffix
done
gunzip -c seq/hg38.fa.gz > seq/hg38.fa
touch seq/hg38.fa.fai
touch seq/hg38.dict
recipe_outfiles:
- seq/hg38.fa
- seq/hg38.fa.fai
- seq/hg38.fa.gz
- seq/hg38.fa.gz.fai
- seq/hg38.fa.gz.gzi
- seq/hg38.dict