Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
121 lines (115 sloc) 5.1 KB
---
# Configuration file defining biological data to retrieve and install.
# These are stored in an Amazon S3 bucket:
# https://s3.amazonaws.com/biodata
# and retrieved using the data_fabfile Fabric script.
# ## Genome data -- Next generation sequencing and Galaxy
# Details about the genomes you want to include.
# Required genome fields (corresponding to Galaxy's tool_data_table_conf.xml columns):
# dbkey - globally unique identifier for the genome (e.g., hg19)
# name - descriptive name for the given genome (to be displayed in Galaxy, e.g., Hsapiens)
# Optional genome fields (corresponding to Galaxy's tool_data_table_conf.xml columns):
# formats, species, dbkey1, dbkey2, value, path, index
# Additional genome fields specific to data deployment:
# genome_indexes - list of tool indexes specific to the associated genome (overrides global 'genome_indexes')
genomes:
- dbkey: phix
name: phiX174
- dbkey: hg19
name: Human (hg19)
indexes: [seq, twobit]
annotations: [GA4GH_problem_regions, capture_regions, rmsk,
MIG, prioritize, dbsnp, hapmap, 1000g_omni_snps, 1000g_snps,
mills_indels, cosmic, ancestral, clinvar, qsignature, ACMG56_genes, transcripts, RADAR, mirbase,
genesplicer, effects_transcripts, vcfanno, viral, exac, gnomad_exome, esp, 1000g, varpon]
annotations_available: [battenberg, dbnsfp, dbscsnv, ericscript, gnomad]
validation: [giab-NA12878, platinum-genome-NA12878, giab-NA24385, giab-NA24631, giab-NA24143, giab-NA24149]
- dbkey: GRCh37
name: Human (GRCh37)
indexes: [seq, twobit]
annotations: [GA4GH_problem_regions, capture_regions,
MIG, prioritize, dbsnp, hapmap, 1000g_omni_snps, 1000g_snps,
mills_indels, cosmic, ancestral, clinvar, qsignature, ACMG56_genes, transcripts, RADAR, mirbase,
genesplicer, effects_transcripts, vcfanno, viral, exac, gnomad_exome, esp, 1000g, varpon]
annotations_available: [battenberg, dbnsfp, dbscsnv, ericscript, gnomad]
validation: [giab-NA12878, giab-NA24385, giab-NA24631, dream-syn3, dream-syn4, giab-NA12878-NA24385-somatic,
giab-NA24143, giab-NA24149]
- dbkey: hg38
name: Human (hg38) full
indexes: [seq, twobit, bwa, hisat2]
annotations: [ccds, coverage, capture_regions, rmsk, prioritize, dbsnp, hapmap_snps,
1000g_omni_snps, 1000g_snps, 1000g_indels,
mills_indels, clinvar, qsignature, ACMG56_genes, transcripts,
genesplicer, effects_transcripts, vcfanno, esp, exac, gnomad_exome, viral, RADAR, mirbase, varpon]
annotations_available: [dbnsfp, dbscsnv, ericscript, gnomad]
validation: [giab-NA12878, giab-NA24385, giab-NA24631,
platinum-genome-NA12878, giab-NA12878-remap, giab-NA12878-crossmap,
dream-syn4-crossmap, dream-syn3-crossmap, giab-NA12878-NA24385-somatic,
giab-NA24143, giab-NA24149]
- dbkey: hg38-noalt
name: Human (hg38) without alternative alleles
annotations: [coverage, dbsnp, hapmap_snps, 1000g_omni_snps, 1000g_snps,
1000g_indels, mills_indels, clinvar, transcripts, mirbase]
annotations_available: [dbnsfp, dbscsnv]
- dbkey: mm9
name: Mouse (mm9)
- dbkey: mm10
name: Mouse (mm10)
indexes: [twobit]
annotations: [problem_regions, dbsnp, transcripts, mirbase, rmsk, vcfanno]
- dbkey: rn5
name: Rat (rn5)
- dbkey: rn6
name: Rat (rn6)
indexes: [seq, twobit]
annotations: [transcripts, mirbase]
- dbkey: canFam3
name: Dog (canFam3)
indexes: [twobit]
annotations: [dbsnp, transcripts, mirbase]
- dbkey: galGal4
name: Chicken (galGal4)
- dbkey: Sscrofa11.1
name: Pig (Sscrofa11.1)
indexes: [seq, twobit]
annotations: [transcripts]
- dbkey: dm3
name: D melangogaster (dm3)
- dbkey: TAIR10
name: Arabidopsis thaliana (TAIR10)
annotations: [mirbase]
- dbkey: xenTro3
name: X tropicalis (xenTro3)
- dbkey: GRCz11
name: Zebrafish (GRCz11)
indexes: [seq, twobit]
annotations: [transcripts]
- dbkey: GRCz10
name: Zebrafish (GRCz10)
- dbkey: Zv9
name: Zebrafish (Zv9)
- dbkey: sacCer3
indexes: [seq]
annotations: [transcripts]
name: S cerevisiae (sacCer3)
- dbkey: WBcel235
name: C elegans (WBcel235)
- dbkey: pseudomonas_aeruginosa_ucbpp_pa14
name: Pseudomonas aeruginosa UCBPP-PA14
# High level targets for specifying annotations
annotation_groups:
variation: [ccds, problem_regions, GA4GH_problem_regions, capture_regions, MIG, coverage, prioritize, dbsnp,
hapmap, hapmap_snps, 1000g_omni_snps, ACMG56_genes,
1000g_snps, mills_indels, 1000g_indels, clinvar, cosmic, ancestral, qsignature,
genesplicer, effects_transcripts, varpon, vcfanno, viral]
rnaseq: [transcripts, RADAR, rmsk]
smallrna: [mirbase]
gemini: [esp, exac, gnomad_exome, 1000g]
# Global set of indexes to include for each genome.
# Available choices are in GENOMES_INDEXES_SUPPORTED in cloudbio/biodata/genomes.py
genome_indexes:
- bwa
- twobit
# Additional data targets
install_liftover: false
install_uniref: false