Permalink
Newer
100644
105 lines (99 sloc)
4.3 KB
|
|
||
| 1 | --- | |
| 2 | # Configuration file defining biological data to retrieve and install. | |
| 3 | # These are stored in an Amazon S3 bucket: | |
| 4 | # https://s3.amazonaws.com/biodata | |
| 5 | # and retrieved using the data_fabfile Fabric script. | |
| 6 | ||
| 7 | # ## Genome data -- Next generation sequencing and Galaxy | |
| 8 | ||
| 9 | # Details about the genomes you want to include. | |
| 10 | # Required genome fields (corresponding to Galaxy's tool_data_table_conf.xml columns): | |
| 11 | # dbkey - globally unique identifier for the genome (e.g., hg19) | |
| 12 | # name - descriptive name for the given genome (to be displayed in Galaxy, e.g., Hsapiens) | |
| 13 | # Optional genome fields (corresponding to Galaxy's tool_data_table_conf.xml columns): | |
| 14 | # formats, species, dbkey1, dbkey2, value, path, index | |
|
|
||
| 15 | # Additional genome fields specific to data deployment: | |
|
|
||
| 16 | # genome_indexes - list of tool indexes specific to the associated genome (overrides global 'genome_indexes') | |
| 17 | genomes: | |
| 18 | - dbkey: phix | |
| 19 | name: phiX174 | |
| 20 | - dbkey: hg19 | |
| 21 | name: Human (hg19) | |
|
|
||
| 22 | indexes: [seq, twobit] | |
|
|
||
| 23 | annotations: [GA4GH_problem_regions, capture_regions, | |
| 24 | MIG, prioritize, dbsnp, hapmap, 1000g_omni_snps, 1000g_snps, | |
|
|
||
| 25 | mills_indels, cosmic, ancestral, clinvar, qsignature, ACMG56_genes, transcripts, RADAR, mirbase, | |
|
|
||
| 26 | genesplicer, effects_transcripts, vcfanno, viral] | |
|
|
||
| 27 | annotations_available: [battenberg, dbnsfp, dbscsnv] | |
|
|
||
| 28 | validation: [giab-NA12878, platinum-genome-NA12878, giab-NA24385, giab-NA24631] | |
|
|
||
| 29 | - dbkey: GRCh37 | |
| 30 | name: Human (GRCh37) | |
|
|
||
| 31 | indexes: [seq, twobit] | |
|
|
||
| 32 | annotations: [GA4GH_problem_regions, capture_regions, | |
| 33 | MIG, prioritize, dbsnp, hapmap, 1000g_omni_snps, 1000g_snps, | |
|
|
||
| 34 | mills_indels, cosmic, ancestral, clinvar, qsignature, ACMG56_genes, transcripts, RADAR, mirbase, | |
|
|
||
| 35 | genesplicer, effects_transcripts, vcfanno, viral] | |
|
|
||
| 36 | annotations_available: [battenberg, dbnsfp, dbscsnv] | |
|
|
||
| 37 | validation: [giab-NA12878, giab-NA24385, giab-NA24631, dream-syn3, dream-syn4] | |
|
|
||
| 38 | - dbkey: hg38 | |
| 39 | name: Human (hg38) full | |
|
|
||
| 40 | indexes: [seq, twobit, bwa, hisat2] | |
|
|
||
| 41 | annotations: [coverage, prioritize, dbsnp, hapmap_snps, 1000g_omni_snps, 1000g_snps, | |
|
|
||
| 42 | 1000g_indels, mills_indels, clinvar, qsignature, ACMG56_genes, transcripts, | |
|
|
||
| 43 | genesplicer, effects_transcripts, vcfanno, esp, exac, viral] | |
|
|
||
| 44 | annotations_available: [dbnsfp, dbscsnv] | |
|
|
||
| 45 | validation: [giab-NA12878, giab-NA24385, giab-NA24631, | |
| 46 | platinum-genome-NA12878, giab-NA12878-remap, giab-NA12878-crossmap, | |
|
|
||
| 47 | dream-syn4-crossmap, dream-syn3-crossmap] | |
|
|
||
| 48 | - dbkey: hg38-noalt | |
| 49 | name: Human (hg38) without alternative alleles | |
|
|
||
| 50 | annotations: [coverage, dbsnp, hapmap_snps, 1000g_omni_snps, 1000g_snps, | |
| 51 | 1000g_indels, mills_indels, clinvar, transcripts] | |
|
|
||
| 52 | annotations_available: [dbnsfp, dbscsnv] | |
|
|
||
| 53 | - dbkey: mm9 | |
| 54 | name: Mouse (mm9) | |
|
|
||
| 55 | - dbkey: mm10 | |
| 56 | name: Mouse (mm10) | |
|
|
||
| 57 | annotations: [problem_regions, dbsnp, transcripts, mirbase] | |
|
|
||
| 58 | - dbkey: rn5 | |
| 59 | name: Rat (rn5) | |
|
|
||
| 60 | - dbkey: rn6 | |
| 61 | name: Rat (rn6) | |
|
|
||
| 62 | annotations: [transcripts, mirbase] | |
|
|
||
| 63 | - dbkey: canFam3 | |
| 64 | name: Dog (canFam3) | |
|
|
||
| 65 | annotations: [dbsnp, transcripts] | |
|
|
||
| 66 | - dbkey: galGal4 | |
| 67 | name: Chicken (galGal4) | |
|
|
||
| 68 | - dbkey: dm3 | |
| 69 | name: D melangogaster (dm3) | |
|
|
||
| 70 | - dbkey: TAIR10 | |
| 71 | name: Arabidopsis thaliana (TAIR10) | |
|
|
||
| 72 | annotations: [mirbase] | |
|
|
||
| 73 | - dbkey: xenTro3 | |
| 74 | name: X tropicalis (xenTro3) | |
|
|
||
| 75 | - dbkey: GRCz10 | |
| 76 | name: Zebrafish (GRCz10) | |
|
|
||
| 77 | - dbkey: Zv9 | |
| 78 | name: Zebrafish (Zv9) | |
|
|
||
| 79 | - dbkey: sacCer3 | |
| 80 | name: S cerevisiae (sacCer3) | |
|
|
||
| 81 | - dbkey: WBcel235 | |
| 82 | name: C elegans (WBcel235) | |
|
|
||
| 83 | - dbkey: pseudomonas_aeruginosa_ucbpp_pa14 | |
| 84 | name: Pseudomonas aeruginosa UCBPP-PA14 | |
|
|
||
| 85 | ||
|
|
||
| 86 | # High level targets for specifying annotations | |
| 87 | annotation_groups: | |
|
|
||
| 88 | variation: [problem_regions, GA4GH_problem_regions, capture_regions, MIG, coverage, prioritize, dbsnp, | |
|
|
||
| 89 | hapmap, hapmap_snps, 1000g_omni_snps, ACMG56_genes, | |
|
|
||
| 90 | 1000g_snps, mills_indels, 1000g_indels, clinvar, cosmic, ancestral, qsignature, | |
|
|
||
| 91 | genesplicer, effects_transcripts, vcfanno, viral] | |
|
|
||
| 92 | rnaseq: [transcripts, RADAR] | |
|
|
||
| 93 | smallrna: [mirbase] | |
|
|
||
| 94 | gemini: [esp, exac] | |
|
|
||
| 95 | ||
|
|
||
| 96 | # Global set of indexes to include for each genome. | |
| 97 | # Available choices are in GENOMES_INDEXES_SUPPORTED in cloudbio/biodata/genomes.py | |
| 98 | genome_indexes: | |
| 99 | - bwa | |
|
|
||
| 100 | - twobit | |
|
|
||
| 101 | ||
| 102 | # Additional data targets | |
|
|
||
| 103 | install_liftover: false | |
|
|
||
| 104 | install_uniref: false |