/
germline_detect_variants.cwl
214 lines (213 loc) · 6.6 KB
/
germline_detect_variants.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
label: "exome alignment and germline variant detection"
requirements:
- class: SubworkflowFeatureRequirement
- class: SchemaDefRequirement
types:
- $import: ../types/vep_custom_annotation.yml
- class: StepInputExpressionRequirement
- class: InlineJavascriptRequirement
inputs:
reference:
type:
- string
- File
secondaryFiles: [.fai, ^.dict]
bam:
type: File
secondaryFiles: [^.bai]
gvcf_gq_bands:
type: string[]
intervals:
type:
type: array
items:
type: array
items: string
contamination_fraction:
type: string?
ploidy:
type: int?
vep_cache_dir:
type:
- string
- Directory
vep_ensembl_assembly:
type: string
doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38"
vep_ensembl_version:
type: string
doc: "ensembl version - Must be present in the cache directory. Example: 95"
vep_ensembl_species:
type: string
doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus"
vep_plugins:
type: string[]
default: [Frameshift, Wildtype]
synonyms_file:
type: File?
annotate_coding_only:
type: boolean?
vep_custom_annotations:
type: ../types/vep_custom_annotation.yml#vep_custom_annotation[]
doc: "custom type, check types directory for input format"
limit_variant_intervals:
type: File
variants_to_table_fields:
type: string[]?
default: ['CHROM','POS','ID','REF','ALT']
variants_to_table_genotype_fields:
type: string[]?
vep_to_table_fields:
type: string[]?
final_tsv_prefix:
type: string?
default: 'variants'
filter_gnomAD_maximum_population_allele_frequency:
type: float
default: 0.05
outputs:
raw_vcf:
type: File
outputSource: merge_vcfs/merged_vcf
secondaryFiles: [.tbi]
final_vcf:
type: File
outputSource: filter_vcf/final_vcf
secondaryFiles: [.tbi]
filtered_vcf:
type: File
outputSource: filter_vcf/filtered_vcf
secondaryFiles: [.tbi]
vep_summary:
type: File
outputSource: annotate_variants/vep_summary
final_tsv:
type: File
outputSource: set_final_tsv_name/replacement
filtered_tsv:
type: File
outputSource: set_filtered_tsv_name/replacement
steps:
haplotype_caller:
run: gatk_haplotypecaller_iterator.cwl
in:
reference: reference
bam: bam
emit_reference_confidence:
default: "NONE"
gvcf_gq_bands: gvcf_gq_bands
intervals: intervals
contamination_fraction: contamination_fraction
ploidy: ploidy
out:
[gvcf]
merge_vcfs:
run: ../tools/picard_merge_vcfs.cwl
in:
vcfs: haplotype_caller/gvcf
out:
[merged_vcf]
annotate_variants:
run: ../tools/vep.cwl
in:
vcf: merge_vcfs/merged_vcf
cache_dir: vep_cache_dir
ensembl_assembly: vep_ensembl_assembly
ensembl_version: vep_ensembl_version
ensembl_species: vep_ensembl_species
synonyms_file: synonyms_file
coding_only: annotate_coding_only
reference: reference
custom_annotations: vep_custom_annotations
plugins: vep_plugins
out:
[annotated_vcf, vep_summary]
bgzip_annotated_vcf:
run: ../tools/bgzip.cwl
in:
file: annotate_variants/annotated_vcf
out:
[bgzipped_file]
index_annotated_vcf:
run: ../tools/index_vcf.cwl
in:
vcf: bgzip_annotated_vcf/bgzipped_file
out:
[indexed_vcf]
filter_vcf:
run: germline_filter_vcf.cwl
in:
annotated_vcf: annotate_variants/annotated_vcf
filter_gnomAD_maximum_population_allele_frequency: filter_gnomAD_maximum_population_allele_frequency
gnomad_field_name:
source: vep_custom_annotations
valueFrom: |
${
if(self){
for(var i=0; i<self.length; i++){
if(self[i].annotation.gnomad_filter){
return(self[i].annotation.name + '_AF');
}
}
}
return('gnomAD_AF');
}
limit_variant_intervals: limit_variant_intervals
reference: reference
out:
[filtered_vcf, final_vcf]
filtered_variants_to_table:
run: ../tools/variants_to_table.cwl
in:
reference: reference
vcf: filter_vcf/filtered_vcf
fields: variants_to_table_fields
genotype_fields: variants_to_table_genotype_fields
out:
[variants_tsv]
filtered_add_vep_fields_to_table:
run: ../tools/add_vep_fields_to_table.cwl
in:
vcf: filter_vcf/filtered_vcf
vep_fields: vep_to_table_fields
tsv: filtered_variants_to_table/variants_tsv
prefix: final_tsv_prefix
out:
[annotated_variants_tsv]
set_filtered_tsv_name:
run: ../tools/staged_rename.cwl
in:
original: filtered_add_vep_fields_to_table/annotated_variants_tsv
name:
valueFrom: 'annotated.filtered.tsv'
out:
[replacement]
final_variants_to_table:
run: ../tools/variants_to_table.cwl
in:
reference: reference
vcf: filter_vcf/final_vcf
fields: variants_to_table_fields
genotype_fields: variants_to_table_genotype_fields
out:
[variants_tsv]
final_add_vep_fields_to_table:
run: ../tools/add_vep_fields_to_table.cwl
in:
vcf: filter_vcf/final_vcf
vep_fields: vep_to_table_fields
tsv: final_variants_to_table/variants_tsv
prefix: final_tsv_prefix
out:
[annotated_variants_tsv]
set_final_tsv_name:
run: ../tools/staged_rename.cwl
in:
original: final_add_vep_fields_to_table/annotated_variants_tsv
name:
valueFrom: 'annotated.filtered.final.tsv'
out:
[replacement]