-
Notifications
You must be signed in to change notification settings - Fork 2
/
exomeseq-gatk4.cwl
189 lines (189 loc) · 5.96 KB
/
exomeseq-gatk4.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
label: exomeseq-gatk4/v2.2.0
doc: Whole Exome Sequence analysis using GATK4 - v2.2.0
requirements:
- class: ScatterFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: SchemaDefRequirement
types:
- $import: types/ExomeseqStudyType.yml
- $import: types/FASTQReadPairType.yml
inputs:
study_type:
type: types/ExomeseqStudyType.yml#ExomeseqStudyType
# Intervals should come from capture kit (target intervals) bed format
target_intervals: File[]?
# Intervals should come from capture kit (bait intervals) bed format
bait_intervals: File[]?
interval_padding: int?
# Named read pair in FASTQ format
read_pairs:
type:
type: array
items: types/FASTQReadPairType.yml#FASTQReadPairType
# reference genome, fasta
reference_genome:
type: File
secondaryFiles:
- .amb
- .ann
- .bwt
- .pac
- .sa
- .fai
- ^.dict
# Number of threads to use
threads: int
# Read Group annotation
# Can be the project name
library: string
# e.g. Illumina
platform: string
known_sites:
type: File[] # vcf files of known sites, with indexing
secondaryFiles:
- .idx
# Variant Recalibration - SNPs
snp_resource_hapmap:
type: File
secondaryFiles:
- .idx
snp_resource_omni:
type: File
secondaryFiles:
- .idx
snp_resource_1kg:
type: File
secondaryFiles:
- .idx
# Variant Recalibration - Common
resource_dbsnp:
type: File
secondaryFiles:
- .idx
# Variant Recalibration - Indels
indel_resource_mills:
type: File
secondaryFiles:
- .idx
variant_recalibration_annotations_indels:
type: string[]
default: ["FS", "ReadPosRankSum", "MQRankSum", "QD", "SOR"]
variant_recalibration_annotations_snps:
type: string[]
default: ["QD", "MQRankSum", "ReadPosRankSum", "FS", "MQ", "SOR"]
outputs:
fastp_html_reports_dir:
type: Directory
outputSource: organize_directories/fastp_html_reports_dir
fastp_json_reports_dir:
type: Directory
outputSource: organize_directories/fastp_json_reports_dir
raw_variants_dir:
type: Directory
outputSource: organize_directories/raw_variants_dir
bams_markduplicates_dir:
type: Directory
outputSource: organize_directories/bams_markduplicates_dir
doc: "BAM and bai files from markduplicates"
markduplicates_metrics_dir:
type: Directory
outputSource: organize_directories/metrics_markduplicates_dir
doc: "metrics files from markduplicates"
bams_recalibrated_dir:
type: Directory
outputSource: organize_directories/bams_recalibrated_dir
doc: "BAM files containing recalibrated reads"
joint_raw_variants:
type: File
outputSource: variant_discovery/joint_raw_variants
doc: "GVCF file from joint genotyping calling"
filtered_recalibrated_variants:
type: File
outputSource: variant_discovery/variant_recalibration_combined_vcf
doc: "The output filtered and recalibrated VCF file in which each variant is annotated with its VQSLOD value"
variant_calling_detail_metrics:
type: File
outputSource: variant_discovery/detail_metrics
variant_calling_summary_metrics:
type: File
outputSource: variant_discovery/summary_metrics
steps:
prepare_reference_data:
run: subworkflows/exomeseq-gatk4-00-prepare-reference-data.cwl
in:
target_intervals: target_intervals
bait_intervals: bait_intervals
reference_genome: reference_genome
out:
- target_interval_list
- bait_interval_list
preprocessing:
run: subworkflows/exomeseq-gatk4-01-preprocessing.cwl
scatter: read_pair
in:
intervals: target_intervals
interval_padding: interval_padding
target_interval_list: prepare_reference_data/target_interval_list
bait_interval_list: prepare_reference_data/bait_interval_list
read_pair: read_pairs
reference_genome: reference_genome
threads: threads
library: library
platform: platform
known_sites: known_sites
resource_dbsnp: resource_dbsnp
out:
- fastp_html_report
- fastp_json_report
- markduplicates_bam
- markduplicates_metrics
- recalibration_table
- recalibrated_reads
- raw_variants
- haplotypes_bam
variant_discovery:
run: subworkflows/exomeseq-gatk4-02-variantdiscovery.cwl
in:
study_type: study_type
name: library
intervals: target_intervals
interval_padding: interval_padding
target_interval_list: prepare_reference_data/target_interval_list
raw_variants: preprocessing/raw_variants
reference_genome: reference_genome
snp_resource_hapmap: snp_resource_hapmap
snp_resource_omni: snp_resource_omni
snp_resource_1kg: snp_resource_1kg
resource_dbsnp: resource_dbsnp
indel_resource_mills: indel_resource_mills
variant_recalibration_annotations_indels: variant_recalibration_annotations_indels
variant_recalibration_annotations_snps: variant_recalibration_annotations_snps
out:
- joint_raw_variants
- variant_recalibration_snps_tranches
- variant_recalibration_snps_recalibration
- variant_recalibration_combined_vcf
- variant_recalibration_indels_tranches
- variant_recalibration_snps_indels_recalibration
- variant_recalibration_indels_vcf
- detail_metrics
- summary_metrics
organize_directories:
run: subworkflows/exomeseq-gatk4-03-organizedirectories.cwl
in:
fastp_html_reports: preprocessing/fastp_html_report
fastp_json_reports: preprocessing/fastp_json_report
bams_markduplicates: preprocessing/markduplicates_bam
metrics_markduplicates: preprocessing/markduplicates_metrics
raw_variants: preprocessing/raw_variants
bams_recalibrated: preprocessing/recalibrated_reads
out:
- fastp_html_reports_dir
- fastp_json_reports_dir
- bams_markduplicates_dir
- metrics_markduplicates_dir
- raw_variants_dir
- bams_recalibrated_dir