-
Notifications
You must be signed in to change notification settings - Fork 0
/
bwamem-gatk-report-wf.cwl
149 lines (142 loc) · 3.4 KB
/
bwamem-gatk-report-wf.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
cwlVersion: v1.1
class: Workflow
label: WGS processing workflow for single sample
requirements:
SubworkflowFeatureRequirement: {}
inputs:
fastq1:
type: File
format: edam:format_1930 # FASTQ
label: One of set of pair-end FASTQs (R1)
fastq2:
type: File
format: edam:format_1930 # FASTQ
label: One of set of pair-end FASTQs (R2)
reference:
type: File
format: edam:format_1929 # FASTA
label: Reference genome
secondaryFiles:
- .amb
- .ann
- .bwt
- .pac
- .sa
- .fai
- ^.dict
fullintervallist:
type: File
label: Full list of intervals to operate over
sample:
type: string
label: Sample Name
knownsites1:
type: File
format: edam:format_3016 # VCF
label: VCF of known SNPS sites for BQSR
secondaryFiles:
- .idx
knownsites2:
type: File
format: edam:format_3016 # VCF
label: VCF of known indel sites for BQSR
secondaryFiles:
- .tbi
scattercount:
type: string
label: Desired split for variant calling
clinvarvcf:
type: File
format: edam:format_3016 # VCF
label: Reference VCF for ClinVar
reportfunc:
type: File
label: Function used to create HTML report
headhtml:
type: File
format: edam:format_2331 # HTML
label: Header for HTML report
tailhtml:
type: File
format: edam:format_2331 # HTML
label: Footer for HTML report
outputs:
qc-html:
type: File[]
label: FASTQ QC reports
format: edam:format_2331 # HTML
outputSource: fastqc/out-html
qc-zip:
type: File[]
label: Zip files of FASTQ QC report and associated data
outputSource: fastqc/out-zip
gvcf:
type: File
outputSource: haplotypecaller/gatheredgvcf
format: edam:format_3016 # GVCF
label: GVCF generated from GATK Haplotype Caller
report:
type: File
outputSource: generate-report/report
format: edam:format_2331 # HTML
label: ClinVar variant report
steps:
fastqc:
run: fastqc.cwl
in:
fastq1: fastq1
fastq2: fastq2
out: [out-html, out-zip]
bwamem-samtools-view:
run: bwamem-samtools-view.cwl
in:
fastq1: fastq1
fastq2: fastq2
reference: reference
sample: sample
out: [bam]
samtools-sort:
run: samtools-sort.cwl
in:
bam: bwamem-samtools-view/bam
sample: sample
out: [sortedbam]
mark-duplicates:
run: mark-duplicates.cwl
in:
bam: samtools-sort/sortedbam
out: [dupbam,dupmetrics]
samtools-index:
run: samtools-index.cwl
in:
bam: mark-duplicates/dupbam
out: [indexedbam]
haplotypecaller:
run: scatter-gatk-wf-with-interval.cwl
in:
reference: reference
fullintervallist: fullintervallist
bam: samtools-index/indexedbam
sample: sample
scattercount: scattercount
knownsites1: knownsites1
knownsites2: knownsites2
out: [gatheredgvcf]
generate-report:
run: report-wf.cwl
in:
gvcf: haplotypecaller/gatheredgvcf
sample: sample
clinvarvcf: clinvarvcf
reportfunc: reportfunc
headhtml: headhtml
tailhtml: tailhtml
out: [report]
s:codeRepository: https://github.com/arvados/arvados-tutorial
s:license: https://www.gnu.org/licenses/agpl-3.0.en.html
$namespaces:
s: https://schema.org/
edam: http://edamontology.org/
#$schemas:
# - https://schema.org/version/latest/schema.rdf
# - http://edamontology.org/EDAM_1.18.owl