/
rnaseq.cwl
157 lines (156 loc) · 4.71 KB
/
rnaseq.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
label: "RNA-Seq alignment and transcript/gene abundance workflow"
requirements:
- class: MultipleInputFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: ScatterFeatureRequirement
- class: InlineJavascriptRequirement
inputs:
reference_index:
type: File #this requires an extra file with the basename
secondaryFiles: [".1.ht2", ".2.ht2", ".3.ht2", ".4.ht2", ".5.ht2", ".6.ht2", ".7.ht2", ".8.ht2"]
reference_annotation:
type: File
instrument_data_bams:
type: File[]
read_group_id:
type: string[]
read_group_fields:
type:
type: array
items:
type: array
items: string
sample_name:
type: string
trimming_adapters:
type: File
trimming_adapter_trim_end:
type: string
trimming_adapter_min_overlap:
type: int
trimming_max_uncalled:
type: int
trimming_min_readlength:
type: int
kallisto_index:
type: File
gene_transcript_lookup_table:
type: File
strand:
type:
- "null"
- type: enum
symbols: ["first", "second", "unstranded"]
refFlat:
type: File
ribosomal_intervals:
type: File?
species:
type: string
doc: 'the species being analyzed, such as homo_sapiens or mus_musculus'
assembly:
type: string
doc: 'the assembly used, such as GRCh37/38, GRCm37/38'
outputs:
final_bam:
type: File
outputSource: mark_dup/sorted_bam
secondaryFiles: [.bai]
stringtie_transcript_gtf:
type: File
outputSource: stringtie/transcript_gtf
stringtie_gene_expression_tsv:
type: File
outputSource: stringtie/gene_expression_tsv
transcript_abundance_tsv:
type: File
outputSource: kallisto/expression_transcript_table
transcript_abundance_h5:
type: File
outputSource: kallisto/expression_transcript_h5
gene_abundance:
type: File
outputSource: transcript_to_gene/gene_abundance
metrics:
type: File
outputSource: generate_qc_metrics/metrics
chart:
type: File?
outputSource: generate_qc_metrics/chart
fusion_evidence:
type: File
outputSource: kallisto/fusion_evidence
steps:
bam_to_trimmed_fastq_and_hisat_alignments:
run: ../subworkflows/bam_to_trimmed_fastq_and_hisat_alignments.cwl
scatter: [bam, read_group_id, read_group_fields]
scatterMethod: dotproduct
in:
bam: instrument_data_bams
read_group_id: read_group_id
read_group_fields: read_group_fields
adapters: trimming_adapters
adapter_trim_end: trimming_adapter_trim_end
adapter_min_overlap: trimming_adapter_min_overlap
max_uncalled: trimming_max_uncalled
min_readlength: trimming_min_readlength
reference_index: reference_index
strand: strand
out:
[fastqs,aligned_bam]
kallisto:
run: ../tools/kallisto.cwl
in:
kallisto_index: kallisto_index
strand: strand
fastqs: bam_to_trimmed_fastq_and_hisat_alignments/fastqs
out:
[expression_transcript_table,expression_transcript_h5,fusion_evidence]
transcript_to_gene:
run: ../tools/transcript_to_gene.cwl
in:
transcript_table_h5: kallisto/expression_transcript_h5
gene_transcript_lookup_table: gene_transcript_lookup_table
out:
[gene_abundance]
merge:
run: ../tools/merge_bams.cwl
in:
bams: bam_to_trimmed_fastq_and_hisat_alignments/aligned_bam
out:
[merged_bam]
index_bam:
run: ../tools/index_bam.cwl
in:
bam: merge/merged_bam
out:
[indexed_bam]
mark_dup:
run: ../tools/mark_duplicates_and_sort.cwl
in:
bam: merge/merged_bam
input_sort_order:
default: "coordinate"
out:
[sorted_bam, metrics_file]
stringtie:
run: ../tools/stringtie.cwl
in:
bam: mark_dup/sorted_bam
reference_annotation: reference_annotation
sample_name: sample_name
strand: strand
out:
[transcript_gtf,gene_expression_tsv]
generate_qc_metrics:
run: ../tools/generate_qc_metrics.cwl
in:
refFlat: refFlat
ribosomal_intervals: ribosomal_intervals
strand: strand
bam: index_bam/indexed_bam
out:
[metrics, chart]