/
BroadInternalRNAWithUMIs.wdl
205 lines (184 loc) · 11.8 KB
/
BroadInternalRNAWithUMIs.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
version 1.0
import "../../../../pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl" as RNAWithUMIs
import "../../../../pipelines/broad/qc/CheckFingerprint.wdl" as FP
import "../../../../tasks/broad/RNAWithUMIsTasks.wdl" as tasks
import "../../../../tasks/broad/Utilities.wdl" as utils
workflow BroadInternalRNAWithUMIs {
String pipeline_version = "1.0.29"
input {
# input needs to be either "hg19" or "hg38"
String reference_build
String sample_lsid
# RNAWithUMIs inputs
File r1_fastq
File r2_fastq
String read1Structure
String read2Structure
String output_basename
String platform
String library_name
String platform_unit
String read_group_name
String sequencing_center = "BI"
# Terra Data Repo dataset information
String? tdr_dataset_uuid
String? tdr_sample_id
String environment
File vault_token_path
}
File ref = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.fasta" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.fasta"
File refIndex = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.fasta.fai" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.fasta.fai"
File refDict = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.dict" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.dict"
File haplotype_database_file = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.haplotype_database.txt" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.haplotype_database.txt"
File refFlat = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/Homo_sapiens_assembly19.refFlat.txt" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/hg38_GENCODE_v34_refFlat.txt"
File starIndex = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/star/STAR2.7.10a_genome_hg19_noALT_noHLA_noDecoy_v19_oh145.tar.gz" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/star/STAR2.7.10a_genome_GRCh38_noALT_noHLA_noDecoy_v34_oh145.tar.gz"
File gtf = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/gencode.v19.genes.v7.collapsed_only.patched_contigs.gtf" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode.v34.annotation_collapsed_only.gtf"
File ribosomalIntervals = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/Homo_sapiens_assembly19.rRNA.interval_list" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode_v34_rRNA.interval_list"
File exonBedFile = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/gencode.v19.hg19.insert_size_intervals_geq1000bp.bed" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode.v34.GRCh38.insert_size_intervals_geq1000bp.bed"
File population_vcf = if (reference_build == "hg19") then "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf" else "gs://gatk-best-practices/somatic-hg38/small_exac_common_3.hg38.vcf.gz"
File population_vcf_index = if (reference_build == "hg19") then "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf.idx" else "gs://gatk-best-practices/somatic-hg38/small_exac_common_3.hg38.vcf.gz.tbi"
parameter_meta {
reference_build: "String used to define the reference genome build; should be set to 'hg19' or 'hg38'"
sample_lsid: "The sample lsid (an identifier used to retrieve fingerrints from Mercury)"
r1_fastq: "Read 1 FASTQ file"
r2_fastq: "Read 2 FASTQ file"
read1Structure: "String describing how the bases in a sequencing run should be allocated into logical reads for read 1"
read2Structure: "String describing how the bases in a sequencing run should be allocated into logical reads for read 2"
output_basename: "String used as a prefix in workflow output files"
platform: "String used to describe the sequencing platform"
library_name: "String used to describe the library"
platform_unit: "String used to describe the platform unit"
read_group_name: "String used to describe the read group name"
sequencing_center: "String used to describe the sequencing center; default is set to 'BI'"
environment: "The environment (dev or prod) used for determining which service to use to retrieve Mercury fingerprints"
vault_token_path: "The path to the vault token used for accessing the Mercury Fingerprint Store"
tdr_dataset_uuid: "Optional string used to define the Terra Data Repo (TDR) dataset to which outputs will be ingested"
tdr_sample_id: "Optional string used to identify the sample being processed; this must be the primary key in the TDR dataset"
}
# make sure either hg19 or hg38 is supplied as reference_build input
if ((reference_build != "hg19") && (reference_build != "hg38")) {
call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
input:
message = "reference_build must be supplied with either 'hg19' or 'hg38'."
}
}
call RNAWithUMIs.RNAWithUMIsPipeline as RNAWithUMIs {
input:
r1_fastq = r1_fastq,
r2_fastq = r2_fastq,
read1Structure = read1Structure,
read2Structure = read2Structure,
starIndex = starIndex,
output_basename = output_basename,
gtf = gtf,
platform = platform,
library_name = library_name,
platform_unit = platform_unit,
read_group_name = read_group_name,
sequencing_center = sequencing_center,
ref = ref,
refIndex = refIndex,
refDict = refDict,
refFlat = refFlat,
ribosomalIntervals = ribosomalIntervals,
exonBedFile = exonBedFile,
population_vcf = population_vcf,
population_vcf_index = population_vcf_index
}
call FP.CheckFingerprint as CheckFingerprint {
input:
input_bam = RNAWithUMIs.output_bam,
input_bam_index = RNAWithUMIs.output_bam_index,
sample_alias = RNAWithUMIs.sample_name,
sample_lsid = sample_lsid,
output_basename = output_basename,
ref_fasta = ref,
ref_fasta_index = refIndex,
ref_dict = refDict,
read_fingerprint_from_mercury = true,
haplotype_database_file = haplotype_database_file,
environment = environment,
vault_token_path = vault_token_path,
allow_lod_zero = true
}
call tasks.MergeMetrics {
input:
alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics,
insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics,
picard_rna_metrics = RNAWithUMIs.picard_rna_metrics,
duplicate_metrics = RNAWithUMIs.duplicate_metrics,
rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics,
fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file,
output_basename = RNAWithUMIs.sample_name
}
if (defined(tdr_dataset_uuid) && defined(tdr_sample_id)) {
call tasks.formatPipelineOutputs {
input:
sample_id = select_first([tdr_sample_id, ""]),
transcriptome_bam = RNAWithUMIs.transcriptome_bam,
transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics,
output_bam = RNAWithUMIs.output_bam,
output_bam_index = RNAWithUMIs.output_bam_index,
duplicate_metrics = RNAWithUMIs.duplicate_metrics,
rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm,
rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts,
rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts,
rnaseqc2_fragment_size_histogram = RNAWithUMIs.rnaseqc2_fragment_size_histogram,
rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics,
picard_rna_metrics = RNAWithUMIs.picard_rna_metrics,
picard_alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics,
picard_insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics,
picard_insert_size_histogram = RNAWithUMIs.picard_insert_size_histogram,
picard_base_distribution_by_cycle_metrics = RNAWithUMIs.picard_base_distribution_by_cycle_metrics,
picard_base_distribution_by_cycle_pdf = RNAWithUMIs.picard_base_distribution_by_cycle_pdf,
picard_quality_by_cycle_metrics = RNAWithUMIs.picard_quality_by_cycle_metrics,
picard_quality_by_cycle_pdf = RNAWithUMIs.picard_quality_by_cycle_pdf,
picard_quality_distribution_metrics = RNAWithUMIs.picard_quality_distribution_metrics,
picard_quality_distribution_pdf = RNAWithUMIs.picard_quality_distribution_pdf,
picard_fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file,
picard_fingerprint_detail_metrics = CheckFingerprint.fingerprint_detail_metrics_file,
unified_metrics = MergeMetrics.unified_metrics,
contamination = RNAWithUMIs.contamination,
contamination_error = RNAWithUMIs.contamination_error,
fastqc_html_report = RNAWithUMIs.fastqc_html_report,
fastqc_percent_reads_with_adapter = RNAWithUMIs.fastqc_percent_reads_with_adapter
}
call tasks.updateOutputsInTDR {
input:
tdr_dataset_uuid = select_first([tdr_dataset_uuid, ""]),
outputs_json = formatPipelineOutputs.pipeline_outputs_json
}
}
output {
File transcriptome_bam = RNAWithUMIs.transcriptome_bam
File output_bam = RNAWithUMIs.output_bam
File output_bam_index = RNAWithUMIs.output_bam_index
File duplicate_metrics = RNAWithUMIs.duplicate_metrics
File transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics
File rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm
File rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts
File rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts
File rnaseqc2_fragment_size_histogram = RNAWithUMIs.rnaseqc2_fragment_size_histogram
File rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics
File picard_rna_metrics = RNAWithUMIs.picard_rna_metrics
File picard_alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics
File picard_insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics
File picard_insert_size_histogram = RNAWithUMIs.picard_insert_size_histogram
File picard_base_distribution_by_cycle_metrics = RNAWithUMIs.picard_base_distribution_by_cycle_metrics
File picard_base_distribution_by_cycle_pdf = RNAWithUMIs.picard_base_distribution_by_cycle_pdf
File picard_quality_by_cycle_metrics = RNAWithUMIs.picard_quality_by_cycle_metrics
File picard_quality_by_cycle_pdf = RNAWithUMIs.picard_quality_by_cycle_pdf
File picard_quality_distribution_metrics = RNAWithUMIs.picard_quality_distribution_metrics
File picard_quality_distribution_pdf = RNAWithUMIs.picard_quality_distribution_pdf
File? picard_fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file
File? picard_fingerprint_detail_metrics = CheckFingerprint.fingerprint_detail_metrics_file
File unified_metrics = MergeMetrics.unified_metrics
Float contamination = RNAWithUMIs.contamination
Float contamination_error = RNAWithUMIs.contamination_error
File fastqc_html_report = RNAWithUMIs.fastqc_html_report
Float fastqc_percent_reads_with_adapter = RNAWithUMIs.fastqc_percent_reads_with_adapter
}
meta {
allowNestedInputs: true
}
}