/
cnv_germline_cohort_workflow.wdl
571 lines (521 loc) · 26.4 KB
/
cnv_germline_cohort_workflow.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
# Workflow for creating a GATK GermlineCNVCaller denoising model and generating calls given a list of normal samples. Supports both WGS and WES.
#
# Notes:
#
# - The intervals argument is required for both WGS and WES workflows and accepts formats compatible with the
# GATK -L argument (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
# These intervals will be padded on both sides by the amount specified by padding (default 250)
# and split into bins of length specified by bin_length (default 1000; specify 0 to skip binning,
# e.g., for WES). For WGS, the intervals should simply cover the chromosomes of interest.
#
# - Intervals can be blacklisted from coverage collection and all downstream steps by using the blacklist_intervals
# argument, which accepts formats compatible with the GATK -XL argument
# (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
# This may be useful for excluding centromeric regions, etc. from analysis. Alternatively, these regions may
# be manually filtered from the final callset.
#
# - Example invocation:
#
# java -jar cromwell.jar run cnv_germline_cohort_workflow.wdl -i my_parameters.json
#
#############
import "cnv_common_tasks.wdl" as CNVTasks
workflow CNVGermlineCohortWorkflow {
##################################
#### required basic arguments ####
##################################
File intervals
File? blacklist_intervals
Array[String]+ normal_bams
Array[String]+ normal_bais
String cohort_entity_id
File contig_ploidy_priors
Int num_intervals_per_scatter
File ref_fasta_dict
File ref_fasta_fai
File ref_fasta
String gatk_docker
##################################
#### optional basic arguments ####
##################################
# If true, AnnotateIntervals will be run to create GC annotations and explicit
# GC correction will be performed by the model generated by
Boolean? do_explicit_gc_correction
File? gatk4_jar_override
Int? preemptible_attempts
####################################################
#### optional arguments for PreprocessIntervals ####
####################################################
Int? padding
Int? bin_length
##################################################
#### optional arguments for AnnotateIntervals ####
##################################################
File? mappability_track_bed
File? mappability_track_bed_idx
File? segmental_duplication_track_bed
File? segmental_duplication_track_bed_idx
Int? feature_query_lookahead
Int? mem_gb_for_annotate_intervals
#################################################
#### optional arguments for FilterIntervals ####
################################################
File? blacklist_intervals_for_filter_intervals
Float? minimum_gc_content
Float? maximum_gc_content
Float? minimum_mappability
Float? maximum_mappability
Float? minimum_segmental_duplication_content
Float? maximum_segmental_duplication_content
Int? low_count_filter_count_threshold
Float? low_count_filter_percentage_of_samples
Float? extreme_count_filter_minimum_percentile
Float? extreme_count_filter_maximum_percentile
Float? extreme_count_filter_percentage_of_samples
Int? mem_gb_for_filter_intervals
##############################################
#### optional arguments for CollectCounts ####
##############################################
String? collect_counts_format
Int? mem_gb_for_collect_counts
########################################################################
#### optional arguments for DetermineGermlineContigPloidyCohortMode ####
########################################################################
Float? ploidy_mean_bias_standard_deviation
Float? ploidy_mapping_error_rate
Float? ploidy_global_psi_scale
Float? ploidy_sample_psi_scale
Int? mem_gb_for_determine_germline_contig_ploidy
Int? cpu_for_determine_germline_contig_ploidy
############################################################
#### optional arguments for GermlineCNVCallerCohortMode ####
############################################################
Float? gcnv_p_alt
Float? gcnv_p_active
Float? gcnv_cnv_coherence_length
Float? gcnv_class_coherence_length
Int? gcnv_max_copy_number
Int? mem_gb_for_germline_cnv_caller
Int? cpu_for_germline_cnv_caller
# optional arguments for germline CNV denoising model
Int? gcnv_max_bias_factors
Float? gcnv_mapping_error_rate
Float? gcnv_interval_psi_scale
Float? gcnv_sample_psi_scale
Float? gcnv_depth_correction_tau
Float? gcnv_log_mean_bias_standard_deviation
Float? gcnv_init_ard_rel_unexplained_variance
Int? gcnv_num_gc_bins
Float? gcnv_gc_curve_standard_deviation
String? gcnv_copy_number_posterior_expectation_mode
Boolean? gcnv_enable_bias_factors
Int? gcnv_active_class_padding_hybrid_mode
# optional arguments for Hybrid ADVI
Float? gcnv_learning_rate
Float? gcnv_adamax_beta_1
Float? gcnv_adamax_beta_2
Int? gcnv_log_emission_samples_per_round
Float? gcnv_log_emission_sampling_median_rel_error
Int? gcnv_log_emission_sampling_rounds
Int? gcnv_max_advi_iter_first_epoch
Int? gcnv_max_advi_iter_subsequent_epochs
Int? gcnv_min_training_epochs
Int? gcnv_max_training_epochs
Float? gcnv_initial_temperature
Int? gcnv_num_thermal_advi_iters
Int? gcnv_convergence_snr_averaging_window
Float? gcnv_convergence_snr_trigger_threshold
Int? gcnv_convergence_snr_countdown_window
Int? gcnv_max_calling_iters
Float? gcnv_caller_update_convergence_threshold
Float? gcnv_caller_internal_admixing_rate
Float? gcnv_caller_external_admixing_rate
Boolean? gcnv_disable_annealing
###################################################
#### arguments for PostprocessGermlineCNVCalls ####
###################################################
Int ref_copy_number_autosomal_contigs
Array[String]? allosomal_contigs
Array[Pair[String, String]] normal_bams_and_bais = zip(normal_bams, normal_bais)
call CNVTasks.PreprocessIntervals {
input:
intervals = intervals,
blacklist_intervals = blacklist_intervals,
ref_fasta = ref_fasta,
ref_fasta_fai = ref_fasta_fai,
ref_fasta_dict = ref_fasta_dict,
padding = padding,
bin_length = bin_length,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
preemptible_attempts = preemptible_attempts
}
if (select_first([do_explicit_gc_correction, false])) {
call CNVTasks.AnnotateIntervals {
input:
intervals = PreprocessIntervals.preprocessed_intervals,
ref_fasta = ref_fasta,
ref_fasta_fai = ref_fasta_fai,
ref_fasta_dict = ref_fasta_dict,
mappability_track_bed = mappability_track_bed,
mappability_track_bed_idx = mappability_track_bed_idx,
segmental_duplication_track_bed = segmental_duplication_track_bed,
segmental_duplication_track_bed_idx = segmental_duplication_track_bed_idx,
feature_query_lookahead = feature_query_lookahead,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem_gb = mem_gb_for_annotate_intervals,
preemptible_attempts = preemptible_attempts
}
}
scatter (normal_bam_and_bai in normal_bams_and_bais) {
call CNVTasks.CollectCounts {
input:
intervals = PreprocessIntervals.preprocessed_intervals,
bam = normal_bam_and_bai.left,
bam_idx = normal_bam_and_bai.right,
ref_fasta = ref_fasta,
ref_fasta_fai = ref_fasta_fai,
ref_fasta_dict = ref_fasta_dict,
format = collect_counts_format,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem_gb = mem_gb_for_collect_counts,
preemptible_attempts = preemptible_attempts
}
}
call CNVTasks.FilterIntervals {
input:
intervals = PreprocessIntervals.preprocessed_intervals,
blacklist_intervals = blacklist_intervals_for_filter_intervals,
annotated_intervals = AnnotateIntervals.annotated_intervals,
read_count_files = CollectCounts.counts,
minimum_gc_content = minimum_gc_content,
maximum_gc_content = maximum_gc_content,
minimum_mappability = minimum_mappability,
maximum_mappability = maximum_mappability,
minimum_segmental_duplication_content = minimum_segmental_duplication_content,
maximum_segmental_duplication_content = maximum_segmental_duplication_content,
low_count_filter_count_threshold = low_count_filter_count_threshold,
low_count_filter_percentage_of_samples = low_count_filter_percentage_of_samples,
extreme_count_filter_minimum_percentile = extreme_count_filter_minimum_percentile,
extreme_count_filter_maximum_percentile = extreme_count_filter_maximum_percentile,
extreme_count_filter_percentage_of_samples = extreme_count_filter_percentage_of_samples,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem_gb = mem_gb_for_filter_intervals,
preemptible_attempts = preemptible_attempts
}
call DetermineGermlineContigPloidyCohortMode {
input:
cohort_entity_id = cohort_entity_id,
intervals = FilterIntervals.filtered_intervals,
read_count_files = CollectCounts.counts,
contig_ploidy_priors = contig_ploidy_priors,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem_gb = mem_gb_for_determine_germline_contig_ploidy,
cpu = cpu_for_determine_germline_contig_ploidy,
mean_bias_standard_deviation = ploidy_mean_bias_standard_deviation,
mapping_error_rate = ploidy_mapping_error_rate,
global_psi_scale = ploidy_global_psi_scale,
sample_psi_scale = ploidy_sample_psi_scale,
preemptible_attempts = preemptible_attempts
}
call CNVTasks.ScatterIntervals {
input:
interval_list = FilterIntervals.filtered_intervals,
num_intervals_per_scatter = num_intervals_per_scatter,
gatk_docker = gatk_docker,
preemptible_attempts = preemptible_attempts
}
scatter (scatter_index in range(length(ScatterIntervals.scattered_interval_lists))) {
call GermlineCNVCallerCohortMode {
input:
scatter_index = scatter_index,
cohort_entity_id = cohort_entity_id,
read_count_files = CollectCounts.counts,
contig_ploidy_calls_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_calls_tar,
intervals = ScatterIntervals.scattered_interval_lists[scatter_index],
annotated_intervals = AnnotateIntervals.annotated_intervals,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem_gb = mem_gb_for_germline_cnv_caller,
cpu = cpu_for_germline_cnv_caller,
p_alt = gcnv_p_alt,
p_active = gcnv_p_active,
cnv_coherence_length = gcnv_cnv_coherence_length,
class_coherence_length = gcnv_class_coherence_length,
max_copy_number = gcnv_max_copy_number,
max_bias_factors = gcnv_max_bias_factors,
mapping_error_rate = gcnv_mapping_error_rate,
interval_psi_scale = gcnv_interval_psi_scale,
sample_psi_scale = gcnv_sample_psi_scale,
depth_correction_tau = gcnv_depth_correction_tau,
log_mean_bias_standard_deviation = gcnv_log_mean_bias_standard_deviation,
init_ard_rel_unexplained_variance = gcnv_init_ard_rel_unexplained_variance,
num_gc_bins = gcnv_num_gc_bins,
gc_curve_standard_deviation = gcnv_gc_curve_standard_deviation,
copy_number_posterior_expectation_mode = gcnv_copy_number_posterior_expectation_mode,
enable_bias_factors = gcnv_enable_bias_factors,
active_class_padding_hybrid_mode = gcnv_active_class_padding_hybrid_mode,
learning_rate = gcnv_learning_rate,
adamax_beta_1 = gcnv_adamax_beta_1,
adamax_beta_2 = gcnv_adamax_beta_2,
log_emission_samples_per_round = gcnv_log_emission_samples_per_round,
log_emission_sampling_median_rel_error = gcnv_log_emission_sampling_median_rel_error,
log_emission_sampling_rounds = gcnv_log_emission_sampling_rounds,
max_advi_iter_first_epoch = gcnv_max_advi_iter_first_epoch,
max_advi_iter_subsequent_epochs = gcnv_max_advi_iter_subsequent_epochs,
min_training_epochs = gcnv_min_training_epochs,
max_training_epochs = gcnv_max_training_epochs,
initial_temperature = gcnv_initial_temperature,
num_thermal_advi_iters = gcnv_num_thermal_advi_iters,
convergence_snr_averaging_window = gcnv_convergence_snr_averaging_window,
convergence_snr_trigger_threshold = gcnv_convergence_snr_trigger_threshold,
convergence_snr_countdown_window = gcnv_convergence_snr_countdown_window,
max_calling_iters = gcnv_max_calling_iters,
caller_update_convergence_threshold = gcnv_caller_update_convergence_threshold,
caller_internal_admixing_rate = gcnv_caller_internal_admixing_rate,
caller_external_admixing_rate = gcnv_caller_external_admixing_rate,
disable_annealing = gcnv_disable_annealing,
preemptible_attempts = preemptible_attempts
}
}
Array[Array[File]] call_tars_sample_by_shard = transpose(GermlineCNVCallerCohortMode.gcnv_call_tars)
scatter (sample_index in range(length(CollectCounts.entity_id))) {
call CNVTasks.PostprocessGermlineCNVCalls {
input:
entity_id = CollectCounts.entity_id[sample_index],
gcnv_calls_tars = call_tars_sample_by_shard[sample_index],
gcnv_model_tars = GermlineCNVCallerCohortMode.gcnv_model_tar,
calling_configs = GermlineCNVCallerCohortMode.calling_config_json,
denoising_configs = GermlineCNVCallerCohortMode.denoising_config_json,
gcnvkernel_version = GermlineCNVCallerCohortMode.gcnvkernel_version_json,
sharded_interval_lists = GermlineCNVCallerCohortMode.sharded_interval_list,
contig_ploidy_calls_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_calls_tar,
allosomal_contigs = allosomal_contigs,
ref_copy_number_autosomal_contigs = ref_copy_number_autosomal_contigs,
sample_index = sample_index,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
preemptible_attempts = preemptible_attempts
}
}
output {
File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
Array[File] read_counts_entity_ids = CollectCounts.entity_id
Array[File] read_counts = CollectCounts.counts
File? annotated_intervals = AnnotateIntervals.annotated_intervals
File filtered_intervals = FilterIntervals.filtered_intervals
File contig_ploidy_model_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_model_tar
File contig_ploidy_calls_tar = DetermineGermlineContigPloidyCohortMode.contig_ploidy_calls_tar
Array[File] gcnv_model_tars = GermlineCNVCallerCohortMode.gcnv_model_tar
Array[Array[File]] gcnv_calls_tars = GermlineCNVCallerCohortMode.gcnv_call_tars
Array[File] gcnv_tracking_tars = GermlineCNVCallerCohortMode.gcnv_tracking_tar
Array[File] genotyped_intervals_vcfs = PostprocessGermlineCNVCalls.genotyped_intervals_vcf
Array[File] genotyped_segments_vcfs = PostprocessGermlineCNVCalls.genotyped_segments_vcf
}
}
task DetermineGermlineContigPloidyCohortMode {
String cohort_entity_id
File? intervals
Array[File] read_count_files
File contig_ploidy_priors
String? output_dir
File? gatk4_jar_override
# Runtime parameters
String gatk_docker
Int? mem_gb
Int? disk_space_gb
Boolean use_ssd = false
Int? cpu
Int? preemptible_attempts
# Model parameters
Float? mean_bias_standard_deviation
Float? mapping_error_rate
Float? global_psi_scale
Float? sample_psi_scale
# We do not expose Hybrid ADVI parameters -- the default values are decent
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 500
# If optional output_dir not specified, use "out"
String output_dir_ = select_first([output_dir, "out"])
command <<<
set -e
mkdir ${output_dir_}
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
export MKL_NUM_THREADS=${default=8 cpu}
export OMP_NUM_THREADS=${default=8 cpu}
gatk --java-options "-Xmx${command_mem_mb}m" DetermineGermlineContigPloidy \
${"-L " + intervals} \
--input ${sep=" --input " read_count_files} \
--contig-ploidy-priors ${contig_ploidy_priors} \
--interval-merging-rule OVERLAPPING_ONLY \
--output ${output_dir_} \
--output-prefix ${cohort_entity_id} \
--verbosity DEBUG \
--mean-bias-standard-deviation ${default="0.01" mean_bias_standard_deviation} \
--mapping-error-rate ${default="0.01" mapping_error_rate} \
--global-psi-scale ${default="0.001" global_psi_scale} \
--sample-psi-scale ${default="0.0001" sample_psi_scale}
tar czf ${cohort_entity_id}-contig-ploidy-model.tar.gz -C ${output_dir_}/${cohort_entity_id}-model .
tar czf ${cohort_entity_id}-contig-ploidy-calls.tar.gz -C ${output_dir_}/${cohort_entity_id}-calls .
>>>
runtime {
docker: "${gatk_docker}"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 150]) + if use_ssd then " SSD" else " HDD"
cpu: select_first([cpu, 8])
preemptible: select_first([preemptible_attempts, 2])
}
output {
File contig_ploidy_model_tar = "${cohort_entity_id}-contig-ploidy-model.tar.gz"
File contig_ploidy_calls_tar = "${cohort_entity_id}-contig-ploidy-calls.tar.gz"
}
}
task GermlineCNVCallerCohortMode {
Int scatter_index
String cohort_entity_id
Array[File] read_count_files
File contig_ploidy_calls_tar
File intervals
File? annotated_intervals
String? output_dir
File? gatk4_jar_override
# Runtime parameters
String gatk_docker
Int? mem_gb
Int? disk_space_gb
Boolean use_ssd = false
Int? cpu
Int? preemptible_attempts
# Caller parameters
Float? p_alt
Float? p_active
Float? cnv_coherence_length
Float? class_coherence_length
Int? max_copy_number
# Denoising model parameters
Int? max_bias_factors
Float? mapping_error_rate
Float? interval_psi_scale
Float? sample_psi_scale
Float? depth_correction_tau
Float? log_mean_bias_standard_deviation
Float? init_ard_rel_unexplained_variance
Int? num_gc_bins
Float? gc_curve_standard_deviation
String? copy_number_posterior_expectation_mode
Boolean? enable_bias_factors
Int? active_class_padding_hybrid_mode
# Hybrid ADVI parameters
Float? learning_rate
Float? adamax_beta_1
Float? adamax_beta_2
Int? log_emission_samples_per_round
Float? log_emission_sampling_median_rel_error
Int? log_emission_sampling_rounds
Int? max_advi_iter_first_epoch
Int? max_advi_iter_subsequent_epochs
Int? min_training_epochs
Int? max_training_epochs
Float? initial_temperature
Int? num_thermal_advi_iters
Int? convergence_snr_averaging_window
Float? convergence_snr_trigger_threshold
Int? convergence_snr_countdown_window
Int? max_calling_iters
Float? caller_update_convergence_threshold
Float? caller_internal_admixing_rate
Float? caller_external_admixing_rate
Boolean? disable_annealing
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 500
# If optional output_dir not specified, use "out"
String output_dir_ = select_first([output_dir, "out"])
Int num_samples = length(read_count_files)
String dollar = "$" #WDL workaround, see https://github.com/broadinstitute/cromwell/issues/1819
command <<<
set -e
mkdir ${output_dir_}
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
export MKL_NUM_THREADS=${default=8 cpu}
export OMP_NUM_THREADS=${default=8 cpu}
mkdir contig-ploidy-calls-dir
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls-dir
gatk --java-options "-Xmx${command_mem_mb}m" GermlineCNVCaller \
--run-mode COHORT \
-L ${intervals} \
--input ${sep=" --input " read_count_files} \
--contig-ploidy-calls contig-ploidy-calls-dir \
${"--annotated-intervals " + annotated_intervals} \
--interval-merging-rule OVERLAPPING_ONLY \
--output ${output_dir_} \
--output-prefix ${cohort_entity_id} \
--verbosity DEBUG \
--p-alt ${default="1e-6" p_alt} \
--p-active ${default="1e-2" p_active} \
--cnv-coherence-length ${default="10000.0" cnv_coherence_length} \
--class-coherence-length ${default="10000.0" class_coherence_length} \
--max-copy-number ${default="5" max_copy_number} \
--max-bias-factors ${default="5" max_bias_factors} \
--mapping-error-rate ${default="0.01" mapping_error_rate} \
--interval-psi-scale ${default="0.001" interval_psi_scale} \
--sample-psi-scale ${default="0.0001" sample_psi_scale} \
--depth-correction-tau ${default="10000.0" depth_correction_tau} \
--log-mean-bias-standard-deviation ${default="0.1" log_mean_bias_standard_deviation} \
--init-ard-rel-unexplained-variance ${default="0.1" init_ard_rel_unexplained_variance} \
--num-gc-bins ${default="20" num_gc_bins} \
--gc-curve-standard-deviation ${default="1.0" gc_curve_standard_deviation} \
--copy-number-posterior-expectation-mode ${default="HYBRID" copy_number_posterior_expectation_mode} \
--enable-bias-factors ${default="true" enable_bias_factors} \
--active-class-padding-hybrid-mode ${default="50000" active_class_padding_hybrid_mode} \
--learning-rate ${default="0.05" learning_rate} \
--adamax-beta-1 ${default="0.9" adamax_beta_1} \
--adamax-beta-2 ${default="0.99" adamax_beta_2} \
--log-emission-samples-per-round ${default="50" log_emission_samples_per_round} \
--log-emission-sampling-median-rel-error ${default="0.005" log_emission_sampling_median_rel_error} \
--log-emission-sampling-rounds ${default="10" log_emission_sampling_rounds} \
--max-advi-iter-first-epoch ${default="5000" max_advi_iter_first_epoch} \
--max-advi-iter-subsequent-epochs ${default="100" max_advi_iter_subsequent_epochs} \
--min-training-epochs ${default="10" min_training_epochs} \
--max-training-epochs ${default="100" max_training_epochs} \
--initial-temperature ${default="2.0" initial_temperature} \
--num-thermal-advi-iters ${default="2500" num_thermal_advi_iters} \
--convergence-snr-averaging-window ${default="500" convergence_snr_averaging_window} \
--convergence-snr-trigger-threshold ${default="0.1" convergence_snr_trigger_threshold} \
--convergence-snr-countdown-window ${default="10" convergence_snr_countdown_window} \
--max-calling-iters ${default="10" max_calling_iters} \
--caller-update-convergence-threshold ${default="0.001" caller_update_convergence_threshold} \
--caller-internal-admixing-rate ${default="0.75" caller_internal_admixing_rate} \
--caller-external-admixing-rate ${default="1.00" caller_external_admixing_rate} \
--disable-annealing ${default="false" disable_annealing}
tar czf ${cohort_entity_id}-gcnv-model-shard-${scatter_index}.tar.gz -C ${output_dir_}/${cohort_entity_id}-model .
tar czf ${cohort_entity_id}-gcnv-tracking-shard-${scatter_index}.tar.gz -C ${output_dir_}/${cohort_entity_id}-tracking .
CURRENT_SAMPLE=0
NUM_SAMPLES=${num_samples}
NUM_DIGITS=${dollar}{#NUM_SAMPLES}
while [ $CURRENT_SAMPLE -lt $NUM_SAMPLES ]; do
CURRENT_SAMPLE_WITH_LEADING_ZEROS=$(printf "%0${dollar}{NUM_DIGITS}d" $CURRENT_SAMPLE)
tar czf ${cohort_entity_id}-gcnv-calls-shard-${scatter_index}-sample-$CURRENT_SAMPLE_WITH_LEADING_ZEROS.tar.gz -C ${output_dir_}/${cohort_entity_id}-calls/SAMPLE_$CURRENT_SAMPLE .
let CURRENT_SAMPLE=CURRENT_SAMPLE+1
done
>>>
runtime {
docker: "${gatk_docker}"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 150]) + if use_ssd then " SSD" else " HDD"
cpu: select_first([cpu, 8])
preemptible: select_first([preemptible_attempts, 2])
}
output {
File gcnv_model_tar = "${cohort_entity_id}-gcnv-model-shard-${scatter_index}.tar.gz"
Array[File] gcnv_call_tars = glob("${cohort_entity_id}-gcnv-calls-shard-${scatter_index}-sample-*.tar.gz")
File gcnv_tracking_tar = "${cohort_entity_id}-gcnv-tracking-shard-${scatter_index}.tar.gz"
File calling_config_json = "${output_dir_}/${cohort_entity_id}-calls/calling_config.json"
File denoising_config_json = "${output_dir_}/${cohort_entity_id}-calls/denoising_config.json"
File gcnvkernel_version_json = "${output_dir_}/${cohort_entity_id}-calls/gcnvkernel_version.json"
File sharded_interval_list = "${output_dir_}/${cohort_entity_id}-calls/interval_list.tsv"
}
}