Permalink
Please sign in to comment.
Browse files
Merge pull request #809 from broadinstitute/eb_aggreagate_variant_met…
…rics Added a new tool to merge a collection of variant calling metrics
- Loading branch information...
Showing
with
414 additions
and 19 deletions.
- +134 −0 src/main/java/picard/vcf/AccumulateVariantCallingMetrics.java
- +56 −19 src/main/java/picard/vcf/CollectVariantCallingMetrics.java
- +170 −0 src/test/java/picard/vcf/AccumulateVariantCallingMetricsTest.java
- +9 −0 testdata/picard/vcf/mergeTest.emptyShard.variant_calling_detail_metrics
- +9 −0 testdata/picard/vcf/mergeTest.emptyShard.variant_calling_summary_metrics
- +9 −0 testdata/picard/vcf/mergeTest.shard1.variant_calling_detail_metrics
- +9 −0 testdata/picard/vcf/mergeTest.shard1.variant_calling_summary_metrics
- +9 −0 testdata/picard/vcf/mergeTest.shard2.variant_calling_detail_metrics
- +9 −0 testdata/picard/vcf/mergeTest.shard2.variant_calling_summary_metrics
| @@ -0,0 +1,134 @@ | ||
| +/* | ||
| + * The MIT License | ||
| + * | ||
| + * Copyright (c) 2017 The Broad Institute | ||
| + * | ||
| + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| + * of this software and associated documentation files (the "Software"), to deal | ||
| + * in the Software without restriction, including without limitation the rights | ||
| + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| + * copies of the Software, and to permit persons to whom the Software is | ||
| + * furnished to do so, subject to the following conditions: | ||
| + * | ||
| + * The above copyright notice and this permission notice shall be included in | ||
| + * all copies or substantial portions of the Software. | ||
| + * | ||
| + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
| + * THE SOFTWARE. | ||
| + */ | ||
| +package picard.vcf; | ||
| + | ||
| +import htsjdk.samtools.metrics.MetricsFile; | ||
| +import htsjdk.samtools.util.*; | ||
| +import picard.PicardException; | ||
| +import picard.cmdline.CommandLineProgram; | ||
| +import picard.cmdline.CommandLineProgramProperties; | ||
| +import picard.cmdline.Option; | ||
| +import picard.cmdline.StandardOptionDefinitions; | ||
| +import picard.cmdline.programgroups.Metrics; | ||
| + | ||
| +import java.io.File; | ||
| +import java.io.FileReader; | ||
| +import java.io.IOException; | ||
| +import java.util.*; | ||
| + | ||
| +/** | ||
| + * Combines multiple Variant Calling Metrics files into a single file. | ||
| + * @author Eric Banks | ||
| + */ | ||
| +@CommandLineProgramProperties( | ||
| + usage = "Combines multiple Variant Calling Metrics files into a single file. This tool is used in cases where the metrics are calculated" + | ||
| + " separately for different (genomic) shards of the same callset and we want to combine them into a single result over the entire callset." + | ||
| + " The shards are expected to contain the same samples (although it will not fail if they do not) and to not have been run over overlapping genomic positions.", | ||
| + usageShort = "Combines multiple Variant Calling Metrics files into a single file", | ||
| + programGroup = Metrics.class | ||
| +) | ||
| +public class AccumulateVariantCallingMetrics extends CommandLineProgram { | ||
| + | ||
| + @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Paths (except for the file extensions) of Variant Calling Metrics files to read and merge.", minElements=1) | ||
| + public List<File> INPUT; | ||
| + | ||
| + @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Path (except for the file extension) of output metrics files to write.") | ||
| + public File OUTPUT; | ||
| + | ||
| + @Override | ||
| + protected int doWork() { | ||
| + | ||
| + final String outputPrefix = OUTPUT.getAbsolutePath() + "."; | ||
| + final File detailOutputFile = new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()); | ||
| + final File summaryOutputFile = new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()); | ||
| + IOUtil.assertFileIsWritable(detailOutputFile); | ||
| + IOUtil.assertFileIsWritable(summaryOutputFile); | ||
| + | ||
| + // set up the collectors | ||
| + final Map<String, Collection<CollectVariantCallingMetrics.VariantCallingDetailMetrics>> sampleDetailsMap = new HashMap<>(); | ||
| + final Collection<CollectVariantCallingMetrics.VariantCallingSummaryMetrics> summaries = new ArrayList<>(); | ||
| + | ||
| + for (final File file : INPUT) { | ||
| + final String inputPrefix = file.getAbsolutePath() + "."; | ||
| + | ||
| + try { | ||
| + // read in the detailed metrics file | ||
| + final File detail = new File(inputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()); | ||
| + IOUtil.assertFileIsReadable(detail); | ||
| + MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, ?> detailedMetricsFile = getMetricsFile(); | ||
| + detailedMetricsFile.read(new FileReader(detail)); | ||
| + | ||
| + // for each sample in the detailed metrics... | ||
| + long totalHetDepth = 0L; | ||
| + for (final CollectVariantCallingMetrics.VariantCallingDetailMetrics detailedMetrics : detailedMetricsFile.getMetrics()) { | ||
| + // re-calculate internal fields from derived fields | ||
| + detailedMetrics.calculateFromDerivedFields(); | ||
| + totalHetDepth += detailedMetrics.TOTAL_HET_DEPTH; | ||
| + | ||
| + // add it to the list of metrics for that sample so that we can merge them later | ||
| + sampleDetailsMap.computeIfAbsent(detailedMetrics.SAMPLE_ALIAS, f -> new ArrayList<>()).add(detailedMetrics); | ||
| + } | ||
| + | ||
| + // next, read in the summary metrics | ||
| + final File summary = new File(inputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()); | ||
| + IOUtil.assertFileIsReadable(summary); | ||
| + MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, ?> summaryMetricsFile = getMetricsFile(); | ||
| + summaryMetricsFile.read(new FileReader(summary)); | ||
| + if (summaryMetricsFile.getMetrics().size() != 1) { | ||
| + throw new PicardException(String.format("Expected 1 row in the summary metrics file but saw %d", summaryMetricsFile.getMetrics().size())); | ||
| + } | ||
| + | ||
| + // re-calculate internal fields from derived fields and add it to the list of summary metrics | ||
| + final CollectVariantCallingMetrics.VariantCallingSummaryMetrics summaryMetrics = summaryMetricsFile.getMetrics().get(0); | ||
| + summaryMetrics.calculateFromDerivedFields(totalHetDepth); | ||
| + summaries.add(summaryMetrics); | ||
| + } catch (IOException e) { | ||
| + throw new PicardException(String.format("Cannot read from metrics files with prefix %s", inputPrefix)); | ||
| + } | ||
| + } | ||
| + | ||
| + // now merge all of the accumulated metrics | ||
| + final Collection<CollectVariantCallingMetrics.VariantCallingDetailMetrics> collapsedDetails = new ArrayList<>(); | ||
| + sampleDetailsMap.values().forEach(sampleDetails -> { | ||
| + final CollectVariantCallingMetrics.VariantCallingDetailMetrics collapsed = new CollectVariantCallingMetrics.VariantCallingDetailMetrics(); | ||
| + CollectVariantCallingMetrics.VariantCallingDetailMetrics.foldInto(collapsed, sampleDetails); | ||
| + collapsed.calculateDerivedFields(); | ||
| + collapsedDetails.add(collapsed); | ||
| + }); | ||
| + final CollectVariantCallingMetrics.VariantCallingSummaryMetrics collapsedSummary = new CollectVariantCallingMetrics.VariantCallingSummaryMetrics(); | ||
| + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.foldInto(collapsedSummary, summaries); | ||
| + collapsedSummary.calculateDerivedFields(); | ||
| + | ||
| + // prepare and write the finalized merged metrics | ||
| + final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Integer> detail = getMetricsFile(); | ||
| + final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Integer> summary = getMetricsFile(); | ||
| + summary.addMetric(collapsedSummary); | ||
| + collapsedDetails.forEach(detail::addMetric); | ||
| + | ||
| + detail.write(detailOutputFile); | ||
| + summary.write(summaryOutputFile); | ||
| + | ||
| + return 0; | ||
| + } | ||
| +} |
Oops, something went wrong.
0 comments on commit
bb529af