Permalink
Please sign in to comment.
Browse files
Include tumor-aware results when results have been rolled-up (sample …
…or library) (#642) * new CLP CrosscheckFingerprints can crosscheck bams and vcfs * added option to crosscheck by input file * added choice of output format: Matrix or Metric File. Metric File outputs contains the LOD score and also the tumor-aware LOD scores. * Includes tumor-aware results when results are emitted as a metric file * new CLP ClusterCrosscheckMetrics that will take the metric output of CrosscheckFingerprints and find clusters of groups that connect with a high LOD * tests refactored * general purpose graph-clustering algorithm added * removed deprecated GenotypeReader class * add a deprecated wrapper named CrosscheckReadgroupFingerprints for backwards continuity * crosscheck now allows missing RG tags in bam if VALIDATION_STRINGENCY is not STRICT.
- Loading branch information...
Showing
with
7,591 additions
and 415 deletions.
- +1 −1 build.gradle
- +1 −1 src/main/java/picard/fingerprint/CheckFingerprint.java
- +135 −0 src/main/java/picard/fingerprint/ClusterCrosscheckMetrics.java
- +52 −0 src/main/java/picard/fingerprint/ClusteredCrosscheckMetric.java
- +396 −0 src/main/java/picard/fingerprint/CrosscheckFingerprints.java
- +110 −0 src/main/java/picard/fingerprint/CrosscheckMetric.java
- +40 −251 src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java
- +286 −106 src/main/java/picard/fingerprint/FingerprintChecker.java
- +135 −0 src/main/java/picard/fingerprint/FingerprintIdDetails.java
- +14 −53 src/main/java/picard/sam/markduplicates/UmiGraph.java
- +1 −1 src/main/java/picard/util/BaitDesigner.java
- +113 −0 src/main/java/picard/util/GraphUtils.java
- +49 −0 src/main/java/picard/util/ReflectionUtil.java
- +4 −1 src/test/java/picard/analysis/CollectGcBiasMetricsTest.java
- +3 −0 src/test/java/picard/analysis/CollectWgsMetricsTest.java
- +342 −0 src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java
- +271 −0 src/test/java/picard/fingerprint/CrosscheckReadGroupFingerprintsTest.java
- +54 −0 src/test/java/picard/fingerprint/FingerprintCheckerTest.java
- +2 −0 src/test/java/picard/sam/FilterSamReadsTest.java
- +1 −1 src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java
- +76 −0 src/test/java/picard/util/GraphUtilsTest.java
- +33 −0 src/test/java/picard/vcf/SamTestUtils.java
- +937 −0 testdata/picard/fingerprint/NA12891.over.fingerprints.noRgTag.sam
- +937 −0 testdata/picard/fingerprint/NA12891.over.fingerprints.r1.sam
- +872 −0 testdata/picard/fingerprint/NA12891.over.fingerprints.r2.sam
- +964 −0 testdata/picard/fingerprint/NA12891_named_NA12892.over.fingerprints.r1.sam
- +923 −0 testdata/picard/fingerprint/NA12892.over.fingerprints.r1.sam
- +743 −0 testdata/picard/fingerprint/NA12892.over.fingerprints.r2.sam
- +96 −0 testdata/picard/fingerprint/aligned_queryname_sorted.sam
| @@ -0,0 +1,135 @@ | |||
| + | |||
| +/* | |||
| + * The MIT License | |||
| + * | |||
| + * Copyright (c) 2017 The Broad Institute | |||
| + * | |||
| + * Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| + * of this software and associated documentation files (the "Software"), to deal | |||
| + * in the Software without restriction, including without limitation the rights | |||
| + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| + * copies of the Software, and to permit persons to whom the Software is | |||
| + * furnished to do so, subject to the following conditions: | |||
| + * | |||
| + * The above copyright notice and this permission notice shall be included in | |||
| + * all copies or substantial portions of the Software. | |||
| + * | |||
| + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| + * THE SOFTWARE. | |||
| + */ | |||
| + | |||
| +package picard.fingerprint; | |||
| + | |||
| +import htsjdk.samtools.metrics.MetricsFile; | |||
| +import htsjdk.samtools.util.IOUtil; | |||
| +import picard.cmdline.CommandLineProgram; | |||
| +import picard.cmdline.CommandLineProgramProperties; | |||
| +import picard.cmdline.Option; | |||
| +import picard.cmdline.StandardOptionDefinitions; | |||
| +import picard.cmdline.programgroups.Fingerprinting; | |||
| +import picard.util.GraphUtils; | |||
| + | |||
| +import java.io.File; | |||
| +import java.io.FileNotFoundException; | |||
| +import java.io.FileReader; | |||
| +import java.util.List; | |||
| +import java.util.Map; | |||
| +import java.util.Set; | |||
| +import java.util.stream.Collectors; | |||
| + | |||
| +/** | |||
| + * Program to check that all (read-)groups within the set of input files appear to come from the same | |||
| + * individual. Can be used to cross-check libraries, samples, or files. | |||
| + * | |||
| + * @author Yossi Farjoun | |||
| + */ | |||
| +@CommandLineProgramProperties( | |||
| + usage = "Clusters the results from a CrosscheckFingerprints into groups that are connected according " + | |||
| + "to a large enough LOD score.", | |||
| + usageShort = "Clusters the results of a CrosscheckFingerprints run by LOD score.", | |||
| + programGroup = Fingerprinting.class | |||
| +) | |||
| +public class ClusterCrosscheckMetrics extends CommandLineProgram { | |||
| + | |||
| + @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, | |||
| + doc = "The cross-check metrics file to be clustered") | |||
| + public File INPUT; | |||
| + | |||
| + @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true, | |||
| + doc = "Optional output file to write metrics to. Default is to write to stdout.") | |||
| + public File OUTPUT; | |||
| + | |||
| + @Option(shortName = "LOD", | |||
| + doc = "LOD score to be used as the threshold for clustering.") | |||
| + public double LOD_THRESHOLD = 0; | |||
| + | |||
| + @Override | |||
| + protected int doWork() { | |||
| + IOUtil.assertFileIsReadable(INPUT); | |||
| + if(OUTPUT != null) IOUtil.assertFileIsWritable(OUTPUT); | |||
| + | |||
| + final MetricsFile<CrosscheckMetric, ?> metricsFile = getMetricsFile(); | |||
| + | |||
| + try { | |||
| + metricsFile.read(new FileReader(INPUT)); | |||
| + } catch (FileNotFoundException e) { | |||
| + e.printStackTrace(); | |||
| + return 1; | |||
| + } | |||
| + | |||
| + clusterMetrics(metricsFile.getMetrics()).write(OUTPUT); | |||
| + | |||
| + return 0; | |||
| + } | |||
| + | |||
| + private MetricsFile<ClusteredCrosscheckMetric, ?> clusterMetrics(final List<CrosscheckMetric> metrics) { | |||
| + final GraphUtils.Graph<String> graph = new GraphUtils.Graph<>(); | |||
| + metrics.stream() | |||
| + .filter(metric -> metric.LOD_SCORE > LOD_THRESHOLD) | |||
| + .forEach(metric -> { | |||
| + final String lhsBy = metric.LEFT_GROUP_VALUE; | |||
| + final String rhsBy = metric.RIGHT_GROUP_VALUE; | |||
| + | |||
| + graph.addEdge(lhsBy, rhsBy); | |||
| + }); | |||
| + | |||
| + final Map<String, Integer> clusters = graph.cluster(); | |||
| + | |||
| + // invert map...get map from group integer to list of group_value | |||
| + final Map<Integer, Set<String>> collection = clusters.entrySet().stream() | |||
| + .collect(Collectors.groupingBy(Map.Entry::getValue)) | |||
| + .entrySet() | |||
| + .stream() | |||
| + .collect(Collectors | |||
| + .toMap(Map.Entry::getKey, entry -> entry.getValue() | |||
| + .stream() | |||
| + .map(Map.Entry::getKey) | |||
| + .collect(Collectors.toSet()))); | |||
| + | |||
| + final MetricsFile<ClusteredCrosscheckMetric, ?> clusteredMetrics = getMetricsFile(); | |||
| + // for each cluster, find the metrics that compare groups that are both from the cluster | |||
| + // and add them to the metrics file | |||
| + for (final Map.Entry<Integer, Set<String>> cluster : collection.entrySet()) { | |||
| + | |||
| + clusteredMetrics.addAllMetrics( | |||
| + metrics.stream() | |||
| + .filter(metric -> | |||
| + cluster.getValue().contains(metric.LEFT_GROUP_VALUE) && | |||
| + cluster.getValue().contains(metric.RIGHT_GROUP_VALUE)) | |||
| + .map(metric -> { | |||
| + final ClusteredCrosscheckMetric clusteredCrosscheckMetric = new ClusteredCrosscheckMetric(metric); | |||
| + clusteredCrosscheckMetric.CLUSTER = cluster.getKey(); | |||
| + clusteredCrosscheckMetric.CLUSTER_SIZE = cluster.getValue().size(); | |||
| + | |||
| + return clusteredCrosscheckMetric; | |||
| + }) | |||
| + .collect(Collectors.toSet())); | |||
| + } | |||
| + return clusteredMetrics; | |||
| + } | |||
| +} | |||
| @@ -0,0 +1,52 @@ | |||
| +/* | |||
| + * The MIT License | |||
| + * | |||
| + * Copyright (c) 2015 The Broad Institute | |||
| + * | |||
| + * Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| + * of this software and associated documentation files (the "Software"), to deal | |||
| + * in the Software without restriction, including without limitation the rights | |||
| + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| + * copies of the Software, and to permit persons to whom the Software is | |||
| + * furnished to do so, subject to the following conditions: | |||
| + * | |||
| + * The above copyright notice and this permission notice shall be included in | |||
| + * all copies or substantial portions of the Software. | |||
| + * | |||
| + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| + * THE SOFTWARE. | |||
| + */ | |||
| + | |||
| +package picard.fingerprint; | |||
| + | |||
| +import picard.util.ReflectionUtil; | |||
| + | |||
| +/** | |||
| + * A Metric class to hold the result of clustered crosschecking fingerprints. | |||
| + * The same metric will be used for crosschecking Readgroups, libraries, samples, or files. | |||
| + * | |||
| + * @author Yossi Farjoun | |||
| + */ | |||
| + | |||
| +public class ClusteredCrosscheckMetric extends CrosscheckMetric { | |||
| + // Number indicating the cluster to which the groups within this metric belong. | |||
| + // Metric-lines involving groups that are not in the same cluster should either | |||
| + // be excluded, or given an error-indicating CLUSTER value (perhaps MIN_VALUE?). | |||
| + public Integer CLUSTER; | |||
| + | |||
| + public Integer CLUSTER_SIZE; | |||
| + | |||
| + public ClusteredCrosscheckMetric() { | |||
| + super(); | |||
| + } | |||
| + | |||
| + public ClusteredCrosscheckMetric(CrosscheckMetric metric) { | |||
| + super(); | |||
| + ReflectionUtil.copyFromBaseClass(metric, this); | |||
| + } | |||
| +} | |||
Oops, something went wrong.
0 comments on commit
2409b8f