Permalink
Browse files

Include tumor-aware results when results have been rolled-up (sample …

…or library) (#642)


* new CLP CrosscheckFingerprints can crosscheck bams and vcfs
* added option to crosscheck by input file
* added choice of output format: Matrix or Metric File. Metric File outputs contains the LOD score and also the tumor-aware LOD scores.
* Includes tumor-aware results when results are emitted as a metric file
* new CLP ClusterCrosscheckMetrics that will take the metric output of CrosscheckFingerprints and find clusters of groups that connect with a high LOD
* tests refactored
* general purpose graph-clustering algorithm added
* removed deprecated GenotypeReader class
* add a deprecated wrapper named CrosscheckReadgroupFingerprints for backwards continuity
* crosscheck now allows missing RG tags in bam if VALIDATION_STRINGENCY is not STRICT.
  • Loading branch information...
1 parent d9e5eb9 commit 2409b8f72c8bab125569cd20bfc74e25ca2df08b @yfarjoun yfarjoun committed on GitHub Jun 27, 2017
Showing with 7,591 additions and 415 deletions.
  1. +1 −1 build.gradle
  2. +1 −1 src/main/java/picard/fingerprint/CheckFingerprint.java
  3. +135 −0 src/main/java/picard/fingerprint/ClusterCrosscheckMetrics.java
  4. +52 −0 src/main/java/picard/fingerprint/ClusteredCrosscheckMetric.java
  5. +396 −0 src/main/java/picard/fingerprint/CrosscheckFingerprints.java
  6. +110 −0 src/main/java/picard/fingerprint/CrosscheckMetric.java
  7. +40 −251 src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java
  8. +286 −106 src/main/java/picard/fingerprint/FingerprintChecker.java
  9. +135 −0 src/main/java/picard/fingerprint/FingerprintIdDetails.java
  10. +14 −53 src/main/java/picard/sam/markduplicates/UmiGraph.java
  11. +1 −1 src/main/java/picard/util/BaitDesigner.java
  12. +113 −0 src/main/java/picard/util/GraphUtils.java
  13. +49 −0 src/main/java/picard/util/ReflectionUtil.java
  14. +4 −1 src/test/java/picard/analysis/CollectGcBiasMetricsTest.java
  15. +3 −0 src/test/java/picard/analysis/CollectWgsMetricsTest.java
  16. +342 −0 src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java
  17. +271 −0 src/test/java/picard/fingerprint/CrosscheckReadGroupFingerprintsTest.java
  18. +54 −0 src/test/java/picard/fingerprint/FingerprintCheckerTest.java
  19. +2 −0 src/test/java/picard/sam/FilterSamReadsTest.java
  20. +1 −1 src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java
  21. +76 −0 src/test/java/picard/util/GraphUtilsTest.java
  22. +33 −0 src/test/java/picard/vcf/SamTestUtils.java
  23. +937 −0 testdata/picard/fingerprint/NA12891.over.fingerprints.noRgTag.sam
  24. +937 −0 testdata/picard/fingerprint/NA12891.over.fingerprints.r1.sam
  25. +872 −0 testdata/picard/fingerprint/NA12891.over.fingerprints.r2.sam
  26. +964 −0 testdata/picard/fingerprint/NA12891_named_NA12892.over.fingerprints.r1.sam
  27. +923 −0 testdata/picard/fingerprint/NA12892.over.fingerprints.r1.sam
  28. +743 −0 testdata/picard/fingerprint/NA12892.over.fingerprints.r2.sam
  29. +96 −0 testdata/picard/fingerprint/aligned_queryname_sorted.sam
View
@@ -160,7 +160,7 @@ ext.commandClasses = ["picard.sam.AddCommentsToBam", "picard.sam.AddOrReplaceRea
"picard.analysis.CollectWgsMetricsWithNonZeroCoverage", "picard.analysis.CompareMetrics", "picard.sam.CompareSAMs",
"picard.analysis.artifacts.ConvertSequencingArtifactToOxoG", "picard.sam.CreateSequenceDictionary", "picard.sam.DownsampleSam",
"picard.illumina.ExtractIlluminaBarcodes", "picard.sam.markduplicates.EstimateLibraryComplexity", "picard.sam.FastqToSam", "picard.util.FifoBuffer",
- "picard.vcf.MendelianViolations.FindMendelianViolations",
+ "picard.vcf.MendelianViolations.FindMendelianViolations","picard.fingerprint.CrosscheckFingerprints", "picard.fingerprint.ClusterCrosscheckMetrics", "picard.fingerprint.CheckFingerprint",
"picard.sam.FilterSamReads", "picard.vcf.filter.FilterVcf", "picard.sam.FixMateInformation", "picard.sam.GatherBamFiles", "picard.vcf.GatherVcfs",
"picard.vcf.GenotypeConcordance", "picard.illumina.IlluminaBasecallsToFastq", "picard.illumina.IlluminaBasecallsToSam", "picard.illumina.CheckIlluminaDirectory",
"picard.sam.CheckTerminatorBlock", "picard.util.IntervalListTools", "picard.util.LiftOverIntervalList", "picard.vcf.LiftoverVcf", "picard.vcf.MakeSitesOnlyVcf",
@@ -292,7 +292,7 @@ else if (!observedSampleAlias.equals(rec.getSample())) {
return super.customCommandLineValidation();
}
- private boolean isBamOrSamFile(final File f) {
+ static boolean isBamOrSamFile(final File f) {
return (BamFileIoUtils.isBamFile(f) || f.getName().endsWith(IOUtil.SAM_FILE_EXTENSION));
}
}
@@ -0,0 +1,135 @@
+
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2017 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.IOUtil;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.Fingerprinting;
+import picard.util.GraphUtils;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Program to check that all (read-)groups within the set of input files appear to come from the same
+ * individual. Can be used to cross-check libraries, samples, or files.
+ *
+ * @author Yossi Farjoun
+ */
+@CommandLineProgramProperties(
+ usage = "Clusters the results from a CrosscheckFingerprints into groups that are connected according " +
+ "to a large enough LOD score.",
+ usageShort = "Clusters the results of a CrosscheckFingerprints run by LOD score.",
+ programGroup = Fingerprinting.class
+)
+public class ClusterCrosscheckMetrics extends CommandLineProgram {
+
+ @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
+ doc = "The cross-check metrics file to be clustered")
+ public File INPUT;
+
+ @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true,
+ doc = "Optional output file to write metrics to. Default is to write to stdout.")
+ public File OUTPUT;
+
+ @Option(shortName = "LOD",
+ doc = "LOD score to be used as the threshold for clustering.")
+ public double LOD_THRESHOLD = 0;
+
+ @Override
+ protected int doWork() {
+ IOUtil.assertFileIsReadable(INPUT);
+ if(OUTPUT != null) IOUtil.assertFileIsWritable(OUTPUT);
+
+ final MetricsFile<CrosscheckMetric, ?> metricsFile = getMetricsFile();
+
+ try {
+ metricsFile.read(new FileReader(INPUT));
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ return 1;
+ }
+
+ clusterMetrics(metricsFile.getMetrics()).write(OUTPUT);
+
+ return 0;
+ }
+
+ private MetricsFile<ClusteredCrosscheckMetric, ?> clusterMetrics(final List<CrosscheckMetric> metrics) {
+ final GraphUtils.Graph<String> graph = new GraphUtils.Graph<>();
+ metrics.stream()
+ .filter(metric -> metric.LOD_SCORE > LOD_THRESHOLD)
+ .forEach(metric -> {
+ final String lhsBy = metric.LEFT_GROUP_VALUE;
+ final String rhsBy = metric.RIGHT_GROUP_VALUE;
+
+ graph.addEdge(lhsBy, rhsBy);
+ });
+
+ final Map<String, Integer> clusters = graph.cluster();
+
+ // invert map...get map from group integer to list of group_value
+ final Map<Integer, Set<String>> collection = clusters.entrySet().stream()
+ .collect(Collectors.groupingBy(Map.Entry::getValue))
+ .entrySet()
+ .stream()
+ .collect(Collectors
+ .toMap(Map.Entry::getKey, entry -> entry.getValue()
+ .stream()
+ .map(Map.Entry::getKey)
+ .collect(Collectors.toSet())));
+
+ final MetricsFile<ClusteredCrosscheckMetric, ?> clusteredMetrics = getMetricsFile();
+ // for each cluster, find the metrics that compare groups that are both from the cluster
+ // and add them to the metrics file
+ for (final Map.Entry<Integer, Set<String>> cluster : collection.entrySet()) {
+
+ clusteredMetrics.addAllMetrics(
+ metrics.stream()
+ .filter(metric ->
+ cluster.getValue().contains(metric.LEFT_GROUP_VALUE) &&
+ cluster.getValue().contains(metric.RIGHT_GROUP_VALUE))
+ .map(metric -> {
+ final ClusteredCrosscheckMetric clusteredCrosscheckMetric = new ClusteredCrosscheckMetric(metric);
+ clusteredCrosscheckMetric.CLUSTER = cluster.getKey();
+ clusteredCrosscheckMetric.CLUSTER_SIZE = cluster.getValue().size();
+
+ return clusteredCrosscheckMetric;
+ })
+ .collect(Collectors.toSet()));
+ }
+ return clusteredMetrics;
+ }
+}
@@ -0,0 +1,52 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import picard.util.ReflectionUtil;
+
+/**
+ * A Metric class to hold the result of clustered crosschecking fingerprints.
+ * The same metric will be used for crosschecking Readgroups, libraries, samples, or files.
+ *
+ * @author Yossi Farjoun
+ */
+
+public class ClusteredCrosscheckMetric extends CrosscheckMetric {
+ // Number indicating the cluster to which the groups within this metric belong.
+ // Metric-lines involving groups that are not in the same cluster should either
+ // be excluded, or given an error-indicating CLUSTER value (perhaps MIN_VALUE?).
+ public Integer CLUSTER;
+
+ public Integer CLUSTER_SIZE;
+
+ public ClusteredCrosscheckMetric() {
+ super();
+ }
+
+ public ClusteredCrosscheckMetric(CrosscheckMetric metric) {
+ super();
+ ReflectionUtil.copyFromBaseClass(metric, this);
+ }
+}
Oops, something went wrong.

0 comments on commit 2409b8f

Please sign in to comment.