Skip to content

Commit

Permalink
More informative error messages when dictionaries don't match (#1870)
Browse files Browse the repository at this point in the history
  • Loading branch information
kachulis committed May 11, 2023
1 parent 78892bf commit e67fe94
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 31 deletions.
9 changes: 6 additions & 3 deletions src/main/java/picard/analysis/CollectOxoGMetrics.java
Expand Up @@ -40,12 +40,12 @@
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import htsjdk.samtools.util.SequenceUtil;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.DbSnpBitSetUtil;
import picard.util.help.HelpConstants;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.*;
Expand Down Expand Up @@ -239,8 +239,11 @@ protected int doWork() {
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);

if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(),
refWalker.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
in.getFileHeader().getSequenceDictionary(),
INPUT.getAbsolutePath(),
refWalker.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());
}

final Set<String> samples = new HashSet<>();
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/picard/analysis/CollectWgsMetrics.java
Expand Up @@ -48,6 +48,7 @@
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.argumentcollections.IntervalArgumentCollection;
Expand All @@ -57,6 +58,7 @@
import picard.filter.CountingFilter;
import picard.filter.CountingMapQFilter;
import picard.filter.CountingPairedFilter;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.ArrayList;
Expand Down Expand Up @@ -211,7 +213,11 @@ protected int doWork() {

// Verify the sequence dictionaries match
if (!this.header.getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(this.header.getSequenceDictionary(), refWalker.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
this.header.getSequenceDictionary(),
INPUT.getAbsolutePath(),
refWalker.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());
}

final List<SamRecordFilter> filters = new ArrayList<>();
Expand Down
9 changes: 6 additions & 3 deletions src/main/java/picard/analysis/SinglePassSamProgram.java
Expand Up @@ -35,14 +35,14 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.argumentcollections.OutputArgumentCollection;
import picard.cmdline.argumentcollections.RequiredOutputArgumentCollection;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.Arrays;
Expand Down Expand Up @@ -114,8 +114,11 @@ public static void makeItSo(final File input,
walker = new ReferenceSequenceFileWalker(referenceSequence);

if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(),
walker.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
in.getFileHeader().getSequenceDictionary(),
input.getAbsolutePath(),
walker.getSequenceDictionary(),
referenceSequence.getAbsolutePath());
}
}

Expand Down
24 changes: 15 additions & 9 deletions src/main/java/picard/analysis/directed/CollectTargetedMetrics.java
Expand Up @@ -15,12 +15,14 @@
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import org.broadinstitute.barclay.argparser.Argument;
import picard.PicardException;
import picard.analysis.MetricAccumulationLevel;
import picard.analysis.TheoreticalSensitivity;
import picard.analysis.TheoreticalSensitivityMetrics;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.metrics.MultilevelMetrics;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.*;
Expand Down Expand Up @@ -121,22 +123,26 @@ protected int doWork() {
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);

// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(
INPUT.getAbsolutePath(),
targetIntervals.getHeader().getSequenceDictionary(),
"target intervals");
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
getProbeIntervals().getHeader().getSequenceDictionary()
);
INPUT.getAbsolutePath(),
getProbeIntervals().getHeader().getSequenceDictionary(),
"probe intervals");

ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(),
INPUT, REFERENCE_SEQUENCE
);
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
INPUT.getAbsolutePath(),
ref.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());
}

final COLLECTOR collector = makeCollector(
Expand Down
Expand Up @@ -45,7 +45,6 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.utils.ValidationUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
Expand All @@ -67,6 +66,7 @@
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.filter.CountingPairedFilter;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.HashMap;
Expand Down Expand Up @@ -207,8 +207,11 @@ protected int doWork() {
final VCFFileReader vcf = new VCFFileReader(VCF, false);
final VCFHeader vcfFileHeader = vcf.getFileHeader();


SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), vcfFileHeader.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
in.getFileHeader().getSequenceDictionary(),
INPUT.getAbsolutePath(),
vcfFileHeader.getSequenceDictionary(),
VCF.getAbsolutePath());

final List<String> samples = vcfFileHeader.getSampleNamesInOrder();

Expand Down
13 changes: 10 additions & 3 deletions src/main/java/picard/fingerprint/CheckFingerprint.java
Expand Up @@ -248,9 +248,16 @@ protected int doWork() {
final FingerprintChecker checker = new FingerprintChecker(HAPLOTYPE_MAP);
checker.setReferenceFasta(REFERENCE_SEQUENCE);

SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), SAMSequenceDictionaryExtractor.extractDictionary(genotypesPath), true);
SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), checker.getHeader().getSequenceDictionary(), true);

try {
SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), SAMSequenceDictionaryExtractor.extractDictionary(genotypesPath), true);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException("Dictionary in " + inputPath + " does not match dictionary in " + genotypesPath, e);
}
try {
SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), checker.getHeader().getSequenceDictionary(), true);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException("Dictionary in " + inputPath + " does not match dictionary in " + HAPLOTYPE_MAP, e);
}


final String observedSampleAlias = extractObservedSampleName(inputPath, OBSERVED_SAMPLE_ALIAS);
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/picard/fingerprint/FingerprintChecker.java
Expand Up @@ -223,9 +223,13 @@ private void checkDictionaryGoodForFingerprinting(final SAMSequenceDictionary se
final SAMSequenceDictionary activeDictionary = getActiveDictionary(haplotypes);

if (sequenceDictionaryToCheck.getSequences().size() < activeDictionary.size()) {
throw new IllegalArgumentException("Dictionary on fingerprinted file smaller than that on Haplotype Database!");
throw new SequenceUtil.SequenceListsDifferException("Dictionary on fingerprinted file smaller than that on Haplotype Database!");
}
try {
SequenceUtil.assertSequenceDictionariesEqual(activeDictionary, sequenceDictionaryToCheck, true);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException("Dictionary on fingerprinted file does not match dictionary in Haplotype Database.", e);
}
SequenceUtil.assertSequenceDictionariesEqual(activeDictionary, sequenceDictionaryToCheck, true);
}

private static SAMSequenceDictionary getActiveDictionary(final HaplotypeMap haplotypes) {
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/picard/reference/ExtractSequences.java
Expand Up @@ -39,6 +39,7 @@
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import picard.cmdline.programgroups.ReferenceProgramGroup;
import picard.cmdline.StandardOptionDefinitions;
import picard.util.SequenceDictionaryUtils;

import java.io.BufferedWriter;
import java.io.File;
Expand Down Expand Up @@ -93,7 +94,11 @@ protected int doWork() {

final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
intervals.getHeader().getSequenceDictionary(),
INTERVAL_LIST.getAbsolutePath(),
ref.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());

final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

Expand Down
7 changes: 5 additions & 2 deletions src/main/java/picard/util/BaitDesigner.java
Expand Up @@ -450,8 +450,11 @@ protected int doWork() {
final ReferenceSequenceFileWalker referenceWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

// Check that the reference and the target list have matching headers
SequenceUtil.assertSequenceDictionariesEqual(referenceWalker.getSequenceDictionary(),
targets.getHeader().getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
referenceWalker.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath(),
targets.getHeader().getSequenceDictionary(),
TARGETS.getAbsolutePath());

// Design the baits!
int discardedBaits = 0;
Expand Down
26 changes: 26 additions & 0 deletions src/main/java/picard/util/SequenceDictionaryUtils.java
Expand Up @@ -23,6 +23,7 @@
*/
package picard.util;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceDictionaryCodec;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.ReferenceSequence;
Expand All @@ -32,6 +33,7 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;

Expand Down Expand Up @@ -192,6 +194,30 @@ public static SortingCollection<String> makeSortingCollection() {
);
}

/**
* Throw an exception if the two provided sequence dictionaries are not equal.
*
* @param firstDict first dictionary to compare
* @param firstDictSource a user-recognizable message identifying the source of the first dictionary, preferably a file path
* @param secondDict second dictionary to compare
* @param secondDictSource a user-recognizable message identifying the source of the second dictionary, preferably a file path
*/
public static void assertSequenceDictionariesEqual(
final SAMSequenceDictionary firstDict,
final String firstDictSource,
final SAMSequenceDictionary secondDict,
final String secondDictSource) {
try {
SequenceUtil.assertSequenceDictionariesEqual(firstDict, secondDict);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException(
String.format("Sequence dictionary for (%s) does not match sequence dictionary for (%s)",
firstDictSource,
secondDictSource),
e);
}
}

private static class StringCodec implements SortingCollection.Codec<String> {
private DataInputStream dis;
private DataOutputStream dos;
Expand Down
14 changes: 11 additions & 3 deletions src/main/java/picard/vcf/GenotypeConcordance.java
Expand Up @@ -31,7 +31,6 @@
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.Tribble;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
Expand All @@ -56,6 +55,7 @@
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;
import picard.nio.PicardHtsPath;
import picard.util.SequenceDictionaryUtils;
import picard.vcf.GenotypeConcordanceStates.CallState;
import picard.vcf.GenotypeConcordanceStates.ContingencyState;
import picard.vcf.GenotypeConcordanceStates.TruthAndCallStates;
Expand Down Expand Up @@ -381,13 +381,21 @@ private boolean indexExists(final Path vcf) {
}

// Verify that both VCFs have the same Sequence Dictionary
SequenceUtil.assertSequenceDictionariesEqual(truthReader.getFileHeader().getSequenceDictionary(), callReader.getFileHeader().getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
truthReader.getFileHeader().getSequenceDictionary(),
TRUTH_VCF.getRawInputString(),
callReader.getFileHeader().getSequenceDictionary(),
CALL_VCF.getRawInputString());

final Optional<VariantContextWriter> writer = getVariantContextWriter(truthReader, callReader);

if (usingIntervals) {
// If using intervals, verify that the sequence dictionaries agree with those of the VCFs
SequenceUtil.assertSequenceDictionariesEqual(intervalsSamSequenceDictionary, truthReader.getFileHeader().getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
intervalsSamSequenceDictionary,
"provided intervals",
truthReader.getFileHeader().getSequenceDictionary(),
TRUTH_VCF.getRawInputString());
}

// Build the pair of iterators over the regions of interest
Expand Down
3 changes: 2 additions & 1 deletion src/test/java/picard/analysis/CollectWgsMetricsTest.java
Expand Up @@ -40,6 +40,7 @@
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;
import picard.cmdline.CommandLineProgramTest;
import picard.sam.SortSam;
import picard.util.TestNGUtil;
Expand Down Expand Up @@ -530,7 +531,7 @@ public void testAdapterReads(final String useFastAlgorithm) throws IOException {
}
}

@Test(expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
@Test(expectedExceptions = PicardException.class)
public void testFailDifferentSequenceDictionaries() throws IOException {
final File input = new File(TEST_DIR, "forMetrics.sam");
final File outfile = getTempOutputFile("test", ".wgs_metrics");
Expand Down
48 changes: 48 additions & 0 deletions src/test/java/picard/util/SequenceDictionaryUtilsTest.java
@@ -0,0 +1,48 @@
package picard.util;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;

import java.util.List;

public class SequenceDictionaryUtilsTest {

@Test
public void testSequenceDictionaryPositive() {
final SAMSequenceDictionary s1 = new SAMSequenceDictionary(
List.of(
new SAMSequenceRecord("chr1", 10),
new SAMSequenceRecord("chr2", 15),
new SAMSequenceRecord("chr3", 20)
)
);
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
s1,
"s1",
new SAMSequenceDictionary(s1.getSequences()),
"s2");
}

@DataProvider
public Object[][] sequenceDictionaryNegativeProvider() {
return new Object[][]{
{
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr1", 100))),
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr1", 200)))
},
{
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr1", 10))),
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr2", 10)))
},
};
}

@Test(dataProvider = "sequenceDictionaryNegativeProvider", expectedExceptions = PicardException.class)
public void testSequenceDictionaryNegative(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2 ) {
SequenceDictionaryUtils.assertSequenceDictionariesEqual(s1, "s1", s2, "s2");
}

}

0 comments on commit e67fe94

Please sign in to comment.