Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More informative error messages when dictionaries don't match #1870

Merged
merged 4 commits into from May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/main/java/picard/analysis/CollectOxoGMetrics.java
Expand Up @@ -40,12 +40,12 @@
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import htsjdk.samtools.util.SequenceUtil;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.DbSnpBitSetUtil;
import picard.util.help.HelpConstants;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.*;
Expand Down Expand Up @@ -239,8 +239,11 @@ protected int doWork() {
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);

if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(),
refWalker.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
in.getFileHeader().getSequenceDictionary(),
INPUT.getAbsolutePath(),
refWalker.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());
}

final Set<String> samples = new HashSet<>();
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/picard/analysis/CollectWgsMetrics.java
Expand Up @@ -48,6 +48,7 @@
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.argumentcollections.IntervalArgumentCollection;
Expand All @@ -57,6 +58,7 @@
import picard.filter.CountingFilter;
import picard.filter.CountingMapQFilter;
import picard.filter.CountingPairedFilter;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.ArrayList;
Expand Down Expand Up @@ -211,7 +213,11 @@ protected int doWork() {

// Verify the sequence dictionaries match
if (!this.header.getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(this.header.getSequenceDictionary(), refWalker.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
this.header.getSequenceDictionary(),
INPUT.getAbsolutePath(),
refWalker.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());
}

final List<SamRecordFilter> filters = new ArrayList<>();
Expand Down
9 changes: 6 additions & 3 deletions src/main/java/picard/analysis/SinglePassSamProgram.java
Expand Up @@ -35,14 +35,14 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.argumentcollections.OutputArgumentCollection;
import picard.cmdline.argumentcollections.RequiredOutputArgumentCollection;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.Arrays;
Expand Down Expand Up @@ -114,8 +114,11 @@ public static void makeItSo(final File input,
walker = new ReferenceSequenceFileWalker(referenceSequence);

if (!in.getFileHeader().getSequenceDictionary().isEmpty()) {
SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(),
walker.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
in.getFileHeader().getSequenceDictionary(),
input.getAbsolutePath(),
walker.getSequenceDictionary(),
referenceSequence.getAbsolutePath());
}
}

Expand Down
24 changes: 15 additions & 9 deletions src/main/java/picard/analysis/directed/CollectTargetedMetrics.java
Expand Up @@ -15,12 +15,14 @@
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import org.broadinstitute.barclay.argparser.Argument;
import picard.PicardException;
import picard.analysis.MetricAccumulationLevel;
import picard.analysis.TheoreticalSensitivity;
import picard.analysis.TheoreticalSensitivityMetrics;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.metrics.MultilevelMetrics;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.*;
Expand Down Expand Up @@ -121,22 +123,26 @@ protected int doWork() {
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);

// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(
INPUT.getAbsolutePath(),
targetIntervals.getHeader().getSequenceDictionary(),
"target intervals");
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
getProbeIntervals().getHeader().getSequenceDictionary()
);
INPUT.getAbsolutePath(),
getProbeIntervals().getHeader().getSequenceDictionary(),
"probe intervals");

ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(),
INPUT, REFERENCE_SEQUENCE
);
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
INPUT.getAbsolutePath(),
ref.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());
}

final COLLECTOR collector = makeCollector(
Expand Down
Expand Up @@ -45,7 +45,6 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.utils.ValidationUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
Expand All @@ -67,6 +66,7 @@
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.filter.CountingPairedFilter;
import picard.util.SequenceDictionaryUtils;

import java.io.File;
import java.util.HashMap;
Expand Down Expand Up @@ -207,8 +207,11 @@ protected int doWork() {
final VCFFileReader vcf = new VCFFileReader(VCF, false);
final VCFHeader vcfFileHeader = vcf.getFileHeader();


SequenceUtil.assertSequenceDictionariesEqual(in.getFileHeader().getSequenceDictionary(), vcfFileHeader.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
in.getFileHeader().getSequenceDictionary(),
INPUT.getAbsolutePath(),
vcfFileHeader.getSequenceDictionary(),
VCF.getAbsolutePath());

final List<String> samples = vcfFileHeader.getSampleNamesInOrder();

Expand Down
13 changes: 10 additions & 3 deletions src/main/java/picard/fingerprint/CheckFingerprint.java
Expand Up @@ -248,9 +248,16 @@ protected int doWork() {
final FingerprintChecker checker = new FingerprintChecker(HAPLOTYPE_MAP);
checker.setReferenceFasta(REFERENCE_SEQUENCE);

SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), SAMSequenceDictionaryExtractor.extractDictionary(genotypesPath), true);
SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), checker.getHeader().getSequenceDictionary(), true);

try {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be able to switch to SequenceDictionaryUtils.assertSequenceDictionariesEqual as well, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't change these because they use the extra checkPrefixOnly argument, which the others don't. So rather than create another overload in Picard, I left these as direct calls to htsjdk. If you think its worth it though I could add a second method for those cases though.

Copy link
Contributor Author

@kachulis kachulis May 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh, I see. No I think that's fine in that case.

SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), SAMSequenceDictionaryExtractor.extractDictionary(genotypesPath), true);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException("Dictionary in " + inputPath + " does not match dictionary in " + genotypesPath, e);
}
try {
SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(inputPath), checker.getHeader().getSequenceDictionary(), true);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException("Dictionary in " + inputPath + " does not match dictionary in " + HAPLOTYPE_MAP, e);
}


final String observedSampleAlias = extractObservedSampleName(inputPath, OBSERVED_SAMPLE_ALIAS);
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/picard/fingerprint/FingerprintChecker.java
Expand Up @@ -223,9 +223,13 @@ private void checkDictionaryGoodForFingerprinting(final SAMSequenceDictionary se
final SAMSequenceDictionary activeDictionary = getActiveDictionary(haplotypes);

if (sequenceDictionaryToCheck.getSequences().size() < activeDictionary.size()) {
throw new IllegalArgumentException("Dictionary on fingerprinted file smaller than that on Haplotype Database!");
throw new SequenceUtil.SequenceListsDifferException("Dictionary on fingerprinted file smaller than that on Haplotype Database!");
}
try {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and also this?

SequenceUtil.assertSequenceDictionariesEqual(activeDictionary, sequenceDictionaryToCheck, true);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException("Dictionary on fingerprinted file does not match dictionary in Haplotype Database.", e);
}
SequenceUtil.assertSequenceDictionariesEqual(activeDictionary, sequenceDictionaryToCheck, true);
}

private static SAMSequenceDictionary getActiveDictionary(final HaplotypeMap haplotypes) {
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/picard/reference/ExtractSequences.java
Expand Up @@ -39,6 +39,7 @@
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import picard.cmdline.programgroups.ReferenceProgramGroup;
import picard.cmdline.StandardOptionDefinitions;
import picard.util.SequenceDictionaryUtils;

import java.io.BufferedWriter;
import java.io.File;
Expand Down Expand Up @@ -93,7 +94,11 @@ protected int doWork() {

final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
intervals.getHeader().getSequenceDictionary(),
INTERVAL_LIST.getAbsolutePath(),
ref.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath());

final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

Expand Down
7 changes: 5 additions & 2 deletions src/main/java/picard/util/BaitDesigner.java
Expand Up @@ -450,8 +450,11 @@ protected int doWork() {
final ReferenceSequenceFileWalker referenceWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

// Check that the reference and the target list have matching headers
SequenceUtil.assertSequenceDictionariesEqual(referenceWalker.getSequenceDictionary(),
targets.getHeader().getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
referenceWalker.getSequenceDictionary(),
REFERENCE_SEQUENCE.getAbsolutePath(),
targets.getHeader().getSequenceDictionary(),
TARGETS.getAbsolutePath());

// Design the baits!
int discardedBaits = 0;
Expand Down
26 changes: 26 additions & 0 deletions src/main/java/picard/util/SequenceDictionaryUtils.java
Expand Up @@ -23,6 +23,7 @@
*/
package picard.util;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceDictionaryCodec;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.ReferenceSequence;
Expand All @@ -32,6 +33,7 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;

Expand Down Expand Up @@ -192,6 +194,30 @@ public static SortingCollection<String> makeSortingCollection() {
);
}

/**
* Throw an exception if the two provided sequence dictionaries are not equal.
*
* @param firstDict first dictionary to compare
* @param firstDictSource a user-recognizable message identifying the source of the first dictionary, preferably a file path
* @param secondDict second dictionary to compare
* @param secondDictSource a user-recognizable message identifying the source of the second dictionary, preferably a file path
*/
public static void assertSequenceDictionariesEqual(
final SAMSequenceDictionary firstDict,
final String firstDictSource,
final SAMSequenceDictionary secondDict,
final String secondDictSource) {
try {
SequenceUtil.assertSequenceDictionariesEqual(firstDict, secondDict);
} catch (final SequenceUtil.SequenceListsDifferException e) {
throw new PicardException(
String.format("Sequence dictionary for (%s) does not match sequence dictionary for (%s)",
firstDictSource,
secondDictSource),
e);
}
}

private static class StringCodec implements SortingCollection.Codec<String> {
private DataInputStream dis;
private DataOutputStream dos;
Expand Down
14 changes: 11 additions & 3 deletions src/main/java/picard/vcf/GenotypeConcordance.java
Expand Up @@ -31,7 +31,6 @@
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.Tribble;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
Expand All @@ -56,6 +55,7 @@
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;
import picard.nio.PicardHtsPath;
import picard.util.SequenceDictionaryUtils;
import picard.vcf.GenotypeConcordanceStates.CallState;
import picard.vcf.GenotypeConcordanceStates.ContingencyState;
import picard.vcf.GenotypeConcordanceStates.TruthAndCallStates;
Expand Down Expand Up @@ -381,13 +381,21 @@ private boolean indexExists(final Path vcf) {
}

// Verify that both VCFs have the same Sequence Dictionary
SequenceUtil.assertSequenceDictionariesEqual(truthReader.getFileHeader().getSequenceDictionary(), callReader.getFileHeader().getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
truthReader.getFileHeader().getSequenceDictionary(),
TRUTH_VCF.getRawInputString(),
callReader.getFileHeader().getSequenceDictionary(),
CALL_VCF.getRawInputString());

final Optional<VariantContextWriter> writer = getVariantContextWriter(truthReader, callReader);

if (usingIntervals) {
// If using intervals, verify that the sequence dictionaries agree with those of the VCFs
SequenceUtil.assertSequenceDictionariesEqual(intervalsSamSequenceDictionary, truthReader.getFileHeader().getSequenceDictionary());
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
intervalsSamSequenceDictionary,
"provided intervals",
truthReader.getFileHeader().getSequenceDictionary(),
TRUTH_VCF.getRawInputString());
}

// Build the pair of iterators over the regions of interest
Expand Down
3 changes: 2 additions & 1 deletion src/test/java/picard/analysis/CollectWgsMetricsTest.java
Expand Up @@ -40,6 +40,7 @@
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;
import picard.cmdline.CommandLineProgramTest;
import picard.sam.SortSam;
import picard.util.TestNGUtil;
Expand Down Expand Up @@ -530,7 +531,7 @@ public void testAdapterReads(final String useFastAlgorithm) throws IOException {
}
}

@Test(expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
@Test(expectedExceptions = PicardException.class)
public void testFailDifferentSequenceDictionaries() throws IOException {
final File input = new File(TEST_DIR, "forMetrics.sam");
final File outfile = getTempOutputFile("test", ".wgs_metrics");
Expand Down
48 changes: 48 additions & 0 deletions src/test/java/picard/util/SequenceDictionaryUtilsTest.java
@@ -0,0 +1,48 @@
package picard.util;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;

import java.util.List;

public class SequenceDictionaryUtilsTest {

@Test
public void testSequenceDictionaryPositive() {
final SAMSequenceDictionary s1 = new SAMSequenceDictionary(
List.of(
new SAMSequenceRecord("chr1", 10),
new SAMSequenceRecord("chr2", 15),
new SAMSequenceRecord("chr3", 20)
)
);
SequenceDictionaryUtils.assertSequenceDictionariesEqual(
s1,
"s1",
new SAMSequenceDictionary(s1.getSequences()),
"s2");
}

@DataProvider
public Object[][] sequenceDictionaryNegativeProvider() {
return new Object[][]{
{
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr1", 100))),
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr1", 200)))
},
{
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr1", 10))),
new SAMSequenceDictionary(List.of(new SAMSequenceRecord("chr2", 10)))
},
};
}

@Test(dataProvider = "sequenceDictionaryNegativeProvider", expectedExceptions = PicardException.class)
public void testSequenceDictionaryNegative(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2 ) {
SequenceDictionaryUtils.assertSequenceDictionariesEqual(s1, "s1", s2, "s2");
}

}