Skip to content

Commit

Permalink
fix DI for queryname sorted, tests, refactor tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kachulis committed Nov 16, 2022
1 parent 9009129 commit 428254f
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 229 deletions.
4 changes: 3 additions & 1 deletion src/main/java/picard/sam/markduplicates/MarkDuplicates.java
Expand Up @@ -861,7 +861,9 @@ private void addRepresentativeReadIndex(final List<ReadEndsForMarkDuplicates> li
// for read name (for representative read name), add the last of the pair that was examined
for (final ReadEndsForMarkDuplicates end : list) {
addRepresentativeReadOfDuplicateSet(best.read1IndexInFile, list.size(), end.read1IndexInFile);
// addRepresentativeReadOfDuplicateSet(best.read1IndexInFile, list.size(), end.read2IndexInFile);
if (end.read1IndexInFile != end.read2IndexInFile) {
addRepresentativeReadOfDuplicateSet(best.read1IndexInFile, list.size(), end.read2IndexInFile);
}
}
}

Expand Down
Expand Up @@ -29,6 +29,7 @@
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.FormatUtil;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import picard.cmdline.CommandLineProgram;
Expand All @@ -40,7 +41,11 @@
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
Expand All @@ -53,6 +58,7 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
final DuplicationMetrics expectedMetrics;

boolean testOpticalDuplicateDTTag = false;
final public Map<List<String>, Double> expectedSetSizeMap = new HashMap<>();

public AbstractMarkDuplicatesCommandLineProgramTester(final ScoringStrategy duplicateScoringStrategy, SAMFileHeader.SortOrder sortOrder) {
this(duplicateScoringStrategy, sortOrder, true);
Expand Down Expand Up @@ -139,26 +145,44 @@ public void setExpectedOpticalDuplicate(final int expectedOpticalDuplicatePairs)
expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES = expectedOpticalDuplicatePairs;
}

@Override
public void test() throws IOException {
testMetrics();

/**
* This method is called before iterating through output records in test method.
* Should be used to update any expectations to be used in implementation
* specific tests.
**/
void updateExpectationsHook() {
}

/**
* Runs test and returns metrics
* @return Duplication metrics
* @throws IOException
* This method is called for each record in the output. Should be used
* to perform any additional implementation specific tests not included
* in default test.
**/
void testRecordHook(final SAMRecord record){
}

/**
* This method is called after iterating through output records.
* Should be used to test any implementation specific methods not
* included in default test
*/
public MetricsFile<DuplicationMetrics, Double> testMetrics() throws IOException {
void testOutputsHook() {
}

@Override
public void test() throws IOException {
try {
updateExpectedDuplicationMetrics();
updateExpectationsHook();

// Read the output and check the duplicate flag
int outputRecords = 0;
final Set<String> sequencingDTErrorsSeen = new HashSet<>();
try(final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(fastaFiles.get(samRecordSetBuilder.getHeader())).open(getOutput())) {
for (final SAMRecord record : reader) {
outputRecords++;
testRecordHook(record);
final String key = samRecordToDuplicatesFlagsKey(record);
Assert.assertTrue(this.duplicateFlags.containsKey(key), "DOES NOT CONTAIN KEY: " + key);
final boolean value = this.duplicateFlags.get(key);
Expand Down Expand Up @@ -197,7 +221,29 @@ public MetricsFile<DuplicationMetrics, Double> testMetrics() throws IOException
Assert.assertEquals(sequencingDTErrorsSeen.size(), expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match duplicate groups observed in the file");
Assert.assertEquals(sequencingDTErrorsSeen.size(), observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match duplicate groups observed in the file");
}
return metricsOutput;

if (!expectedSetSizeMap.isEmpty()) {
boolean checked = false;
for (final Histogram<Double> histo : metricsOutput.getAllHistograms()) {
final String label = histo.getValueLabel();
for (final Double bin : histo.keySet()) {
final String binStr = String.valueOf(bin);
final List<String> labelBinStr = Arrays.asList(label, binStr);
if (expectedSetSizeMap.containsKey(labelBinStr)) {
checked = true;
Histogram.Bin<Double> binValue = histo.get(bin);
final double actual = binValue.getValue();
final double expected = expectedSetSizeMap.get(labelBinStr);
Assert.assertEquals(actual, expected);
}
}
}
if (!checked) {
Assert.fail("Could not not find matching entry for expectedSetSizeMap in metrics.");
}
}

testOutputsHook();
} finally {
IOUtil.recursiveDelete(getOutputDir().toPath());
}
Expand Down
Expand Up @@ -84,9 +84,8 @@ public CollectDuplicateMetricsTester( final SAMFileHeader.SortOrder sortOrder) {
public void recordOpticalDuplicatesMarked() {}

@Override
public void test() throws IOException {
void updateExpectationsHook() {
// CollectDuplicateMetrics cannot identify Optical duplicates....
setExpectedOpticalDuplicate(0);
super.test();
}
}
Expand Up @@ -35,16 +35,16 @@
*/
public class MarkDuplicatesSetSizeHistogramTest extends AbstractMarkDuplicatesCommandLineProgramTest {
@Override
protected MarkDuplicatesSetSizeHistogramTester getTester() {
return new MarkDuplicatesSetSizeHistogramTester();
protected MarkDuplicatesTester getTester() {
return new MarkDuplicatesTester();
}

@Test
public void TestSingleSet() {
// This tests checks that if I add two read pairs with the same alignment start, a duplicate
// set size entry of 2 is put into the histogram. The pair is an optical dup, so a 2 entry for
// the 'optical_sets' should also be marked
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
final MarkDuplicatesTester tester = getTester();
tester.setExpectedOpticalDuplicate(1);
String representativeReadName = "RUNID:1:1:16020:13352";
tester.addMatePair(representativeReadName, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
Expand All @@ -59,7 +59,7 @@ public void TestSingleSet() {
public void testOpticalAndNonOpticalSet() {
// This tests checks that if I have two optical dups and one non-optical in the same dup set that the correct histogram entries
// are specified
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
final MarkDuplicatesTester tester = getTester();
tester.setExpectedOpticalDuplicate(2);
String representativeReadName = "RUNID:1:1:16020:13352";
tester.addMatePair(representativeReadName, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
Expand All @@ -75,7 +75,7 @@ public void testOpticalAndNonOpticalSet() {
@Test
public void testSingleton() {
// This tests checks if having a read pair that is not duplicated, the correct histogram entry is updated
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
final MarkDuplicatesTester tester = getTester();
tester.setExpectedOpticalDuplicate(0);
// Add non-duplicate read
tester.addMatePair("RUNID:1:1:15993:13375", 1, 485255, 485255, false, false, false, false, "43M2S", "43M2S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
Expand All @@ -86,7 +86,7 @@ public void testSingleton() {
@Test
public void testMultiRepresentativeReadTags() {
// This tests checks multiple different duplicate sets of varying sizes
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
final MarkDuplicatesTester tester = getTester();
tester.setExpectedOpticalDuplicate(3);
// Duplicate set: size 2 - all optical
String representativeReadName1 = "RUNID:1:1:16020:13352";
Expand Down

This file was deleted.

0 comments on commit 428254f

Please sign in to comment.