Skip to content

Commit

Permalink
Merge pull request #930 from broadinstitute/rhl_bad_edge_pileup
Browse files Browse the repository at this point in the history
Bypass edge alignment reads while making pileup
  • Loading branch information
eitanbanks committed Apr 21, 2015
2 parents 718e780 + a1ff4f5 commit 9c33132
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
/**
* Filter out reads with wonky cigar strings.
*
* - No reads with a different length and cigar length
* - No reads with Hard/Soft clips in the middle of the cigar
* - No reads starting with deletions (with or without preceding clips)
* - No reads ending in deletions (with or without follow-up clips)
Expand All @@ -57,6 +58,11 @@ public boolean filterOut(final SAMRecord rec) {
return false;
}

// Read and it's CIGAR not the same length
if ( rec.getReadLength() != c.getReadLength() ) {
return true;
}

Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();

CigarOperator firstOp = CigarOperator.H;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
import org.broadinstitute.gatk.engine.filters.BadCigarFilter;
import org.broadinstitute.gatk.engine.filters.MalformedReadFilter;
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
import org.broadinstitute.gatk.engine.samples.Sample;
Expand All @@ -49,7 +50,7 @@
* Time: 1:53:31 PM
* To change this template use File | Settings | File Templates.
*/
@ReadFilters(MalformedReadFilter.class)
@ReadFilters({MalformedReadFilter.class,BadCigarFilter.class})
@PartitionBy(PartitionType.NONE)
@Downsample(by = DownsampleType.NONE)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,24 @@ public void init() {
}

@Test(enabled = true)
public void testWonkyCigars () {
public void testWonkyCigars() {
for (String cigarString : BAD_CIGAR_LIST) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString, 0);
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
}
}

@Test(enabled = true)
public void testReadCigarLengthMismatch() {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("4M", 1);
Assert.assertTrue(filter.filterOut(read), read.getCigarString());
}

@Test(enabled = true)
public void testGoodCigars() {
List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(10);
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
Assert.assertFalse(filter.filterOut(read), read.getCigarString());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ public String toString() {
@DataProvider(name = "PRTest")
public Object[][] createPrintReadsTestData() {
return new Object[][]{
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, "", "fa9c66f66299fe5405512ac36ec9d0f2")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -compress 0", "488eb22abc31c6af7cbb1a3d41da1507")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -simplifyBAM", "1510dc4429f3ed49caf96da41e8ed396")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, "", "5aee1c592f7b0505430df4d4452b8000")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -compress 0", "62a542230502c9e54124ebd46242e252")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -simplifyBAM", "a054a6618ffa8cd2d1113b005335922b")},
{new PRTest(hg18Reference, new String[]{"HiSeq.1mb.bam"}, " -n 10", "0e3d1748ad1cb523e3295cab9d09d8fc")},
// See: GATKBAMIndex.getStartOfLastLinearBin(), BAMScheduler.advance(), IntervalOverlapFilteringIterator.advance()
{new PRTest(b37KGReference, new String[]{"unmappedFlagReadsInLastLinearBin.bam"}, "", "d7f23fd77d7dc7cb50d3397f644c6d8a")},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
"currentCigarElementOffset <= nCigarElements"
})
public class AlignmentStateMachine {

public static final String MAKE_PILEUP_EDGE_ERROR = "Cannot make a pileup element from an edge alignment state";
/**
* Our read
*/
Expand Down Expand Up @@ -359,7 +361,7 @@ public CigarOperator stepForwardOnGenome() {
@Ensures("result != null")
public final PileupElement makePileupElement() {
if ( isLeftEdge() || isRightEdge() )
throw new IllegalStateException("Cannot make a pileup element from an edge alignment state");
throw new IllegalStateException(MAKE_PILEUP_EDGE_ERROR);
return new PileupElement(read,
getReadOffset(),
getCurrentCigarElement(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
* There are a few constraints on required and ensured by LIBS:
*
* -- Requires the Iterator<GATKSAMRecord> to returns reads in coordinate sorted order, consistent with the ordering
* defined by the SAM file format. That that for performance reasons this constraint isn't actually enforced.
* defined by the SAM file format. That for performance reasons this constraint isn't actually enforced.
* The behavior of LIBS is undefined in the case where the reads are badly ordered.
* -- The reads in the ReadBackedPileup are themselves in the order of appearance of the reads from the iterator.
* That is, the pileup is ordered in a way consistent with the SAM coordinate ordering
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,31 @@ public class ReadClipperTestUtils {
new CigarElement(1, CigarOperator.DELETION),
new CigarElement(1, CigarOperator.MATCH_OR_MISMATCH)};

/**
* Make a read fom the CIGAR
*
* @param cigar the CIGAR
* @param lengthChange change in read length relative the CIGAR length
* @return artificial read
*/
public static GATKSAMRecord makeReadFromCigar(Cigar cigar, int lengthChange) {
int readLength = cigar.getReadLength();
if ( readLength >= -lengthChange ) {
readLength += lengthChange;
}

public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, readLength), Utils.arrayFromArrayWithLength(QUALS, readLength), cigar.toString());
}

public static GATKSAMRecord makeReadFromCigar(String cigarString) {
return makeReadFromCigar(CigarUtils.cigarFromString(cigarString));
/**
* Make a read from the CIGAR string
*
* @param cigarString string used to create a CIGAR
* @param lengthChange change in read length relative the CIGAR length
* @return artificial read
*/
public static GATKSAMRecord makeReadFromCigar(String cigarString, int lengthChange) {
return makeReadFromCigar(CigarUtils.cigarFromString(cigarString), lengthChange);
}

public static List<Cigar> generateCigarList(int maximumLength) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public void init() {
@Test(enabled = !DEBUG)
public void testHardClipBothEndsByReferenceCoordinates() {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
int readLength = alnStart - alnEnd;
Expand All @@ -76,7 +76,7 @@ public void testHardClipBothEndsByReferenceCoordinates() {
@Test(enabled = !DEBUG)
public void testHardClipByReadCoordinates() {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
int readLength = read.getReadLength();
for (int i = 0; i < readLength; i++) {
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
Expand Down Expand Up @@ -105,7 +105,7 @@ public Object[][] makeClippedReadLengthData() {

@Test(dataProvider = "ClippedReadLengthData", enabled = !DEBUG)
public void testHardClipReadLengthIsRight(final int originalReadLength, final int nToClip) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(originalReadLength + "M");
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(originalReadLength + "M", 0);
read.getReadLength(); // provoke the caching of the read length
final int expectedReadLength = originalReadLength - nToClip;
GATKSAMRecord clipped = ReadClipper.hardClipByReadCoordinates(read, 0, nToClip - 1);
Expand All @@ -117,7 +117,7 @@ public void testHardClipReadLengthIsRight(final int originalReadLength, final in
@Test(enabled = !DEBUG)
public void testHardClipByReferenceCoordinates() {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
int start = read.getSoftStart();
int stop = read.getSoftEnd();

Expand All @@ -140,7 +140,7 @@ public void testHardClipByReferenceCoordinates() {
@Test(enabled = !DEBUG)
public void testHardClipByReferenceCoordinatesLeftTail() {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
Expand All @@ -159,7 +159,7 @@ public void testHardClipByReferenceCoordinatesLeftTail() {
@Test(enabled = !DEBUG)
public void testHardClipByReferenceCoordinatesRightTail() {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
Expand All @@ -181,7 +181,7 @@ public void testHardClipLowQualEnds() {

/** create a read for every cigar permutation */
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
int readLength = read.getReadLength();
byte[] quals = new byte[readLength];

Expand Down Expand Up @@ -221,7 +221,7 @@ public void testHardClipLowQualEnds() {
@Test(enabled = !DEBUG)
public void testHardClipSoftClippedBases() {
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
CigarCounter original = new CigarCounter(read);
CigarCounter clipped = new CigarCounter(clippedRead);
Expand All @@ -235,7 +235,7 @@ public void testHardClipSoftClippedBases() {
public void testHardClipLeadingInsertions() {
for (Cigar cigar : cigarList) {
if (startsWithInsertion(cigar)) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);

assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
Expand All @@ -259,7 +259,7 @@ public void testRevertSoftClippedBases() {
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
final int tailSoftClips = leadingCigarElementLength(CigarUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);

final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);

assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
Expand All @@ -281,7 +281,7 @@ public void testRevertSoftClippedBasesWithThreshold() {
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
final int tailSoftClips = leadingCigarElementLength(CigarUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);

final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);

assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
Expand Down Expand Up @@ -313,7 +313,7 @@ public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, f
final int nMatches = 10;
final int nSoft = -1 * (softStart - alignmentStart);
final String cigar = nSoft + "S" + nMatches + "M";
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0);
read.setAlignmentStart(alignmentStart);

Assert.assertEquals(read.getSoftStart(), softStart);
Expand Down Expand Up @@ -413,7 +413,7 @@ public boolean assertHardClippingSoftClips(CigarCounter clipped) {

@Test(enabled = !DEBUG)
public void testRevertEntirelySoftclippedReads() {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H");
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H", 0);
GATKSAMRecord clippedRead = ReadClipper.revertSoftClippedBases(read);
Assert.assertEquals(clippedRead.getAlignmentStart(), read.getSoftStart());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ public void testAlignmentStateMachineTest(LIBSTest params) {

// TODO -- more tests about test state machine state before first step?
Assert.assertTrue(state.isLeftEdge());
try {
state.makePileupElement();
Assert.fail("makePileupElement should've thrown an exception");
} catch (IllegalStateException e) {
Assert.assertTrue(e.getMessage().indexOf(state.MAKE_PILEUP_EDGE_ERROR) != -1);
}
Assert.assertNull(state.getCigarOperator());
Assert.assertNotNull(state.toString());
Assert.assertEquals(state.getReadOffset(), -1);
Expand Down

0 comments on commit 9c33132

Please sign in to comment.