Skip to content

Commit

Permalink
GoodCigarFilter + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
akiezun committed Apr 27, 2015
1 parent f1eefec commit 175fd37
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ private ReadFilterLibrary(){ /*no instance*/ }
public static final ReadFilter PASSES_VENDOR_QUALITY_CHECK = read -> !read.getReadFailsVendorQualityCheckFlag();
public static final ReadFilter MAPPING_QUALITY_AVAILABLE = read -> read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE;
public static final ReadFilter MAPPING_QUALITY_NOT_ZERO = read -> read.getMappingQuality() != 0;
public static final ReadFilter READLENGTH_EQUALS_CIGARLENGTH = read -> read.getReadLength() == read.getCigar().getReadLength();
public static final ReadFilter GOOD_CIGAR = read -> CigarUtils.isGood(read.getCigar());

public static final ReadFilter VALID_ALIGNMENT_START = read -> {
if (read.getReadUnmappedFlag()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
package org.broadinstitute.hellbender.utils.read;

import htsjdk.samtools.*;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import org.broadinstitute.hellbender.exceptions.GATKException;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;

public class CigarUtils {
Expand Down Expand Up @@ -107,7 +113,7 @@ public static boolean isCigarValid(Cigar cigar) {
return false;
}

public static final int countRefBasesBasedOnCigar(final SAMRecord read, final int cigarStartIndex, final int cigarEndIndex){
public static int countRefBasesBasedOnCigar(final SAMRecord read, final int cigarStartIndex, final int cigarEndIndex){
int result = 0;
for(int i = cigarStartIndex; i<cigarEndIndex;i++){
final CigarElement cigarElement = read.getCigar().getCigarElement(i);
Expand Down Expand Up @@ -169,4 +175,64 @@ public static boolean containsNOperator(final Cigar cigar) {
}
return false;
}

/**
* A good Cigar object obeys the following rules:
* - is valid as per SAM spec {@link Cigar#isValid(String, long)}.
* - has no consecutive I/D elements
* - does not start or end with deletions (with or without preceding clips).
*/
public static boolean isGood(final Cigar c) {
if (c == null){
throw new IllegalArgumentException("null cigar");
}
//Note: the string comes from the SAMRecord so it must be a wellformed CIGAR (that is, in "\*|([0-9]+[MIDNSHPX=])+" as per SAM spec).
//We don't have to check that
if (c.isValid(null, -1) != null){ //if it's invalid, then it's not good
return false;
}
final List<CigarElement> elems = c.getCigarElements();
if (hasConsecutiveIndels(elems)){
return false;
}
if (startsWithDeletionIgnoringClips(elems)){
return false;
}
//revert the list and check deletions at the end
final List<CigarElement> elemsRev = new ArrayList<>(elems);
Collections.reverse(elemsRev);
return !startsWithDeletionIgnoringClips(elemsRev);
}

/**
* Checks if cigar has consecutive I/D elements.
*/
private static boolean hasConsecutiveIndels(final List<CigarElement> elems) {
boolean prevIndel = false;
for (final CigarElement elem : elems) {
final CigarOperator op = elem.getOperator();
final boolean isIndel = (op == CigarOperator.INSERTION || op == CigarOperator.DELETION);
if (prevIndel && isIndel) {
return true;
}
prevIndel = isIndel;
}
return false;
}

/**
* Checks if cigar starts with a deletion (ignoring any clips at the beginning).
*/
private static boolean startsWithDeletionIgnoringClips(final List<CigarElement> elems) {
final Iterator<CigarElement> iter = elems.iterator();
boolean isClip = true;
CigarOperator op = null;
while(iter.hasNext() && isClip) { //consume clips at the beginning
final CigarElement elem = iter.next();
op = elem.getOperator();
isClip = (op == CigarOperator.HARD_CLIP || op == CigarOperator.SOFT_CLIP);
}
//once all clips are consumed, is it a deletion or not?
return op == CigarOperator.DELETION;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import htsjdk.samtools.*;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.clipping.ReadClipperTestUtils;
import org.broadinstitute.hellbender.utils.read.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
Expand Down Expand Up @@ -196,7 +197,7 @@ public void failsALIGNMENT_AGREES_WITH_HEADER_case1() {
public void failsALIGNMENT_AGREES_WITH_HEADER_case2() {
SAMRecord read = simpleGoodRead();
final int length = read.getHeader().getSequence(0).getSequenceLength();
read.setAlignmentStart(length+10);
read.setAlignmentStart(length + 10);
Assert.assertFalse(ALIGNMENT_AGREES_WITH_HEADER.test(read), read.toString());
}

Expand All @@ -210,7 +211,7 @@ public void failsHAS_READ_GROUP() {
@Test
public void failsHAS_MATCHING_BASES_AND_QUALS() {
SAMRecord read = simpleGoodRead();
read.setBaseQualities(new byte[]{1,2,3});
read.setBaseQualities(new byte[]{1, 2, 3});
Assert.assertFalse(HAS_MATCHING_BASES_AND_QUALS.test(read), read.toString());
}

Expand All @@ -228,4 +229,89 @@ public void failsCIGAR_IS_SUPPORTED() {
Assert.assertFalse(CIGAR_IS_SUPPORTED.test(read), read.toString());
}

@DataProvider(name = "badCigars")
public Object[][] badCigars() {
return new Object[][]{
{"2D4M"}, // starting with multiple deletions
{"4M2D"}, // ending with multiple deletions
{"3M1I1D"}, // adjacent indels AND ends in deletion
{"1M1I1D2M"}, // adjacent indels I->D
{"1M1D2I1M"}, // adjacent indels D->I
{"1M1I2M1D"}, // ends in single deletion with insertion in the middle
{"4M1D"}, // ends in single deletion
{"1D4M"}, // starts with single deletion
{"2M1D1D2M"}, // adjacent D's
{"1M1I1I1M"}, // adjacent I's
{"1H1D4M"}, // starting with deletion after H
{"1S1D3M"}, // starting with deletion after S
{"1H1S1D3M"}, // starting with deletion after HS
{"4M1D1H"}, // ending with deletion before H
{"3M1D1S"}, // ending with deletion before S
{"3M1D1S1H"}, // ending with deletion before HS
{"1H1S1H1M"}, // H in the middle, after S
{"1M1H1S1M"}, // S in the middle, after H
{"10M2H10M"}, // H in the middle
{"10M2S10M"}, // S in the middle
{"1S1H"}, // only clipping
{"1S1S"}, // only clipping
{"1H1S"}, // only clipping
{"1H1H"}, // only clipping
{"1S1H10M"}, // H in the middle
{"1H1M1S1M"}, // H in the middle
{"1M1H1S"}, // H in the middle
{"1H1S10M2S10M1S1H"}, // deceiving S in the middle: HSMSMSH
{"1H1S10M2H10M1S1H"}, // deceiving H in the middle
{"1H1H2M"}, // (invalid according to htsjdk)
{"1S20S10M"}, // (invalid according to htsjdk)
{"1S1S1S1M"}, // (invalid according to htsjdk)
{"1H1S10M10S1S30H"}, // (invalid according to htsjdk)
{"1H1S10M10S1S30H"}, // (invalid according to htsjdk)
{"1H20H10M"}, // (invalid according to htsjdk)
{"1H1H10M10H30H"}, // (invalid according to htsjdk)
{"1H1H10M10H1H30H"}, // (invalid according to htsjdk)
{"1M1H2H"}, // (invalid according to htsjdk)
};
}
@Test(dataProvider = "badCigars")
public void testWonkyCigars (String cigarString) {
SAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString);
Assert.assertFalse(GOOD_CIGAR.test(read), read.getCigarString());
}

@Test
public void testReadCigarLengthMismatch() {
SAMRecord read = ReadClipperTestUtils.makeReadFromCigar("4M", 1);
Assert.assertFalse(READLENGTH_EQUALS_CIGARLENGTH.test(read), read.getCigarString());
}

@Test
public void testEmptyCigar(){
SAMRecord read = ReadClipperTestUtils.makeReadFromCigar("");
Assert.assertTrue(GOOD_CIGAR.test(read), read.getCigarString());
}

@DataProvider(name = "goodCigars")
public Object[][] goodCigars() {
return new Object[][]{
{"1H1S10M10S30H"},
{"1I9H"},
{"1I1S8H"},
{"1S1I1S7H"}
};
}

@Test(dataProvider = "goodCigars")
public void testGoodCigars (String cigarString) {
SAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString);
Assert.assertTrue(GOOD_CIGAR.test(read), read.getCigarString());
}
@Test
public void testGoodCigarsUpToSize() {
//Note: not using data providers here because it's super slow to print (many minutes vs few seconds).
List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(10);
for (Cigar cigar : cigarList) {
SAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
Assert.assertTrue(GOOD_CIGAR.test(read), read.getCigarString());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,49 @@ public class ReadClipperTestUtils {
new CigarElement(1, CigarOperator.MATCH_OR_MISMATCH)};


/**
* Make a read from the CIGAR string
*
* @param cigarString string used to create a CIGAR
* @return artificial read
*/
public static SAMRecord makeReadFromCigar(String cigarString) {
return makeReadFromCigar(TextCigarCodec.decode(cigarString));
}

/**
* Make a read from the CIGAR.
*
* @param cigar
* @return artificial read
*/
public static SAMRecord makeReadFromCigar(Cigar cigar) {
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
return makeReadFromCigar(cigar, 0);
}

public static SAMRecord makeReadFromCigar(String cigarString) {
return makeReadFromCigar(TextCigarCodec.decode(cigarString));
private static SAMRecord makeReadFromCigar(Cigar cigar, int lengthChange) {
int readLength = cigar.getReadLength();
if (readLength >= -lengthChange) {
readLength += lengthChange;
}
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, readLength), Utils.arrayFromArrayWithLength(QUALS, readLength), cigar.toString());
}

/**
* Make a read from the CIGAR
*
* @param cigarString string used to create a CIGAR
* @param lengthChange change in read length relative the CIGAR length
* @return artificial read
*/
public static SAMRecord makeReadFromCigar(String cigarString, int lengthChange) {
return makeReadFromCigar(TextCigarCodec.decode(cigarString), lengthChange);
}

/**
* This function generates every valid permutation of cigar strings (with a given set of cigarElement) with a given length.
* See {@link ReadClipperTestUtils#generateCigarList(int, CigarElement[]) for a full description.}
*/
public static List<Cigar> generateCigarList(int maximumLength) {
return generateCigarList(maximumLength, cigarElements);
}
Expand All @@ -56,7 +91,7 @@ public static List<Cigar> generateCigarList(int maximumLength, CigarElement[] ci
while (true) {
Cigar cigar = createCigarFromCombination(cigarCombination, cigarElements); // create the cigar
cigar = CigarUtils.combineAdjacentCigarElements(cigar); // combine adjacent elements
if (CigarUtils.isCigarValid(cigar)) { // check if it's valid
if (CigarUtils.isGood(cigar)) { // check if it's valid
cigarList.add(cigar); // add it
}

Expand Down Expand Up @@ -88,5 +123,4 @@ private static Cigar createCigarFromCombination(byte[] cigarCombination, CigarEl
}
return cigar;
}

}

0 comments on commit 175fd37

Please sign in to comment.