Skip to content

Commit

Permalink
Merge pull request #439 from broadinstitute/ak_readClipperUnitTests
Browse files Browse the repository at this point in the history
port of read clipper unit tests
  • Loading branch information
akiezun committed Apr 24, 2015
2 parents 2fa2a8d + 75f78bc commit a75b706
Show file tree
Hide file tree
Showing 6 changed files with 469 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,18 @@ private SAMRecord clipLowQualEnds(ClippingRepresentation algorithm, byte lowQual
return this.clipRead(algorithm);
}

private SAMRecord hardClipLowQualEnds(byte lowQual) {
return this.clipLowQualEnds(ClippingRepresentation.HARDCLIP_BASES, lowQual);
}

public static SAMRecord clipLowQualEnds(SAMRecord read, byte lowQual, ClippingRepresentation algorithm) {
return (new ReadClipper(read)).clipLowQualEnds(algorithm, lowQual);
}

public static SAMRecord hardClipLowQualEnds(SAMRecord read, byte lowQual) {
return (new ReadClipper(read)).hardClipLowQualEnds(lowQual);
}

/**
* Will hard clip every soft clipped bases in the read.
*
Expand Down Expand Up @@ -309,7 +317,7 @@ public static SAMRecord hardClipSoftClippedBases (SAMRecord read) {
public static SAMRecord hardClipToRegionIncludingClippedBases( final SAMRecord read, final int refStart, final int refStop ) {
final int start = read.getUnclippedStart();
final int stop = start + CigarUtils.countRefBasesBasedOnCigar(read, 0, read.getCigarLength()) - 1;
return hardClipToRegion(read, refStart, refStop,start,stop);
return hardClipToRegion(read, refStart, refStop, start, stop);
}

private static SAMRecord hardClipToRegion( final SAMRecord read, final int refStart, final int refStop, final int alignmentStart, final int alignmentStop){
Expand Down Expand Up @@ -346,6 +354,32 @@ public static SAMRecord hardClipAdaptorSequence (SAMRecord read) {
return (new ReadClipper(read)).hardClipAdaptorSequence();
}

/**
* Hard clips any leading insertions in the read. Only looks at the beginning of the read, not the end.
*
* @return a new read without leading insertions
*/
private SAMRecord hardClipLeadingInsertions() {
if (ReadUtils.isEmpty(read)) {
return read;
}

for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
cigarElement.getOperator() != CigarOperator.INSERTION)
break;

else if (cigarElement.getOperator() == CigarOperator.INSERTION)
this.addOp(new ClippingOp(0, cigarElement.getLength() - 1));

}
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}

public static SAMRecord hardClipLeadingInsertions(SAMRecord read) {
return (new ReadClipper(read)).hardClipLeadingInsertions();
}

/**
* Turns soft clipped bases into matches
* @return a new read with every soft clip turned into a match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,33 @@ public static Cigar combineAdjacentCigarElements(Cigar rawCigar) {
return combinedCigar;
}

/**
* Checks whether or not the read has any cigar element that is not H or S
*
* @param read the read
* @return true if it has any M, I or D, false otherwise
*/
public static boolean readHasNonClippedBases(SAMRecord read) {
for (CigarElement cigarElement : read.getCigar().getCigarElements())
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
return true;
return false;
}

public static Cigar invertCigar (Cigar cigar) {
Stack<CigarElement> cigarStack = new Stack<>();
for (CigarElement cigarElement : cigar.getCigarElements()) {
cigarStack.push(cigarElement);
}

Cigar invertedCigar = new Cigar();
while (!cigarStack.isEmpty()) {
invertedCigar.add(cigarStack.pop());
}

return invertedCigar;
}

/**
* A valid cigar object obeys the following rules:
* - No Hard/Soft clips in the middle of the read
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.utils.GenomeLocParser;
import org.broadinstitute.hellbender.utils.ReadClipperTestUtils;
import org.broadinstitute.hellbender.utils.clipping.ReadClipperTestUtils;
import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.hellbender.utils.test.BaseTest;
import org.testng.Assert;
Expand Down Expand Up @@ -61,7 +61,7 @@ public TestManager() {
@Test
public void splitReadAtN() {
final int cigarStringLength = 10;
final List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(cigarStringLength,cigarElements);
final List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(cigarStringLength, cigarElements);

// For Debugging use those lines (instead of above cigarList) to create specific read:
//------------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import org.broadinstitute.hellbender.utils.ReadClipperTestUtils;
import org.broadinstitute.hellbender.utils.clipping.ReadClipperTestUtils;
import org.broadinstitute.hellbender.utils.test.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
package org.broadinstitute.hellbender.utils;
package org.broadinstitute.hellbender.utils.clipping;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.*;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.ArtificialSAMUtils;
import org.broadinstitute.hellbender.utils.read.CigarUtils;

Expand All @@ -28,6 +26,14 @@ public static SAMRecord makeReadFromCigar(Cigar cigar) {
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
}

public static SAMRecord makeReadFromCigar(String cigarString) {
return makeReadFromCigar(TextCigarCodec.decode(cigarString));
}

public static List<Cigar> generateCigarList(int maximumLength) {
return generateCigarList(maximumLength, cigarElements);
}

/**
* This function generates every valid permutation of cigar strings (with a given set of cigarElement) with a given length.
* <p>
Expand All @@ -42,7 +48,7 @@ public static SAMRecord makeReadFromCigar(Cigar cigar) {
*/
public static List<Cigar> generateCigarList(int maximumLength, CigarElement[] cigarElements) {
int numCigarElements = cigarElements.length;
LinkedList<Cigar> cigarList = new LinkedList<Cigar>();
LinkedList<Cigar> cigarList = new LinkedList<>();
byte[] cigarCombination = new byte[maximumLength];

Utils.fillArrayWithByte(cigarCombination, (byte) 0); // we start off with all 0's in the combination array.
Expand Down
Loading

0 comments on commit a75b706

Please sign in to comment.