Skip to content

Commit

Permalink
Fix for indel calling with UG in presence of reduced reads: When a re…
Browse files Browse the repository at this point in the history
…ad is long enough so that there's no reference context available, the reads gets clipped so that it falls again within the reference context range. However, the clipping is incorrect, as it makes the read end precisely at the end of the reference context coordinates. This might lead to a case where a read might span beyond the haplotype if one of the candidate haplotypes is shorter than the reference context (As in the case e.g. with deletions). In this case, the HMM will not work properly and the likelihood will be bad, since "insertions" at end of reads when haplotype is done will be penalized and likelihood will be much lower than it should.

-- Added check to see if read spans beyond reference window MINUS padding and event length. This guarantees that read will always be contained in haplotype.
-- Changed md5's that happen when long reads from old 454 data have their likelihoods changed because of the extra base clipping.
  • Loading branch information
Guillermo del Angel committed Apr 29, 2013
1 parent c5701a9 commit 20d3137
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 11 deletions.
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -245,8 +245,13 @@ public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final Read
} }
} }
else { else {
final int refWindowStart = ref.getWindow().getStart(); // extra padding on candidate haplotypes to make sure reads are always strictly contained
final int refWindowStop = ref.getWindow().getStop(); // in them - a value of 1 will in theory do but we use a slightly higher one just for safety sake, mostly
// in case bases at edge of reads have lower quality.
final int trailingBases = 3;
final int extraOffset = Math.abs(eventLength);
final int refWindowStart = ref.getWindow().getStart()+(trailingBases+extraOffset);
final int refWindowStop = ref.getWindow().getStop()-(trailingBases+extraOffset);


if (DEBUG) { if (DEBUG) {
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString()); System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
Expand All @@ -255,10 +260,10 @@ public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final Read
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());


if (!read.isEmpty() && (read.getSoftEnd() > refWindowStop && read.getSoftStart() < refWindowStop)) if (!read.isEmpty() && (read.getSoftEnd() > refWindowStop && read.getSoftStart() < refWindowStop))
read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop()); read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, refWindowStop);


if (!read.isEmpty() && (read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart)) if (!read.isEmpty() && (read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart))
read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart()); read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, refWindowStart);


if (read.isEmpty()) if (read.isEmpty())
continue; continue;
Expand All @@ -270,7 +275,6 @@ public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final Read
continue; continue;


// get bases of candidate haplotypes that overlap with reads // get bases of candidate haplotypes that overlap with reads
final int trailingBases = 3;
final long readStart = read.getSoftStart(); final long readStart = read.getSoftStart();
final long readEnd = read.getSoftEnd(); final long readEnd = read.getSoftEnd();


Expand All @@ -286,7 +290,6 @@ public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final Read
final int numEndSoftClippedBases = softClips ? read.getSoftEnd()- read.getAlignmentEnd() : 0 ; final int numEndSoftClippedBases = softClips ? read.getSoftEnd()- read.getAlignmentEnd() : 0 ;
final byte [] unclippedReadBases = read.getReadBases(); final byte [] unclippedReadBases = read.getReadBases();
final byte [] unclippedReadQuals = read.getBaseQualities(); final byte [] unclippedReadQuals = read.getBaseQualities();
final int extraOffset = Math.abs(eventLength);


/** /**
* Compute genomic locations that candidate haplotypes will span. * Compute genomic locations that candidate haplotypes will span.
Expand All @@ -313,6 +316,7 @@ else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
startLocationInRefForHaplotypes = ref.getWindow().getStop(); // read starts after haplotype: read will have to be clipped completely; startLocationInRefForHaplotypes = ref.getWindow().getStop(); // read starts after haplotype: read will have to be clipped completely;
} }


// candidate haplotype cannot go beyond reference context
if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) { if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
stopLocationInRefForHaplotypes = ref.getWindow().getStop(); // check also if end of read will go beyond reference context stopLocationInRefForHaplotypes = ref.getWindow().getStop(); // check also if end of read will go beyond reference context
} }
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public void testMultiTechnologyIndels() {
" -o %s" + " -o %s" +
" -L 1:10,000,000-10,500,000", " -L 1:10,000,000-10,500,000",
1, 1,
Arrays.asList("54e13f696f56eb742bf449ad11d0dc5f")); Arrays.asList("8d9b8f8a1479322961c840e461b6dba8"));


executeTest(String.format("test indel calling, multiple technologies"), spec); executeTest(String.format("test indel calling, multiple technologies"), spec);
} }
Expand Down Expand Up @@ -136,7 +136,7 @@ public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
Arrays.asList("e87f5c76661527ef7aa44e528fe19573")); Arrays.asList("3d4d66cc253eac55f16e5b0a36f17d8d"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
} }


Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ public void testMultiTechnologies() {
" -o %s" + " -o %s" +
" -L 1:10,000,000-10,100,000", " -L 1:10,000,000-10,100,000",
1, 1,
Arrays.asList("d995b76adc3766889f7c2c88da14055c")); Arrays.asList("31be725b2a7c15e9769391ad940c0587"));


executeTest(String.format("test multiple technologies"), spec); executeTest(String.format("test multiple technologies"), spec);
} }
Expand All @@ -259,7 +259,7 @@ public void testCallingWithBAQ() {
" -L 1:10,000,000-10,100,000" + " -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY", " -baq CALCULATE_AS_NECESSARY",
1, 1,
Arrays.asList("9669e1643d22d5ad047b3941aeefd6db")); Arrays.asList("dcc5cec42730567982def16da4a7f286"));


executeTest(String.format("test calling with BAQ"), spec); executeTest(String.format("test calling with BAQ"), spec);
} }
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultiSamplePilot1() { public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("e3efd1917192ea743ac1e9958aa0a98f")); Arrays.asList("a6c224235c21b4af816b1512eb0624df"));
executeTest("test MultiSample Pilot1", spec); executeTest("test MultiSample Pilot1", spec);
} }


Expand Down

0 comments on commit 20d3137

Please sign in to comment.