Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Several fixes and improvements to Pool caller with ancillary test fun…

…ctions (not done yet):

a) Utility class called Probability Vector that holds a log-probability vector and has the ability to clip ends that deviate largely from max value.
b) Used this class to hold site error model, since likelihoods of error model away from peak are so far down that it's not worth computing with them and just wastes time.
c) Expand unit tests and add an exhaustive test for ErrorModel class.
d) Corrected major math bug in ErrorModel uncovered by exhaustive test: log(e^x) is NOT x if log's base = 10.
e) Refactored utility functions that created artificial pileups for testing into separate class ArtificialPileupTestProvider. Right now functionality is limited (one artificial contig of 10 bp), can only specify pileups in one position with a given number of matches and mismatches to ref) but functionality will be expanded in future to cover more test cases.
f) Use this utility class for IndelGenotypeLikelihoods unit test and for PoolGenotypeLikelihoods unit test (the latter testing functionality still not done).
g) Linearized implementation of biallelic exact model (very simple approach, similar to diploid exact model, just abort if we're past the max value of AC distribution and below a threshold). Still need to add unit tests for this and to expand to multiallelic model.
h) Update integration test md5's due to minor differences stemming from linearized exact model and better error model math
  • Loading branch information...
commit 730208133ba0fd824287694c5ee24f4e4606683b 1 parent 1bbb156
Guillermo del Angel authored
View
2  public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -243,7 +243,7 @@ public static double log10sumLog10(double[] log10p, int start, int finish) {
double maxValue = arrayMax(log10p, finish);
if(maxValue == Double.NEGATIVE_INFINITY)
- return sum;
+ return maxValue;
for (int i = start; i < finish; i++) {
sum += Math.pow(10.0, log10p[i] - maxValue);
View
203 ...est/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMReadGroupRecord;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+
+import java.util.*;
+
+
+public class ArtificialReadPileupTestProvider {
+ final int contigStart = 1;
+ final int contigStop = 10;
+ final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
+// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
+ final String artificialContig = "chr1";
+ // final int artificialContigIndex = 0;
+ final String artificialReadName = "synth";
+ final int artificialRefStart = 1;
+ final int artificialMappingQuality = 60;
+ Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
+ List<SAMReadGroupRecord> sampleRGs;
+
+ final String refBases = "AGGATACTGT";
+ List<String> sampleNames = new ArrayList<String>();
+ private String sampleName(int i) { return sampleNames.get(i); }
+ private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
+ public final int offset = 5;
+ public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
+ public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
+ public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
+ public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
+
+
+ public ArtificialReadPileupTestProvider(int numSamples, final String SAMPLE_PREFIX) {
+ sampleRGs = new ArrayList<SAMReadGroupRecord>();
+
+ for ( int i = 0; i < numSamples; i++ ) {
+ sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
+ SAMReadGroupRecord rg = createRG(sampleName(i));
+ sampleRGs.add(rg);
+ sample2RG.put(sampleName(i), rg);
+ }
+
+ }
+
+ public List<String> getSampleNames() {
+ return sampleNames;
+ }
+ public byte getRefByte() {
+ return refBases.substring(offset,offset+1).getBytes()[0];
+ }
+
+ public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
+ // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
+
+
+ ArrayList vcAlleles = new ArrayList<Allele>();
+ Allele refAllele, altAllele;
+ if (eventLength == 0) {// SNP case
+ refAllele =Allele.create(refBases.substring(offset,offset+1),true);
+ altAllele = Allele.create(altBases.substring(0,1), false);
+
+ } else if (eventLength>0){
+ // insertion
+ refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
+ altAllele = Allele.create(altBases.substring(0,eventLength), false);
+ }
+ else {
+ // deletion
+ refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
+ altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+ }
+ int stop = loc.getStart();
+ vcAlleles.add(refAllele);
+ vcAlleles.add(altAllele);
+
+ final VariantContextBuilder builder = new VariantContextBuilder().source("");
+ builder.loc(loc.getContig(), loc.getStart(), stop);
+ builder.alleles(vcAlleles);
+ builder.referenceBaseForIndel(referenceContext.getBase());
+ builder.noGenotypes();
+
+ final VariantContext vc = builder.make();
+
+ Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
+
+ for (String sample: sampleNames) {
+ AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
+ contexts.put(sample,context);
+
+ }
+
+ return contexts;
+ }
+
+ private SAMReadGroupRecord createRG(String name) {
+ SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
+ rg.setPlatform("ILLUMINA");
+ rg.setSample(name);
+ return rg;
+ }
+ private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
+ int[] numReadsPerAllele, String sample) {
+ List<PileupElement> pileupElements = new ArrayList<PileupElement>();
+ int readStart = contigStart;
+ int offset = (contigStop-contigStart+1)/2;
+ int refAlleleLength = 0;
+ int readCounter = 0;
+ int alleleCounter = 0;
+ for (Allele allele: vc.getAlleles()) {
+ if (allele.isReference())
+ refAlleleLength = allele.getBases().length;
+
+ int alleleLength = allele.getBases().length;
+
+ for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
+ byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
+ byte[] readQuals = new byte[readBases.length];
+ Arrays.fill(readQuals, (byte) 50);
+
+ GATKSAMRecord read = new GATKSAMRecord(header);
+ read.setBaseQualities(readQuals);
+ read.setReadBases(readBases);
+ read.setReadName(artificialReadName+readCounter++);
+
+ boolean isBeforeDeletion = false, isBeforeInsertion = false;
+ if (allele.isReference())
+ read.setCigarString(readBases.length + "M");
+ else {
+ isBeforeDeletion = alleleLength<refAlleleLength;
+ isBeforeInsertion = alleleLength>refAlleleLength;
+ if (isBeforeDeletion || isBeforeInsertion)
+ read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
+ (readBases.length-offset)+"M");
+ else // SNP case
+ read.setCigarString(readBases.length+"M");
+ }
+
+ int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
+ read.setReadPairedFlag(false);
+ read.setAlignmentStart(readStart);
+ read.setMappingQuality(artificialMappingQuality);
+ read.setReferenceName(loc.getContig());
+ read.setReadNegativeStrandFlag(false);
+ read.setAttribute("RG", sampleRG(sample).getReadGroupId());
+
+
+ pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
+ }
+ alleleCounter++;
+ }
+
+ return new ReadBackedPileupImpl(loc,pileupElements);
+ }
+
+ private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
+ // create haplotype based on a particular allele
+ String prefix = refBases.substring(offset);
+ String alleleBases = new String(allele.getBases());
+ String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
+
+ return (prefix+alleleBases+postfix).getBytes();
+
+
+
+ }
+
+}
View
154 ...est/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java
@@ -45,51 +45,23 @@
*/
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
- final int contigStart = 1;
- final int contigStop = 10;
- final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop-contigStart+1);
- final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
- final String artificialContig = "chr1";
- final int artificialContigIndex = 0;
- final String artificialReadName = "synth";
- final int artificialRefStart = 1;
- final int artificialMappingQuality = 60;
- Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
-
final String refBases = "AGGATACTGT";
- final String SAMPLE_PREFIX = "sample";
-
- List<String> sampleNames = new ArrayList<String>();
final int nSamples = 1;
- final int numReadsPerAllele = 10;
-
- List<SAMReadGroupRecord> sampleRGs;
-
- private String sampleName(int i) { return sampleNames.get(i); }
- private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
+ final int[] numReadsPerAllele = new int[]{10,10};
+ final String SAMPLE_PREFIX = "sample";
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
final Logger logger = Logger.getLogger(Walker.class);
- final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
final IndelGenotypeLikelihoodsCalculationModel model = new IndelGenotypeLikelihoodsCalculationModel(UAC,logger);
- final int offset = 5;
- final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
- final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
- final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
- @BeforeSuite
- public void before() {
- sampleRGs = new ArrayList<SAMReadGroupRecord>();
-
- for ( int i = 0; i < nSamples; i++ ) {
- sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
- SAMReadGroupRecord rg = createRG(sampleName(i));
- sampleRGs.add(rg);
- sample2RG.put(sampleName(i), rg);
- }
+ ArtificialReadPileupTestProvider pileupProvider;
+ @BeforeSuite
+ public void before() {
+ pileupProvider = new ArtificialReadPileupTestProvider(nSamples, SAMPLE_PREFIX);
}
+
@Test
public void testBasicConsensusCounts() {
// 4 inserted bases, min cnt = 10
@@ -107,7 +79,7 @@ public void testBasicConsensusCounts() {
eventLength = 3;
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
- Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(offset,offset+eventLength));
+ Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
// same with min Reads = 11
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
@@ -121,112 +93,10 @@ public void testBasicConsensusCounts() {
}
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
- final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(genomeLocParser, true, minCnt, minFraction);
- return counter.computeConsensusAlleles(referenceContext,getContextFromAlleles(eventLength, isInsertion, altBases), AlignmentContextUtils.ReadOrientation.COMPLETE);
-
- }
- private Map<String,AlignmentContext> getContextFromAlleles(int eventLength, boolean isInsertion, String altBases) {
- // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
-
-
- ArrayList vcAlleles = new ArrayList<Allele>();
- Allele refAllele, altAllele;
- if (isInsertion) {
- refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
- altAllele = Allele.create(altBases.substring(0,eventLength), false);
- }
- else {
- refAllele =Allele.create(refBases.substring(offset,offset+eventLength),true);
- altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
- }
-
- int stop = loc.getStart();
- vcAlleles.add(refAllele);
- vcAlleles.add(altAllele);
-
- final VariantContextBuilder builder = new VariantContextBuilder().source("");
- builder.loc(loc.getContig(), loc.getStart(), stop);
- builder.alleles(vcAlleles);
- builder.referenceBaseForIndel(referenceContext.getBase());
- builder.noGenotypes();
-
- final VariantContext vc = builder.make();
-
- Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
-
- for (String sample: sampleNames) {
- AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
- contexts.put(sample,context);
-
- }
-
- return contexts;
- }
-
- private SAMReadGroupRecord createRG(String name) {
- SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
- rg.setPlatform("ILLUMINA");
- rg.setSample(name);
- return rg;
- }
- private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
- int numReads, String sample) {
- List<PileupElement> pileupElements = new ArrayList<PileupElement>();
- int readStart = contigStart;
- int offset = (contigStop-contigStart+1)/2;
- int refAlleleLength = 0;
- int readCounter = 0;
- for (Allele allele: vc.getAlleles()) {
- if (allele.isReference())
- refAlleleLength = allele.getBases().length;
-
- int alleleLength = allele.getBases().length;
-
- for ( int d = 0; d < numReads; d++ ) {
- byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
- byte[] readQuals = new byte[readBases.length];
- Arrays.fill(readQuals,(byte)50);
-
- GATKSAMRecord read = new GATKSAMRecord(header);
- read.setBaseQualities(readQuals);
- read.setReadBases(readBases);
- read.setReadName(artificialReadName+readCounter++);
-
- boolean isBeforeDeletion = false, isBeforeInsertion = false;
- if (allele.isReference())
- read.setCigarString(readBases.length + "M");
- else {
- isBeforeDeletion = alleleLength<refAlleleLength;
- isBeforeInsertion = alleleLength>refAlleleLength;
- read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
- (readBases.length-offset)+"M");
- }
-
- int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
- read.setReadPairedFlag(false);
- read.setAlignmentStart(readStart);
- read.setMappingQuality(artificialMappingQuality);
- read.setReferenceName(loc.getContig());
- read.setReadNegativeStrandFlag(false);
- read.setAttribute("RG", sampleRG(sample).getReadGroupId());
-
-
- pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
- }
- }
-
- return new ReadBackedPileupImpl(loc,pileupElements);
- }
-
- byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
- // create haplotype based on a particular allele
- String prefix = refBases.substring(offset);
- String alleleBases = new String(allele.getBases());
- String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
-
- return (prefix+alleleBases+postfix).getBytes();
-
-
+ final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(pileupProvider.genomeLocParser, true, minCnt, minFraction);
+ return counter.computeConsensusAlleles(pileupProvider.referenceContext,
+ pileupProvider.getAlignmentContextFromAlleles(isInsertion?eventLength:-eventLength,altBases,numReadsPerAllele),
+ AlignmentContextUtils.ReadOrientation.COMPLETE);
}
}
Please sign in to comment.
Something went wrong with that request. Please try again.