Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #439 from broadinstitute/vrr_graphLikelihoods2

Adding Graph-based likelihoods calculation
  • Loading branch information...
commit 5fad1c10bf20c2fdbe0693f35d005101e63819e8 2 parents 1cfacdc + 1318043
@vruano vruano authored
Showing with 339 additions and 59 deletions.
  1. +2 −0  public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
  2. +42 −0 public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
  3. +0 −2  public/java/src/org/broadinstitute/sting/utils/MathUtils.java
  4. +1 −0  public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
  5. +0 −1  public/java/src/org/broadinstitute/sting/utils/Utils.java
  6. +15 −3 public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
  7. +1 −1  public/java/src/org/broadinstitute/sting/utils/genotyper/MostLikelyAllele.java
  8. +4 −6 public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
  9. +30 −1 public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java
  10. +51 −41 public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java
  11. +7 −0 public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java
  12. +1 −1  public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java
  13. +182 −0 public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMReadyHaplotypes.java
  14. +1 −0  public/java/test/org/broadinstitute/sting/BaseTest.java
  15. +1 −1  public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java
  16. +0 −1  public/java/test/org/broadinstitute/sting/WalkerTest.java
  17. +1 −1  public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
View
2  public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@@ -180,4 +180,6 @@ public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet interv
}
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, IntervalMergingRule.ALL);
}
+
+
}
View
42 public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@@ -138,6 +138,48 @@ static public boolean basesAreEqual(byte base1, byte base2) {
return simpleBaseToBaseIndex(base1) == simpleBaseToBaseIndex(base2);
}
+ /**
+ * Checks whether to bases are the same in fact ignore ambiguous 'N' bases.
+ *
+ * @param base1 first base to compare.
+ * @param base2 second base to compare.
+ * @return true if {@code base1 == base2} or either is an 'N', false otherwise.
+ */
+ static public boolean basesAreEqualIgnoreAmbiguous(final byte base1, final byte base2) {
+ if (base1 == base2) return true;
+ else if (base1 == 'n' || base1 == 'N' || base2 == 'N' || base2 == 'n') return true;
+ else return false;
+ }
+
+ /**
+ * Compare to base arrays ranges checking whether they contain the same bases.
+ *
+ * <p>
+ * By default two array have equal bases, i.e. {@code length == 0} results results in {@code true}.
+ * </p>
+ *
+ * @param bases1 first base array to compare.
+ * @param offset1 position of the first base in bases1 to compare.
+ * @param bases2 second base array to compare.
+ * @param offset2 position of the first base in bases2 to compare.
+ * @param length number of bases to compare.
+ *
+ * @throws NullPointerException if {@code bases1} or {@code bases2} is {@code null}.
+ * @throws ArrayIndexOutOfBoundsException if:
+ * <ul>
+ * <li>{@code offset1} is not within the range [0,{@code bases1.length}) or</li>
+ * <li>{@code offset2} is not within the range [0,{@code bases2.length}) or</li>
+ * <li>{@code offset1 + length} is not within the range [0,{@code bases1.length}) or </li>
+ * <li>{@code offset2 + length} is not within the range [0,{@code bases2.length})</li>
+ * </ul>
+ * @return
+ */
+ static public boolean basesAreEqualIgnoreAmbiguous(final byte[] bases1, final int offset1, final byte[] bases2, final int offset2, final int length) {
+ for (int i = 0; i < length; i++)
+ if (!basesAreEqualIgnoreAmbiguous(bases1[offset1 + i],bases2[offset2 + i])) return false;
+ return true;
+ }
+
static public boolean extendedBasesAreEqual(byte base1, byte base2) {
return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
}
View
2  public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -30,7 +30,6 @@
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import javax.annotation.Nullable;
import java.math.BigDecimal;
import java.util.*;
@@ -425,7 +424,6 @@ public static double log10BinomialProbability(final int n, final int k) {
* happen when: any value is negative or larger than a short. This method is optimized for speed; it is not intended to serve as a
* utility function.
*/
- @Nullable
static Long fastGenerateUniqueHashFromThreeIntegers(final int one, final int two, final int three) {
if (one < 0 || two < 0 || three < 0 || Short.MAX_VALUE < one || Short.MAX_VALUE < two || Short.MAX_VALUE < three) {
return null;
View
1  public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@@ -63,6 +63,7 @@
private static double qualToErrorProbCache[] = new double[256];
private static double qualToProbLog10Cache[] = new double[256];
+
static {
for (int i = 0; i < 256; i++) {
qualToErrorProbCache[i] = qualToErrorProb((double) i);
View
1  public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -27,7 +27,6 @@
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
-import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMProgramRecord;
import org.apache.log4j.Logger;
View
18 public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
@@ -32,9 +32,7 @@
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.HasGenomeLocation;
-import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*;
@@ -109,6 +107,12 @@
*/
private GenomeLoc spanIncludingReads;
+
+ /**
+ * Indicates whether the active region has been finalized
+ */
+ private boolean hasBeenFinalized;
+
/**
* Create a new ActiveRegion containing no reads
*
@@ -205,7 +209,7 @@ public String toString() {
* @return a non-null array of bytes holding the reference bases in referenceReader
*/
@Ensures("result != null")
- private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
+ public byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
if ( referenceReader == null ) throw new IllegalArgumentException("referenceReader cannot be null");
if ( padding < 0 ) throw new IllegalArgumentException("padding must be a positive integer but got " + padding);
if ( genomeLoc == null ) throw new IllegalArgumentException("genomeLoc cannot be null");
@@ -451,4 +455,12 @@ public ActiveRegion trim(final GenomeLoc newExtent) {
return new ActiveRegion( subActive, Collections.<ActivityProfileState>emptyList(), isActive, genomeLocParser, requiredExtension );
}
+
+ public void setFinalized(final boolean value) {
+ hasBeenFinalized = value;
+ }
+
+ public boolean isFinalized() {
+ return hasBeenFinalized;
+ }
}
View
2  public/java/src/org/broadinstitute/sting/utils/genotyper/MostLikelyAllele.java
@@ -65,7 +65,7 @@
* @param log10LikelihoodOfMostLikely the log10 likelihood of the most likely allele
* @param log10LikelihoodOfSecondBest the log10 likelihood of the next most likely allele (should be NEGATIVE_INFINITY if none is available)
*/
- public MostLikelyAllele(Allele mostLikely, Allele secondMostLikely, double log10LikelihoodOfMostLikely, double log10LikelihoodOfSecondBest) {
+ public MostLikelyAllele(final Allele mostLikely, final Allele secondMostLikely, double log10LikelihoodOfMostLikely, double log10LikelihoodOfSecondBest) {
if ( mostLikely == null ) throw new IllegalArgumentException("mostLikely allele cannot be null");
if ( log10LikelihoodOfMostLikely != Double.NEGATIVE_INFINITY && ! MathUtils.goodLog10Probability(log10LikelihoodOfMostLikely) )
throw new IllegalArgumentException("log10LikelihoodOfMostLikely must be either -Infinity or a good log10 prob but got " + log10LikelihoodOfMostLikely);
View
10 public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
@@ -305,18 +305,16 @@ public static MostLikelyAllele getMostLikelyAllele( final Map<Allele,Double> all
/**
* Debug method to dump contents of object into string for display
*/
- @Override
public String toString() {
- StringBuilder sb = new StringBuilder();
+ final StringBuilder sb = new StringBuilder();
sb.append("Alelles in map:");
- for (Allele a:alleles) {
+ for (final Allele a:alleles) {
sb.append(a.getDisplayString()+",");
-
}
sb.append("\n");
- for (Map.Entry <GATKSAMRecord, Map<Allele, Double>> el : getLikelihoodReadMap().entrySet() ) {
- for (Map.Entry<Allele,Double> eli : el.getValue().entrySet()) {
+ for (final Map.Entry <GATKSAMRecord, Map<Allele, Double>> el : getLikelihoodReadMap().entrySet() ) {
+ for (final Map.Entry<Allele,Double> eli : el.getValue().entrySet()) {
sb.append("Read "+el.getKey().getReadName()+". Allele:"+eli.getKey().getDisplayString()+" has likelihood="+Double.toString(eli.getValue())+"\n");
}
View
31 public/java/src/org/broadinstitute/sting/utils/haplotype/Haplotype.java
@@ -38,10 +38,13 @@
import org.broadinstitute.variant.variantcontext.Allele;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
public class Haplotype extends Allele {
+
+
private GenomeLoc genomeLocation = null;
private EventMap eventMap = null;
private Cigar cigar;
@@ -214,7 +217,7 @@ public Cigar getConsolidatedPaddedCigar(final int padSize) {
public void setCigar( final Cigar cigar ) {
this.cigar = AlignmentUtils.consolidateCigar(cigar);
if ( this.cigar.getReadLength() != length() )
- throw new IllegalArgumentException("Read length " + length() + " not equal to the read length of the cigar " + cigar.getReadLength());
+ throw new IllegalArgumentException("Read length " + length() + " not equal to the read length of the cigar " + cigar.getReadLength() + " " + this.cigar);
}
@Requires({"refInsertLocation >= 0"})
@@ -311,4 +314,30 @@ public double getScore() {
public void setScore(double score) {
this.score = this.isReference() ? Double.MAX_VALUE : score;
}
+
+ /**
+ * Comparator used to sort haplotypes, alphanumerically.
+ *
+ * <p>
+ * If one haplotype is the prefix of the other, the shorter one comes first.
+ * </p>
+ */
+ public static final Comparator<Haplotype> ALPHANUMERICAL_COMPARATOR = new Comparator<Haplotype>() {
+
+ @Override
+ public int compare(final Haplotype o1, final Haplotype o2) {
+ if (o1 == o2)
+ return 0;
+ final byte[] bases1 = o1.getBases();
+ final byte[] bases2 = o2.getBases();
+ final int iLimit = Math.min(bases1.length, bases2.length);
+ for (int i = 0; i < iLimit; i++) {
+ final int cmp = Byte.compare(bases1[i], bases2[i]);
+ if (cmp != 0) return cmp;
+ }
+ if (bases1.length == bases2.length) return 0;
+ return (bases1.length > bases2.length) ? -1 : 1; // is a bit better to get the longest haplotypes first.
+ }
+ };
+
}
View
92 public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java
@@ -1,27 +1,27 @@
/*
- * Copyright (c) 2012 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
+* Copyright (c) 2012 The Broad Institute
+*
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
package org.broadinstitute.sting.utils.pairhmm;
@@ -40,18 +40,18 @@
* User: rpoplin, carneiro
* Date: 3/1/12
*/
-public final class Log10PairHMM extends N2MemoryPairHMM {
+public class Log10PairHMM extends N2MemoryPairHMM {
/**
* Should we use exact log10 calculation (true), or an approximation (false)?
*/
private final boolean doExactLog10;
- private static final int matchToMatch = 0;
- private static final int indelToMatch = 1;
- private static final int matchToInsertion = 2;
- private static final int insertionToInsertion = 3;
- private static final int matchToDeletion = 4;
- private static final int deletionToDeletion = 5;
+ protected static final int matchToMatch = 0;
+ protected static final int indelToMatch = 1;
+ protected static final int matchToInsertion = 2;
+ protected static final int insertionToInsertion = 3;
+ protected static final int matchToDeletion = 4;
+ protected static final int deletionToDeletion = 5;
// we divide e by 3 because the observed base could have come from any of the non-observed alleles
protected final static double log10_3 = log10(3.0);
@@ -101,17 +101,14 @@ public double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotyp
final boolean recacheReadValues,
final int nextHapStartIndex) {
- if (previousHaplotypeBases == null || previousHaplotypeBases.length != haplotypeBases.length) {
- // set the initial value (free deletions in the beginning) for the first row in the deletion matrix
- final double initialValue = Math.log10(1.0 / haplotypeBases.length);
- for( int j = 0; j < paddedHaplotypeLength; j++ ) {
- deletionMatrix[0][j] = initialValue;
- }
- }
if ( ! constantsAreInitialized || recacheReadValues )
initializeProbabilities(insertionGOP, deletionGOP, overallGCP);
initializePriors(haplotypeBases, readBases, readQuals, hapStartIndex);
+ if (previousHaplotypeBases == null || previousHaplotypeBases.length != haplotypeBases.length) {
+ // set the initial value (free deletions in the beginning) for the first row in the deletion matrix
+ initializeMatrixValues(haplotypeBases);
+ }
for (int i = 1; i < paddedReadLength; i++) {
// +1 here is because hapStartIndex is 0-based, but our matrices are 1 based
@@ -123,14 +120,27 @@ public double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotyp
// final probability is the log10 sum of the last element in the Match and Insertion state arrays
// this way we ignore all paths that ended in deletions! (huge)
// but we have to sum all the paths ending in the M and I matrices, because they're no longer extended.
+ double finalSumProbabilities = finalLikelihoodCalculation();
+
+ return finalSumProbabilities;
+ }
+
+ protected void initializeMatrixValues(final byte[] haplotypeBases) {
+ final double initialValue = Math.log10(1.0 / haplotypeBases.length);
+ for( int j = 0; j < paddedHaplotypeLength; j++ ) {
+ deletionMatrix[0][j] = initialValue;
+ }
+ }
+
+ protected double finalLikelihoodCalculation() {
final int endI = paddedReadLength - 1;
double finalSumProbabilities = myLog10SumLog10(new double[]{matchMatrix[endI][1], insertionMatrix[endI][1]});
for (int j = 2; j < paddedHaplotypeLength; j++)
finalSumProbabilities = myLog10SumLog10(new double[]{finalSumProbabilities, matchMatrix[endI][j], insertionMatrix[endI][j]});
-
return finalSumProbabilities;
}
+
/**
* Initializes the matrix that holds all the constants related to the editing
* distance between the read and the haplotype.
@@ -169,7 +179,7 @@ public void initializePriors(final byte[] haplotypeBases, final byte[] readBases
"overallGCP != null"
})
@Ensures("constantsAreInitialized")
- private void initializeProbabilities(final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
+ protected void initializeProbabilities(final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
for (int i = 0; i < insertionGOP.length; i++) {
final int qualIndexGOP = Math.min(insertionGOP[i] + deletionGOP[i], Byte.MAX_VALUE);
transition[i+1][matchToMatch] = QualityUtils.qualToProbLog10((byte) qualIndexGOP);
@@ -199,7 +209,7 @@ private void initializeProbabilities(final byte[] insertionGOP, final byte[] del
* @return the log10 of the sum of the probabilities
*/
@Requires("values != null")
- private double myLog10SumLog10(final double[] values) {
+ protected double myLog10SumLog10(final double[] values) {
return doExactLog10 ? MathUtils.log10sumLog10(values) : MathUtils.approximateLog10SumLog10(values);
}
@@ -214,7 +224,7 @@ private double myLog10SumLog10(final double[] values) {
* @param prior the likelihood editing distance matrix for the read x haplotype
* @param transition an array with the six transition relevant to this location
*/
- private void updateCell( final int indI, final int indJ, final double prior, final double[] transition) {
+ protected void updateCell( final int indI, final int indJ, final double prior, final double[] transition) {
matchMatrix[indI][indJ] = prior +
myLog10SumLog10(new double[]{matchMatrix[indI - 1][indJ - 1] + transition[matchToMatch],
View
7 public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java
@@ -40,6 +40,13 @@
protected double[][] insertionMatrix = null;
protected double[][] deletionMatrix = null;
+ // only used for debugging purposes
+ protected boolean doNotUseTristateCorrection = false;
+
+ public void doNotUseTristateCorrection() {
+ doNotUseTristateCorrection = true;
+ }
+
/**
* Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths
*
View
2  public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java
@@ -78,7 +78,7 @@
* @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM
* @param readMaxLength the max length of reads we want to use with this PairHMM
*/
- protected void initialize( final int readMaxLength, final int haplotypeMaxLength ) {
+ public void initialize( final int readMaxLength, final int haplotypeMaxLength ) {
if ( readMaxLength <= 0 ) throw new IllegalArgumentException("READ_MAX_LENGTH must be > 0 but got " + readMaxLength);
if ( haplotypeMaxLength <= 0 ) throw new IllegalArgumentException("HAPLOTYPE_MAX_LENGTH must be > 0 but got " + haplotypeMaxLength);
View
182 public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMMReadyHaplotypes.java
@@ -0,0 +1,182 @@
+/*
+* Copyright (c) 2012 The Broad Institute
+*
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package org.broadinstitute.sting.utils.pairhmm;
+
+import java.util.*;
+
+/**
+ * Collection of haplotypes sorted in a conveniently way to be run efficiently by the PairHMM.
+ *
+ * TODO not yet in use but likely to be as part of making graph-base likelihood run faster.
+ * TODO this could be extended to the classical PairHMM implementation simplifyling the PairHMM API.
+ */
+public class PairHMMReadyHaplotypes implements Iterable<PairHMMReadyHaplotypes.Entry> {
+
+
+ public class Entry {
+
+ private final byte[] bases;
+
+ private double likelihood = Double.NaN;
+
+ protected Entry(final byte[] bases) {
+ this.bases = bases;
+ }
+
+ protected byte[] getBases() {
+ return bases;
+ }
+
+ public void setLikelihood(final double lk) {
+ likelihood = lk;
+ }
+
+ public double getLikelihood() {
+ return likelihood;
+ }
+
+ }
+
+ private Map<Entry,Map<Entry,Integer>> commonPrefixLength;
+
+ private SortedSet<Entry> entries;
+
+ private int capacity;
+
+ private final Comparator<Entry> comparator = new Comparator<Entry>() {
+ @Override
+ public int compare(final Entry o1, final Entry o2) {
+ final byte[] b1 = o1.bases;
+ final byte[] b2 = o2.bases;
+ Map<Entry,Integer> b1map = commonPrefixLength.get(o1);
+ if (b1map == null)
+ commonPrefixLength.put(o1, b1map = new HashMap<>(capacity));
+ Map<Entry,Integer> b2map = commonPrefixLength.get(o2);
+ if (b2map == null)
+ commonPrefixLength.put(o2, b2map = new HashMap<>(capacity));
+ final Integer previousI = b1map.get(o2) == null ? null : b1map.get(o2);
+ int i;
+ int result;
+ final int iLimit = Math.min(b1.length,b2.length);
+ if (previousI == null) {
+ for (i = 0; i < iLimit; i++)
+ if (b1[i] != b2[i])
+ break;
+ b1map.put(o2,i);
+ b2map.put(o1,i);
+ } else
+ i = previousI;
+
+ if (i < iLimit)
+ result = Byte.compare(b1[i],b2[i]);
+ else if (b1.length == b2.length)
+ result = 0;
+ else
+ result = b1.length < b2.length ? -1 : 1;
+ return result;
+ }
+ };
+
+ public PairHMMReadyHaplotypes(final int capacity) {
+ commonPrefixLength = new HashMap<>(capacity);
+ entries = new TreeSet<>(comparator);
+ }
+
+ public void add(final byte[] bases) {
+ final Entry entry = new Entry(bases);
+ entries.add(entry);
+ }
+
+ public int size() {
+ return entries.size();
+ }
+
+ @Override
+ public Iterator iterator() {
+ return new Iterator();
+ }
+
+ public class Iterator implements java.util.Iterator<Entry> {
+
+ private java.util.Iterator<Entry> actualIterator;
+ private Entry previousEntry;
+ private Entry currentEntry;
+ private int startIndex;
+ private int cmp;
+
+ private Iterator() {
+ actualIterator = entries.iterator();
+ }
+
+ public boolean hasNext() {
+ return actualIterator.hasNext();
+ }
+
+ public Entry next() {
+ previousEntry = currentEntry;
+ final Entry result = currentEntry = actualIterator.next();
+ startIndex = -1;
+ return result;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ public byte[] bases() {
+ if (currentEntry == null)
+ throw new NoSuchElementException();
+ return currentEntry.bases;
+ }
+
+ public int startIndex() {
+ if (startIndex >= 0)
+ return startIndex;
+ else if (previousEntry == null)
+ return startIndex = 0;
+ else {
+ // The comparator will make sure the common-prefix-length is updated.
+ // The result in a field so that we avoid dead code elimination.
+ // perhaps I a bit paranohic but it does not harm to prevent.
+ cmp = comparator.compare(previousEntry,currentEntry);
+ return startIndex = commonPrefixLength.get(previousEntry).get(currentEntry);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return super.toString() + " cmp = " + cmp;
+ }
+
+ public void setLikelihood(final double likelihood) {
+ if (currentEntry == null)
+ throw new NoSuchElementException();
+ currentEntry.setLikelihood(likelihood);
+ }
+ }
+
+}
View
1  public/java/test/org/broadinstitute/sting/BaseTest.java
@@ -90,6 +90,7 @@
public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta";
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
+ public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
public static final String validationDataLocation = GATKDataLocation + "Validation_Data/";
public static final String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/";
View
2  public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java
@@ -54,7 +54,7 @@ public void transform(ITestAnnotation annotation,
Method testMethod)
{
if ( annotation.getTimeOut() == 0 ) {
- logger.warn("test " + testMethod.toString() + " has no specified timeout, adding default timeout " + DEFAULT_TIMEOUT / 1000 / 60 + " minutes");
+ logger.warn("test " + (testMethod == null ? "<null>" : testMethod.toString()) + " has no specified timeout, adding default timeout " + DEFAULT_TIMEOUT / 1000 / 60 + " minutes");
annotation.setTimeOut(DEFAULT_TIMEOUT);
}
}
View
1  public/java/test/org/broadinstitute/sting/WalkerTest.java
@@ -388,7 +388,6 @@ private void qcMD5s(String name, List<String> md5s) {
private void executeTest(String testName, String testClassName, String args, Class expectedException) {
CommandLineGATK instance = new CommandLineGATK();
String[] command = Utils.escapeExpressions(args);
-
// run the executable
boolean gotAnException = false;
try {
View
2  public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
@@ -66,7 +66,7 @@
private SAMFileHeader header;
@BeforeClass
- public void init() {
+ public void init() {
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
Please sign in to comment.
Something went wrong with that request. Please try again.