Skip to content
Browse files

Change the behavior of SelectVariants for PL/AD when it encounters a …

…record that has lost one or more alternate alleles.

Previously, we would strip out the PLs and AD values since they were no longer accurate.  However, this is not ideal because
then that information is just lost and 1) users complain on the forum and post it as a bug and 2) it gives us problems in both
the current and future (single sample) calling pipelines because we subset samples/alleles all the time and lose info.

Now the PLs and AD get correctly selected down.

While I was in there I also refactored some related code in subsetDiploidAlleles().  There were no real changes there - I just
broke it out into smaller chunks as per our best practices.

Added unit tests and updated integration tests.
Addressed reviews.
  • Loading branch information...
1 parent 5fad1c1 commit 43fb0df28d154678b6f9f8aae434ac11806235ce @eitanbanks eitanbanks committed Dec 2, 2013
View
19 public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -299,7 +299,7 @@
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
private int maxIndelSize = Integer.MAX_VALUE;
- @Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
+ @Argument(doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
@@ -657,6 +657,7 @@ public void onTraversalDone(Integer result) {
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
*
* @param vc the VariantContext record to subset
+ * @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles?
* @return the subsetted VariantContext
*/
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
@@ -665,14 +666,10 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
- VariantContextBuilder builder = new VariantContextBuilder(sub);
+ final VariantContextBuilder builder = new VariantContextBuilder(sub);
- GenotypesContext newGC = sub.getGenotypes();
-
- // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
- final boolean lostAllelesInSelection = vc.getAlleles().size() != sub.getAlleles().size();
- if ( lostAllelesInSelection )
- newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
+ // if there are fewer alternate alleles now in the selected VC, we need to fix the PL and AD values
+ GenotypesContext newGC = GATKVariantContextUtils.updatePLsAndAD(sub, vc);
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
if ( vc.getNSamples() != sub.getNSamples() ) {
@@ -682,11 +679,11 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
// Remove a fraction of the genotypes if needed
if ( fractionGenotypes > 0 ){
- ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
+ final ArrayList<Genotype> genotypes = new ArrayList<>();
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
- List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
+ final List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
}
else{
@@ -698,7 +695,7 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
builder.genotypes(newGC);
- addAnnotations(builder, sub, lostAllelesInSelection);
+ addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size());
return builder.make();
}
View
10 public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -976,6 +976,16 @@ public static int countOccurrences(byte element, byte[] array) {
return count;
}
+ public static int countOccurrences(final boolean element, final boolean[] array) {
+ int count = 0;
+ for (final boolean b : array) {
+ if (element == b)
+ count++;
+ }
+
+ return count;
+ }
+
/**
* Returns n random indices drawn with replacement from the range 0..(k-1)
View
248 public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java
@@ -453,67 +453,131 @@ protected static boolean basesAreRepeated(final String l, final String s, final
* rather than the undetermined behavior when using the PLs to assign, which could result
* in hom-var or hom-ref for each, depending on the exact PL values.
*/
- BEST_MATCH_TO_ORIGINAL
+ BEST_MATCH_TO_ORIGINAL,
+
+ /**
+ * do not even bother changing the GTs
+ */
+ DO_NOT_ASSIGN_GENOTYPES
}
/**
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
*
* @param vc variant context with genotype likelihoods
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
- * @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
- * @return genotypes
+ * @param assignGenotypes assignment strategy for the (subsetted) PLs
+ * @return a new non-null GenotypesContext
*/
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
final List<Allele> allelesToUse,
final GenotypeAssignmentMethod assignGenotypes) {
if ( allelesToUse.get(0).isNonReference() ) throw new IllegalArgumentException("First allele must be the reference allele");
if ( allelesToUse.size() == 1 ) throw new IllegalArgumentException("Cannot subset to only 1 alt allele");
- // the genotypes with PLs
- final GenotypesContext oldGTs = vc.getGenotypes();
-
- // the new genotypes to create
- final GenotypesContext newGTs = GenotypesContext.create();
-
// optimization: if no input genotypes, just exit
- if (oldGTs.isEmpty()) return newGTs;
-
- // samples
- final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
+ if (vc.getGenotypes().isEmpty()) return GenotypesContext.create();
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
- final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
- final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
- final int numNewAltAlleles = allelesToUse.size() - 1;
+ final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(vc, allelesToUse);
+
+ // create the new genotypes
+ return createGenotypesWithSubsettedLikelihoods(vc.getGenotypes(), vc, allelesToUse, likelihoodIndexesToUse, assignGenotypes);
+ }
+
+ /**
+ * Figure out which likelihood indexes to use for a selected down set of alleles
+ *
+ * @param originalVC the original VariantContext
+ * @param allelesToUse the subset of alleles to use
+ * @return a list of PL indexes to use or null if none
+ */
+ private static List<Integer> determineLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
- // which PLs should be carried forward?
- ArrayList<Integer> likelihoodIndexesToUse = null;
+ // the bitset representing the allele indexes we want to keep
+ final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the PLs as is; otherwise, we determine which ones to keep
- if ( numNewAltAlleles != numOriginalAltAlleles ) {
- likelihoodIndexesToUse = new ArrayList<>(30);
+ if ( MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length )
+ return null;
- final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
- for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
- if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
- altAlleleIndexToUse[i] = true;
- }
+ return getLikelihoodIndexes(originalVC, alleleIndexesToUse);
+ }
- // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
- final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
- for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
- final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
- // consider this entry only if both of the alleles are good
- if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
- likelihoodIndexesToUse.add(PLindex);
- }
+ /**
+ * Get the actual likelihoods indexes to use given the corresponding allele indexes
+ *
+ * @param originalVC the original VariantContext
+ * @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
+ * @return a non-null List
+ */
+ private static List<Integer> getLikelihoodIndexes(final VariantContext originalVC, final boolean[] alleleIndexesToUse) {
+
+ final List<Integer> result = new ArrayList<>(30);
+
+ // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
+ final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY);
+
+ for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
+ final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
+ // consider this entry only if both of the alleles are good
+ if ( alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2] )
+ result.add(PLindex);
+ }
+
+ return result;
+ }
+
+ /**
+ * Given an original VariantContext and a list of alleles from that VC to keep,
+ * returns a bitset representing which allele indexes should be kept
+ *
+ * @param originalVC the original VC
+ * @param allelesToKeep the list of alleles to keep
+ * @return non-null bitset
+ */
+ private static boolean[] getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToKeep) {
+ final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
+ final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1];
+
+ // the reference Allele is definitely still used
+ alleleIndexesToKeep[0] = true;
+ for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
+ if ( allelesToKeep.contains(originalVC.getAlternateAllele(i)) )
+ alleleIndexesToKeep[i+1] = true;
}
+ return alleleIndexesToKeep;
+ }
+
+ /**
+ * Create the new GenotypesContext with the subsetted PLs
+ *
+ * @param originalGs the original GenotypesContext
+ * @param vc the original VariantContext
+ * @param allelesToUse the actual alleles to use with the new Genotypes
+ * @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse (@see #determineLikelihoodIndexesToUse())
+ * @param assignGenotypes assignment strategy for the (subsetted) PLs
+ * @return a new non-null GenotypesContext
+ */
+ private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
+ final VariantContext vc,
+ final List<Allele> allelesToUse,
+ final List<Integer> likelihoodIndexesToUse,
+ final GenotypeAssignmentMethod assignGenotypes) {
+ // the new genotypes to create
+ final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
+
+ // make sure we are seeing the expected number of likelihoods per sample
+ final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
+
+ // the samples
+ final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
+
// create the new genotypes
- for ( int k = 0; k < oldGTs.size(); k++ ) {
- final Genotype g = oldGTs.get(sampleIndices.get(k));
+ for ( int k = 0; k < originalGs.size(); k++ ) {
+ final Genotype g = originalGs.get(sampleIndices.get(k));
final GenotypeBuilder gb = new GenotypeBuilder(g);
// create the new likelihoods array from the alleles we are allowed to use
@@ -570,13 +634,16 @@ public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT,
final GenotypeAssignmentMethod assignmentMethod,
final double[] newLikelihoods,
final List<Allele> allelesToUse) {
- gb.noAD();
switch ( assignmentMethod ) {
+ case DO_NOT_ASSIGN_GENOTYPES:
+ break;
case SET_TO_NO_CALL:
gb.alleles(NO_CALL_ALLELES);
+ gb.noAD();
gb.noGQ();
break;
case USE_PLS_TO_ASSIGN:
+ gb.noAD();
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
// if there is no mass on the (new) likelihoods, then just no-call the sample
gb.alleles(NO_CALL_ALLELES);
@@ -597,6 +664,7 @@ public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT,
}
gb.noGQ();
gb.noPL();
+ gb.noAD();
gb.alleles(best);
break;
}
@@ -622,7 +690,7 @@ public static GenotypesContext subsetToRefOnly(final VariantContext vc, final in
if (oldGTs.isEmpty()) return oldGTs;
// the new genotypes to create
- final GenotypesContext newGTs = GenotypesContext.create();
+ final GenotypesContext newGTs = GenotypesContext.create(oldGTs.size());
final Allele ref = vc.getReference();
final List<Allele> diploidRefAlleles = Arrays.asList(ref, ref);
@@ -1083,7 +1151,7 @@ private static final boolean hasPLIncompatibleAlleles(final Collection<Allele> a
}
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
- GenotypesContext newGs = GenotypesContext.create(genotypes.size());
+ final GenotypesContext newGs = GenotypesContext.create(genotypes.size());
for ( final Genotype g : genotypes ) {
newGs.add(removePLsAndAD(g));
@@ -1092,6 +1160,108 @@ public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
return newGs;
}
+ /**
+ * Updates the PLs and AD of the Genotypes in the newly selected VariantContext to reflect the fact that some alleles
+ * from the original VariantContext are no longer present.
+ *
+ * @param selectedVC the selected (new) VariantContext
+ * @param originalVC the original VariantContext
+ * @return a new non-null GenotypesContext
+ */
+ public static GenotypesContext updatePLsAndAD(final VariantContext selectedVC, final VariantContext originalVC) {
+ final int numNewAlleles = selectedVC.getAlleles().size();
+ final int numOriginalAlleles = originalVC.getAlleles().size();
+
+ // if we have more alternate alleles in the selected VC than in the original VC, then something is wrong
+ if ( numNewAlleles > numOriginalAlleles )
+ throw new IllegalArgumentException("Attempting to fix PLs and AD from what appears to be a *combined* VCF and not a selected one");
+
+ final GenotypesContext oldGs = selectedVC.getGenotypes();
+
+ // if we have the same number of alternate alleles in the selected VC as in the original VC, then we don't need to fix anything
+ if ( numNewAlleles == numOriginalAlleles )
+ return oldGs;
+
+ final GenotypesContext newGs = fixPLsFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
+
+ return fixADFromSubsettedAlleles(newGs, originalVC, selectedVC.getAlleles());
+ }
+
+ /**
+ * Fix the PLs for the GenotypesContext of a VariantContext that has been subset
+ *
+ * @param originalGs the original GenotypesContext
+ * @param originalVC the original VariantContext
+ * @param allelesToUse the new (sub)set of alleles to use
+ * @return a new non-null GenotypesContext
+ */
+ static private GenotypesContext fixPLsFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
+
+ // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
+ final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(originalVC, allelesToUse);
+
+ // create the new genotypes
+ return createGenotypesWithSubsettedLikelihoods(originalGs, originalVC, allelesToUse, likelihoodIndexesToUse, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES);
+ }
+
+ /**
+ * Fix the AD for the GenotypesContext of a VariantContext that has been subset
+ *
+ * @param originalGs the original GenotypesContext
+ * @param originalVC the original VariantContext
+ * @param allelesToUse the new (sub)set of alleles to use
+ * @return a new non-null GenotypesContext
+ */
+ static private GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
+
+ // the bitset representing the allele indexes we want to keep
+ final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
+
+ // the new genotypes to create
+ final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
+
+ // the samples
+ final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
+
+ // create the new genotypes
+ for ( int k = 0; k < originalGs.size(); k++ ) {
+ final Genotype g = originalGs.get(sampleIndices.get(k));
+ newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size()));
+ }
+
+ return newGTs;
+ }
+
+ /**
+ * Fix the AD for the given Genotype
+ *
+ * @param genotype the original Genotype
+ * @param alleleIndexesToUse a bitset describing whether or not to keep a given index
+ * @param nAllelesToUse how many alleles we are keeping
+ * @return a non-null Genotype
+ */
+ private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse, final int nAllelesToUse) {
+ // if it ain't broke don't fix it
+ if ( !genotype.hasAD() )
+ return genotype;
+
+ final GenotypeBuilder builder = new GenotypeBuilder(genotype);
+
+ final int[] oldAD = genotype.getAD();
+ if ( oldAD.length != alleleIndexesToUse.length ) {
+ builder.noAD();
+ } else {
+ final int[] newAD = new int[nAllelesToUse];
+ int currentIndex = 0;
+ for ( int i = 0; i < oldAD.length; i++ ) {
+ if ( alleleIndexesToUse[i] )
+ newAD[currentIndex++] = oldAD[i];
+ }
+ builder.AD(newAD);
+ }
+ return builder.make();
+ }
+
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
Allele ref = null;
@@ -1277,7 +1447,7 @@ protected static VariantContext trimAlleles(final VariantContext inputVC,
@Requires("originalGenotypes != null && alleleMapper != null")
protected static GenotypesContext updateGenotypesWithMappedAlleles(final GenotypesContext originalGenotypes, final AlleleMapper alleleMapper) {
- final GenotypesContext updatedGenotypes = GenotypesContext.create();
+ final GenotypesContext updatedGenotypes = GenotypesContext.create(originalGenotypes.size());
for ( final Genotype genotype : originalGenotypes ) {
final List<Allele> updatedAlleles = alleleMapper.remap(genotype.getAlleles());
View
108 public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java
@@ -1320,4 +1320,112 @@ public void testSubsetToRef() {
}
}
}
+
+ // --------------------------------------------------------------------------------
+ //
+ // Test updatePLsAndAD
+ //
+ // --------------------------------------------------------------------------------
+
+ @DataProvider(name = "updatePLsAndADData")
+ public Object[][] makeUpdatePLsAndADData() {
+ List<Object[]> tests = new ArrayList<>();
+
+ final Allele A = Allele.create("A", true);
+ final Allele C = Allele.create("C");
+ final Allele G = Allele.create("G");
+
+ final List<Allele> AA = Arrays.asList(A,A);
+ final List<Allele> AC = Arrays.asList(A,C);
+ final List<Allele> CC = Arrays.asList(C,C);
+ final List<Allele> AG = Arrays.asList(A,G);
+ final List<Allele> CG = Arrays.asList(C,G);
+ final List<Allele> GG = Arrays.asList(G,G);
+ final List<Allele> ACG = Arrays.asList(A,C,G);
+
+ final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
+
+ final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01});
+ final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01});
+ final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9});
+ final double[] uninformative = new double[]{0, 0, 0};
+
+ final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make();
+
+ // make sure we don't screw up the simple case where no selection happens
+ final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make();
+ final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make();
+ final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make();
+
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(aaGT).make())});
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(acGT).make())});
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(ccGT).make())});
+
+ // uninformative test cases
+ final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make();
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)});
+ final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make();
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)});
+
+ // actually subsetting down from multiple alt values
+ final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50};
+ final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50};
+ final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50};
+ final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50};
+ final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
+ final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
+
+ final int[] homRef3AllelesAD = new int[]{20, 0, 1};
+ final int[] hetRefC3AllelesAD = new int[]{10, 10, 1};
+ final int[] homC3AllelesAD = new int[]{0, 20, 1};
+ final int[] hetRefG3AllelesAD = new int[]{10, 0, 11};
+ final int[] hetCG3AllelesAD = new int[]{0, 12, 11}; // AA, AC, CC, AG, CG, GG
+ final int[] homG3AllelesAD = new int[]{0, 1, 21}; // AA, AC, CC, AG, CG, GG
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AC).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).AD(new int[]{20, 0}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefC3AllelesAD).PL(hetRefC3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AC).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-10, 0, -20}).AD(new int[]{10, 10}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homC3AllelesAD).PL(homC3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AC).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -10, 0}).AD(new int[]{0, 20}).GQ(100).make())});
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefG3AllelesAD).PL(hetRefG3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AG).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, 0, -50}).AD(new int[]{10, 11}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetCG3AllelesAD).PL(hetCG3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AG).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).AD(new int[]{0, 11}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homG3AllelesAD).PL(homG3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AG).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -40, 0}).AD(new int[]{0, 21}).GQ(100).make())});
+
+ return tests.toArray(new Object[][]{});
+ }
+
+ @Test(dataProvider = "updatePLsAndADData")
+ public void testUpdatePLsAndADData(final VariantContext originalVC,
+ final VariantContext selectedVC,
+ final List<Genotype> expectedGenotypes) {
+ final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();
+ final GenotypesContext actual = GATKVariantContextUtils.updatePLsAndAD(selectedVCwithGTs, originalVC);
+
+ Assert.assertEquals(actual.size(), expectedGenotypes.size());
+ for ( final Genotype expected : expectedGenotypes ) {
+ final Genotype actualGT = actual.get(expected.getSampleName());
+ Assert.assertNotNull(actualGT);
+ assertGenotypesAreEqual(actualGT, expected);
+ }
+ }
}

0 comments on commit 43fb0df

Please sign in to comment.
Something went wrong with that request. Please try again.