Browse files

Merge pull request #440 from broadinstitute/eb_selection_should_keep_pls

Eb selection should keep pls
  • Loading branch information...
2 parents 5fad1c1 + 0b5cf30 commit 499fb2676ce3a733601e7638e7201eb5e7798808 @eitanbanks eitanbanks committed Dec 3, 2013
View
19 public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -299,7 +299,7 @@
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
private int maxIndelSize = Integer.MAX_VALUE;
- @Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
+ @Argument(doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
@@ -657,6 +657,7 @@ public void onTraversalDone(Integer result) {
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
*
* @param vc the VariantContext record to subset
+ * @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles?
* @return the subsetted VariantContext
*/
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
@@ -665,14 +666,10 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
- VariantContextBuilder builder = new VariantContextBuilder(sub);
+ final VariantContextBuilder builder = new VariantContextBuilder(sub);
- GenotypesContext newGC = sub.getGenotypes();
-
- // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
- final boolean lostAllelesInSelection = vc.getAlleles().size() != sub.getAlleles().size();
- if ( lostAllelesInSelection )
- newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
+ // if there are fewer alternate alleles now in the selected VC, we need to fix the PL and AD values
+ GenotypesContext newGC = GATKVariantContextUtils.updatePLsAndAD(sub, vc);
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
if ( vc.getNSamples() != sub.getNSamples() ) {
@@ -682,11 +679,11 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
// Remove a fraction of the genotypes if needed
if ( fractionGenotypes > 0 ){
- ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
+ final ArrayList<Genotype> genotypes = new ArrayList<>();
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
- List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
+ final List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
}
else{
@@ -698,7 +695,7 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu
builder.genotypes(newGC);
- addAnnotations(builder, sub, lostAllelesInSelection);
+ addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size());
return builder.make();
}
View
10 public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -976,6 +976,16 @@ public static int countOccurrences(byte element, byte[] array) {
return count;
}
+ public static int countOccurrences(final boolean element, final boolean[] array) {
+ int count = 0;
+ for (final boolean b : array) {
+ if (element == b)
+ count++;
+ }
+
+ return count;
+ }
+
/**
* Returns n random indices drawn with replacement from the range 0..(k-1)
View
248 public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java
@@ -453,67 +453,131 @@ protected static boolean basesAreRepeated(final String l, final String s, final
* rather than the undetermined behavior when using the PLs to assign, which could result
* in hom-var or hom-ref for each, depending on the exact PL values.
*/
- BEST_MATCH_TO_ORIGINAL
+ BEST_MATCH_TO_ORIGINAL,
+
+ /**
+ * do not even bother changing the GTs
+ */
+ DO_NOT_ASSIGN_GENOTYPES
}
/**
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
*
* @param vc variant context with genotype likelihoods
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
- * @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
- * @return genotypes
+ * @param assignGenotypes assignment strategy for the (subsetted) PLs
+ * @return a new non-null GenotypesContext
*/
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
final List<Allele> allelesToUse,
final GenotypeAssignmentMethod assignGenotypes) {
if ( allelesToUse.get(0).isNonReference() ) throw new IllegalArgumentException("First allele must be the reference allele");
if ( allelesToUse.size() == 1 ) throw new IllegalArgumentException("Cannot subset to only 1 alt allele");
- // the genotypes with PLs
- final GenotypesContext oldGTs = vc.getGenotypes();
-
- // the new genotypes to create
- final GenotypesContext newGTs = GenotypesContext.create();
-
// optimization: if no input genotypes, just exit
- if (oldGTs.isEmpty()) return newGTs;
-
- // samples
- final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
+ if (vc.getGenotypes().isEmpty()) return GenotypesContext.create();
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
- final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
- final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
- final int numNewAltAlleles = allelesToUse.size() - 1;
+ final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(vc, allelesToUse);
+
+ // create the new genotypes
+ return createGenotypesWithSubsettedLikelihoods(vc.getGenotypes(), vc, allelesToUse, likelihoodIndexesToUse, assignGenotypes);
+ }
+
+ /**
+ * Figure out which likelihood indexes to use for a selected down set of alleles
+ *
+ * @param originalVC the original VariantContext
+ * @param allelesToUse the subset of alleles to use
+ * @return a list of PL indexes to use or null if none
+ */
+ private static List<Integer> determineLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
- // which PLs should be carried forward?
- ArrayList<Integer> likelihoodIndexesToUse = null;
+ // the bitset representing the allele indexes we want to keep
+ final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the PLs as is; otherwise, we determine which ones to keep
- if ( numNewAltAlleles != numOriginalAltAlleles ) {
- likelihoodIndexesToUse = new ArrayList<>(30);
+ if ( MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length )
+ return null;
- final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
- for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
- if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
- altAlleleIndexToUse[i] = true;
- }
+ return getLikelihoodIndexes(originalVC, alleleIndexesToUse);
+ }
- // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
- final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
- for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
- final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
- // consider this entry only if both of the alleles are good
- if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
- likelihoodIndexesToUse.add(PLindex);
- }
+ /**
+ * Get the actual likelihoods indexes to use given the corresponding allele indexes
+ *
+ * @param originalVC the original VariantContext
+ * @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
+ * @return a non-null List
+ */
+ private static List<Integer> getLikelihoodIndexes(final VariantContext originalVC, final boolean[] alleleIndexesToUse) {
+
+ final List<Integer> result = new ArrayList<>(30);
+
+ // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
+ final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY);
+
+ for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
+ final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
+ // consider this entry only if both of the alleles are good
+ if ( alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2] )
+ result.add(PLindex);
+ }
+
+ return result;
+ }
+
+ /**
+ * Given an original VariantContext and a list of alleles from that VC to keep,
+ * returns a bitset representing which allele indexes should be kept
+ *
+ * @param originalVC the original VC
+ * @param allelesToKeep the list of alleles to keep
+ * @return non-null bitset
+ */
+ private static boolean[] getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToKeep) {
+ final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
+ final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1];
+
+ // the reference Allele is definitely still used
+ alleleIndexesToKeep[0] = true;
+ for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
+ if ( allelesToKeep.contains(originalVC.getAlternateAllele(i)) )
+ alleleIndexesToKeep[i+1] = true;
}
+ return alleleIndexesToKeep;
+ }
+
+ /**
+ * Create the new GenotypesContext with the subsetted PLs
+ *
+ * @param originalGs the original GenotypesContext
+ * @param vc the original VariantContext
+ * @param allelesToUse the actual alleles to use with the new Genotypes
+ * @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse (@see #determineLikelihoodIndexesToUse())
+ * @param assignGenotypes assignment strategy for the (subsetted) PLs
+ * @return a new non-null GenotypesContext
+ */
+ private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
+ final VariantContext vc,
+ final List<Allele> allelesToUse,
+ final List<Integer> likelihoodIndexesToUse,
+ final GenotypeAssignmentMethod assignGenotypes) {
+ // the new genotypes to create
+ final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
+
+ // make sure we are seeing the expected number of likelihoods per sample
+ final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
+
+ // the samples
+ final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
+
// create the new genotypes
- for ( int k = 0; k < oldGTs.size(); k++ ) {
- final Genotype g = oldGTs.get(sampleIndices.get(k));
+ for ( int k = 0; k < originalGs.size(); k++ ) {
+ final Genotype g = originalGs.get(sampleIndices.get(k));
final GenotypeBuilder gb = new GenotypeBuilder(g);
// create the new likelihoods array from the alleles we are allowed to use
@@ -570,13 +634,16 @@ public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT,
final GenotypeAssignmentMethod assignmentMethod,
final double[] newLikelihoods,
final List<Allele> allelesToUse) {
- gb.noAD();
switch ( assignmentMethod ) {
+ case DO_NOT_ASSIGN_GENOTYPES:
+ break;
case SET_TO_NO_CALL:
gb.alleles(NO_CALL_ALLELES);
+ gb.noAD();
gb.noGQ();
break;
case USE_PLS_TO_ASSIGN:
+ gb.noAD();
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
// if there is no mass on the (new) likelihoods, then just no-call the sample
gb.alleles(NO_CALL_ALLELES);
@@ -597,6 +664,7 @@ public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT,
}
gb.noGQ();
gb.noPL();
+ gb.noAD();
gb.alleles(best);
break;
}
@@ -622,7 +690,7 @@ public static GenotypesContext subsetToRefOnly(final VariantContext vc, final in
if (oldGTs.isEmpty()) return oldGTs;
// the new genotypes to create
- final GenotypesContext newGTs = GenotypesContext.create();
+ final GenotypesContext newGTs = GenotypesContext.create(oldGTs.size());
final Allele ref = vc.getReference();
final List<Allele> diploidRefAlleles = Arrays.asList(ref, ref);
@@ -1083,7 +1151,7 @@ private static final boolean hasPLIncompatibleAlleles(final Collection<Allele> a
}
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
- GenotypesContext newGs = GenotypesContext.create(genotypes.size());
+ final GenotypesContext newGs = GenotypesContext.create(genotypes.size());
for ( final Genotype g : genotypes ) {
newGs.add(removePLsAndAD(g));
@@ -1092,6 +1160,108 @@ public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
return newGs;
}
+ /**
+ * Updates the PLs and AD of the Genotypes in the newly selected VariantContext to reflect the fact that some alleles
+ * from the original VariantContext are no longer present.
+ *
+ * @param selectedVC the selected (new) VariantContext
+ * @param originalVC the original VariantContext
+ * @return a new non-null GenotypesContext
+ */
+ public static GenotypesContext updatePLsAndAD(final VariantContext selectedVC, final VariantContext originalVC) {
+ final int numNewAlleles = selectedVC.getAlleles().size();
+ final int numOriginalAlleles = originalVC.getAlleles().size();
+
+ // if we have more alternate alleles in the selected VC than in the original VC, then something is wrong
+ if ( numNewAlleles > numOriginalAlleles )
+ throw new IllegalArgumentException("Attempting to fix PLs and AD from what appears to be a *combined* VCF and not a selected one");
+
+ final GenotypesContext oldGs = selectedVC.getGenotypes();
+
+ // if we have the same number of alternate alleles in the selected VC as in the original VC, then we don't need to fix anything
+ if ( numNewAlleles == numOriginalAlleles )
+ return oldGs;
+
+ final GenotypesContext newGs = fixPLsFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
+
+ return fixADFromSubsettedAlleles(newGs, originalVC, selectedVC.getAlleles());
+ }
+
+ /**
+ * Fix the PLs for the GenotypesContext of a VariantContext that has been subset
+ *
+ * @param originalGs the original GenotypesContext
+ * @param originalVC the original VariantContext
+ * @param allelesToUse the new (sub)set of alleles to use
+ * @return a new non-null GenotypesContext
+ */
+ static private GenotypesContext fixPLsFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
+
+ // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
+ final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(originalVC, allelesToUse);
+
+ // create the new genotypes
+ return createGenotypesWithSubsettedLikelihoods(originalGs, originalVC, allelesToUse, likelihoodIndexesToUse, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES);
+ }
+
+ /**
+ * Fix the AD for the GenotypesContext of a VariantContext that has been subset
+ *
+ * @param originalGs the original GenotypesContext
+ * @param originalVC the original VariantContext
+ * @param allelesToUse the new (sub)set of alleles to use
+ * @return a new non-null GenotypesContext
+ */
+ static private GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
+
+ // the bitset representing the allele indexes we want to keep
+ final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
+
+ // the new genotypes to create
+ final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
+
+ // the samples
+ final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
+
+ // create the new genotypes
+ for ( int k = 0; k < originalGs.size(); k++ ) {
+ final Genotype g = originalGs.get(sampleIndices.get(k));
+ newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size()));
+ }
+
+ return newGTs;
+ }
+
+ /**
+ * Fix the AD for the given Genotype
+ *
+ * @param genotype the original Genotype
+ * @param alleleIndexesToUse a bitset describing whether or not to keep a given index
+ * @param nAllelesToUse how many alleles we are keeping
+ * @return a non-null Genotype
+ */
+ private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse, final int nAllelesToUse) {
+ // if it ain't broke don't fix it
+ if ( !genotype.hasAD() )
+ return genotype;
+
+ final GenotypeBuilder builder = new GenotypeBuilder(genotype);
+
+ final int[] oldAD = genotype.getAD();
+ if ( oldAD.length != alleleIndexesToUse.length ) {
+ builder.noAD();
+ } else {
+ final int[] newAD = new int[nAllelesToUse];
+ int currentIndex = 0;
+ for ( int i = 0; i < oldAD.length; i++ ) {
+ if ( alleleIndexesToUse[i] )
+ newAD[currentIndex++] = oldAD[i];
+ }
+ builder.AD(newAD);
+ }
+ return builder.make();
+ }
+
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
Allele ref = null;
@@ -1277,7 +1447,7 @@ protected static VariantContext trimAlleles(final VariantContext inputVC,
@Requires("originalGenotypes != null && alleleMapper != null")
protected static GenotypesContext updateGenotypesWithMappedAlleles(final GenotypesContext originalGenotypes, final AlleleMapper alleleMapper) {
- final GenotypesContext updatedGenotypes = GenotypesContext.create();
+ final GenotypesContext updatedGenotypes = GenotypesContext.create(originalGenotypes.size());
for ( final Genotype genotype : originalGenotypes ) {
final List<Allele> updatedAlleles = alleleMapper.remap(genotype.getAlleles());
View
108 public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java
@@ -1320,4 +1320,112 @@ public void testSubsetToRef() {
}
}
}
+
+ // --------------------------------------------------------------------------------
+ //
+ // Test updatePLsAndAD
+ //
+ // --------------------------------------------------------------------------------
+
+ @DataProvider(name = "updatePLsAndADData")
+ public Object[][] makeUpdatePLsAndADData() {
+ List<Object[]> tests = new ArrayList<>();
+
+ final Allele A = Allele.create("A", true);
+ final Allele C = Allele.create("C");
+ final Allele G = Allele.create("G");
+
+ final List<Allele> AA = Arrays.asList(A,A);
+ final List<Allele> AC = Arrays.asList(A,C);
+ final List<Allele> CC = Arrays.asList(C,C);
+ final List<Allele> AG = Arrays.asList(A,G);
+ final List<Allele> CG = Arrays.asList(C,G);
+ final List<Allele> GG = Arrays.asList(G,G);
+ final List<Allele> ACG = Arrays.asList(A,C,G);
+
+ final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
+
+ final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01});
+ final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01});
+ final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9});
+ final double[] uninformative = new double[]{0, 0, 0};
+
+ final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make();
+
+ // make sure we don't screw up the simple case where no selection happens
+ final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make();
+ final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make();
+ final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make();
+
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(aaGT).make())});
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(acGT).make())});
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(ccGT).make())});
+
+ // uninformative test cases
+ final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make();
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)});
+ final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make();
+ tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)});
+
+ // actually subsetting down from multiple alt values
+ final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50};
+ final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50};
+ final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50};
+ final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50};
+ final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
+ final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
+
+ final int[] homRef3AllelesAD = new int[]{20, 0, 1};
+ final int[] hetRefC3AllelesAD = new int[]{10, 10, 1};
+ final int[] homC3AllelesAD = new int[]{0, 20, 1};
+ final int[] hetRefG3AllelesAD = new int[]{10, 0, 11};
+ final int[] hetCG3AllelesAD = new int[]{0, 12, 11}; // AA, AC, CC, AG, CG, GG
+ final int[] homG3AllelesAD = new int[]{0, 1, 21}; // AA, AC, CC, AG, CG, GG
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AC).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).AD(new int[]{20, 0}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefC3AllelesAD).PL(hetRefC3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AC).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-10, 0, -20}).AD(new int[]{10, 10}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homC3AllelesAD).PL(homC3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AC).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -10, 0}).AD(new int[]{0, 20}).GQ(100).make())});
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefG3AllelesAD).PL(hetRefG3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AG).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, 0, -50}).AD(new int[]{10, 11}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetCG3AllelesAD).PL(hetCG3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AG).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).AD(new int[]{0, 11}).GQ(100).make())});
+
+ tests.add(new Object[]{
+ new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homG3AllelesAD).PL(homG3AllelesPL).make()).make(),
+ new VariantContextBuilder(vcBase).alleles(AG).make(),
+ Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -40, 0}).AD(new int[]{0, 21}).GQ(100).make())});
+
+ return tests.toArray(new Object[][]{});
+ }
+
+ @Test(dataProvider = "updatePLsAndADData")
+ public void testUpdatePLsAndADData(final VariantContext originalVC,
+ final VariantContext selectedVC,
+ final List<Genotype> expectedGenotypes) {
+ final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();
+ final GenotypesContext actual = GATKVariantContextUtils.updatePLsAndAD(selectedVCwithGTs, originalVC);
+
+ Assert.assertEquals(actual.size(), expectedGenotypes.size());
+ for ( final Genotype expected : expectedGenotypes ) {
+ final Genotype actualGT = actual.get(expected.getSampleName());
+ Assert.assertNotNull(actualGT);
+ assertGenotypesAreEqual(actualGT, expected);
+ }
+ }
}

0 comments on commit 499fb26

Please sign in to comment.