Skip to content

Commit

Permalink
Change the behavior of SelectVariants for PL/AD when it encounters a …
Browse files Browse the repository at this point in the history
…record that has lost one or more alternate alleles.

Previously, we would strip out the PLs and AD values since they were no longer accurate.  However, this is not ideal because
then that information is just lost and 1) users complain on the forum and post it as a bug and 2) it gives us problems in both
the current and future (single sample) calling pipelines because we subset samples/alleles all the time and lose info.

Now the PLs and AD get correctly selected down.

While I was in there I also refactored some related code in subsetDiploidAlleles().  There were no real changes there - I just
broke it out into smaller chunks as per our best practices.

Added unit tests and updated integration tests.
Addressed reviews.
  • Loading branch information
eitanbanks committed Dec 3, 2013
1 parent 5fad1c1 commit 43fb0df
Show file tree
Hide file tree
Showing 4 changed files with 335 additions and 50 deletions.
Expand Up @@ -299,7 +299,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
@Argument(doc="indel size select",required=false,fullName="maxIndelSize") @Argument(doc="indel size select",required=false,fullName="maxIndelSize")
private int maxIndelSize = Integer.MAX_VALUE; private int maxIndelSize = Integer.MAX_VALUE;


@Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES") @Argument(doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false; private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;




Expand Down Expand Up @@ -657,6 +657,7 @@ public void onTraversalDone(Integer result) {
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF). * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
* *
* @param vc the VariantContext record to subset * @param vc the VariantContext record to subset
* @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles?
* @return the subsetted VariantContext * @return the subsetted VariantContext
*/ */
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) { private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
Expand All @@ -665,14 +666,10 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu


final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used


VariantContextBuilder builder = new VariantContextBuilder(sub); final VariantContextBuilder builder = new VariantContextBuilder(sub);


GenotypesContext newGC = sub.getGenotypes(); // if there are fewer alternate alleles now in the selected VC, we need to fix the PL and AD values

GenotypesContext newGC = GATKVariantContextUtils.updatePLsAndAD(sub, vc);
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
final boolean lostAllelesInSelection = vc.getAlleles().size() != sub.getAlleles().size();
if ( lostAllelesInSelection )
newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());


// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
if ( vc.getNSamples() != sub.getNSamples() ) { if ( vc.getNSamples() != sub.getNSamples() ) {
Expand All @@ -682,11 +679,11 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu


// Remove a fraction of the genotypes if needed // Remove a fraction of the genotypes if needed
if ( fractionGenotypes > 0 ){ if ( fractionGenotypes > 0 ){
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(); final ArrayList<Genotype> genotypes = new ArrayList<>();
for ( Genotype genotype : newGC ) { for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction. //Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){ if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); final List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make()); genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
} }
else{ else{
Expand All @@ -698,7 +695,7 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean exclu


builder.genotypes(newGC); builder.genotypes(newGC);


addAnnotations(builder, sub, lostAllelesInSelection); addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size());


return builder.make(); return builder.make();
} }
Expand Down
10 changes: 10 additions & 0 deletions public/java/src/org/broadinstitute/sting/utils/MathUtils.java
Expand Up @@ -976,6 +976,16 @@ public static int countOccurrences(byte element, byte[] array) {
return count; return count;
} }


public static int countOccurrences(final boolean element, final boolean[] array) {
int count = 0;
for (final boolean b : array) {
if (element == b)
count++;
}

return count;
}



/** /**
* Returns n random indices drawn with replacement from the range 0..(k-1) * Returns n random indices drawn with replacement from the range 0..(k-1)
Expand Down

0 comments on commit 43fb0df

Please sign in to comment.