Skip to content

Commit

Permalink
Added a new Walker abstract class to be used for all built-in walke…
Browse files Browse the repository at this point in the history
…rs. (#4964)

Added a new `Walker` abstract class to be used for all built-in walkers. Allows direct access to engine datasources for direct subclasses of GATKTool, while disallowing it for concrete walker implementations, which should get their data via apply().
  • Loading branch information
jonn-smith authored and droazen committed Apr 16, 2019
1 parent 7884afc commit 9fce0b2
Show file tree
Hide file tree
Showing 19 changed files with 246 additions and 34 deletions.
Expand Up @@ -33,7 +33,7 @@
* Created by Takuto Sato 1/30/17, abstractified by David Benjamin on 2/22/17.
* {@link #onTraversalStart}, {@link #onTraversalSuccess} and/or {@link #closeTool}.
*/
public abstract class AbstractConcordanceWalker extends GATKTool {
public abstract class AbstractConcordanceWalker extends WalkerBase {

public static final String TRUTH_VARIANTS_LONG_NAME = "truth";
public static final String EVAL_VARIANTS_SHORT_NAME = "eval";
Expand Down Expand Up @@ -116,8 +116,17 @@ protected final void onStartup() {
// the primary work of the walker. Must be overridden in implementing classes.
protected abstract void apply(final TruthVersusEval truthVersusEval, final ReadsContext readsContext, final ReferenceContext refContext);

/**
* {@inheritDoc}
*
* Implementation of concordance traversal.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the
* engine package.
*/
@Override
public final void traverse() {
public void traverse() {
// Process each variant in the input stream.
StreamSupport.stream(getSpliteratorForDrivingVariants(), false)
.forEach(truthVersusEval -> {
Expand Down
Expand Up @@ -39,7 +39,7 @@
* Internally, the reads are loaded in chunks called read shards, which are then subdivided into active/inactive regions
* for processing by the tool implementation. One read shard is created per contig.
*/
public abstract class AssemblyRegionWalker extends GATKTool {
public abstract class AssemblyRegionWalker extends WalkerBase {

//NOTE: these argument names are referenced by HaplotypeCallerSpark
public static final String MIN_ASSEMBLY_LONG_NAME = "min-assembly-region-size";
Expand Down Expand Up @@ -252,8 +252,17 @@ protected ReadsDownsampler createDownsampler() {
return maxReadsPerAlignmentStart > 0 ? new PositionalDownsampler(maxReadsPerAlignmentStart, getHeaderForReads()) : null;
}

/**
* {@inheritDoc}
*
* Implementation of assembly region traversal.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the
* engine package.
*/
@Override
public final void traverse() {
public void traverse() {

CountingReadFilter countedFilter = makeReadFilter();

Expand Down
Expand Up @@ -20,7 +20,7 @@
*
* @param <F> the driving feature type.
*/
public abstract class FeatureWalker<F extends Feature> extends GATKTool {
public abstract class FeatureWalker<F extends Feature> extends WalkerBase {

private FeatureDataSource<F> drivingFeatures;

Expand Down Expand Up @@ -74,8 +74,13 @@ private void initializeDrivingFeatures() {
protected abstract boolean isAcceptableFeatureType(Class<? extends Feature> featureType);

/**
* {@inheritDoc}
*
* Implementation of Feature-based traversal.
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the engine
* package.
*/
@Override
public void traverse() {
Expand Down
69 changes: 62 additions & 7 deletions src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java
Expand Up @@ -9,6 +9,11 @@
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeaderLine;
import java.io.File;
import java.nio.file.Path;
import java.time.ZonedDateTime;
import java.util.*;
import java.util.stream.Stream;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor;
Expand Down Expand Up @@ -38,12 +43,6 @@
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.io.File;
import java.nio.file.Path;
import java.time.ZonedDateTime;
import java.util.*;
import java.util.stream.Stream;

/**
* Base class for all GATK tools. Tool authors that wish to write a "GATK" tool but not use one of
* the pre-packaged Walker traversals should feel free to extend this class directly. All other
Expand Down Expand Up @@ -151,14 +150,70 @@ public abstract class GATKTool extends CommandLineProgram {
public FeatureManager features;

/**
*
* Intervals to be used for traversal (null if no intervals were provided).
*
* Walker base classes (ReadWalker, etc.) are responsible for hooking these intervals up to
* their particular driving data source.
*/
List<SimpleInterval> userIntervals;

/**
* Get the {@link ReferenceDataSource} for this {@link GATKTool}.
* Will throw a {@link GATKException} if the reference is null.
* Clients are expected to call the {@link #hasReference()} method prior to calling this.
*
* Should only be called by walker base classes in the engine (such as {@link ReadWalker}), or by "free-form" tools that
* extend the {@link GATKTool} class directly rather than one of the built-in walker types.
* Tools that extend a walker type should get their data via {@code apply()} rather than directly accessing
* the engine datasources.
*
* @return the {@link ReferenceDataSource} for this {@link GATKTool}. Never {@code null}.
*/
protected ReferenceDataSource directlyAccessEngineReferenceDataSource() {
if ( reference == null ) {
throw new GATKException("Attempted to retrieve null reference!");
}
return reference;
}

/**
* Get the {@link ReadsDataSource} for this {@link GATKTool}.
* Will throw a {@link GATKException} if the reads are null.
* Clients are expected to call the {@link #hasReads()} method prior to calling this.
*
* Should only be called by walker base classes in the engine (such as {@link ReadWalker}), or by "free-form" tools that
* extend the {@link GATKTool} class directly rather than one of the built-in walker types.
* Tools that extend a walker type should get their data via {@code apply()} rather than directly accessing
* the engine datasources.
*
* @return the {@link ReadsDataSource} for this {@link GATKTool}. Never {@code null}.
*/
protected ReadsDataSource directlyAccessEngineReadsDataSource() {
if ( reads == null ) {
throw new GATKException("Attempted to retrieve null reads!");
}
return reads;
}

/**
* Get the {@link FeatureManager} for this {@link GATKTool}.
* Will throw a {@link GATKException} if the features are null.
* Clients are expected to call the {@link #hasFeatures()} method prior to calling this.
*
* Should only be called by walker base classes in the engine (such as {@link ReadWalker}), or by "free-form" tools that
* extend the {@link GATKTool} class directly rather than one of the built-in walker types.
* Tools that extend a walker type should get their data via {@code apply()} rather than directly accessing
* the engine datasources.
*
* @return the {@link FeatureManager} for this {@link GATKTool}. Never {@code null}.
*/
protected FeatureManager directlyAccessEngineFeatureManager() {
if ( features == null ) {
throw new GATKException("Attempted to retrieve null features!");
}
return features;
}

/**
* Progress meter to print out traversal statistics. Subclasses must invoke
* {@link ProgressMeter#update(Locatable)} after each record processed from
Expand Down
Expand Up @@ -16,7 +16,7 @@
* onTraversalStart() and/or onTraversalSuccess(). See the {@link org.broadinstitute.hellbender.tools.examples.ExampleIntervalWalker}
* tool for an example.
*/
public abstract class IntervalWalker extends GATKTool {
public abstract class IntervalWalker extends WalkerBase {

@Override
public boolean requiresIntervals() {
Expand Down Expand Up @@ -53,6 +53,15 @@ protected final void onStartup() {
super.onStartup();
}

/**
* {@inheritDoc}
*
* Implementation of interval-based traversal.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the
* engine package.
*/
@Override
public void traverse() {
final ReadFilter readFilter = makeReadFilter();
Expand Down
Expand Up @@ -37,7 +37,7 @@
*
* @author Daniel Gomez-Sanchez (magicDGS)
*/
public abstract class LocusWalker extends GATKTool {
public abstract class LocusWalker extends WalkerBase {
public static final String MAX_DEPTH_PER_SAMPLE_NAME = "max-depth-per-sample";

@Argument(fullName = MAX_DEPTH_PER_SAMPLE_NAME, shortName = MAX_DEPTH_PER_SAMPLE_NAME, doc = "Maximum number of reads to retain per sample per locus. Reads above this threshold will be downsampled. Set to 0 to disable.", optional = true)
Expand Down Expand Up @@ -140,12 +140,17 @@ protected final void onStartup() {
}

/**
* {@inheritDoc}
*
* Implementation of locus-based traversal.
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
*
* The default implementation iterates over all positions in the reference covered by reads (filtered and transformed)
* for all samples in the read groups, using the downsampling method provided by {@link #getDownsamplingInfo()}
* and including deletions only if {@link #includeDeletions()} returns {@code true}.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the engine
* package.
*/
@Override
public void traverse() {
Expand Down
Expand Up @@ -101,8 +101,13 @@ public final VCFHeader getHeaderForVariants() {
}

/**
* Implementation of variant-based traversal.
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
* {@inheritDoc}
*
* Implementation of multi-variant traversal.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the
* engine package.
*/
@Override
public void traverse() {
Expand Down
Expand Up @@ -107,8 +107,17 @@ final static ReferenceContext getExpandedReferenceContext(List<VariantContext> c
return currentReferenceContext;
}

/**
* {@inheritDoc}
*
* Implementation of multi-variant grouped on start traversal.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the
* engine package.
*/
@Override
public final void traverse() {
public void traverse() {
beforeTraverse();
super.traverse();
afterTraverse();
Expand Down
Expand Up @@ -18,7 +18,16 @@ public abstract class MultiplePassVariantWalker extends VariantWalker {
protected abstract int numberOfPasses();

/**
* Overrides the default, single-pass traversal framework of {@link VariantWalkerBase}
* {@inheritDoc}
*
* Implementation of multiple-pass variant traversal.
*
* Overrides the default, single-pass traversal framework of {@link VariantWalkerBase} allowing for multiple
* passes.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the
* engine package.
*/
@Override
public void traverse(){
Expand Down
Expand Up @@ -24,7 +24,7 @@
* ReadWalker authors must implement the apply() method to process each read, and may optionally implement
* onTraversalStart() and/or onTraversalSuccess(). See the PrintReadsWithReference walker for an example.
*/
public abstract class ReadWalker extends GATKTool {
public abstract class ReadWalker extends WalkerBase {

@Override
public boolean requiresReads() {
Expand Down Expand Up @@ -72,13 +72,18 @@ void initializeFeatures() {
}

/**
* {@inheritDoc}
*
* Implementation of read-based traversal.
* Subclasses can override to provide own behavior but default implementation should be suitable for most uses.
*
* The default implementation creates filters using {@link #makeReadFilter} and transformers using
* {@link #makePreReadFilterTransformer()} {@link #makePostReadFilterTransformer()} and then iterates over all reads, applies
* the pre-filter transformer, the filter, then the post-filter transformer and hands the resulting reads to the {@link #apply}
* function of the walker (along with additional contextual information, if present, such as reference bases).
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the engine
* package.
*/
@Override
public void traverse() {
Expand Down
Expand Up @@ -18,7 +18,7 @@
*
* See the {@link ExampleReferenceWalker} walker for an example.
*/
public abstract class ReferenceWalker extends GATKTool {
public abstract class ReferenceWalker extends WalkerBase {

@Override
public String getProgressMeterRecordLabel() { return "bases"; }
Expand All @@ -37,8 +37,13 @@ protected final void onStartup() {
}

/**
* {@inheritDoc}
*
* Implementation of reference-locus-based traversal.
* Subclasses can override to provide own behavior but default implementation should be suitable for most uses.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the engine
* package.
*/
@Override
public void traverse() {
Expand Down
Expand Up @@ -33,6 +33,16 @@
*/
public abstract class TwoPassReadWalker extends ReadWalker {


/**
* {@inheritDoc}
*
* Implementation of two-pass read-based traversal.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the engine
* package.
*/
@Override
public void traverse() {
// Process each read in the input stream.
Expand Down
Expand Up @@ -113,8 +113,13 @@ public boolean requiresReference() {
}

/**
* Implementation of variant-based traversal.
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
* {@inheritDoc}
*
* Implementation of variant-locus traversal.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the engine
* package.
*/
@Override
public void traverse() {
Expand Down
Expand Up @@ -88,8 +88,13 @@ public final VCFHeader getHeaderForVariants() {
}

/**
* {@inheritDoc}
*
* Implementation of variant-based traversal.
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
*
* NOTE: You should only override {@link #traverse()} if you are writing a new walker base class in the
* engine package that extends this class. It is not meant to be overridden by tools outside of the engine
* package.
*/
@Override
public void traverse() {
Expand Down

0 comments on commit 9fce0b2

Please sign in to comment.