Skip to content

Commit

Permalink
Added a new Walker abstract class to be used for all built-in walkers.
Browse files Browse the repository at this point in the history
  • Loading branch information
jonn-smith committed Jun 28, 2018
1 parent 54260e3 commit a5b3ba0
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.io.IOUtils;

import java.io.File;
import java.util.Iterator;
Expand All @@ -34,7 +33,7 @@
* Created by Takuto Sato 1/30/17, abstractified by David Benjamin on 2/22/17.
* {@link #onTraversalStart}, {@link #onTraversalSuccess} and/or {@link #closeTool}.
*/
public abstract class AbstractConcordanceWalker extends GATKTool {
public abstract class AbstractConcordanceWalker extends Walker {

public static final String TRUTH_VARIANTS_LONG_NAME = "truth";
public static final String EVAL_VARIANTS_SHORT_NAME = "eval";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.broadinstitute.hellbender.engine;

import htsjdk.samtools.util.Locatable;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineException;
Expand All @@ -12,13 +11,10 @@
import org.broadinstitute.hellbender.utils.IGVUtils;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.activityprofile.ActivityProfileState;
import org.broadinstitute.hellbender.utils.downsampling.PositionalDownsampler;
import org.broadinstitute.hellbender.utils.downsampling.ReadsDownsampler;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
Expand All @@ -43,7 +39,7 @@
* Internally, the reads are loaded in chunks called read shards, which are then subdivided into active/inactive regions
* for processing by the tool implementation. One read shard is created per contig.
*/
public abstract class AssemblyRegionWalker extends GATKTool {
public abstract class AssemblyRegionWalker extends Walker {

//NOTE: these argument names are referenced by HaplotypeCallerSpark
public static final String MIN_ASSEMBLY_LONG_NAME = "min-assembly-region-size";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.broadinstitute.hellbender.utils.Utils;

import java.io.File;
import java.nio.file.Path;

/**
* A FeatureWalker is a tool that processes a {@link Feature} at a time from a source of Features, with
Expand All @@ -21,7 +20,7 @@
*
* @param <F> the driving feature type.
*/
public abstract class FeatureWalker<F extends Feature> extends GATKTool {
public abstract class FeatureWalker<F extends Feature> extends Walker {

private FeatureDataSource<F> drivingFeatures;

Expand Down Expand Up @@ -79,7 +78,7 @@ private void initializeDrivingFeatures() {
* Subclasses can override to provide their own behavior but default implementation should be suitable for most uses.
*/
@Override
public void traverse() {
public final void traverse() {
CountingReadFilter readFilter = makeReadFilter();
// Process each feature in the input stream.
Utils.stream(drivingFeatures).forEach(feature -> {
Expand Down
50 changes: 45 additions & 5 deletions src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFSimpleHeaderLine;
import java.io.File;
import java.nio.file.Path;
import java.time.ZonedDateTime;
import java.util.*;
import java.util.stream.Stream;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor;
Expand Down Expand Up @@ -43,6 +38,12 @@
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.io.File;
import java.nio.file.Path;
import java.time.ZonedDateTime;
import java.util.*;
import java.util.stream.Stream;

/**
* Base class for all GATK tools. Tool authors that wish to write a "GATK" tool but not use one of
* the pre-packaged Walker traversals should feel free to extend this class directly. All other
Expand Down Expand Up @@ -157,6 +158,45 @@ public abstract class GATKTool extends CommandLineProgram {
*/
List<SimpleInterval> intervalsForTraversal;

/**
* Get the {@link ReferenceDataSource} for this {@link GATKTool}.
* Will throw a {@link GATKException} if the reference is null.
* Clients are expected to call the {@link #hasReference()} method prior to calling this.
* @return the {@link ReferenceDataSource} for this {@link GATKTool}. Never {@code null}.
*/
protected ReferenceDataSource getReferenceDataSource() {
if ( reference == null ) {
throw new GATKException("Attempted to retrieve null reference!");
}
return reference;
}

/**
* Get the {@link ReadsDataSource} for this {@link GATKTool}.
* Will throw a {@link GATKException} if the reads are null.
* Clients are expected to call the {@link #hasReads()} method prior to calling this.
* @return the {@link ReadsDataSource} for this {@link GATKTool}. Never {@code null}.
*/
protected ReadsDataSource getReadsDataSource() {
if ( reads == null ) {
throw new GATKException("Attempted to retrieve null reads!");
}
return reads;
}

/**
* Get the {@link FeatureManager} for this {@link GATKTool}.
* Will throw a {@link GATKException} if the features are null.
* Clients are expected to call the {@link #hasFeatures()} method prior to calling this.
* @return the {@link FeatureManager} for this {@link GATKTool}. Never {@code null}.
*/
protected FeatureManager getFeatureManager() {
if ( features == null ) {
throw new GATKException("Attempted to retrieve null features!");
}
return features;
}

/**
* Progress meter to print out traversal statistics. Subclasses must invoke
* {@link ProgressMeter#update(Locatable)} after each record processed from
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.utils.SimpleInterval;

import java.nio.file.Path;

/**
* An IntervalWalker is a tool that processes a single interval at a time, with the ability to query
* optional overlapping sources of reads, reference data, and/or variants/features.
Expand All @@ -18,7 +16,7 @@
* onTraversalStart() and/or onTraversalSuccess(). See the {@link org.broadinstitute.hellbender.tools.examples.ExampleIntervalWalker}
* tool for an example.
*/
public abstract class IntervalWalker extends GATKTool {
public abstract class IntervalWalker extends Walker {

@Override
public boolean requiresIntervals() {
Expand Down Expand Up @@ -56,7 +54,7 @@ protected final void onStartup() {
}

@Override
public void traverse() {
public final void traverse() {
final ReadFilter readFilter = makeReadFilter();
for ( final SimpleInterval interval : intervalsForTraversal ) {
apply(interval,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
*
* @author Daniel Gomez-Sanchez (magicDGS)
*/
public abstract class LocusWalker extends GATKTool {
public abstract class LocusWalker extends Walker {

@Argument(fullName = "maxDepthPerSample", shortName = "maxDepthPerSample", doc = "Maximum number of reads to retain per sample per locus. Reads above this threshold will be downsampled. Set to 0 to disable.", optional = true)
protected int maxDepthPerSample = defaultMaxDepthPerSample();
Expand Down Expand Up @@ -146,7 +146,7 @@ protected final void onStartup() {
* and including deletions only if {@link #includeDeletions()} returns {@code true}.
*/
@Override
public void traverse() {
public final void traverse() {
final SAMFileHeader header = getHeaderForReads();
// get the samples from the read groups
final Set<String> samples = header.getReadGroups().stream()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
package org.broadinstitute.hellbender.engine;

import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor;
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKReadFilterPluginDescriptor;
import org.broadinstitute.hellbender.engine.filters.CountingReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.WellformedReadFilter;
import org.broadinstitute.hellbender.transformers.ReadTransformer;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.engine.filters.CountingReadFilter;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.nio.file.Path;
import java.util.Collections;
import java.util.List;

Expand All @@ -29,7 +24,7 @@
* ReadWalker authors must implement the apply() method to process each read, and may optionally implement
* onTraversalStart() and/or onTraversalSuccess(). See the PrintReadsWithReference walker for an example.
*/
public abstract class ReadWalker extends GATKTool {
public abstract class ReadWalker extends Walker {

@Override
public boolean requiresReads() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* {@link #getSpliteratorForDrivingVariants}, and may optionally implement {@link #onTraversalStart},
* {@link #onTraversalSuccess} and/or {@link #closeTool}.
*/
public abstract class VariantWalkerBase extends GATKTool {
public abstract class VariantWalkerBase extends Walker {

/**
* This number controls the size of the cache for our primary and auxiliary FeatureInputs
Expand Down
27 changes: 27 additions & 0 deletions src/main/java/org/broadinstitute/hellbender/engine/Walker.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.broadinstitute.hellbender.engine;

import org.broadinstitute.hellbender.exceptions.GATKException;

/**
* An abstract class to represent built-in walkers that inherit directly from {@link GATKTool}.
* Created by jonn on 6/28/18.
*/
public abstract class Walker extends GATKTool {

@Override
final protected ReferenceDataSource getReferenceDataSource() {
throw new GATKException("Should never access ReferenceDataSource in child classes of AssemblyRegionWalker.");
}

@Override
final protected ReadsDataSource getReadsDataSource() {
throw new GATKException("Should never access ReadsDataSource in child classes of AssemblyRegionWalker.");
}

@Override
final protected FeatureManager getFeatureManager() {
throw new GATKException("Should never access FeatureManager in child classes of AssemblyRegionWalker.");
}


}

0 comments on commit a5b3ba0

Please sign in to comment.