Skip to content

Commit

Permalink
Local Assembler for SVs
Browse files Browse the repository at this point in the history
  • Loading branch information
tedsharpe committed Jun 3, 2021
1 parent 944c128 commit 0198191
Show file tree
Hide file tree
Showing 44 changed files with 3,983 additions and 236 deletions.
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.engine;

import htsjdk.samtools.SAMSequenceDictionary;
import org.broadinstitute.hellbender.engine.filters.CountingReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.WellformedReadFilter;
Expand Down Expand Up @@ -57,7 +58,10 @@ protected final void onStartup() {
*/
void setReadTraversalBounds() {
if ( hasUserSuppliedIntervals() ) {
reads.setTraversalBounds(intervalArgumentCollection.getTraversalParameters(getHeaderForReads().getSequenceDictionary()));
final SAMSequenceDictionary dict = getHeaderForReads().getSequenceDictionary();
final boolean traverseUnmapped =
intervalArgumentCollection.getTraversalParameters(dict).traverseUnmappedReads();
reads.setTraversalBounds(new TraversalParameters(userIntervals, traverseUnmapped));
}
}

Expand Down
Expand Up @@ -16,7 +16,7 @@
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.utils.HopscotchMap;
import org.broadinstitute.hellbender.utils.collections.HopscotchMap;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Tail;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
Expand Down
2,513 changes: 2,513 additions & 0 deletions src/main/java/org/broadinstitute/hellbender/tools/LocalAssembler.java

Large diffs are not rendered by default.

@@ -0,0 +1,115 @@
package org.broadinstitute.hellbender.tools;

import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReadWalker;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;

import java.util.*;

@CommandLineProgramProperties(
summary = "Unmaps reads that have distant mates. "+
"This ensures that a PairWalker will always see both mates, "+
"even when one of them is mapped far away, when given the output "+
"of this tool along with the original inputs.",
oneLineSummary = "Unmaps reads with distant mates.",
programGroup = ReadDataManipulationProgramGroup.class
)
@DocumentedFeature
public class PrintDistantMates extends ReadWalker {
@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
doc="Write output to this file")
public GATKPath output;
public static String DISTANT_MATE_TAG = "DM";

private SAMFileGATKReadWriter outputWriter;

@Override public List<ReadFilter> getDefaultReadFilters() {
final List<ReadFilter> readFilters = new ArrayList<>(super.getDefaultReadFilters());
readFilters.add(ReadFilterLibrary.PAIRED);
readFilters.add(ReadFilterLibrary.PRIMARY_LINE);
readFilters.add(ReadFilterLibrary.NOT_DUPLICATE);
readFilters.add(ReadFilterLibrary.MATE_DISTANT);
return readFilters;
}

@Override
public boolean requiresReads() { return true; }

@Override
public void onTraversalStart() {
outputWriter = createSAMWriter(output, false);
}

@Override
public void apply( final GATKRead read,
final ReferenceContext referenceContext,
final FeatureContext featureContext ) {
final GATKRead copy = doDistantMateAlterations(read);
outputWriter.addRead(copy);
}

@Override
public void closeTool() {
if ( outputWriter != null ) {
outputWriter.close();
}
}

public static boolean isDistantMate( final GATKRead read ) {
return read.hasAttribute(DISTANT_MATE_TAG);
}

public static GATKRead doDistantMateAlterations( final GATKRead read ) {
final GATKRead copy = read.copy();
final Integer nmValue = read.getAttributeAsInteger(SAMTag.NM.name());
final String oaValue = read.getContig() + "," + read.getStart() + "," +
(read.isReverseStrand() ? "-," : "+,") + read.getCigar() + "," +
read.getMappingQuality() + "," + (nmValue == null ? "" : nmValue) + ";";
copy.clearAttribute(SAMTag.NM.name());
copy.setAttribute(SAMTag.OA.name(), oaValue);
copy.setAttribute(DISTANT_MATE_TAG, "");
copy.setPosition(read.getMateContig(), read.getMateStart());
copy.setCigar(SAMRecord.NO_ALIGNMENT_CIGAR);
copy.setMappingQuality(0);
copy.setIsUnmapped();
return copy;
}

public static GATKRead undoDistantMateAlterations( final GATKRead read ) {
final String oaValue = read.getAttributeAsString(SAMTag.OA.name());
if ( oaValue == null ) {
return read;
}
final GATKRead copy = read.copy();
copy.clearAttribute(DISTANT_MATE_TAG);
copy.clearAttribute(SAMTag.OA.name());
try {
final String[] tokens = oaValue.split(",");
copy.setPosition(tokens[0], Integer.parseInt(tokens[1]));
copy.setIsReverseStrand("-".equals(tokens[2]));
copy.setCigar(tokens[3]);
copy.setMappingQuality(Integer.parseInt(tokens[4]));
if ( tokens[5].length() > 1 ) {
final String nmValue = tokens[5].substring(0, tokens[5].length() - 1);
copy.setAttribute(SAMTag.NM.name(), Integer.parseInt(nmValue));
}
} catch ( final Exception exc ) {
throw new UserException("can't recover alignment from OA tag: " + oaValue, exc);
}
return copy;
}
}
Expand Up @@ -4,7 +4,7 @@
import htsjdk.samtools.util.SequenceUtil;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.spark.sv.utils.*;
import org.broadinstitute.hellbender.tools.spark.utils.HopscotchMultiMap;
import org.broadinstitute.hellbender.tools.spark.utils.HopscotchMultiMapSpark;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.bwa.BwaMemAligner;
import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment;
Expand Down Expand Up @@ -128,7 +128,7 @@ static FermiLiteAssembly removeShadowedContigs( final FermiLiteAssembly assembly

// make a map of assembled kmers
final int capacity = assembly.getContigs().stream().mapToInt(tig -> tig.getSequence().length - kmerSize + 1).sum();
final HopscotchMultiMap<SVKmerShort, ContigLocation, KmerLocation> kmerMap = new HopscotchMultiMap<>(capacity);
final HopscotchMultiMapSpark<SVKmerShort, ContigLocation, KmerLocation> kmerMap = new HopscotchMultiMapSpark<>(capacity);
assembly.getContigs().forEach(tig -> {
int contigOffset = 0;
final Iterator<SVKmer> contigKmerItr = new SVKmerizer(tig.getSequence(), kmerSize, new SVKmerShort());
Expand Down
Expand Up @@ -17,7 +17,7 @@
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection;
import org.broadinstitute.hellbender.tools.spark.sv.utils.*;
import org.broadinstitute.hellbender.tools.spark.utils.HopscotchMap;
import org.broadinstitute.hellbender.tools.spark.utils.HopscotchMapSpark;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import scala.Tuple2;
Expand Down Expand Up @@ -147,7 +147,7 @@ static List<SVKmer> collectUbiquitousKmersInReference(final int kSize,
final int hashSize = 2*REF_RECORDS_PER_PARTITION;
return refRDD
.mapPartitions(seqItr -> {
final HopscotchMap<SVKmer, Integer, KmerAndCount> kmerCounts = new HopscotchMap<>(hashSize);
final HopscotchMapSpark<SVKmer, Integer, KmerAndCount> kmerCounts = new HopscotchMapSpark<>(hashSize);
while ( seqItr.hasNext() ) {
final byte[] seq = seqItr.next();
SVDUSTFilteredKmerizer.canonicalStream(seq, kSize, maxDUSTScore, new SVKmerLong())
Expand All @@ -162,7 +162,7 @@ static List<SVKmer> collectUbiquitousKmersInReference(final int kSize,
.mapToPair(entry -> new Tuple2<>(entry.getKey(), entry.getValue()))
.partitionBy(new HashPartitioner(nPartitions))
.mapPartitions(pairItr -> {
final HopscotchMap<SVKmer, Integer, KmerAndCount> kmerCounts = new HopscotchMap<>(hashSize);
final HopscotchMapSpark<SVKmer, Integer, KmerAndCount> kmerCounts = new HopscotchMapSpark<>(hashSize);
while ( pairItr.hasNext() ) {
final Tuple2<SVKmer, Integer> pair = pairItr.next();
final SVKmer kmer = pair._1();
Expand Down

0 comments on commit 0198191

Please sign in to comment.