Skip to content

Commit

Permalink
Merge 10c595b into 4158744
Browse files Browse the repository at this point in the history
  • Loading branch information
ryan-williams committed Oct 1, 2016
2 parents 4158744 + 10c595b commit 3309300
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 36 deletions.
37 changes: 30 additions & 7 deletions src/main/scala/org/hammerlab/guacamole/assembly/AssemblyArgs.scala
Original file line number Diff line number Diff line change
@@ -1,23 +1,46 @@
package org.hammerlab.guacamole.assembly

import org.hammerlab.guacamole.loci.partitioning.HalfWindowConfig
import org.kohsuke.args4j.{Option => Args4jOption}

trait AssemblyArgs {
@Args4jOption(name = "--kmer-size", usage = "Length of kmer used for DeBruijn Graph assembly")
trait AssemblyArgs
extends HalfWindowConfig {

@Args4jOption(
name = "--kmer-size",
usage = "Length of kmer used for DeBruijn Graph assembly"
)
var kmerSize: Int = 45

@Args4jOption(name = "--assembly-window-range", usage = "Number of bases before and after to check for additional matches or deletions")
@Args4jOption(
name = "--assembly-window-range",
usage = "Number of bases before and after to check for additional matches or deletions"
)
var assemblyWindowRange: Int = 20

@Args4jOption(name = "--min-occurrence", required = false, usage = "Minimum occurrences to include a kmer ")
override def halfWindowSize: Int = assemblyWindowRange

@Args4jOption(
name = "--min-occurrence",
usage = "Minimum occurrences to include a kmer "
)
var minOccurrence: Int = 3

@Args4jOption(name = "--min-area-vaf", required = false, usage = "Minimum variant allele frequency to investigate area")
@Args4jOption(
name = "--min-area-vaf",
usage = "Minimum variant allele frequency to investigate area"
)
var minAreaVaf: Int = 5

@Args4jOption(name = "--min-mean-kmer-quality", usage = "Minimum mean base quality to include a kmer")
@Args4jOption(
name = "--min-mean-kmer-quality",
usage = "Minimum mean base quality to include a kmer"
)
var minMeanKmerQuality: Int = 0

@Args4jOption(name = "--shortcut-assembly", required = false, usage = "Skip assembly process in inactive regions")
@Args4jOption(
name = "--shortcut-assembly",
usage = "Skip assembly process in inactive regions"
)
var shortcutAssembly: Boolean = false
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ object GermlineAssemblyCaller {
PartitionedRegions(
qualityReads,
loci.result(readsets.contigLengths),
args,
args.assemblyWindowRange
args
)

val calledAlleles =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,7 @@ object SomaticJoint {
PartitionedRegions(
readsets.allMappedReads,
lociSetMinusOne(loci),
args,
halfWindowSize = 0
args
)

val broadcastForceCallLoci = sc.broadcast(forceCallLoci)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ object SomaticStandard {
PartitionedRegions(
readsets.allMappedReads,
loci,
args,
halfWindowSize = 0
args
)

// Destructure `args`' fields here to avoid serializing `args` itself.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ object VAFHistogram {
PartitionedRegions(
readsets.allMappedReads,
loci.result(contigLengths),
args,
halfWindowSize = 0
args
)

val minReadDepth = args.minReadDepth
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ object VariantSupport {
PartitionedRegions(
readsets.allMappedReads,
loci,
args,
halfWindowSize = 0
args
)

val alleleCounts =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import org.kohsuke.args4j.{Option => Args4JOption}

import scala.reflect.ClassTag

trait CappedRegionsPartitionerArgs {
trait CappedRegionsPartitionerArgs
extends HalfWindowConfig {

@Args4JOption(
name = "--max-reads-per-partition",
usage = "Maximum number of reads to allow any one partition to have. Loci that have more depth than this will be dropped."
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package org.hammerlab.guacamole.loci.partitioning

trait HalfWindowConfig {
def halfWindowSize: Int = 0
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ trait LociPartitionerArgs
)
var lociPartitionerName: String = "capped"

def getPartitioner[R <: ReferenceRegion: ClassTag](regions: RDD[R], halfWindowSize: Int = 0): LociPartitioner = {
def getPartitioner[R <: ReferenceRegion: ClassTag](regions: RDD[R]): LociPartitioner = {
val sc = regions.sparkContext

val numPartitions =
Expand All @@ -56,7 +56,7 @@ trait LociPartitionerArgs
)

case "micro-regions" =>
new MicroRegionPartitioner(regions, halfWindowSize, numPartitions, partitioningAccuracy)
new MicroRegionPartitioner(regions, numPartitions, partitioningAccuracy)
case "uniform" =>
UniformPartitioner(numPartitions)
case _ =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ object LociPartitioning {

def apply[R <: ReferenceRegion: ClassTag](regions: RDD[R],
loci: LociSet,
args: LociPartitionerArgs,
halfWindowSize: Int = 0): LociPartitioning = {
args: LociPartitionerArgs): LociPartitioning = {
for (lociPartitioningPath <- args.lociPartitioningPathOpt) {
val path = new Path(lociPartitioningPath)
val fs = path.getFileSystem(regions.sparkContext.hadoopConfiguration)
Expand All @@ -116,7 +115,7 @@ object LociPartitioning {

val lociPartitioning =
args
.getPartitioner(regions, halfWindowSize)
.getPartitioner(regions)
.partition(loci)

for (lociPartitioningPath <- args.lociPartitioningPathOpt) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ trait MicroRegionPartitionerArgs extends UniformPartitionerArgs {
* @return LociMap of locus -> partition assignments.
*/
class MicroRegionPartitioner[R <: ReferenceRegion: ClassTag](regions: RDD[R],
halfWindowSize: Int,
numPartitions: NumPartitions,
microPartitionsPerPartition: NumMicroPartitions)
extends LociPartitioner {
Expand Down Expand Up @@ -127,7 +126,10 @@ class MicroRegionPartitioner[R <: ReferenceRegion: ClassTag](regions: RDD[R],

var totalRegionsAssigned = 0.0
var partition = 0
def regionsRemainingForThisPartition() = math.round(((partition + 1) * regionsPerPartition) - totalRegionsAssigned)
def regionsRemainingForThisPartition =
math.round(
((partition + 1) * regionsPerPartition) - totalRegionsAssigned
)

val builder = LociMap.newBuilder[PartitionIndex]

Expand All @@ -143,9 +145,9 @@ class MicroRegionPartitioner[R <: ReferenceRegion: ClassTag](regions: RDD[R],
} else {

// If we've allocated all regions for this partition, move on to the next partition.
if (regionsRemainingForThisPartition() == 0)
if (regionsRemainingForThisPartition == 0)
partition += 1
assert(regionsRemainingForThisPartition() > 0)
assert(regionsRemainingForThisPartition > 0)
assert(partition < numPartitions)

/**
Expand All @@ -161,7 +163,7 @@ class MicroRegionPartitioner[R <: ReferenceRegion: ClassTag](regions: RDD[R],
* current partition.
*
*/
val fractionToTake = math.min(1.0, regionsRemainingForThisPartition().toDouble / regionsInSet.toDouble)
val fractionToTake = math.min(1.0, regionsRemainingForThisPartition.toDouble / regionsInSet.toDouble)

/**
* Based on fractionToTake, we set the number of loci and regions to assign.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,17 +122,13 @@ object PartitionedRegions {
* @param loci Genomic loci to operate on; these will be split among Spark partitions and coupled with all regions
* from `regionRDDs` that overlap them (module the half-window described below).
* @param args Parameters dictating how `loci` should be partitioned.
* @param halfWindowSize A region is considered to overlap a partition's loci if it passes within this distance of any
* of them, in which case a copy of that region will be sent to that partition (possibly among
* others).
* @tparam R ReferenceRegion type.
*/
def apply[R <: ReferenceRegion: ClassTag](regions: RDD[R],
loci: LociSet,
args: PartitionedRegionsArgs,
halfWindowSize: Int = 0): PartitionedRegions[R] = {
args: PartitionedRegionsArgs): PartitionedRegions[R] = {

val lociPartitioning = LociPartitioning(regions, loci, args, halfWindowSize)
val lociPartitioning = LociPartitioning(regions, loci, args)

progress(
s"Partitioned loci: ${lociPartitioning.numPartitions} partitions.",
Expand All @@ -146,7 +142,7 @@ object PartitionedRegions {
apply(
regions,
lociPartitioning,
halfWindowSize,
args.halfWindowSize,
args.partitionedReadsPathOpt,
args.compressReadPartitions,
args.printPartitioningStats
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class MicroRegionPartitionerSuite
val result =
new MicroRegionPartitioner(
reads,
halfWindowSize = 0,
numPartitions = 2,
microPartitionsPerPartition = 100
).partition(loci)
Expand Down

0 comments on commit 3309300

Please sign in to comment.