Skip to content

Commit

Permalink
rename sortReadsBy methods
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Jun 28, 2019
1 parent aa33b06 commit f3ae5c1
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 31 deletions.
Expand Up @@ -346,9 +346,9 @@ class TransformAlignments(protected val args: TransformAlignmentsArgs) extends B

// are we sorting lexicographically or using legacy SAM sort order?
val sortedDs = if (args.sortLexicographically) {
ds.sortReadsByReferencePosition()
ds.sortByReferencePosition()
} else {
ds.sortReadsByReferencePositionAndIndex()
ds.sortByReferencePositionAndIndex()
}

// unpersist the cached rdd, if caching was requested
Expand Down
Expand Up @@ -132,9 +132,9 @@ class TransformFragments(protected val args: TransformFragmentsArgs) extends BDG

// prep to save
val finalRdd = if (args.sortReads) {
readRdd.sortReadsByReferencePosition()
readRdd.sortByReferencePosition()
} else if (args.sortLexicographically) {
readRdd.sortReadsByReferencePositionAndIndex()
readRdd.sortByReferencePositionAndIndex()
} else {
readRdd
}
Expand Down
Expand Up @@ -26,7 +26,6 @@ object Timers extends Metrics {

// Load methods
val LoadAlignments = timer("Load Alignments")
val LoadContigFragments = timer("Load Contig Fragments")
val LoadCoverage = timer("Load Coverage")
val LoadFeatures = timer("Load Features")
val LoadFragments = timer("Load Fragments")
Expand Down Expand Up @@ -110,9 +109,9 @@ object Timers extends Metrics {
val SweepReadOverReferenceForQuality = timer("Sweep Read Over Reference For Quality")
val FinalizingRealignments = timer("Finalizing Realignments")

// Sort Reads
val SortReads = timer("Sort Reads")
val SortByIndex = timer("Sort Reads By Index")
// Sort Alignments
val SortAlignments = timer("Sort Alignments")
val SortAlignmentsByIndex = timer("Sort Alignments By Index")

// File Saving
val SAMSave = timer("SAM Save")
Expand Down
Expand Up @@ -970,29 +970,29 @@ sealed abstract class AlignmentRecordDataset extends AvroReadGroupGenomicDataset
}

/**
* Sorts our read data by read name.
* Sorts our alignments by read name.
*
* @return Returns a new RDD containing sorted reads.
* @return Returns a new genomic dataset containing sorted alignments.
*/
def sortReadsByReadName(): AlignmentRecordDataset = SortReads.time {
info("Sorting reads by read name")
def sortByReadName(): AlignmentRecordDataset = SortAlignments.time {
info("Sorting alignments by read name")

transformDataset(_.orderBy("readName", "readInFragment"))
}

/**
* Sorts our read data by reference positions, with references ordered by name.
* Sorts our alignments by reference position, with references ordered by name.
*
* Sorts reads by the location where they are aligned. Unaligned reads are
* Sorts alignments by the location where the reads are aligned. Unaligned reads are
* put at the end and sorted by read name. References are ordered
* lexicographically.
*
* @return Returns a new genomic dataset containing sorted reads.
* @return Returns a new genomic dataset containing sorted alignments.
*
* @see sortReadsByReferencePositionAndIndex
* @see sortByReferencePositionAndIndex
*/
def sortReadsByReferencePosition(): AlignmentRecordDataset = SortReads.time {
info("Sorting reads by reference position")
def sortByReferencePosition(): AlignmentRecordDataset = SortAlignments.time {
info("Sorting alignments by reference position")

// NOTE: In order to keep unmapped reads from swamping a single partition
// we sort the unmapped reads by read name. We prefix with tildes ("~";
Expand All @@ -1008,18 +1008,18 @@ sealed abstract class AlignmentRecordDataset extends AvroReadGroupGenomicDataset
}

/**
* Sorts our read data by reference positions, with references ordered by index.
* Sorts our alignments by reference position, with references ordered by index.
*
* Sorts reads by the location where they are aligned. Unaligned reads are
* Sorts alignments by the location where the reads are aligned. Unaligned reads are
* put at the end and sorted by read name. References are ordered by index
* that they are ordered in the SequenceDictionary.
*
* @return Returns a new genomic dataset containing sorted reads.
* @return Returns a new genomic dataset containing sorted alignments.
*
* @see sortReadsByReferencePosition
* @see sortByReferencePosition
*/
def sortReadsByReferencePositionAndIndex(): AlignmentRecordDataset = SortByIndex.time {
info("Sorting reads by reference index, using %s.".format(sequences))
def sortByReferencePositionAndIndex(): AlignmentRecordDataset = SortAlignmentsByIndex.time {
info("Sorting alignments by reference index, using %s.".format(sequences))

import scala.math.Ordering.{ Int => ImplicitIntOrdering, _ }

Expand Down
Expand Up @@ -177,7 +177,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite {
val sd = new SequenceDictionary(contigNames.map(v => SequenceRecord(v, 1000000L)).toVector)

val sortedReads = AlignmentRecordDataset(rdd, sd, ReadGroupDictionary.empty, Seq.empty)
.sortReadsByReferencePosition()
.sortByReferencePosition()
.rdd
.collect()
.zipWithIndex
Expand All @@ -193,7 +193,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite {
sparkTest("unmapped reads go at the end when sorting") {
val inputPath = testFile("reads13.sam")
val reads = sc.loadAlignments(inputPath)
val sortedReads = reads.sortReadsByReferencePosition()
val sortedReads = reads.sortByReferencePosition()
.rdd
.collect()
assert(!sortedReads.last.getReadMapped)
Expand Down Expand Up @@ -286,7 +286,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite {

val rdd = sc.parallelize(reads)
val sortedReads = AlignmentRecordDataset(rdd, sd, ReadGroupDictionary.empty, Seq.empty)
.sortReadsByReferencePositionAndIndex()
.sortByReferencePositionAndIndex()
.rdd
.collect()
.zipWithIndex
Expand Down Expand Up @@ -536,7 +536,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite {
val reads = ardd.rdd

val actualSortedPath = tmpFile("sorted.sam")
ardd.sortReadsByReferencePosition()
ardd.sortByReferencePosition()
.saveAsSam(actualSortedPath,
isSorted = true,
asSingleFile = true)
Expand All @@ -560,7 +560,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite {
sparkTest("writing ordered sam from unordered sam") {
val unsortedPath = testFile("unordered.sam")
val ardd = sc.loadBam(unsortedPath)
val reads = ardd.sortReadsByReferencePosition
val reads = ardd.sortByReferencePosition

val actualSortedPath = tmpFile("ordered.sam")
reads.saveAsSam(actualSortedPath,
Expand Down Expand Up @@ -1838,7 +1838,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite {
val reads = ardd.rdd

val actualSortedPath = tmpFile("readname_sorted.sam")
ardd.sortReadsByReadName()
ardd.sortByReadName()
.saveAsSam(actualSortedPath,
asType = None,
asSingleFile = true,
Expand Down
Expand Up @@ -56,7 +56,7 @@ class RealignIndelsSuite extends ADAMFunSuite {
.realignIndels(consensusModel = cg,
maxReadsPerTarget = maxCoverage,
optReferenceFile = optRefFile)
.sortReadsByReferencePosition()
.sortByReferencePosition()
.rdd
}

Expand Down

0 comments on commit f3ae5c1

Please sign in to comment.