From f3ae5c18441dfd44c30cdea9ca7a1e5f06629ac5 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Fri, 28 Jun 2019 00:42:03 -0500 Subject: [PATCH] rename sortReadsBy methods --- .../adam/cli/TransformAlignments.scala | 4 +-- .../adam/cli/TransformFragments.scala | 4 +-- .../adam/instrumentation/Timers.scala | 7 ++-- .../rdd/read/AlignmentRecordDataset.scala | 32 +++++++++---------- .../read/AlignmentRecordDatasetSuite.scala | 12 +++---- .../read/realignment/RealignIndelsSuite.scala | 2 +- 6 files changed, 30 insertions(+), 31 deletions(-) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala index 58bb162a3d..f7f2914cce 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala @@ -346,9 +346,9 @@ class TransformAlignments(protected val args: TransformAlignmentsArgs) extends B // are we sorting lexicographically or using legacy SAM sort order? val sortedDs = if (args.sortLexicographically) { - ds.sortReadsByReferencePosition() + ds.sortByReferencePosition() } else { - ds.sortReadsByReferencePositionAndIndex() + ds.sortByReferencePositionAndIndex() } // unpersist the cached rdd, if caching was requested diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformFragments.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformFragments.scala index 5332d95694..7e77105eee 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformFragments.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformFragments.scala @@ -132,9 +132,9 @@ class TransformFragments(protected val args: TransformFragmentsArgs) extends BDG // prep to save val finalRdd = if (args.sortReads) { - readRdd.sortReadsByReferencePosition() + readRdd.sortByReferencePosition() } else if (args.sortLexicographically) { - readRdd.sortReadsByReferencePositionAndIndex() + readRdd.sortByReferencePositionAndIndex() } else { readRdd } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala b/adam-core/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala index add9566f48..fead0e1be3 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala @@ -26,7 +26,6 @@ object Timers extends Metrics { // Load methods val LoadAlignments = timer("Load Alignments") - val LoadContigFragments = timer("Load Contig Fragments") val LoadCoverage = timer("Load Coverage") val LoadFeatures = timer("Load Features") val LoadFragments = timer("Load Fragments") @@ -110,9 +109,9 @@ object Timers extends Metrics { val SweepReadOverReferenceForQuality = timer("Sweep Read Over Reference For Quality") val FinalizingRealignments = timer("Finalizing Realignments") - // Sort Reads - val SortReads = timer("Sort Reads") - val SortByIndex = timer("Sort Reads By Index") + // Sort Alignments + val SortAlignments = timer("Sort Alignments") + val SortAlignmentsByIndex = timer("Sort Alignments By Index") // File Saving val SAMSave = timer("SAM Save") diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDataset.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDataset.scala index 38fff22984..1e37c25ee3 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDataset.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDataset.scala @@ -970,29 +970,29 @@ sealed abstract class AlignmentRecordDataset extends AvroReadGroupGenomicDataset } /** - * Sorts our read data by read name. + * Sorts our alignments by read name. * - * @return Returns a new RDD containing sorted reads. + * @return Returns a new genomic dataset containing sorted alignments. */ - def sortReadsByReadName(): AlignmentRecordDataset = SortReads.time { - info("Sorting reads by read name") + def sortByReadName(): AlignmentRecordDataset = SortAlignments.time { + info("Sorting alignments by read name") transformDataset(_.orderBy("readName", "readInFragment")) } /** - * Sorts our read data by reference positions, with references ordered by name. + * Sorts our alignments by reference position, with references ordered by name. * - * Sorts reads by the location where they are aligned. Unaligned reads are + * Sorts alignments by the location where the reads are aligned. Unaligned reads are * put at the end and sorted by read name. References are ordered * lexicographically. * - * @return Returns a new genomic dataset containing sorted reads. + * @return Returns a new genomic dataset containing sorted alignments. * - * @see sortReadsByReferencePositionAndIndex + * @see sortByReferencePositionAndIndex */ - def sortReadsByReferencePosition(): AlignmentRecordDataset = SortReads.time { - info("Sorting reads by reference position") + def sortByReferencePosition(): AlignmentRecordDataset = SortAlignments.time { + info("Sorting alignments by reference position") // NOTE: In order to keep unmapped reads from swamping a single partition // we sort the unmapped reads by read name. We prefix with tildes ("~"; @@ -1008,18 +1008,18 @@ sealed abstract class AlignmentRecordDataset extends AvroReadGroupGenomicDataset } /** - * Sorts our read data by reference positions, with references ordered by index. + * Sorts our alignments by reference position, with references ordered by index. * - * Sorts reads by the location where they are aligned. Unaligned reads are + * Sorts alignments by the location where the reads are aligned. Unaligned reads are * put at the end and sorted by read name. References are ordered by index * that they are ordered in the SequenceDictionary. * - * @return Returns a new genomic dataset containing sorted reads. + * @return Returns a new genomic dataset containing sorted alignments. * - * @see sortReadsByReferencePosition + * @see sortByReferencePosition */ - def sortReadsByReferencePositionAndIndex(): AlignmentRecordDataset = SortByIndex.time { - info("Sorting reads by reference index, using %s.".format(sequences)) + def sortByReferencePositionAndIndex(): AlignmentRecordDataset = SortAlignmentsByIndex.time { + info("Sorting alignments by reference index, using %s.".format(sequences)) import scala.math.Ordering.{ Int => ImplicitIntOrdering, _ } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala index 9eb99488f5..9e0d151454 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala @@ -177,7 +177,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite { val sd = new SequenceDictionary(contigNames.map(v => SequenceRecord(v, 1000000L)).toVector) val sortedReads = AlignmentRecordDataset(rdd, sd, ReadGroupDictionary.empty, Seq.empty) - .sortReadsByReferencePosition() + .sortByReferencePosition() .rdd .collect() .zipWithIndex @@ -193,7 +193,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite { sparkTest("unmapped reads go at the end when sorting") { val inputPath = testFile("reads13.sam") val reads = sc.loadAlignments(inputPath) - val sortedReads = reads.sortReadsByReferencePosition() + val sortedReads = reads.sortByReferencePosition() .rdd .collect() assert(!sortedReads.last.getReadMapped) @@ -286,7 +286,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite { val rdd = sc.parallelize(reads) val sortedReads = AlignmentRecordDataset(rdd, sd, ReadGroupDictionary.empty, Seq.empty) - .sortReadsByReferencePositionAndIndex() + .sortByReferencePositionAndIndex() .rdd .collect() .zipWithIndex @@ -536,7 +536,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite { val reads = ardd.rdd val actualSortedPath = tmpFile("sorted.sam") - ardd.sortReadsByReferencePosition() + ardd.sortByReferencePosition() .saveAsSam(actualSortedPath, isSorted = true, asSingleFile = true) @@ -560,7 +560,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite { sparkTest("writing ordered sam from unordered sam") { val unsortedPath = testFile("unordered.sam") val ardd = sc.loadBam(unsortedPath) - val reads = ardd.sortReadsByReferencePosition + val reads = ardd.sortByReferencePosition val actualSortedPath = tmpFile("ordered.sam") reads.saveAsSam(actualSortedPath, @@ -1838,7 +1838,7 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite { val reads = ardd.rdd val actualSortedPath = tmpFile("readname_sorted.sam") - ardd.sortReadsByReadName() + ardd.sortByReadName() .saveAsSam(actualSortedPath, asType = None, asSingleFile = true, diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala index 2253f60eac..d5ff90c9e2 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala @@ -56,7 +56,7 @@ class RealignIndelsSuite extends ADAMFunSuite { .realignIndels(consensusModel = cg, maxReadsPerTarget = maxCoverage, optReferenceFile = optRefFile) - .sortReadsByReferencePosition() + .sortByReferencePosition() .rdd }