From 107ce14dcf51e3573baaa69e5063b22553c4a014 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Sun, 2 Jun 2019 14:29:44 -0500 Subject: [PATCH] add tests --- .../bdgenomics/adam/rdd/read/ReadDataset.scala | 7 +++++-- .../rdd/read/AlignmentRecordDatasetSuite.scala | 13 +++++++++++++ .../adam/rdd/read/ReadDatasetSuite.scala | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ReadDataset.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ReadDataset.scala index 2dbb9f0f05..2c39603a8a 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ReadDataset.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/ReadDataset.scala @@ -231,10 +231,13 @@ sealed abstract class ReadDataset extends AvroGenomicDataset[Read, ReadProduct, AlignmentRecord.newBuilder() .setReadName(read.getName) .setSequence(read.getSequence) - .setQuality(read.getQuality) + .setQuality(read.getQualityScores) .build() } - AlignmentRecordDataset(rdd.map(toAlignmentRecord), sequences) + AlignmentRecordDataset(rdd.map(toAlignmentRecord), + sequences, + ReadGroupDictionary.empty, + processingSteps = Seq.empty) } /** diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala index 6accf636dc..9eb99488f5 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordDatasetSuite.scala @@ -1860,4 +1860,17 @@ class AlignmentRecordDatasetSuite extends ADAMFunSuite { assert(reads.dataset.first().start.get === transformed.dataset.first().start.get) } + + sparkTest("convert alignments to reads") { + val alignments = sc.loadAlignments(testFile("small.sam")) + val reads = alignments.toReads() + assert(alignments.sequences === reads.sequences) + assert(alignments.rdd.count === reads.rdd.count) + + val first = reads.rdd.sortBy(_.getName).first() + assert(first.getName() === "simread:1:101556378:false") + assert(first.getSequence() === "TTTATTTTTTGAGCATGAAAGTAATATATGCTCAGTGTAAACAATTAGGTCATTATAAATATATTTAACAGGAAT") + assert(first.getLength() === 75L) + assert(first.getAlphabet() === org.bdgenomics.formats.avro.Alphabet.DNA) + } } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/ReadDatasetSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/ReadDatasetSuite.scala index c709ff70db..0799f7747f 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/ReadDatasetSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/ReadDatasetSuite.scala @@ -139,6 +139,22 @@ class ReadDatasetSuite extends ADAMFunSuite { assert(kv._2 === feature) } + sparkTest("convert reads to alignments") { + val reads: ReadDataset = ReadDataset(sc.parallelize(Seq(r1, r2))) + val alignments = reads.toAlignments.rdd.collect() + assert(alignments.length === 2) + + val a1 = alignments(0) + assert(a1.getReadName === "name1") + assert(a1.getSequence === "actg") + assert(a1.getQuality === "9999") + + val a2 = alignments(1) + assert(a2.getReadName === "name2") + assert(a2.getSequence === "actg") + assert(a2.getQuality === "9999") + } + sparkTest("convert reads to sequences") { val reads: ReadDataset = ReadDataset(sc.parallelize(Seq(r1, r2))) val sequences = reads.toSequences.rdd.collect()