Skip to content

Commit

Permalink
Merge pull request #2075 from heuermh/alt-issue-2059
Browse files Browse the repository at this point in the history
 [ADAM-2059] Properly pass back primitive typed arrays to HTSJDK.
  • Loading branch information
akmorrow13 committed Nov 5, 2018
2 parents f3ec463 + bd9da4a commit 55a8b3d
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -126,18 +126,15 @@ object AttributeUtils {

private def typedStringToValue(tagType: TagType.Value, valueStr: String): Any = {
tagType match {
case TagType.Character => valueStr(0)
case TagType.Integer => Integer.valueOf(valueStr)
case TagType.Float => java.lang.Float.valueOf(valueStr)
case TagType.String => valueStr
case TagType.ByteSequence => valueStr.map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericByteSequence => valueStr.split(",").map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericShortSequence => valueStr.split(",").map(c => java.lang.Short.valueOf("" + c))
case TagType.NumericIntSequence => valueStr.split(",").map(c => java.lang.Integer.valueOf("" + c))
case TagType.NumericUnsignedByteSequence => valueStr.split(",").map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericUnsignedShortSequence => valueStr.split(",").map(c => java.lang.Short.valueOf("" + c))
case TagType.NumericUnsignedIntSequence => valueStr.split(",").map(c => java.lang.Integer.valueOf("" + c))
case TagType.NumericFloatSequence => valueStr.split(",").map(c => java.lang.Float.valueOf("" + c))
case TagType.Character => valueStr(0)
case TagType.Integer => Integer.valueOf(valueStr)
case TagType.Float => java.lang.Float.valueOf(valueStr)
case TagType.String => valueStr
case TagType.ByteSequence => valueStr.map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericByteSequence | TagType.NumericUnsignedByteSequence => valueStr.split(",").map(_.toByte)
case TagType.NumericShortSequence | TagType.NumericUnsignedShortSequence => valueStr.split(",").map(_.toShort)
case TagType.NumericIntSequence | TagType.NumericUnsignedIntSequence => valueStr.split(",").map(_.toInt)
case TagType.NumericFloatSequence => valueStr.split(",").map(_.toFloat)
}
}
}
5 changes: 5 additions & 0 deletions adam-core/src/test/resources/tag.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@PG ID:p1 PN:myProg CL:"myProg 123" VN:1.0.0
@PG ID:p2 PN:myProg CL:"myProg 456" VN:1.0.0 PP:p1
simread:1:26472783:false 16 1 26472784 60 75M * 0 0 GTATAAGAGCAGCCTTATTCCTATTTATAATCAGGGTGAAACACCTGTGCCAATGCCAAGACAGGGGTGCCAAGA * NM:i:0 AS:i:75 XS:i:0 Zb:B:c,-1,0,1 ZB:B:C,1,0,1 Zi:B:i,-1,0,1,2 ZI:B:I,1,0,1,2 Zs:B:s,-2,0,2 ZS:B:S,2,0,2 ZF:B:f,-1.100000,0.000000,1.100000
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ import org.bdgenomics.adam.sql.{
Variant => VariantProduct,
VariantContext => VariantContextProduct
}
import org.bdgenomics.adam.util.{ ADAMFunSuite, ManualRegionPartitioner }
import org.bdgenomics.adam.util.{ ADAMFunSuite, AttributeUtils, ManualRegionPartitioner }
import org.bdgenomics.formats.avro._
import org.seqdoop.hadoop_bam.{ CRAMInputFormat, SAMFormat }
import scala.collection.JavaConversions._
Expand Down Expand Up @@ -475,6 +475,27 @@ class AlignmentRecordRDDSuite extends ADAMFunSuite {
}
}

sparkTest("writing a small file with tags should produce the expected result") {
val samPath = testFile("tag.sam")
val ardd = sc.loadBam(samPath)

val newSamPath = tmpFile("tag.sam")
ardd.saveAsSam(newSamPath,
asSingleFile = true)

val brdd = sc.loadBam(newSamPath)

assert(ardd.rdd.count === brdd.rdd.count)
val aRecord = ardd.rdd.first
val bRecord = brdd.rdd.first
val aAttrs = AttributeUtils.parseAttributes(aRecord.getAttributes)
val bAttrs = AttributeUtils.parseAttributes(bRecord.getAttributes)
assert(aAttrs.length === 10)
assert(bAttrs.length === 10)
val bAttrsSet = bAttrs.map(_.tag).toSet
assert(aAttrs.forall(attr => bAttrsSet.contains(attr.tag)))
}

sparkTest("writing a small sorted file as SAM should produce the expected result") {
val unsortedPath = testFile("unsorted.sam")
val ardd = sc.loadBam(unsortedPath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,24 @@ class AttributeUtilsSuite extends FunSuite {
}

test("parseTags works with NumericSequence tagType") {
val tags = parseAttributes("jM:B:c,-1\tjI:B:i,-1,1")
val tags = parseAttributes("zz:B:c,-1,1\tzy:B:s,-1,1\tzx:B:i,-1,1\tzw:B:f,-1.0,1.0")

assert(tags.length === 2)
assert(tags(0).tag === "jM")
assert(tags.length === 4)
assert(tags(0).tag === "zz")
assert(tags(0).tagType === TagType.NumericByteSequence)
assert(tags(0).value.asInstanceOf[Array[Number]].sameElements(Array(-1)))
assert(tags(1).tag === "jI")
assert(tags(1).tagType === TagType.NumericIntSequence)
assert(tags(1).value.asInstanceOf[Array[Number]].sameElements(Array(-1, 1)))
assert(tags(0).value.asInstanceOf[Array[Byte]].sameElements(Array(-1, 1).map(_.toByte)))

assert(tags(1).tag === "zy")
assert(tags(1).tagType === TagType.NumericShortSequence)
assert(tags(1).value.asInstanceOf[Array[Short]].sameElements(Array(-1, 1).map(_.toShort)))

assert(tags(2).tag === "zx")
assert(tags(2).tagType === TagType.NumericIntSequence)
assert(tags(2).value.asInstanceOf[Array[Int]].sameElements(Array(-1, 1)))

assert(tags(3).tag === "zw")
assert(tags(3).tagType === TagType.NumericFloatSequence)
assert(tags(3).value.asInstanceOf[Array[Float]].sameElements(Array(-1.0f, 1.0f)))
}

test("empty string is parsed as zero tagStrings") {
Expand Down

0 comments on commit 55a8b3d

Please sign in to comment.