Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ADAM-2059] Properly pass back primitive typed arrays to HTSJDK. #2061

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -126,18 +126,15 @@ object AttributeUtils {

private def typedStringToValue(tagType: TagType.Value, valueStr: String): Any = {
tagType match {
case TagType.Character => valueStr(0)
case TagType.Integer => Integer.valueOf(valueStr)
case TagType.Float => java.lang.Float.valueOf(valueStr)
case TagType.String => valueStr
case TagType.ByteSequence => valueStr.map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericByteSequence => valueStr.split(",").map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericShortSequence => valueStr.split(",").map(c => java.lang.Short.valueOf("" + c))
case TagType.NumericIntSequence => valueStr.split(",").map(c => java.lang.Integer.valueOf("" + c))
case TagType.NumericUnsignedByteSequence => valueStr.split(",").map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericUnsignedShortSequence => valueStr.split(",").map(c => java.lang.Short.valueOf("" + c))
case TagType.NumericUnsignedIntSequence => valueStr.split(",").map(c => java.lang.Integer.valueOf("" + c))
case TagType.NumericFloatSequence => valueStr.split(",").map(c => java.lang.Float.valueOf("" + c))
case TagType.Character => valueStr(0)
case TagType.Integer => Integer.valueOf(valueStr)
case TagType.Float => java.lang.Float.valueOf(valueStr)
case TagType.String => valueStr
case TagType.ByteSequence => valueStr.map(c => java.lang.Byte.valueOf("" + c))
case TagType.NumericByteSequence | TagType.NumericUnsignedByteSequence => valueStr.split(",").map(_.toByte)
case TagType.NumericShortSequence | TagType.NumericUnsignedShortSequence => valueStr.split(",").map(_.toShort)
case TagType.NumericIntSequence | TagType.NumericUnsignedIntSequence => valueStr.split(",").map(_.toInt)
case TagType.NumericFloatSequence => valueStr.split(",").map(_.toFloat)
}
}
}
5 changes: 5 additions & 0 deletions adam-core/src/test/resources/tag.sam
@@ -0,0 +1,5 @@
@SQ SN:1 LN:249250621
@SQ SN:2 LN:243199373
@PG ID:p1 PN:myProg CL:"myProg 123" VN:1.0.0
@PG ID:p2 PN:myProg CL:"myProg 456" VN:1.0.0 PP:p1
simread:1:26472783:false 16 1 26472784 60 75M * 0 0 GTATAAGAGCAGCCTTATTCCTATTTATAATCAGGGTGAAACACCTGTGCCAATGCCAAGACAGGGGTGCCAAGA * NM:i:0 AS:i:75 XS:i:0 Zb:B:c,-1,0,1 ZB:B:C,1,0,1 Zi:B:i,-1,0,1,2 ZI:B:I,1,0,1,2 Zs:B:s,-2,0,2 ZS:B:S,2,0,2 ZF:B:f,-1.100000,0.000000,1.100000
Expand Up @@ -57,7 +57,7 @@ import org.bdgenomics.adam.sql.{
Variant => VariantProduct,
VariantContext => VariantContextProduct
}
import org.bdgenomics.adam.util.{ ADAMFunSuite, ManualRegionPartitioner }
import org.bdgenomics.adam.util.{ ADAMFunSuite, AttributeUtils, ManualRegionPartitioner }
import org.bdgenomics.formats.avro._
import org.seqdoop.hadoop_bam.{ CRAMInputFormat, SAMFormat }
import scala.collection.JavaConversions._
Expand Down Expand Up @@ -475,6 +475,27 @@ class AlignmentRecordRDDSuite extends ADAMFunSuite {
}
}

sparkTest("writing a small file with tags should produce the expected result") {
val samPath = testFile("tag.sam")
val ardd = sc.loadBam(samPath)

val newSamPath = tmpFile("tag.sam")
ardd.saveAsSam(newSamPath,
asSingleFile = true)

val brdd = sc.loadBam(newSamPath)

assert(ardd.rdd.count === brdd.rdd.count)
val aRecord = ardd.rdd.first
val bRecord = brdd.rdd.first
val aAttrs = AttributeUtils.parseAttributes(aRecord.getAttributes)
val bAttrs = AttributeUtils.parseAttributes(aRecord.getAttributes)
assert(aAttrs.length === 10)
assert(bAttrs.length === 10)
val bAttrsSet = bAttrs.map(_.tag).toSet
assert(aAttrs.forall(attr => bAttrsSet.contains(attr.tag)))
}

sparkTest("writing a small sorted file as SAM should produce the expected result") {
val unsortedPath = testFile("unsorted.sam")
val ardd = sc.loadBam(unsortedPath)
Expand Down