Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement required flag for types #2384

Merged
merged 8 commits into from
Nov 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 123 additions & 62 deletions python/hail/typ.py

Large diffs are not rendered by default.

38 changes: 19 additions & 19 deletions src/main/scala/is/hail/HailContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -299,15 +299,15 @@ class HailContext private(val sc: SparkContext,
info(s"Number of variants in all GEN files: $nVariants")
info(s"Number of samples in GEN files: $nSamples")

val signature = TStruct("rsid" -> TString, "varid" -> TString)
val signature = TStruct("rsid" -> TString(), "varid" -> TString())

val rdd = sc.union(results.map(_.rdd)).toOrderedRDD(TVariant(GenomeReference.GRCh37).orderedKey, classTag[(Annotation, Iterable[Annotation])])

new GenericDataset(this,
VSMFileMetadata(samples,
vaSignature = signature,
genotypeSignature = TStruct("GT" -> TCall,
"GP" -> TArray(TFloat64)),
genotypeSignature = TStruct("GT" -> TCall(),
"GP" -> TArray(TFloat64())),
wasSplit = true),
rdd)
}
Expand Down Expand Up @@ -456,7 +456,7 @@ class HailContext private(val sc: SparkContext,
}
})
},
genotypeSignature = TGenotype)
genotypeSignature = TGenotype())
}

def importVCFGeneric(file: String, force: Boolean = false,
Expand Down Expand Up @@ -537,27 +537,27 @@ class HailContext private(val sc: SparkContext,
def eval(expr: String): (Annotation, Type) = {
val ec = EvalContext(
"v" -> TVariant(GenomeReference.GRCh37),
"s" -> TString,
"g" -> TGenotype,
"s" -> TString(),
"g" -> TGenotype(),
"sa" -> TStruct(
"cohort" -> TString,
"cohort" -> TString(),
"covariates" -> TStruct(
"PC1" -> TFloat64,
"PC2" -> TFloat64,
"PC3" -> TFloat64,
"age" -> TInt32,
"isFemale" -> TBoolean
"PC1" -> TFloat64(),
"PC2" -> TFloat64(),
"PC3" -> TFloat64(),
"age" -> TInt32(),
"isFemale" -> TBoolean()
)),
"va" -> TStruct(
"info" -> TStruct(
"AC" -> TArray(TInt32),
"AN" -> TInt32,
"AF" -> TArray(TFloat64)),
"AC" -> TArray(TInt32()),
"AN" -> TInt32(),
"AF" -> TArray(TFloat64())),
"transcripts" -> TArray(TStruct(
"gene" -> TString,
"isoform" -> TString,
"canonical" -> TBoolean,
"consequence" -> TString))))
"gene" -> TString(),
"isoform" -> TString(),
"canonical" -> TBoolean(),
"consequence" -> TString()))))

val v = Variant("16", 19200405, "C", Array("G", "CCC"))
val s = "NA12878"
Expand Down
28 changes: 14 additions & 14 deletions src/main/scala/is/hail/annotations/Annotation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@ object Annotation {

def expandType(t: Type): Type = t match {
case tc: ComplexType => expandType(tc.representation)
case TArray(elementType) =>
TArray(expandType(elementType))
case TStruct(fields) =>
TStruct(fields.map { f => f.copy(typ = expandType(f.typ)) })
case TSet(elementType) =>
TArray(expandType(elementType))
case TDict(keyType, valueType) =>
case TArray(elementType, req) =>
TArray(expandType(elementType), req)
case TStruct(fields, req) =>
TStruct(fields.map { f => f.copy(typ = expandType(f.typ)) }, req)
case TSet(elementType, req) =>
TArray(expandType(elementType), req)
case TDict(keyType, valueType, req) =>
TArray(TStruct(
"key" -> expandType(keyType),
"value" -> expandType(valueType)))
"value" -> expandType(valueType)), req)
case _ => t
}

Expand All @@ -54,28 +54,28 @@ object Annotation {
else
t match {
case _: TVariant => a.asInstanceOf[Variant].toRow
case TGenotype => Genotype.toRow(a.asInstanceOf[Genotype])
case _: TGenotype => Genotype.toRow(a.asInstanceOf[Genotype])
case _: TLocus => a.asInstanceOf[Locus].toRow

case TArray(elementType) =>
case TArray(elementType, _) =>
a.asInstanceOf[IndexedSeq[_]].map(expandAnnotation(_, elementType))
case TStruct(fields) =>
case TStruct(fields, _) =>
Row.fromSeq((a.asInstanceOf[Row].toSeq, fields).zipped.map { case (ai, f) =>
expandAnnotation(ai, f.typ)
})

case TSet(elementType) =>
case TSet(elementType, _) =>
(a.asInstanceOf[Set[_]]
.toArray[Any] : IndexedSeq[_])
.map(expandAnnotation(_, elementType))

case TDict(keyType, valueType) =>
case TDict(keyType, valueType, _) =>
(a.asInstanceOf[Map[String, _]]

.toArray[(Any, Any)]: IndexedSeq[(Any, Any)])
.map { case (k, v) => Annotation(expandAnnotation(k, keyType), expandAnnotation(v, valueType)) }

case TAltAllele => a.asInstanceOf[AltAllele].toRow
case _: TAltAllele => a.asInstanceOf[AltAllele].toRow

case _: TInterval =>
val i = a.asInstanceOf[Interval[Locus]]
Expand Down
64 changes: 34 additions & 30 deletions src/main/scala/is/hail/annotations/MemoryBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -300,15 +300,15 @@ final class MemoryBuffer(var mem: Long, var length: Long, var offset: Long = 0)

def visit(t: Type, off: Long, v: ValueVisitor) {
t match {
case TBoolean => v.visitBoolean(loadBoolean(off))
case TInt32 => v.visitInt32(loadInt(off))
case TInt64 => v.visitInt64(loadLong(off))
case TFloat32 => v.visitFloat32(loadFloat(off))
case TFloat64 => v.visitFloat64(loadDouble(off))
case TString =>
case _: TBoolean => v.visitBoolean(loadBoolean(off))
case _: TInt32 => v.visitInt32(loadInt(off))
case _: TInt64 => v.visitInt64(loadLong(off))
case _: TFloat32 => v.visitFloat32(loadFloat(off))
case _: TFloat64 => v.visitFloat64(loadDouble(off))
case _: TString =>
val boff = off
v.visitString(TString.loadString(this, boff))
case TBinary =>
case _: TBinary =>
val boff = off
val length = TBinary.loadLength(this, boff)
val b = loadBytes(TBinary.bytesOffset(boff), length)
Expand Down Expand Up @@ -549,7 +549,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
assert(typestk.isEmpty)
root match {
case t: TArray =>
case TBinary =>
case _: TBinary =>
case _ =>
region.align(root.alignment)
start = region.allocate(root.byteSize)
Expand Down Expand Up @@ -632,16 +632,20 @@ class RegionValueBuilder(var region: MemoryBuffer) {
val i = indexstk.top
typestk.top match {
case t: TStruct =>
if (t.fieldType(i).required)
fatal(s"cannot set missing field for required type ${t.fieldType(i)}")
t.setFieldMissing(region, offsetstk.top, i)
case t: TArray =>
if (t.elementType.required)
fatal(s"cannot set missing field for required type ${t.elementType}")
t.setElementMissing(region, offsetstk.top, i)
}

advance()
}

def addBoolean(b: Boolean) {
assert(currentType() == TBoolean)
assert(currentType().isInstanceOf[TBoolean])
if (typestk.isEmpty)
allocateRoot()
val off = currentOffset()
Expand All @@ -650,7 +654,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
}

def addInt(i: Int) {
assert(currentType() == TInt32)
assert(currentType().isInstanceOf[TInt32])
if (typestk.isEmpty)
allocateRoot()
val off = currentOffset()
Expand All @@ -659,7 +663,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
}

def addLong(l: Long) {
assert(currentType() == TInt64)
assert(currentType().isInstanceOf[TInt64])
if (typestk.isEmpty)
allocateRoot()
val off = currentOffset()
Expand All @@ -668,7 +672,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
}

def addFloat(f: Float) {
assert(currentType() == TFloat32)
assert(currentType().isInstanceOf[TFloat32])
if (typestk.isEmpty)
allocateRoot()
val off = currentOffset()
Expand All @@ -677,7 +681,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
}

def addDouble(d: Double) {
assert(currentType() == TFloat64)
assert(currentType().isInstanceOf[TFloat64])
if (typestk.isEmpty)
allocateRoot()
val off = currentOffset()
Expand All @@ -686,7 +690,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
}

def addBinary(bytes: Array[Byte]) {
assert(currentType() == TBinary)
assert(currentType().isInstanceOf[TBinary])

region.align(TBinary.contentAlignment)
val boff = region.offset
Expand Down Expand Up @@ -729,7 +733,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
def requiresFixup(t: Type): Boolean = {
t match {
case t: TStruct => t.fields.exists(f => requiresFixup(f.typ))
case _: TArray | TBinary => true
case _: TArray | _: TBinary => true
case _ => false
}
}
Expand All @@ -753,7 +757,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
val toAOff2 = fixupArray(t2, fromRegion, t.loadElement(fromRegion, fromAOff, length, i))
region.storeAddress(t.elementOffset(toAOff, length, i), toAOff2)

case TBinary =>
case _: TBinary =>
val toBOff = fixupBinary(fromRegion, t.loadElement(fromRegion, fromAOff, length, i))
region.storeAddress(t.elementOffset(toAOff, length, i), toBOff)

Expand All @@ -777,7 +781,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
case t2: TStruct =>
fixupStruct(t2, t.fieldOffset(toOff, i), fromRegion, t.fieldOffset(fromOff, i))

case TBinary =>
case _: TBinary =>
val toBOff = fixupBinary(fromRegion, t.loadField(fromRegion, fromOff, i))
region.storeAddress(t.fieldOffset(toOff, i), toBOff)

Expand Down Expand Up @@ -852,7 +856,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
else
start = toAOff
}
case TBinary =>
case _: TBinary =>
if (region.eq(fromRegion)) {
assert(!typestk.isEmpty)
region.storeAddress(toOff, fromOff)
Expand Down Expand Up @@ -882,13 +886,13 @@ class RegionValueBuilder(var region: MemoryBuffer) {
setMissing()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should probably check that t is permitted to be missing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally had it in and then took it out, under the theory that setMissing() itself already performs that check. Could put another one here if you'd prefer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need, I missed setMissing.

else
t match {
case TBoolean => addBoolean(a.asInstanceOf[Boolean])
case TInt32 => addInt(a.asInstanceOf[Int])
case TInt64 => addLong(a.asInstanceOf[Long])
case TFloat32 => addFloat(a.asInstanceOf[Float])
case TFloat64 => addDouble(a.asInstanceOf[Double])
case TString => addString(a.asInstanceOf[String])
case TBinary => addBinary(a.asInstanceOf[Array[Byte]])
case _: TBoolean => addBoolean(a.asInstanceOf[Boolean])
case _: TInt32 => addInt(a.asInstanceOf[Int])
case _: TInt64 => addLong(a.asInstanceOf[Long])
case _: TFloat32 => addFloat(a.asInstanceOf[Float])
case _: TFloat64 => addDouble(a.asInstanceOf[Double])
case _: TString => addString(a.asInstanceOf[String])
case _: TBinary => addBinary(a.asInstanceOf[Array[Byte]])

case t: TArray =>
a match {
Expand All @@ -913,7 +917,7 @@ class RegionValueBuilder(var region: MemoryBuffer) {
addRow(t, r)
}

case TSet(elementType) =>
case TSet(elementType, _) =>
val s = a.asInstanceOf[Set[Annotation]]
.toArray
.sorted(elementType.ordering(true))
Expand Down Expand Up @@ -944,23 +948,23 @@ class RegionValueBuilder(var region: MemoryBuffer) {
startArray(v.altAlleles.length)
var i = 0
while (i < v.altAlleles.length) {
addAnnotation(TAltAllele, v.altAlleles(i))
addAnnotation(TAltAllele(), v.altAlleles(i))
i += 1
}
endArray()
endStruct()

case TAltAllele =>
case _: TAltAllele =>
val aa = a.asInstanceOf[AltAllele]
startStruct()
addString(aa.ref)
addString(aa.alt)
endStruct()

case TCall =>
case _: TCall =>
addInt(a.asInstanceOf[Int])

case TGenotype =>
case _: TGenotype =>
val g = a.asInstanceOf[Genotype]
startStruct()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class StagedRegionValueBuilder private(val fb: FunctionBuilder[_], val typ: Type
assert(!typ.isInstanceOf[TArray])
typ.fundamentalType match {
case _: TStruct => start(true)
case TBinary =>
case _: TBinary =>
assert(pOffset == null)
startOffset.store(endOffset)
case _ => Code(
Expand Down Expand Up @@ -114,7 +114,7 @@ class StagedRegionValueBuilder private(val fb: FunctionBuilder[_], val typ: Type
Code(
region.align(TBinary.contentAlignment),
typ.fundamentalType match {
case TBinary => _empty
case _: TBinary => _empty
case _ =>
region.storeAddress(currentOffset, endOffset)
},
Expand All @@ -130,11 +130,11 @@ class StagedRegionValueBuilder private(val fb: FunctionBuilder[_], val typ: Type
def addStruct(t: TStruct, f: (StagedRegionValueBuilder => Code[Unit]), init: LocalRef[Boolean] = null): Code[Unit] = f(new StagedRegionValueBuilder(fb, t, this))

def addPrimitive(t: Type): (Code[_]) => Code[Unit] = t.fundamentalType match {
case TBoolean => v => addBoolean(v.asInstanceOf[Code[Boolean]])
case TInt32 => v => addInt32(v.asInstanceOf[Code[Int]])
case TInt64 => v => addInt64(v.asInstanceOf[Code[Long]])
case TFloat32 => v => addFloat32(v.asInstanceOf[Code[Float]])
case TFloat64 => v => addFloat64(v.asInstanceOf[Code[Double]])
case _: TBoolean => v => addBoolean(v.asInstanceOf[Code[Boolean]])
case _: TInt32 => v => addInt32(v.asInstanceOf[Code[Int]])
case _: TInt64 => v => addInt64(v.asInstanceOf[Code[Long]])
case _: TFloat32 => v => addFloat32(v.asInstanceOf[Code[Float]])
case _: TFloat64 => v => addFloat64(v.asInstanceOf[Code[Double]])
case t => throw new UnsupportedOperationException("addPrimitive only supports primitive types: " + t)
}

Expand Down
Loading