Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[hail][wip] Local ld prune ptype #6425

Merged
merged 19 commits into from Jul 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion hail/src/main/scala/is/hail/expr/ir/TableValue.scala
Expand Up @@ -173,4 +173,4 @@ case class TableValue(typ: TableType, globals: BroadcastRow, rvd: RVD) {
Map(colsFieldName -> LowerMatrixIR.colsFieldName),
Map(entriesFieldName -> LowerMatrixIR.entriesFieldName)))
}
}
}
4 changes: 4 additions & 0 deletions hail/src/main/scala/is/hail/expr/types/physical/PType.scala
Expand Up @@ -199,6 +199,10 @@ abstract class PType extends BaseType with Serializable {

def required: Boolean

final def unary_+(): PType = setRequired(true)

final def unary_-(): PType = setRequired(false)

final def setRequired(required: Boolean): PType = {
if (required == this.required)
this
Expand Down
19 changes: 9 additions & 10 deletions hail/src/main/scala/is/hail/methods/LocalLDPrune.scala
Expand Up @@ -7,7 +7,7 @@ import is.hail.annotations._
import is.hail.expr.ir.functions.MatrixToTableFunction
import is.hail.expr.ir.{ExecuteContext, Interpret, MatrixValue, TableLiteral, TableValue}
import is.hail.expr.types._
import is.hail.expr.types.physical.{PArray, PInt64Required, PStruct}
import is.hail.expr.types.physical.{PArray, PFloat64, PInt32Required, PInt64Required, PStruct, PType}
import is.hail.expr.types.virtual._
import is.hail.rvd.{RVD, RVDType}
import is.hail.table.Table
Expand All @@ -17,8 +17,8 @@ import is.hail.variant._
object BitPackedVectorView {
val bpvElementSize: Long = PInt64Required.byteSize

def rvRowType(locusType: Type, allelesType: Type): PStruct = TStruct("locus" -> locusType, "alleles" -> allelesType,
"bpv" -> TArray(TInt64Required), "nSamples" -> TInt32Required, "mean" -> TFloat64(), "centered_length_rec" -> TFloat64()).physicalType
def rvRowPType(locusType: PType, allelesType: PType): PStruct = PStruct("locus" -> locusType, "alleles" -> allelesType,
"bpv" -> PArray(PInt64Required), "nSamples" -> PInt32Required, "mean" -> PFloat64(), "centered_length_rec" -> PFloat64())
}

class BitPackedVectorView(rvRowType: PStruct) {
Expand Down Expand Up @@ -305,14 +305,13 @@ case class LocalLDPrune(
fatal(s"Maximum queue size must be positive. Found '$maxQueueSize'.")

val nSamples = mv.nCols

val fullRowType = mv.rvRowType

val fullRowPType = mv.rvRowPType

val locusIndex = fullRowType.fieldIdx("locus")
val allelesIndex = fullRowType.fieldIdx("alleles")
val locusIndex = fullRowPType.fieldIdx("locus")
val allelesIndex = fullRowPType.fieldIdx("alleles")

val bpvType = BitPackedVectorView.rvRowType(fullRowType.types(locusIndex), fullRowType.types(allelesIndex))
val bpvType = BitPackedVectorView.rvRowPType(fullRowPType.types(locusIndex), fullRowPType.types(allelesIndex))

val tableType = typ(mv.typ)

Expand Down Expand Up @@ -342,7 +341,7 @@ case class LocalLDPrune(
None
}
})

val rvdLP = LocalLDPrune.pruneLocal(standardizedRDD, r2Threshold, windowSize, Some(maxQueueSize))

val fieldIndicesToAdd = Array("locus", "alleles", "mean", "centered_length_rec")
Expand All @@ -357,7 +356,7 @@ case class LocalLDPrune(
it.map { rv =>
region.clear()
rvb.set(region)
rvb.start(tableType.rowType.physicalType)
rvb.start(tableType.canonicalPType)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be bpvType I think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also the canonicalRVDType above should be RVDType(bpvType, Array("locus", "alleles"))

Copy link
Contributor Author

@akotlar akotlar Jul 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't yet understand this section of code well, but making the following changes results in an error in the r2 test

(added rvb.addFields(bpvType, rv, fieldIndicesToAdd) , rvdLP.mapPartitions( RVDType(bpvType, Array("locus", "alleles")) )

 val fieldIndicesToAdd = Array("locus", "alleles", "mean", "centered_length_rec")
      .map(field => bpvType.fieldIdx(field))
    val sitesOnly = rvdLP.mapPartitions(
      RVDType(bpvType, Array("locus", "alleles"))
    )({ it =>
      val region = Region()
      val rvb = new RegionValueBuilder(region)
      val newRV = RegionValue(region)

      it.map { rv =>
        region.clear()
        rvb.set(region)
        rvb.start(bpvType)
        rvb.startStruct()
        rvb.addFields(bpvType, rv, fieldIndicesToAdd)
        rvb.endStruct()
        newRV.setOffset(rvb.end())
        newRV
      }
    })
Gradle suite > Gradle test > is.hail.methods.LocalLDPruneSuite.testIsLocallyUncorrelated FAILED
    java.lang.IllegalArgumentException at LocalLDPruneSuite.scala:355
Running test: Test method testR2(is.hail.methods.LocalLDPruneSuite)

Tired, not thinking clearly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will finish digging through tomorrow:

java.lang.IllegalArgumentException: requirement failed
	at scala.Predef$.require(Predef.scala:212)
	at is.hail.expr.ir.TableValue.<init>(TableValue.scala:45)
	at is.hail.methods.LocalLDPrune.execute(LocalLDPrune.scala:368)
	at is.hail.methods.LocalLDPrune$$anonfun$apply$3.apply(LocalLDPrune.scala:286)
	at is.hail.methods.LocalLDPrune$$anonfun$apply$3.apply(LocalLDPrune.scala:284)
	at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:8)
	at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:7)
	at is.hail.utils.package$.using(package.scala:596)
	at is.hail.annotations.Region$.scoped(Region.scala:11)
	at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:7)
	at is.hail.methods.LocalLDPrune$.apply(LocalLDPrune.scala:284)
	at is.hail.methods.LocalLDPruneSuite.testIsLocallyUncorrelated(LocalLDPruneSuite.scala:355)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccess

rvb.startStruct()
rvb.addFields(bpvType, rv, fieldIndicesToAdd)
rvb.endStruct()
Expand Down
2 changes: 1 addition & 1 deletion hail/src/main/scala/is/hail/table/Table.scala
Expand Up @@ -425,4 +425,4 @@ class Table(val hc: HailContext, val tir: TableIR) {
ctx)
)
}
}
}
21 changes: 11 additions & 10 deletions hail/src/test/scala/is/hail/methods/LocalLDPruneSuite.scala
Expand Up @@ -6,7 +6,8 @@ import is.hail.annotations.{Annotation, Region, RegionValue, RegionValueBuilder}
import is.hail.check.Prop._
import is.hail.check.{Gen, Properties}
import is.hail.expr.types._
import is.hail.expr.types.virtual.{TArray, TLocus, TString, TStruct}
import is.hail.expr.types.physical.{PArray, PLocus, PString, PStruct, PType}
import is.hail.expr.types.virtual.{TArray, TString, TStruct}
import is.hail.variant._
import is.hail.utils._
import is.hail.testUtils._
Expand Down Expand Up @@ -53,13 +54,13 @@ object LocalLDPruneSuite {
MatrixType.entriesIdentifier -> TArray(Genotype.htsGenotypeType)
)

val bitPackedVectorViewType = BitPackedVectorView.rvRowType(rvRowType.field("locus").typ,
rvRowType.field("alleles").typ)
val bitPackedVectorViewType = BitPackedVectorView.rvRowPType(PType.canonical(rvRowType.field("locus").typ),
PType.canonical(rvRowType.field("alleles").typ))

def makeRV(gs: Iterable[Annotation]): RegionValue = {
val gArr = gs.toFastIndexedSeq
val rvb = new RegionValueBuilder(Region())
rvb.start(rvRowType.physicalType)
rvb.start(PType.canonical(rvRowType))
rvb.startStruct()
rvb.addAnnotation(rvRowType.types(0), Locus("1", 1))
rvb.addAnnotation(rvRowType.types(1), FastIndexedSeq("A", "T"))
Expand Down Expand Up @@ -89,7 +90,7 @@ object LocalLDPruneSuite {

def toBitPackedVectorRegionValue(rv: RegionValue, nSamples: Int): Option[RegionValue] = {
val rvb = new RegionValueBuilder(Region())
val hcView = HardCallView(rvRowType.physicalType)
val hcView = HardCallView(PType.canonical(rvRowType).asInstanceOf[PStruct])
hcView.setRegion(rv)

rvb.start(bitPackedVectorViewType)
Expand Down Expand Up @@ -141,7 +142,7 @@ class LocalLDPruneSuite extends HailSuite {
val nCores = 4
lazy val vds = TestUtils.importVCF(hc, "src/test/resources/sample.vcf.bgz", nPartitions = Option(10))
lazy val maxQueueSize = LocalLDPruneSuite.estimateMemoryRequirements(vds.countRows(), vds.numCols, memoryPerCoreBytes)

def toC2(i: Int): BoxedCall = if (i == -1) null else Call2.fromUnphasedDiploidGtIndex(i)

def getLocallyPrunedRDDWithGT(unprunedMatrixTable: MatrixTable, locallyPrunedTable: Table):
Expand Down Expand Up @@ -292,7 +293,7 @@ class LocalLDPruneSuite extends HailSuite {
val bv1 = LocalLDPruneSuite.toBitPackedVectorView(v1Ann, nSamples)
val bv2 = LocalLDPruneSuite.toBitPackedVectorView(v2Ann, nSamples)

val view = HardCallView(LocalLDPruneSuite.rvRowType.physicalType)
val view = HardCallView(PType.canonical(LocalLDPruneSuite.rvRowType).asInstanceOf[PStruct])

val rv1 = LocalLDPruneSuite.makeRV(v1Ann)
view.setRegion(rv1)
Expand Down Expand Up @@ -325,9 +326,9 @@ class LocalLDPruneSuite extends HailSuite {
@Test def bitPackedVectorCorrectWhenOffsetNotZero() {
Region.scoped { r =>
val rvb = new RegionValueBuilder(r)
val t = BitPackedVectorView.rvRowType(
+TLocus(ReferenceGenome.GRCh37),
+TArray(+TString()))
val t = BitPackedVectorView.rvRowPType(
+PLocus(ReferenceGenome.GRCh37),
+PArray(+PString()))
val bpv = new BitPackedVectorView(t)
r.appendInt(0xbeef)
rvb.start(t)
Expand Down