Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[query] Support struct loci in export_plink #10538

Merged
merged 2 commits into from May 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 13 additions & 15 deletions hail/python/hail/methods/impex.py
@@ -1,24 +1,23 @@
import json
import re
from typing import List

from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \
sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char
from hail.utils.java import Env, FatalError, jindexed_seq_args, warning
from hail.utils import wrap_to_list
from hail.matrixtable import MatrixTable
from hail.table import Table
from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \
tcall, tbool, tint64, tfloat32
import hail as hl
from hail import ir
from hail.expr import StructExpression, LocusExpression, \
expr_array, expr_float64, expr_str, expr_numeric, expr_call, expr_bool, \
expr_any, \
to_expr, analyze
from hail import ir
from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \
tcall, tbool, tint64, tfloat32
from hail.genetics.reference_genome import reference_genome_type
from hail.methods.misc import require_biallelic, require_row_key_variant, require_row_key_variant_w_struct_locus, require_col_key_str
import hail as hl

from typing import List
from hail.matrixtable import MatrixTable
from hail.methods.misc import require_biallelic, require_row_key_variant, require_col_key_str
from hail.table import Table
from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \
sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char
from hail.utils import wrap_to_list
from hail.utils.java import Env, FatalError, jindexed_seq_args, warning


def locus_interval_expr(contig, start, end, includes_start, includes_end,
Expand Down Expand Up @@ -323,8 +322,7 @@ def export_plink(dataset, output, call=None, fam_id=None, ind_id=None, pat_id=No
The default value is ``0.0``. The missing value is ``0.0``.
"""

require_biallelic(dataset, 'export_plink')
require_row_key_variant_w_struct_locus(dataset, 'export_plink')
require_biallelic(dataset, 'export_plink', tolerate_generic_locus=True)

if ind_id is None:
require_col_key_str(dataset, "export_plink")
Expand Down
9 changes: 6 additions & 3 deletions hail/python/hail/methods/misc.py
Expand Up @@ -230,9 +230,12 @@ def require_key(table, method):
raise ValueError("Method '{}' requires a non-empty key".format(method))


@typecheck(dataset=MatrixTable, method=str)
def require_biallelic(dataset, method) -> MatrixTable:
require_row_key_variant(dataset, method)
@typecheck(dataset=MatrixTable, method=str, tolerate_generic_locus=bool)
def require_biallelic(dataset, method, tolerate_generic_locus: bool = False) -> MatrixTable:
if tolerate_generic_locus:
require_row_key_variant_w_struct_locus(dataset, method)
else:
require_row_key_variant(dataset, method)
return dataset._select_rows(method,
hl.case()
.when(dataset.alleles.length() == 2, dataset._rvrow)
Expand Down
2 changes: 1 addition & 1 deletion hail/python/hail/table.py
Expand Up @@ -3281,7 +3281,7 @@ def _same(self, other, tolerance=1e-6, absolute=False):
from hail.expr.functions import _values_similar

if self._type != other._type:
print(f'Table._same: types differ: {self._type}, {other._type}')
print(f'Table._same: types differ:\n {self._type}\n {other._type}')
return False

left_global_value = Env.get_uid()
Expand Down
18 changes: 18 additions & 0 deletions hail/python/test/hail/methods/test_impex.py
Expand Up @@ -956,6 +956,24 @@ def test_contig_recoding_defaults(self):
resource('sex_mt_contigs.fam'),
reference_genome='random')

@fails_service_backend()
@fails_local_backend()
def test_export_plink_struct_locus(self):
mt = hl.utils.range_matrix_table(10, 10)
mt = mt.key_rows_by(locus=hl.struct(contig=hl.str(mt.row_idx), position=mt.row_idx), alleles=['A', 'T']).select_rows()
mt = mt.key_cols_by(s=hl.str(mt.col_idx)).select_cols()
mt = mt.annotate_entries(GT=hl.call(0, 0))

out = new_temp_file()

hl.export_plink(mt, out)
mt2 = hl.import_plink(
bed=out + '.bed',
bim=out + '.bim',
fam=out + '.fam',
reference_genome=None).select_rows().select_cols()
assert mt._same(mt2)


# this routine was used to generate resources random.gen, random.sample
# random.bgen was generated with qctool v2.0rc9:
Expand Down
2 changes: 2 additions & 0 deletions hail/src/main/scala/is/hail/types/physical/PInt32.scala
Expand Up @@ -45,6 +45,8 @@ class PInt32(override val required: Boolean) extends PNumeric with PPrimitive {
override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = {
Region.storeInt(addr, annotation.asInstanceOf[Int])
}

def unstagedLoadFromAddress(addr: Long): Int = Region.loadInt(addr)
}

object PInt32 {
Expand Down
21 changes: 16 additions & 5 deletions hail/src/main/scala/is/hail/variant/RegionValueVariant.scala
@@ -1,12 +1,12 @@
package is.hail.variant

import is.hail.annotations._
import is.hail.types.physical.{PArray, PLocus, PString, PStruct}
import is.hail.types.physical.{PArray, PInt32, PLocus, PString, PStruct}
import is.hail.utils._

class RegionValueVariant(rowType: PStruct) extends View {
private val locusField = rowType.fieldByName("locus")
private val locusPType = locusField.typ.asInstanceOf[PLocus]
private val locusPType = locusField.typ
private val allelesField = rowType.fieldByName("alleles")
private val locusIdx = locusField.index
private val allelesIdx = allelesField.index
Expand All @@ -31,12 +31,23 @@ class RegionValueVariant(rowType: PStruct) extends View {
}

def contig(): String = {
if (cachedContig == null)
cachedContig = locusPType.contig(locusAddress)
if (cachedContig == null) {
locusPType match {
case pl: PLocus =>
cachedContig = pl.contig(locusAddress)
case s: PStruct =>
cachedContig = s.types(0).asInstanceOf[PString].loadString(s.loadField(locusAddress, 0))
}
}
cachedContig
}

def position(): Int = locusPType.position(locusAddress)
def position(): Int = locusPType match {
case pl: PLocus =>
pl.position(locusAddress)
case s: PStruct =>
s.types(1).asInstanceOf[PInt32].unstagedLoadFromAddress(s.loadField(locusAddress, 1))
}

def alleles(): Array[String] = {
if (cachedAlleles == null) {
Expand Down