Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TableMultiWayZipJoin key behavior #5693

Merged
merged 2 commits into from Apr 1, 2019
Merged

Conversation

@tpoterba
Copy link
Collaborator

@tpoterba tpoterba commented Mar 26, 2019

fixes #5396

Copy link
Collaborator

@chrisvittal chrisvittal left a comment

Different problem now with this change

ipython -c "                                                                                                              [1]
import hail as hl
from hail import ir

hl.init()
mt = hl.import_vcf('src/test/resources/sample.vcf.bgz')
mt = hl.MatrixTable(ir.MatrixKeyRowsBy(mt._mir, ['locus'], is_sorted=True))
ht = mt._localize_entries('_e', '_c')
j = hl.Table._multi_way_zip_join([ht, ht], 'd', 'g')
j.write('/tmp/tst.ht', overwrite=True)
"
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Running on Apache Spark version 2.2.0
SparkUI available at http://10.1.8.50:4040
Welcome to
     __  __     <>__
    / /_/ /__  __/ /
   / __  / _ `/ / /
  /_/ /_/\_,_/_/_/   version 0.2.11-005ecda027a9
LOGGING: writing to /home/BROAD.MIT.EDU/cvittal/src/hail-testing/hail/hail-20190326-1159-0.2.11-005ecda027a9.log
2019-03-26 11:59:58 Hail: INFO: Coerced sorted dataset
---------------------------------------------------------------------------
FatalError                                Traceback (most recent call last)
<ipython-input-1-0aed34e75c01> in <module>()
      8 ht = mt._localize_entries('_e', '_c')
      9 j = hl.Table._multi_way_zip_join([ht, ht], 'd', 'g')
---> 10 j.write('/tmp/tst.ht', overwrite=True)

<decorator-gen-907> in write(self, output, overwrite, stage_locally, _codec_spec)

~/src/hail-testing/hail/python/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
    559     def wrapper(__original_func, *args, **kwargs):
    560         args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 561         return __original_func(*args_, **kwargs_)
    562 
    563     return wrapper

~/src/hail-testing/hail/python/hail/table.py in write(self, output, overwrite, stage_locally, _codec_spec)
   1200         """
   1201 
-> 1202         Env.backend().execute(TableWrite(self._tir, output, overwrite, stage_locally, _codec_spec))
   1203 
   1204     def _show(self, n, width, truncate, types):

~/src/hail-testing/hail/python/hail/backend/backend.py in execute(self, ir)
     92         return ir.typ._from_json(
     93             Env.hail().backend.spark.SparkBackend.executeJSON(
---> 94                 self._to_java_ir(ir)))
     95 
     96     def value_type(self, ir):

~/.local/opt/spark/spark-2.2.0-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
   1131         answer = self.gateway_client.send_command(command)
   1132         return_value = get_return_value(
-> 1133             answer, self.gateway_client, self.target_id, self.name)
   1134 
   1135         for temp_arg in temp_args:

~/src/hail-testing/hail/python/hail/utils/java.py in deco(*args, **kwargs)
    225             raise FatalError('%s\n\nJava stack trace:\n%s\n'
    226                              'Hail version: %s\n'
--> 227                              'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
    228         except pyspark.sql.utils.CapturedException as e:
    229             raise FatalError('%s\n\nJava stack trace:\n%s\n'

FatalError: IllegalArgumentException: requirement failed

Java stack trace:
java.lang.IllegalArgumentException: requirement failed
	at scala.Predef$.require(Predef.scala:212)
	at is.hail.rvd.RVD.<init>(RVD.scala:42)
	at is.hail.rvd.RVD$.apply(RVD.scala:1246)
	at is.hail.expr.ir.TableMultiWayZipJoin.execute(TableIR.scala:738)
	at is.hail.expr.ir.Interpret$.apply(Interpret.scala:758)
	at is.hail.expr.ir.Interpret$.apply(Interpret.scala:87)
	at is.hail.expr.ir.CompileAndEvaluate$.apply(CompileAndEvaluate.scala:31)
	at is.hail.backend.spark.SparkBackend$.execute(SparkBackend.scala:49)
	at is.hail.backend.spark.SparkBackend$.executeJSON(SparkBackend.scala:16)
	at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:280)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:214)
	at java.lang.Thread.run(Thread.java:748)

Hail version: 0.2.11-005ecda027a9
Error summary: IllegalArgumentException: requirement failed

@danking danking merged commit 21f2d10 into hail-is:master Apr 1, 2019
1 check passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Linked issues

Successfully merging this pull request may close these issues.

3 participants