Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH/BUG: BigQuery UNION requires ALL or DISTINCT #1409

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ New Features

* Allow keyword arguments in Node subclasses (:issue:`968`)
* Splat args into Node subclasses instead of requiring a list (:issue:`969`)
* Add support for ``UNION`` in the BigQuery backend (:issue:`1408`, :issue:`1409`)
* Support for writing UDFs in BigQuery (:issue:`1377`). See :ref:`the BigQuery
UDF docs <udf.bigquery>` for more details.

Bug Fixes
~~~~~~~~~
Expand All @@ -35,8 +38,6 @@ API Changes
* The previous, publicly not exposed rule system has been rewritten
* Defining input arguments for operations happens in a more readable fashion
instead of the previous `input_type` list.
* Support for writing UDFs in BigQuery (:issue:`1377`). See :ref:`the BigQuery
UDF docs <udf.bigquery>` for more details.

v0.13.0 (March 30, 2018)
------------------------
Expand Down
7 changes: 7 additions & 0 deletions ibis/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ def compile(self):
return self.expr.op().js


class BigQueryUnion(comp.Union):
@property
def keyword(self):
return 'UNION DISTINCT' if self.distinct else 'UNION ALL'


def find_bigquery_udf(expr):
if isinstance(expr.op(), BigQueryUDFNode):
result = expr
Expand All @@ -48,6 +54,7 @@ def find_bigquery_udf(expr):
class BigQueryQueryBuilder(comp.QueryBuilder):

select_builder = BigQuerySelectBuilder
union_class = BigQueryUnion

def generate_setup_queries(self):
result = list(
Expand Down
21 changes: 21 additions & 0 deletions ibis/bigquery/tests/test_compiler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

import ibis
import ibis.expr.datatypes as dt

Expand All @@ -12,3 +14,22 @@ def test_timestamp_accepts_date_literals(alltypes):
SELECT *, @param AS `param`
FROM testing.functional_alltypes"""
assert result == expected


@pytest.mark.parametrize(
('distinct', 'expected_keyword'),
[
(True, 'DISTINCT'),
(False, 'ALL'),
]
)
def test_union(alltypes, distinct, expected_keyword):
expr = alltypes.union(alltypes, distinct=distinct)
result = expr.compile()
expected = """\
SELECT *
FROM testing.functional_alltypes
UNION {}
SELECT *
FROM testing.functional_alltypes""".format(expected_keyword)
assert result == expected
39 changes: 18 additions & 21 deletions ibis/sql/alchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,13 +707,27 @@ def get_table(self, expr):
return self._get_table_item('_table_objects', expr)


class AlchemyUnion(Union):

def compile(self):
context = self.context
sa_func = sa.union if self.distinct else sa.union_all

left_set = context.get_compiled_expr(self.left)
left_cte = left_set.cte()
left_select = left_cte.select()

right_set = context.get_compiled_expr(self.right)
right_cte = right_set.cte()
right_select = right_cte.select()

return sa_func(left_select, right_select)


class AlchemyQueryBuilder(comp.QueryBuilder):

select_builder = AlchemySelectBuilder

@property
def _union_class(self):
return AlchemyUnion
union_class = AlchemyUnion


def to_sqlalchemy(expr, context, exists=False):
Expand Down Expand Up @@ -1282,23 +1296,6 @@ def _and_all(clauses):
return result


class AlchemyUnion(Union):

def compile(self):
context = self.context
sa_func = sa.union if self.distinct else sa.union_all

left_set = context.get_compiled_expr(self.left)
left_cte = left_set.cte()
left_select = left_cte.select()

right_set = context.get_compiled_expr(self.right)
right_cte = right_set.cte()
right_select = right_cte.select()

return sa_func(left_select, right_select)


class AlchemyProxy(object):
"""
Wraps a SQLAlchemy ResultProxy and ensures that .close() is called on
Expand Down
143 changes: 71 additions & 72 deletions ibis/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@
import ibis.sql.transforms as transforms


@six.add_metaclass(abc.ABCMeta)
class DML(object):
@abc.abstractmethod
def compile(self):
pass


@six.add_metaclass(abc.ABCMeta)
class DDL(object):
@abc.abstractmethod
def compile(self):
pass


class QueryAST(object):

__slots__ = 'context', 'dml', 'setup_queries', 'teardown_queries'
Expand Down Expand Up @@ -903,18 +917,68 @@ def column_handler(results):
.format(type(expr)))


class Union(DML):

def __init__(self, left_table, right_table, expr, context, distinct=False):
self.context = context
self.left = left_table
self.right = right_table
self.distinct = distinct
self.table_set = expr
self.filters = []

def _extract_subqueries(self):
self.subqueries = _extract_subqueries(self)
for subquery in self.subqueries:
self.context.set_extracted(subquery)

def format_subqueries(self):
context = self.context
subqueries = self.subqueries

return ',\n'.join([
'{} AS (\n{}\n)'.format(
context.get_ref(expr),
util.indent(context.get_compiled_expr(expr), 2)
) for expr in subqueries
])

def format_relation(self, expr):
ref = self.context.get_ref(expr)
if ref is not None:
return 'SELECT *\nFROM {}'.format(ref)
return self.context.get_compiled_expr(expr)

@property
def keyword(self):
return 'UNION' if self.distinct else 'UNION ALL'

def compile(self):
self._extract_subqueries()

left_set = self.format_relation(self.left)
right_set = self.format_relation(self.right)
extracted = self.format_subqueries()

buf = []

if extracted:
buf.append('WITH {}'.format(extracted))

buf += [left_set, self.keyword, right_set]

return '\n'.join(buf)


class QueryBuilder(object):

select_builder = SelectBuilder
union_class = Union

def __init__(self, expr, context):
self.expr = expr
self.context = context

@property
def _union_class(self):
return Union

def generate_setup_queries(self):
return []

Expand Down Expand Up @@ -944,9 +1008,9 @@ def get_result(self):

def _make_union(self):
op = self.expr.op()
return self._union_class(op.left, op.right, self.expr,
distinct=op.distinct,
context=self.context)
return self.union_class(op.left, op.right, self.expr,
distinct=op.distinct,
context=self.context)

def _make_select(self):
builder = self.select_builder(self.expr, self.context)
Expand Down Expand Up @@ -1336,20 +1400,6 @@ def make_context(cls, params=None):
return cls.translator.context_class(dialect=cls(), params=params)


@six.add_metaclass(abc.ABCMeta)
class DML(object):
@abc.abstractmethod
def compile(self):
pass


@six.add_metaclass(abc.ABCMeta)
class DDL(object):
@abc.abstractmethod
def compile(self):
pass


class Select(DML):

"""
Expand Down Expand Up @@ -1793,54 +1843,3 @@ def get_result(self):
buf.write(fmt_preds)

return buf.getvalue()


class Union(DML):

def __init__(self, left_table, right_table, expr, context, distinct=False):
self.context = context
self.left = left_table
self.right = right_table
self.distinct = distinct
self.table_set = expr
self.filters = []

def _extract_subqueries(self):
self.subqueries = _extract_subqueries(self)
for subquery in self.subqueries:
self.context.set_extracted(subquery)

def format_subqueries(self):
context = self.context
subqueries = self.subqueries

return ',\n'.join([
'{} AS (\n{}\n)'.format(
context.get_ref(expr),
util.indent(context.get_compiled_expr(expr), 2)
) for expr in subqueries
])

def format_relation(self, expr):
ref = self.context.get_ref(expr)
if ref is not None:
return 'SELECT *\nFROM {}'.format(ref)
return self.context.get_compiled_expr(expr)

def compile(self):
union_keyword = 'UNION' if self.distinct else 'UNION ALL'

self._extract_subqueries()

left_set = self.format_relation(self.left)
right_set = self.format_relation(self.right)
extracted = self.format_subqueries()

buf = []

if extracted:
buf.append('WITH {}'.format(extracted))

buf.extend([left_set, union_keyword, right_set])

return '\n'.join(buf)