Skip to content

Commit

Permalink
refactor(clickhouse): remove recursion from the compiler
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and kszucs committed Sep 26, 2023
1 parent 90befb2 commit ccbcdc0
Show file tree
Hide file tree
Showing 17 changed files with 821 additions and 1,158 deletions.
6 changes: 4 additions & 2 deletions ibis/backends/clickhouse/__init__.py
Expand Up @@ -414,8 +414,10 @@ def table(self, name: str, database: str | None = None) -> ir.Table:
Table expression
"""
schema = self.get_schema(name, database=database)
qname = self._fully_qualified_name(name, database)
return ops.DatabaseTable(qname, schema, self).to_expr()
op = ops.DatabaseTable(
name=name, schema=schema, source=self, namespace=database
)
return op.to_expr()

def insert(
self,
Expand Down
85 changes: 74 additions & 11 deletions ibis/backends/clickhouse/compiler/core.py
Expand Up @@ -45,14 +45,28 @@

import sqlglot as sg

import ibis.expr.analysis as an
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.backends.clickhouse.compiler.relations import translate_rel
from ibis.backends.clickhouse.compiler.values import translate_val
from ibis.common.patterns import Call
from ibis.expr.analysis import c, p, x, y

if TYPE_CHECKING:
from collections.abc import Mapping


a = Call.namespace(an)


def _translate_node(node, *args, **kwargs):
if isinstance(node, ops.Value):
return translate_val(node, *args, **kwargs)
assert isinstance(node, ops.TableNode)
return translate_rel(node, *args, **kwargs)


def translate(op: ops.TableNode, params: Mapping[ir.Value, Any]) -> sg.exp.Expression:
"""Translate an ibis operation to a sqlglot expression.
Expand All @@ -68,31 +82,80 @@ def translate(op: ops.TableNode, params: Mapping[ir.Value, Any]) -> sg.exp.Expre
sqlglot.expressions.Expression
A sqlglot expression
"""
params = {param.op(): value for param, value in params.items()}

alias_index = 0
aliases = {}

def fn(node, cache, params=params, **kwargs):
def fn(node, _, **kwargs):
nonlocal alias_index

result = _translate_node(node, aliases=aliases, **kwargs)

if not isinstance(node, ops.TableNode):
return result

# don't alias the root node
if node is not op:
aliases[node] = f"t{alias_index:d}"
alias_index += 1

raw_rel = translate_rel(
node, aliases=aliases, params=params, cache=cache, **kwargs
)

if alias := aliases.get(node):
try:
return raw_rel.subquery(alias)
return result.subquery(alias=alias)
except AttributeError:
return sg.alias(raw_rel, alias)
return sg.alias(result, alias=alias)
else:
return raw_rel

results = op.map(fn, filter=ops.TableNode)
return result

# substitute parameters immediately to avoid having to define a
# ScalarParameter translation rule
#
# this lets us avoid threading `params` through every `translate_val` call
# only to be used in the one place it would be needed: the ScalarParameter
# `translate_val` rule
params = {param.op(): value for param, value in params.items()}
replace_literals = p.ScalarParameter >> (
lambda op, _: ops.Literal(value=params[op], dtype=op.dtype)
)

# rewrite cumulative functions to window functions, so that we don't have
# to think about handling them in the compiler, we need only compile window
# functions
replace_cumulative_ops = p.WindowFunction(
x @ p.Cumulative, y
) >> a.cumulative_to_window(x, y)

# replace the right side of InColumn into a scalar subquery for sql
# backends
replace_in_column_with_table_array_view = p.InColumn >> (
lambda op, _: op.__class__(
op.value,
ops.TableArrayView(
ops.Selection(
table=an.find_first_base_table(op.options), selections=(op.options,)
)
),
)
)

# replace any checks against an empty right side of the IN operation with
# `False`
replace_empty_in_values_with_false = p.InValues(x, ()) >> c.Literal(
False, dtype="bool"
)

replace_notexists_subquery_with_not_exists = p.NotExistsSubquery(x) >> c.Not(
c.ExistsSubquery(x)
)

op = op.replace(
replace_literals
| replace_cumulative_ops
| replace_in_column_with_table_array_view
| replace_empty_in_values_with_false
| replace_notexists_subquery_with_not_exists
)
# apply translate rules in topological order
results = op.map(fn, filter=(ops.TableNode, ops.Value))
node = results[op]
return node.this if isinstance(node, sg.exp.Subquery) else node

0 comments on commit ccbcdc0

Please sign in to comment.