147 changes: 72 additions & 75 deletions ibis/expr/analysis.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
from operator import methodcaller
import operator

import toolz

import ibis.expr.types as ir
import ibis.expr.lineage as lin
import ibis.expr.operations as ops

import ibis.expr.types as ir
from ibis import util
from ibis.common import ExpressionError, IbisTypeError, RelationError
from ibis.expr.schema import HasSchema
from ibis.expr.window import window

from ibis.common import RelationError, ExpressionError, IbisTypeError


# ---------------------------------------------------------------------
# Some expression metaprogramming / graph transformations to support
# compilation later
Expand Down Expand Up @@ -293,35 +291,36 @@ def get_result(self):
return result

def _lift_arg(self, arg, block=None):
unchanged = [True]
changed = 0

def _lift(expr):
nonlocal changed

def _lift(x):
if isinstance(x, ir.Expr):
lifted_arg = self.lift(x, block=block)
if lifted_arg is not x:
unchanged[0] = False
if isinstance(expr, ir.Expr):
lifted_arg = self.lift(expr, block=block)
changed += lifted_arg is not expr
else:
# a string or some other thing
lifted_arg = x
lifted_arg = expr
return lifted_arg

if arg is None:
return arg, True

if isinstance(arg, (tuple, list)):
result = [_lift(x) for x in arg]
if util.is_iterable(arg):
result = list(map(_lift, arg))
else:
result = _lift(arg)

return result, unchanged[0]
return result, not changed

def lift(self, expr, block=None):
key = expr.op(), block
op, _ = key = expr.op(), block

if key in self.lift_memo:
try:
return self.lift_memo[key]

op = expr.op()
except KeyError:
pass

if isinstance(op, ops.ValueOp):
return self._sub(expr, block=block)
Expand All @@ -346,26 +345,30 @@ def _lift_TableColumn(self, expr, block=None):

if isinstance(root, ops.Selection):
can_lift = False
all_simple_columns = all(
isinstance(sel.op(), ops.TableColumn)
and sel.op().name == sel.get_name()
for sel in root.selections
if isinstance(sel, ir.ValueExpr)
if sel.has_name()
)

for val in root.selections:
value_op = val.op()
if (
isinstance(val.op(), ops.PhysicalTable)
isinstance(value_op, ops.PhysicalTable)
and node.name in val.schema()
):
can_lift = True
lifted_root = self.lift(val)
elif (
isinstance(val.op(), ops.TableColumn)
and val.op().name == val.get_name()
all_simple_columns
and isinstance(val, ir.ValueExpr)
and val.has_name()
and node.name == val.get_name()
):
can_lift = True
lifted_root = self.lift(val.op().table)

# HACK: If we've projected a join, do not lift the children
# TODO: what about limits and other things?
# if isinstance(root.table.op(), Join):
# can_lift = False
lifted_root = self.lift(value_op.table)

if can_lift and not block:
lifted_node = ops.TableColumn(node.name, lifted_root)
Expand All @@ -378,14 +381,15 @@ def _lift_Aggregation(self, expr, block=None):
block = self.block_projection

op = expr.op()
table = op.table

# as exposed in #544, do not lift the table inside (which may be
# filtered or otherwise altered in some way) if blocking

if block:
lifted_table = op.table
lifted_table = table
else:
lifted_table = self.lift(op.table, block=True)
lifted_table = self.lift(table, block=True)

unch = lifted_table is op.table

Expand All @@ -399,7 +403,7 @@ def _lift_Aggregation(self, expr, block=None):
lifted_op = ops.Aggregation(
lifted_table, lifted_aggs, by=lifted_by, having=lifted_having
)
result = ir.TableExpr(lifted_op)
result = lifted_op.to_expr()
else:
result = expr

Expand Down Expand Up @@ -433,7 +437,7 @@ def _lift_Selection(self, expr, block=None):
lifted_predicates,
lifted_sort_keys,
)
result = ir.TableExpr(lifted_projection)
result = lifted_projection.to_expr()
else:
result = expr

Expand Down Expand Up @@ -751,72 +755,73 @@ def __init__(self, parent, proj_exprs):
self.parent = parent
self.input_exprs = proj_exprs
self.resolved_exprs = [parent._ensure_expr(e) for e in proj_exprs]

node = self.parent.op()

if isinstance(node, ops.Selection):
roots = [node]
else:
roots = node.root_tables()

self.parent_roots = roots

clean_exprs = []

for expr in self.resolved_exprs:
# Perform substitution only if we share common roots
expr = windowize_function(expr)
clean_exprs.append(expr)

self.clean_exprs = clean_exprs
node = parent.op()
self.parent_roots = (
[node] if isinstance(node, ops.Selection) else node.root_tables()
)
self.clean_exprs = list(map(windowize_function, self.resolved_exprs))

def get_result(self):
roots = self.parent_roots
first_root = roots[0]

if len(roots) == 1 and isinstance(first_root, ops.Selection):
fused_op = self._check_fusion(first_root)
fused_op = self.try_fusion(first_root)
if fused_op is not None:
return fused_op

return ops.Selection(self.parent, self.clean_exprs)

def _check_fusion(self, root):
roots = root.table._root_tables()
validator = ExprValidator([root.table])
def try_fusion(self, root):
root_table = root.table
roots = root_table._root_tables()
validator = ExprValidator([root_table])
fused_exprs = []
can_fuse = False
clean_exprs = self.clean_exprs

if not isinstance(root_table.op(), ops.Join):
try:
resolved = root_table._resolve(self.input_exprs)
except (AttributeError, IbisTypeError):
resolved = clean_exprs
else:
# joins cannot be used to resolve expressions, but we still may be
# able to fuse columns from a projection off of a join. In that
# case, use the projection's input expressions as the columns with
# which to attempt fusion
resolved = clean_exprs

resolved = _maybe_resolve_exprs(root.table, self.input_exprs)
if not resolved:
return None

root_selections = root.selections
parent_op = self.parent.op()
for val in resolved:
# XXX
lifted_val = substitute_parents(val)

# a * projection
if isinstance(val, ir.TableExpr) and (
self.parent.op().compatible_with(val.op())
parent_op.compatible_with(val.op())
# gross we share the same table root. Better way to
# detect?
or len(roots) == 1
and val._root_tables()[0] is roots[0]
):
can_fuse = True

have_root = False
for y in root.selections:
for root_sel in root_selections:
# Don't add the * projection twice
if y.equals(root.table):
fused_exprs.append(root.table)
if root_sel.equals(root_table):
fused_exprs.append(root_table)
have_root = True
continue
fused_exprs.append(y)
fused_exprs.append(root_sel)

# This was a filter, so implicitly a select *
if not have_root and len(root.selections) == 0:
fused_exprs = [root.table] + fused_exprs
if not have_root and not root_selections:
fused_exprs = [root_table] + fused_exprs
elif validator.validate(lifted_val):
can_fuse = True
fused_exprs.append(lifted_val)
Expand All @@ -828,19 +833,11 @@ def _check_fusion(self, root):

if can_fuse:
return ops.Selection(
root.table,
root_table,
fused_exprs,
predicates=root.predicates,
sort_keys=root.sort_keys,
)
else:
return None


def _maybe_resolve_exprs(table, exprs):
try:
return table._resolve(exprs)
except (AttributeError, IbisTypeError):
return None


Expand Down Expand Up @@ -1027,7 +1024,7 @@ def finder(expr):
return lin.proceed, None

first_tables = lin.traverse(finder, expr.op().flat_args())
options = list(toolz.unique(first_tables, key=methodcaller('op')))
options = list(toolz.unique(first_tables, key=operator.methodcaller('op')))

if len(options) > 1:
raise NotImplementedError('More than one base table not implemented')
Expand Down Expand Up @@ -1095,7 +1092,7 @@ def predicate(expr):

def is_analytic(expr, exclude_windows=False):
def _is_analytic(op):
if isinstance(op, (ops.Reduction, ops.AnalyticOp)):
if isinstance(op, (ops.Reduction, ops.AnalyticOp, ops.Any, ops.All)):
return True
elif isinstance(op, ops.WindowOp) and exclude_windows:
return False
Expand Down
3 changes: 1 addition & 2 deletions ibis/expr/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
# limitations under the License.


import ibis.expr.rules as rlz
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops

import ibis.expr.rules as rlz
from ibis.expr.signature import Argument as Arg


Expand Down
207 changes: 95 additions & 112 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
@@ -1,109 +1,107 @@
from __future__ import print_function
"""Ibis expression API definitions."""

import collections
import datetime
import functools
import numbers
import operator

import toolz

import dateutil.parser

import pandas as pd
import toolz

import ibis
import ibis.util as util
import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.rules as rlz
import ibis.expr.schema as sch
import ibis.expr.analysis as _L
import ibis.expr.datatypes as dt
import ibis.expr.analytics as _analytics
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops

from ibis.compat import to_time, to_date
from ibis.expr.types import Expr, null, param, literal, sequence, as_value_expr
from ibis.expr.schema import Schema

import ibis.expr.rules as rlz
import ibis.expr.schema as sch
import ibis.expr.types as ir
import ibis.util as util
from ibis.compat import to_date, to_time
from ibis.expr.analytics import bucket, histogram
from ibis.expr.groupby import GroupedTableExpr # noqa
from ibis.expr.window import (
window,
range_window,
trailing_window,
cumulative_window,
trailing_range_window,
)

from ibis.expr.schema import Schema
from ibis.expr.types import ( # noqa
ValueExpr,
ScalarExpr,
ColumnExpr,
TableExpr,
NumericValue,
NumericScalar,
NumericColumn,
IntegerValue,
IntegerScalar,
IntegerColumn,
NullValue,
NullScalar,
NullColumn,
BooleanValue,
BooleanScalar,
BooleanColumn,
FloatingValue,
FloatingScalar,
FloatingColumn,
StringValue,
StringScalar,
StringColumn,
DecimalValue,
DecimalScalar,
DecimalColumn,
TimestampValue,
TimestampScalar,
TimestampColumn,
IntervalValue,
IntervalScalar,
IntervalColumn,
DateValue,
DateScalar,
DateColumn,
TimeValue,
TimeScalar,
TimeColumn,
ArrayValue,
ArrayScalar,
ArrayColumn,
MapValue,
MapScalar,
MapColumn,
StructValue,
StructScalar,
StructColumn,
CategoryValue,
ArrayScalar,
ArrayValue,
BooleanColumn,
BooleanScalar,
BooleanValue,
CategoryScalar,
CategoryValue,
ColumnExpr,
DateColumn,
DateScalar,
DateValue,
DecimalColumn,
DecimalScalar,
DecimalValue,
Expr,
FloatingColumn,
FloatingScalar,
FloatingValue,
GeoSpatialColumn,
GeoSpatialScalar,
GeoSpatialValue,
PointColumn,
PointScalar,
PointValue,
IntegerColumn,
IntegerScalar,
IntegerValue,
IntervalColumn,
IntervalScalar,
IntervalValue,
LineStringColumn,
LineStringScalar,
LineStringValue,
PolygonColumn,
PolygonScalar,
PolygonValue,
MapColumn,
MapScalar,
MapValue,
MultiPolygonColumn,
MultiPolygonScalar,
MultiPolygonValue,
NullColumn,
NullScalar,
NullValue,
NumericColumn,
NumericScalar,
NumericValue,
PointColumn,
PointScalar,
PointValue,
PolygonColumn,
PolygonScalar,
PolygonValue,
ScalarExpr,
StringColumn,
StringScalar,
StringValue,
StructColumn,
StructScalar,
StructValue,
TableExpr,
TimeColumn,
TimeScalar,
TimestampColumn,
TimestampScalar,
TimestampValue,
TimeValue,
ValueExpr,
as_value_expr,
literal,
null,
param,
sequence,
)
from ibis.expr.window import (
cumulative_window,
range_window,
trailing_range_window,
trailing_window,
window,
)


__all__ = (
'aggregate',
Expand All @@ -116,7 +114,6 @@
'desc',
'Expr',
'expr_list',
'greatest',
'geo_area',
'geo_contains',
'geo_distance',
Expand All @@ -135,6 +132,7 @@
'geo_y',
'geo_y_max',
'geo_y_min',
'greatest',
'ifelse',
'infer_dtype',
'infer_schema',
Expand Down Expand Up @@ -1325,7 +1323,7 @@ def quantile(arg, quantile, interpolation='linear'):
if scalar input, scalar type, same as input
if array input, list of scalar type
"""
if isinstance(quantile, collections.Sequence):
if isinstance(quantile, collections.abc.Sequence):
op = ops.MultiQuantile(arg, quantile, interpolation)
else:
op = ops.Quantile(arg, quantile, interpolation)
Expand Down Expand Up @@ -3362,43 +3360,29 @@ def mutate(table, exprs=None, **mutations):
>>> expr2 = table.mutate(new_columns)
>>> expr.equals(expr2)
True
"""
if exprs is None:
exprs = []
else:
exprs = util.promote_list(exprs)
exprs = [] if exprs is None else util.promote_list(exprs)
exprs.extend(
(expr(table) if util.is_function(expr) else as_value_expr(expr)).name(
name
)
for name, expr in sorted(mutations.items(), key=operator.itemgetter(0))
)

for k, v in sorted(mutations.items(), key=operator.itemgetter(0)):
if util.is_function(v):
v = v(table)
else:
v = as_value_expr(v)

exprs.append(v.name(k))

has_replacement = False
for expr in exprs:
if expr.get_name() in table:
has_replacement = True

if has_replacement:
by_name = dict((x.get_name(), x) for x in exprs)
used = set()
proj_exprs = []
for c in table.columns:
if c in by_name:
proj_exprs.append(by_name[c])
used.add(c)
else:
proj_exprs.append(c)

for x in exprs:
if x.get_name() not in used:
proj_exprs.append(x)

return table.projection(proj_exprs)
by_name = collections.OrderedDict(
(expr.get_name(), expr) for expr in exprs
)
columns = table.columns
used = by_name.keys() & columns

if used:
proj_exprs = [
by_name.get(column, table[column]) for column in columns
] + [expr for name, expr in by_name.items() if name not in used]
else:
return table.projection([table] + exprs)
proj_exprs = [table] + exprs
return table.projection(proj_exprs)


def projection(table, exprs):
Expand Down Expand Up @@ -3499,7 +3483,6 @@ def projection(table, exprs):
exprs = [exprs]

projector = L.Projector(table, exprs)

op = projector.get_result()
return op.to_expr()

Expand Down
12 changes: 4 additions & 8 deletions ibis/expr/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,20 @@
import numbers
import re
import typing

from typing import Any as GenericAny
from typing import (
Any as GenericAny,
Callable,
Iterator,
List,
Mapping,
NamedTuple,
Optional,
Sequence,
Set as GenericSet,
Tuple,
TypeVar,
Union,
)
from typing import Set as GenericSet
from typing import Tuple, TypeVar, Union

import pandas as pd

import toolz
from multipledispatch import Dispatcher

Expand Down Expand Up @@ -1491,7 +1487,7 @@ def can_cast_string_to_temporal(
source: String,
target: Union[Date, Time, Timestamp],
value: Optional[str] = None,
**kwargs,
**kwargs
) -> bool:
if value is None:
return False
Expand Down
5 changes: 2 additions & 3 deletions ibis/expr/format.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import ibis.util as util

import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.util as util


class FormatMemo:
Expand Down
4 changes: 2 additions & 2 deletions ibis/expr/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@

import types

import toolz

import ibis.expr.analysis as L
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.expr.window as _window
import ibis.util as util

import toolz


def _resolve_exprs(table, exprs):
exprs = util.promote_list(exprs)
Expand Down
23 changes: 14 additions & 9 deletions ibis/expr/lineage.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from itertools import chain
from toolz import identity, compose
from collections import deque, Iterable
import collections
import itertools

from toolz import compose, identity

import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis.expr.types as ir


def find_nodes(expr, node_types):
Expand Down Expand Up @@ -70,7 +71,7 @@ def roots(expr, types=(ops.PhysicalTable,)):
def extender(op):
return reversed(
list(
chain.from_iterable(
itertools.chain.from_iterable(
arg.op().root_tables()
for arg in op.flat_args()
if isinstance(arg, types)
Expand All @@ -86,7 +87,7 @@ class Container:
__slots__ = ('data',)

def __init__(self, data):
self.data = deque(self.visitor(data))
self.data = collections.deque(self.visitor(data))

def append(self, item):
self.data.append(item)
Expand Down Expand Up @@ -150,7 +151,11 @@ def _get_args(op, name):
return [col for col in result if col._name == name]
elif isinstance(op, ops.Aggregation):
assert name is not None, 'name is None'
return [col for col in chain(op.by, op.metrics) if col._name == name]
return [
col
for col in itertools.chain(op.by, op.metrics)
if col._name == name
]
else:
return op.args

Expand Down Expand Up @@ -221,7 +226,7 @@ def traverse(fn, expr, type=ir.Expr, container=Stack):
container: Union[Stack, Queue], default Stack
Defines the traversing order.
"""
args = expr if isinstance(expr, Iterable) else [expr]
args = expr if isinstance(expr, collections.abc.Iterable) else [expr]
todo = container(arg for arg in args if isinstance(arg, type))
seen = set()

Expand All @@ -240,7 +245,7 @@ def traverse(fn, expr, type=ir.Expr, container=Stack):
if control is not halt:
if control is proceed:
args = op.flat_args()
elif isinstance(control, Iterable):
elif isinstance(control, collections.abc.Iterable):
args = control
else:
raise TypeError(
Expand Down
19 changes: 9 additions & 10 deletions ibis/expr/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@
import functools
import itertools
import operator

from contextlib import suppress

import toolz

from ibis.expr.schema import HasSchema, Schema

import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.datatypes as dt
import ibis.expr.rules as rlz
import ibis.expr.schema as sch
import ibis.expr.datatypes as dt

import ibis.expr.types as ir
from ibis import util
from ibis.expr.signature import Annotable, Argument as Arg
from ibis.expr.schema import HasSchema, Schema
from ibis.expr.signature import Annotable
from ibis.expr.signature import Argument as Arg


def _safe_repr(x, memo=None):
Expand Down Expand Up @@ -70,7 +68,7 @@ def blocks(self):
def flat_args(self):
for arg in self.args:
if not isinstance(arg, str) and isinstance(
arg, collections.Iterable
arg, collections.abc.Iterable
):
for x in arg:
yield x
Expand Down Expand Up @@ -397,7 +395,7 @@ class NullIf(ValueOp):

arg = Arg(rlz.any)
null_if_expr = Arg(rlz.any)
output_type = rlz.typeof('arg')
output_type = rlz.shape_like('args')


class NullIfZero(ValueOp):
Expand Down Expand Up @@ -2610,7 +2608,8 @@ class Date(UnaryOp):

class TimestampFromUNIX(ValueOp):
arg = Arg(rlz.any)
unit = Arg(rlz.isin({'s', 'ms', 'us'}))
# Only pandas-based backends support 'ns'
unit = Arg(rlz.isin({'s', 'ms', 'us', 'ns'}))
output_type = rlz.shape_like('arg', dt.timestamp)


Expand Down
16 changes: 8 additions & 8 deletions ibis/expr/rules.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import collections
import enum

from itertools import starmap, product


import functools
from contextlib import suppress
import ibis.util as util
from itertools import product, starmap

import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.schema as sch
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
import ibis.expr.types as ir
import ibis.util as util

try:
from cytoolz import curry, compose, identity
Expand Down Expand Up @@ -152,7 +151,7 @@ def member_of(obj, arg):
@validator
def list_of(inner, arg, min_length=0):
if isinstance(arg, str) or not isinstance(
arg, (collections.Sequence, ir.ListExpr)
arg, (collections.abc.Sequence, ir.ListExpr)
):
raise com.IbisTypeError('Argument must be a sequence')

Expand Down Expand Up @@ -293,6 +292,7 @@ def client(arg):


def promoter(fn):
@functools.wraps(fn)
def wrapper(name_or_value, *args, **kwargs):
if isinstance(name_or_value, str):
return lambda self: fn(
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from multipledispatch import Dispatcher

import ibis.common as com
import ibis.util as util
import ibis.expr.datatypes as dt
import ibis.util as util


class Schema:
Expand Down
6 changes: 3 additions & 3 deletions ibis/expr/signature.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import ibis.util as util
import ibis.expr.rules as rlz

from collections import OrderedDict

import ibis.expr.rules as rlz
import ibis.util as util

try:
from cytoolz import unique
except ImportError:
Expand Down
4 changes: 3 additions & 1 deletion ibis/expr/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
# limitations under the License.

import collections

import pytest

import ibis
from ibis.expr.tests.mocks import MockConnection, GeoMockConnection
from ibis.expr.tests.mocks import GeoMockConnection, MockConnection


@pytest.fixture
Expand Down
74 changes: 62 additions & 12 deletions ibis/expr/tests/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import abc
import pytest

from ibis.client import SQLClient
from ibis.expr.schema import Schema
import ibis.expr.types as ir
import ibis.sql.alchemy as alch # noqa: E402


class BaseMockConnection(SQLClient, metaclass=abc.ABCMeta):
def __init__(self):
self.executed_queries = []

class MockConnection(SQLClient):
@property
@abc.abstractmethod
def dialect(self):
from ibis.impala.compiler import ImpalaDialect
pass

return ImpalaDialect
@abc.abstractmethod
def _build_ast(self, expr, context):
pass

_tables = {
'alltypes': [
Expand Down Expand Up @@ -345,19 +356,11 @@ def dialect(self):
],
}

def __init__(self):
self.executed_queries = []

def _get_table_schema(self, name):
name = name.replace('`', '')
return Schema.from_tuples(self._tables[name])

def _build_ast(self, expr, context):
from ibis.impala.compiler import build_ast

return build_ast(expr, context)

def execute(self, expr, limit=None, params=None):
def execute(self, expr, limit=None, params=None, **kwargs):
ast = self._build_ast_ensure_limit(expr, limit, params=params)
for query in ast.queries:
self.executed_queries.append(query.compile())
Expand All @@ -369,6 +372,53 @@ def compile(self, expr, limit=None, params=None):
return queries[0] if len(queries) == 1 else queries


class MockConnection(BaseMockConnection):
# TODO: Refactor/rename to MockImpalaConnection
# TODO: Should some tests using MockImpalaConnection really use
# MockAlchemyConnection instead?
@property
def dialect(self):
from ibis.impala.compiler import ImpalaDialect

return ImpalaDialect

def _build_ast(self, expr, context):
from ibis.impala.compiler import build_ast

return build_ast(expr, context)


class MockAlchemyConnection(BaseMockConnection):
def __init__(self):
super().__init__()
sa = pytest.importorskip('sqlalchemy')
self.meta = sa.MetaData()

def table(self, name, database=None):
schema = self._get_table_schema(name)
return self._inject_table(name, schema)

def _inject_table(self, name, schema):
if name in self.meta.tables:
table = self.meta.tables[name]
else:
table = alch.table_from_schema(name, self.meta, schema)

node = alch.AlchemyTable(table, self)
return ir.TableExpr(node)

@property
def dialect(self):
from ibis.sql.alchemy import AlchemyDialect

return AlchemyDialect

def _build_ast(self, expr, context):
from ibis.sql.alchemy import build_ast

return build_ast(expr, context)


class GeoMockConnection(SQLClient):
@property
def dialect(self):
Expand Down
15 changes: 5 additions & 10 deletions ibis/expr/tests/test_analysis.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
import pytest

import ibis

import ibis.common as com
import ibis.expr.analysis as L
import ibis.expr.operations as ops
import ibis.common as com

from ibis.tests.util import assert_equal


# TODO: test is_reduction
# TODO: test is_scalar_reduction

Expand Down Expand Up @@ -59,7 +56,8 @@ def test_rewrite_past_projection(con):
table5 = table[(table.f * 2).name('c'), table.f]
expr = table5['c'] == 2
result = L.substitute_parents(expr)
assert result is expr
expected = expr
assert result.equals(expected)


def test_multiple_join_deeper_reference():
Expand Down Expand Up @@ -210,14 +208,11 @@ def test_fuse_filter_sort_by():

def test_no_rewrite(con):
table = con.table('test1')

# Substitution not fully possible if we depend on a new expr in a
# projection
table4 = table[['c', (table['c'] * 2).name('foo')]]
expr = table4['c'] == table4['foo']
result = L.substitute_parents(expr)
expected = table['c'] == table4['foo']
assert_equal(result, expected)
expected = expr
assert result.equals(expected)


# def test_projection_with_join_pushdown_rewrite_refs():
Expand Down
1 change: 0 additions & 1 deletion ibis/expr/tests/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import ibis
import ibis.expr.types as ir

from ibis.expr.tests.mocks import MockConnection
from ibis.tests.util import assert_equal

Expand Down
1 change: 1 addition & 0 deletions ibis/expr/tests/test_array.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
Expand Down
5 changes: 2 additions & 3 deletions ibis/expr/tests/test_case.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import pytest

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis

import ibis.expr.types as ir
from ibis.tests.util import assert_equal


Expand Down
13 changes: 7 additions & 6 deletions ibis/expr/tests/test_datatypes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest
import datetime
import pytz
from collections import OrderedDict

import pytest
import pytz
from multipledispatch.conflict import ambiguities

import ibis
Expand Down Expand Up @@ -364,13 +365,13 @@ def test_time_valid():
(128, dt.int16),
(32767, dt.int16),
(32768, dt.int32),
(2_147_483_647, dt.int32),
(2_147_483_648, dt.int64),
(2147483647, dt.int32),
(2147483648, dt.int64),
(-5, dt.int8),
(-128, dt.int8),
(-129, dt.int16),
(-32769, dt.int32),
(-2_147_483_649, dt.int64),
(-2147483649, dt.int64),
(1.5, dt.double),
# parametric types
(list('abc'), dt.Array(dt.string)),
Expand All @@ -380,7 +381,7 @@ def test_time_valid():
([1, 2, 3], dt.Array(dt.int8)),
([1, 128], dt.Array(dt.int16)),
([1, 128, 32768], dt.Array(dt.int32)),
([1, 128, 32768, 2_147_483_648], dt.Array(dt.int64)),
([1, 128, 32768, 2147483648], dt.Array(dt.int64)),
({'a': 1, 'b': 2, 'c': 3}, dt.Map(dt.string, dt.int8)),
({1: 2, 3: 4, 5: 6}, dt.Map(dt.int8, dt.int8)),
(
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/tests/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import pytest

import ibis.expr.api as api
import ibis.expr.types as ir
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir


def test_type_metadata(lineitem):
Expand Down
1 change: 0 additions & 1 deletion ibis/expr/tests/test_format.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import ibis

from ibis.expr.format import ExprFormatter
from ibis.expr.operations import Node
from ibis.expr.signature import Argument as Arg
Expand Down
3 changes: 2 additions & 1 deletion ibis/expr/tests/test_geospatial.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
""" Tests for geo spatial data types"""
import ibis
import pytest

import ibis


@pytest.mark.parametrize(
'modifier', ['', ';4326', ';4326:geometry', ';4326:geography']
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/tests/test_interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

import unittest

from ibis.expr.tests.mocks import MockConnection
import ibis.config as config
from ibis.expr.tests.mocks import MockConnection


class TestInteractiveUse(unittest.TestCase):
Expand Down
13 changes: 6 additions & 7 deletions ibis/expr/tests/test_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import ibis
import ibis.expr.lineage as lin
import ibis.expr.operations as ops

from ibis.tests.util import assert_equal


Expand Down Expand Up @@ -59,12 +58,12 @@ def test_lineage(companies):
# single table dependency
funding_buckets = [
0,
1_000_000,
10_000_000,
50_000_000,
100_000_000,
500_000_000,
1_000_000_000,
1000000,
10000000,
50000000,
100000000,
500000000,
1000000000,
]

bucket_names = [
Expand Down
7 changes: 3 additions & 4 deletions ibis/expr/tests/test_operations.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import pytest
import numpy as np
import pytest

import ibis
import ibis.expr.types as ir
import ibis.expr.rules as rlz
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops

import ibis.expr.rules as rlz
import ibis.expr.types as ir
from ibis.common import IbisTypeError
from ibis.expr.signature import Argument as Arg

Expand Down
1 change: 1 addition & 0 deletions ibis/expr/tests/test_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import pytest

import ibis


Expand Down
11 changes: 5 additions & 6 deletions ibis/expr/tests/test_rules.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import enum
import ibis
import pytest

import pytest
from toolz import identity
from ibis.common import IbisTypeError

import ibis.expr.types as ir
import ibis.expr.rules as rlz
import ibis
import ibis.expr.datatypes as dt

import ibis.expr.rules as rlz
import ibis.expr.types as ir
from ibis.common import IbisTypeError

table = ibis.table(
[('int_col', 'int64'), ('string_col', 'string'), ('double_col', 'double')]
Expand Down
6 changes: 4 additions & 2 deletions ibis/expr/tests/test_signature.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from functools import partial

import pytest
from toolz import identity

from ibis.common import IbisTypeError
from functools import partial
from ibis.expr.signature import Argument, TypeSignature, Annotable
from ibis.expr.signature import Annotable, Argument, TypeSignature


@pytest.mark.parametrize(
Expand Down
1 change: 0 additions & 1 deletion ibis/expr/tests/test_sql_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import ibis
import ibis.expr.operations as ops
import ibis.expr.types as ir

from ibis.tests.util import assert_equal


Expand Down
5 changes: 2 additions & 3 deletions ibis/expr/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@

import pytest

from ibis import literal
import ibis.expr.types as ir
import ibis.expr.operations as ops

import ibis.expr.types as ir
from ibis import literal
from ibis.tests.util import assert_equal


Expand Down
22 changes: 16 additions & 6 deletions ibis/expr/tests/test_table.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
import pickle
import re

import pytest

import ibis
import ibis.common as com
import ibis.config as config
import ibis.expr.analysis as L
import ibis.expr.api as api
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.expr.analysis as L
import ibis.expr.operations as ops

import pickle
import ibis.expr.types as ir
from ibis.common import ExpressionError, RelationError
from ibis.expr.types import ColumnExpr, TableExpr

from ibis.tests.util import assert_equal


def test_empty_schema():
table = api.table([], 'foo')
assert len(table.schema()) == 0
assert not table.schema()


def test_columns(con):
Expand Down Expand Up @@ -1222,3 +1220,15 @@ def test_unbound_table_name():
name = t.op().name
match = re.match(r'^unbound_table_\d+$', name)
assert match is not None


def test_mutate_chain():
one = ibis.table([('a', 'string'), ('b', 'string')], name='t')
two = one.mutate(b=lambda t: t.b.fillna('Short Term'))
three = two.mutate(a=lambda t: t.a.fillna('Short Term'))
a, b = three.op().selections

# we can't fuse these correctly yet
assert isinstance(a.op(), ops.IfNull)
assert isinstance(b.op(), ops.TableColumn)
assert isinstance(b.op().table.op().selections[1].op(), ops.IfNull)
31 changes: 14 additions & 17 deletions ibis/expr/tests/test_temporal.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import pytest
import datetime
import operator

import pytest

import ibis
import ibis.expr.api as api
import ibis.expr.types as ir
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir


def test_temporal_literals():
Expand All @@ -28,12 +29,8 @@ def test_temporal_literals():
(api.interval(seconds=360), 'm', api.interval(minutes=6)),
(api.interval(seconds=3 * 86400), 'D', api.interval(days=3)),
(api.interval(milliseconds=5000), 's', api.interval(seconds=5)),
(api.interval(microseconds=5_000_000), 's', api.interval(seconds=5)),
(
api.interval(nanoseconds=5_000_000_000),
's',
api.interval(seconds=5),
),
(api.interval(microseconds=5000000), 's', api.interval(seconds=5)),
(api.interval(nanoseconds=5000000000), 's', api.interval(seconds=5)),
],
)
def test_upconvert(interval, unit, expected):
Expand Down Expand Up @@ -110,11 +107,11 @@ def test_subtract(expr):
),
(
api.interval(seconds=2).to_unit('us'),
api.interval(microseconds=2 * 1_000_000),
api.interval(microseconds=2 * 1000000),
),
(
api.interval(seconds=2).to_unit('ns'),
api.interval(nanoseconds=2 * 1_000_000_000),
api.interval(nanoseconds=2 * 1000000000),
),
(
api.interval(milliseconds=2).to_unit('ms'),
Expand All @@ -126,7 +123,7 @@ def test_subtract(expr):
),
(
api.interval(milliseconds=2).to_unit('ns'),
api.interval(nanoseconds=2 * 1_000_000),
api.interval(nanoseconds=2 * 1000000),
),
(
api.interval(microseconds=2).to_unit('us'),
Expand Down Expand Up @@ -156,15 +153,15 @@ def test_downconvert_second_parts(case, expected):
(api.interval(hours=2).to_unit('s'), api.interval(seconds=2 * 3600)),
(
api.interval(hours=2).to_unit('ms'),
api.interval(milliseconds=2 * 3_600_000),
api.interval(milliseconds=2 * 3600000),
),
(
api.interval(hours=2).to_unit('us'),
api.interval(microseconds=2 * 3_600_000_000),
api.interval(microseconds=2 * 3600000000),
),
(
api.interval(hours=2).to_unit('ns'),
api.interval(nanoseconds=2 * 3_600_000_000_000),
api.interval(nanoseconds=2 * 3600000000000),
),
],
)
Expand All @@ -185,15 +182,15 @@ def test_downconvert_hours(case, expected):
(api.interval(days=2).to_unit('s'), api.interval(seconds=2 * 86400)),
(
api.interval(days=2).to_unit('ms'),
api.interval(milliseconds=2 * 86_400_000),
api.interval(milliseconds=2 * 86400000),
),
(
api.interval(days=2).to_unit('us'),
api.interval(microseconds=2 * 86_400_000_000),
api.interval(microseconds=2 * 86400000000),
),
(
api.interval(days=2).to_unit('ns'),
api.interval(nanoseconds=2 * 86_400_000_000_000),
api.interval(nanoseconds=2 * 86400000000000),
),
],
)
Expand Down
8 changes: 4 additions & 4 deletions ibis/expr/tests/test_timestamp.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import pytest
from datetime import datetime

import numpy as np
import pandas as pd
from datetime import datetime
import pytest

import ibis
import ibis.expr.api as api
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.expr.rules as rlz
import ibis.expr.operations as ops
import ibis.expr.rules as rlz
import ibis.expr.types as ir


def test_field_select(alltypes):
Expand Down
145 changes: 121 additions & 24 deletions ibis/expr/tests/test_value_exprs.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,27 @@
import functools
import operator
import os

from collections import OrderedDict
from operator import methodcaller
from datetime import date, datetime, time
from operator import methodcaller

import numpy as np
import pandas as pd
import pytest

import numpy as np

import toolz

from ibis import literal
from ibis.common import IbisTypeError
from ibis.tests.util import assert_equal

import ibis
import ibis.expr.api as api
import ibis.expr.types as ir
import ibis.expr.rules as rlz
import ibis.common as com
import ibis.expr.analysis as L
import ibis.expr.api as api
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops

import ibis.expr.rules as rlz
import ibis.expr.types as ir
from ibis import literal
from ibis.common import IbisTypeError
from ibis.expr.signature import Argument as Arg
from ibis.tests.util import assert_equal


def test_null():
Expand Down Expand Up @@ -58,13 +54,13 @@ def test_unicode():
(128, 'int16'),
(32767, 'int16'),
(32768, 'int32'),
(2_147_483_647, 'int32'),
(2_147_483_648, 'int64'),
(2147483647, 'int32'),
(2147483648, 'int64'),
(-5, 'int8'),
(-128, 'int8'),
(-129, 'int16'),
(-32769, 'int32'),
(-2_147_483_649, 'int64'),
(-2147483649, 'int64'),
(1.5, 'double'),
('foo', 'string'),
([1, 2, 3], 'array<int8>'),
Expand Down Expand Up @@ -99,12 +95,12 @@ def test_literal_with_implicit_type(value, expected_type):
(128, 'int64'),
(32767, 'double'),
(32768, 'float'),
(2_147_483_647, 'int64'),
(2147483647, 'int64'),
(-5, 'int16'),
(-128, 'int32'),
(-129, 'int64'),
(-32769, 'float'),
(-2_147_483_649, 'double'),
(-2147483649, 'double'),
(1.5, 'double'),
('foo', 'string'),
(list(pointA), 'point'),
Expand Down Expand Up @@ -201,8 +197,8 @@ def test_simple_map_operations():
[
(32767, 'int8'),
(32768, 'int16'),
(2_147_483_647, 'int16'),
(2_147_483_648, 'int32'),
(2147483647, 'int16'),
(2147483648, 'int32'),
('foo', 'double'),
],
)
Expand Down Expand Up @@ -777,8 +773,8 @@ def test_binop_string_type_error(table, operation):
[
(operator.add, 'a', 0, 'int8'),
(operator.add, 'a', 5, 'int16'),
(operator.add, 'a', 100_000, 'int32'),
(operator.add, 'a', -100_000, 'int32'),
(operator.add, 'a', 100000, 'int32'),
(operator.add, 'a', -100000, 'int32'),
(operator.add, 'a', 1.5, 'double'),
(operator.add, 'b', 0, 'int16'),
(operator.add, 'b', 5, 'int32'),
Expand All @@ -802,8 +798,8 @@ def test_binop_string_type_error(table, operation):
# technically this can overflow, but we allow it
(operator.mul, 'd', 5, 'int64'),
(operator.sub, 'a', 5, 'int16'),
(operator.sub, 'a', 100_000, 'int32'),
(operator.sub, 'a', -100_000, 'int32'),
(operator.sub, 'a', 100000, 'int32'),
(operator.sub, 'a', -100000, 'int32'),
(operator.sub, 'a', 1.5, 'double'),
(operator.sub, 'b', 5, 'int32'),
(operator.sub, 'b', -5, 'int32'),
Expand Down Expand Up @@ -1443,3 +1439,104 @@ def test_valid_negate_float128():
value = np.float128(1)
expr = ibis.literal(value)
assert -expr is not None


@pytest.mark.parametrize(
('kind', 'begin', 'end'),
[
('preceding', None, None),
('preceding', 1, None),
('preceding', -1, 1),
('preceding', 1, -1),
('preceding', -1, -1),
('following', None, None),
('following', None, 1),
('following', -1, 1),
('following', 1, -1),
('following', -1, -1),
],
)
def test_window_unbounded_invalid(kind, begin, end):
kwargs = {kind: (begin, end)}
with pytest.raises(com.IbisInputError):
ibis.window(**kwargs)


@pytest.mark.parametrize(
('left', 'right', 'expected'),
[
(ibis.literal(1), ibis.literal(1.0), dt.float64),
(ibis.literal('a'), ibis.literal('b'), dt.string),
(ibis.literal(1.0), ibis.literal(1), dt.float64),
(ibis.literal(1), ibis.literal(1), dt.int8),
(ibis.literal(1), ibis.literal(1000), dt.int16),
(ibis.literal(2 ** 16), ibis.literal(2 ** 17), dt.int32),
(ibis.literal(2 ** 50), ibis.literal(1000), dt.int64),
(ibis.literal([1, 2]), ibis.literal([1, 2]), dt.Array(dt.int8)),
(ibis.literal(['a']), ibis.literal([]), dt.Array(dt.string)),
(ibis.literal([]), ibis.literal(['a']), dt.Array(dt.string)),
(ibis.literal([]), ibis.literal([]), dt.Array(dt.null)),
],
)
def test_nullif_type(left, right, expected):
assert left.nullif(right).type() == expected


@pytest.mark.parametrize(
('left', 'right'), [(ibis.literal(1), ibis.literal('a'))]
)
def test_nullif_fail(left, right):
with pytest.raises(com.IbisTypeError):
left.nullif(right)
with pytest.raises(com.IbisTypeError):
right.nullif(left)


@pytest.mark.parametrize(
"join_method",
[
"left_join",
pytest.param(
"right_join",
marks=pytest.mark.xfail(
raises=AttributeError, reason="right_join is not an ibis API"
),
),
"inner_join",
"outer_join",
"asof_join",
pytest.param(
"semi_join",
marks=pytest.mark.xfail(
raises=com.IbisTypeError,
reason=(
"semi_join only gives access to the left table's "
"columns"
),
),
),
],
)
@pytest.mark.xfail(
raises=(com.IbisError, AttributeError),
reason="Select from unambiguous joins not implemented",
)
def test_select_on_unambiguous_join(join_method):
t = ibis.table([("a0", dt.int64), ("b1", dt.string)], name="t")
s = ibis.table([("a1", dt.int64), ("b2", dt.string)], name="s")
method = getattr(t, join_method)
join = method(s, t.b1 == s.b2)
expr1 = join["a0", "a1"]
expr2 = join[["a0", "a1"]]
expr3 = join.select(["a0", "a1"])
assert expr1.equals(expr2)
assert expr1.equals(expr3)


def test_chained_select_on_join():
t = ibis.table([("a", dt.int64)], name="t")
s = ibis.table([("a", dt.int64), ("b", dt.string)], name="s")
join = t.join(s)[t.a, s.b]
expr1 = join["a", "b"]
expr2 = join.select(["a", "b"])
assert expr1.equals(expr2)
14 changes: 8 additions & 6 deletions ibis/expr/tests/test_visualize.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import os
import pytest

pytest.importorskip('graphviz')
import pytest

import ibis # noqa: E402
import ibis.expr.types as ir # noqa: E402
import ibis.expr.rules as rlz # noqa: E402
import ibis.expr.visualize as viz # noqa: E402
import ibis.expr.operations as ops # noqa: E402
import ibis.expr.rules as rlz # noqa: E402
import ibis.expr.types as ir # noqa: E402
from ibis.expr.signature import Argument as Arg # noqa: E402

pytest.importorskip('graphviz')

import ibis.expr.visualize as viz # noqa: E402, isort:skip
import ibis.expr.api as api # noqa; E402

from ibis.expr.signature import Argument as Arg # noqa: E402

pytestmark = pytest.mark.skipif(
int(os.environ.get('CONDA_BUILD', 0)) == 1, reason='CONDA_BUILD defined'
Expand Down
35 changes: 34 additions & 1 deletion ibis/expr/tests/test_window_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest

import ibis

from ibis.expr.window import _determine_how
from ibis.tests.util import assert_equal


Expand Down Expand Up @@ -175,3 +176,35 @@ def test_preceding_following_validate(alltypes):
@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_window_equals(alltypes):
assert False


def test_determine_how():
how = _determine_how((None, 5))
assert how == 'rows'

how = _determine_how((3, 1))
assert how == 'rows'

how = _determine_how(5)
assert how == 'rows'

how = _determine_how(np.int64(7))
assert how == 'rows'

how = _determine_how(ibis.interval(days=3))
assert how == 'range'

how = _determine_how(ibis.interval(months=5) + ibis.interval(days=10))
assert how == 'range'

with pytest.raises(TypeError):
_determine_how(8.9)

with pytest.raises(TypeError):
_determine_how('invalid preceding')

with pytest.raises(TypeError):
_determine_how({'start': 1, 'end': 2})

with pytest.raises(TypeError):
_determine_how([3, 5])
22 changes: 7 additions & 15 deletions ibis/expr/types.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os
import itertools
import os
import webbrowser

import numpy as np

import ibis
import ibis.util as util
import ibis.common as com
import ibis.config as config

import ibis.util as util

# TODO move methods containing ops import to api.py

Expand Down Expand Up @@ -178,10 +179,7 @@ def op(self):

@property
def _factory(self):
def factory(arg, name=None):
return type(self)(arg, name=name)

return factory
return type(self)

def execute(self, limit='default', params=None, **kwargs):
"""
Expand Down Expand Up @@ -473,18 +471,12 @@ def __dir__(self):

def _resolve(self, exprs):
exprs = util.promote_list(exprs)

# Stash this helper method here for now
out_exprs = []
for expr in exprs:
expr = self._ensure_expr(expr)
out_exprs.append(expr)
return out_exprs
return list(map(self._ensure_expr, exprs))

def _ensure_expr(self, expr):
if isinstance(expr, str):
return self[expr]
elif isinstance(expr, int):
elif isinstance(expr, (int, np.integer)):
return self[self.schema().name_at_position(expr)]
elif not isinstance(expr, Expr):
return expr(self)
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import ibis
import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis.expr.types as ir


def get_type(expr):
Expand Down
206 changes: 139 additions & 67 deletions ibis/expr/window.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,50 @@
import ibis.expr.types as ir
"""Encapsulation of SQL window clauses."""

import numpy as np

import ibis.common as com
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.util as util
import ibis.common as com


def _list_to_tuple(x):
if isinstance(x, list):
x = tuple(x)
return x
def _sequence_to_tuple(x):
return tuple(x) if util.is_iterable(x) else x


def _determine_how(preceding):
if isinstance(preceding, tuple):
start, end = preceding
if start is None:
offset_type = type(end)
else:
offset_type = type(start)
else:
offset_type = type(preceding)

if issubclass(offset_type, (int, np.integer)):
how = 'rows'
elif issubclass(offset_type, ir.IntervalScalar):
how = 'range'
else:
raise TypeError(
'Type {} is not supported for row- or range- based trailing '
'window operations'.format(offset_type)
)

return how


class Window:
"""Class to encapsulate the details of a window frame.
"""
A generic window function clause, patterned after SQL window clauses for
the time being. Can be expanded to cover more use cases as they arise.
Notes
-----
This class is patterned after SQL window clauses.
Using None for preceding or following currently indicates unbounded. Use 0
for current_value
for ``CURRENT ROW``.
"""

def __init__(
Expand All @@ -44,12 +71,23 @@ def __init__(
x = ops.SortKey(x).to_expr()
self._order_by.append(x)

self.preceding = _list_to_tuple(preceding)
self.following = _list_to_tuple(following)
self.preceding = _sequence_to_tuple(preceding)
self.following = _sequence_to_tuple(following)
self.how = how

self._validate_frame()

def __hash__(self) -> int:
return hash(
(
tuple(gb.op() for gb in self._group_by),
tuple(ob.op() for ob in self._order_by),
self.preceding,
self.following,
self.how,
)
)

def _validate_frame(self):
preceding_tuple = has_preceding = False
following_tuple = has_following = False
Expand All @@ -70,16 +108,40 @@ def _validate_frame(self):
)
elif preceding_tuple:
start, end = self.preceding
if start is None:
assert end >= 0
else:
assert start > end
if end is None:
raise com.IbisInputError("preceding end point cannot be None")
if end < 0:
raise com.IbisInputError(
"preceding end point must be non-negative"
)
if start is not None:
if start < 0:
raise com.IbisInputError(
"preceding start point must be non-negative"
)
if start <= end:
raise com.IbisInputError(
"preceding start must be greater than preceding end"
)
elif following_tuple:
start, end = self.following
if end is None:
assert start >= 0
else:
assert start < end
if start is None:
raise com.IbisInputError(
"following start point cannot be None"
)
if start < 0:
raise com.IbisInputError(
"following start point must be non-negative"
)
if end is not None:
if end < 0:
raise com.IbisInputError(
"following end point must be non-negative"
)
if start >= end:
raise com.IbisInputError(
"following start must be less than following end"
)
else:
if not isinstance(self.preceding, ir.Expr):
if has_preceding and self.preceding < 0:
Expand Down Expand Up @@ -179,30 +241,31 @@ def equals(self, other, cache=None):


def window(preceding=None, following=None, group_by=None, order_by=None):
"""
Create a window clause for use with window (analytic and aggregate)
functions. This ROW window clause aggregates adjacent rows based
on differences in row number.
"""Create a window clause for use with window functions.
This ROW window clause aggregates adjacent rows based on differences in row
number.
All window frames / ranges are inclusive.
Parameters
----------
preceding : int, tuple, or None, default None
Specify None for unbounded, 0 to include current row
tuple for off-center window
Specify None for unbounded, 0 to include current row tuple for
off-center window
following : int, tuple, or None, default None
Specify None for unbounded, 0 to include current row
tuple for off-center window
Specify None for unbounded, 0 to include current row tuple for
off-center window
group_by : expressions, default None
Either specify here or with TableExpr.group_by
Either specify here or with TableExpr.group_by
order_by : expressions, default None
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
Returns
-------
win : ibis Window
Window
"""
return Window(
preceding=preceding,
Expand All @@ -214,30 +277,31 @@ def window(preceding=None, following=None, group_by=None, order_by=None):


def range_window(preceding=None, following=None, group_by=None, order_by=None):
"""
Create a window clause for use with window (analytic and aggregate)
functions. This RANGE window clause aggregates rows based upon differences
in the value of the order-by expression.
"""Create a range-based window clause for use with window functions.
This RANGE window clause aggregates rows based upon differences in the
value of the order-by expression.
All window frames / ranges are inclusive.
Parameters
----------
preceding : int, tuple, or None, default None
Specify None for unbounded, 0 to include current row
tuple for off-center window
Specify None for unbounded, 0 to include current row tuple for
off-center window
following : int, tuple, or None, default None
Specify None for unbounded, 0 to include current row
tuple for off-center window
Specify None for unbounded, 0 to include current row tuple for
off-center window
group_by : expressions, default None
Either specify here or with TableExpr.group_by
Either specify here or with TableExpr.group_by
order_by : expressions, default None
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
Returns
-------
win : ibis Window
Window
"""
return Window(
preceding=preceding,
Expand All @@ -249,68 +313,76 @@ def range_window(preceding=None, following=None, group_by=None, order_by=None):


def cumulative_window(group_by=None, order_by=None):
"""
Create a cumulative window clause for use with aggregate window functions.
"""Create a cumulative window for use with aggregate window functions.
All window frames / ranges are inclusive.
Parameters
----------
group_by : expressions, default None
Either specify here or with TableExpr.group_by
Either specify here or with TableExpr.group_by
order_by : expressions, default None
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
Returns
-------
win : ibis Window
Window
"""
return Window(
preceding=None, following=0, group_by=group_by, order_by=order_by
)


def trailing_window(rows, group_by=None, order_by=None):
"""
Create a trailing window for use with aggregate window functions.
def trailing_window(preceding, group_by=None, order_by=None):
"""Create a trailing window for use with aggregate window functions.
Parameters
----------
rows : int
Number of trailing rows to include. 0 includes only the current row
preceding : int, float or expression of intervals, i.e.
ibis.interval(days=1) + ibis.interval(hours=5)
Int indicates number of trailing rows to include;
0 includes only the current row.
Interval indicates a trailing range window.
group_by : expressions, default None
Either specify here or with TableExpr.group_by
Either specify here or with TableExpr.group_by
order_by : expressions, default None
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
Returns
-------
win : ibis Window
Window
"""
how = _determine_how(preceding)
return Window(
preceding=rows, following=0, group_by=group_by, order_by=order_by
preceding=preceding,
following=0,
group_by=group_by,
order_by=order_by,
how=how
)


def trailing_range_window(preceding, order_by, group_by=None):
"""
Create a trailing time window for use with aggregate window functions.
"""Create a trailing time window for use with aggregate window functions.
Parameters
----------
preceding : float or expression of intervals, i.e.
ibis.interval(days=1) + ibis.interval(hours=5)
ibis.interval(days=1) + ibis.interval(hours=5)
order_by : expressions, default None
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
For analytic functions requiring an ordering, specify here, or let Ibis
determine the default ordering (for functions like rank)
group_by : expressions, default None
Either specify here or with TableExpr.group_by
Either specify here or with TableExpr.group_by
Returns
-------
win: ibis Window
Window
"""
return Window(
preceding=preceding,
Expand Down
18 changes: 13 additions & 5 deletions ibis/file/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from pathlib import Path

import ibis
import ibis.expr.types as ir
from ibis.pandas.core import execute_and_reset
from pathlib import Path
from ibis.pandas.core import execute
from ibis.pandas.dispatch import execute_last


class FileClient(ibis.client.Client):
Expand Down Expand Up @@ -33,9 +35,15 @@ def database(self, name=None, path=None):
return FileDatabase(name, self, path=path)

def execute(self, expr, params=None, **kwargs): # noqa
assert isinstance(expr, ir.Expr)
scope = kwargs.pop('scope', {})
return execute_and_reset(expr, params=params, scope=scope, **kwargs)
assert isinstance(expr, ir.Expr), "Expected ir.Expr, got {}".format(
type(expr)
)
return execute_last(
expr.op(),
execute(expr, params=params, **kwargs),
params=params,
**kwargs,
)

def list_tables(self, path=None):
raise NotImplementedError
Expand Down
10 changes: 4 additions & 6 deletions ibis/file/csv.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
import toolz
import pandas as pd
import toolz
from pkg_resources import parse_version

import ibis.expr.schema as sch
import ibis.expr.operations as ops

from pkg_resources import parse_version
import ibis.expr.schema as sch
from ibis.file.client import FileClient
from ibis.pandas.api import PandasDialect
from ibis.pandas.core import execute_node, pre_execute, execute
from ibis.pandas.core import execute, execute_node, pre_execute
from ibis.pandas.execution.selection import physical_tables


dialect = PandasDialect


Expand Down
5 changes: 3 additions & 2 deletions ibis/file/hdf5.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pandas as pd
import ibis.expr.schema as sch

import ibis.expr.operations as ops
import ibis.expr.schema as sch
from ibis.file.client import FileClient
from ibis.pandas.core import execute_node, execute
from ibis.pandas.core import execute, execute_node


def connect(path):
Expand Down
11 changes: 4 additions & 7 deletions ibis/file/parquet.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import regex as re

import pyarrow as pa
import pyarrow.parquet as pq
import regex as re
from pkg_resources import parse_version

import ibis.expr.schema as sch
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops

from pkg_resources import parse_version
import ibis.expr.schema as sch
from ibis.file.client import FileClient
from ibis.pandas.api import PandasDialect
from ibis.pandas.core import execute_node, execute

from ibis.pandas.core import execute, execute_node

dialect = PandasDialect

Expand Down
4 changes: 2 additions & 2 deletions ibis/file/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
import pandas as pd
import numpy as np
import pandas as pd
import pytest


@pytest.fixture
Expand Down
13 changes: 5 additions & 8 deletions ibis/file/tests/test_csv.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pytest

from pandas.util import testing as tm

import ibis
from ibis.file.client import FileDatabase
from ibis.file.csv import CSVClient, CSVTable
from ibis.file.client import FileDatabase, execute_and_reset as execute


@pytest.fixture
Expand Down Expand Up @@ -74,19 +74,16 @@ def test_read(csv, data):
expected['time'] = expected['time'].astype(str)
tm.assert_frame_equal(result, expected)

result = execute(closes)
tm.assert_frame_equal(result, expected)


def test_read_with_projection(csv2, data):

t = csv2.csv_dir2.df
result = execute(t)
result = t.execute()
assert 'close' in result.columns
assert 'open' in result.columns

t = t[['time', 'ticker', 'close']]
result = execute(t)
result = t.execute()
assert 'close' in result.columns
assert 'open' not in result.columns

Expand All @@ -95,7 +92,7 @@ def test_insert(transformed, tmpdir):
t = transformed

# csv's don't preserve dtypes
expected = execute(t)
expected = t.execute()
expected['time'] = expected['time'].astype(str)

tpath = tmpdir / 'new_csv'
Expand Down
22 changes: 8 additions & 14 deletions ibis/file/tests/test_hdf5.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import pytest
import pandas as pd
import ibis

import pytest
from pandas.util import testing as tm

pytest.importorskip('tables')
import ibis
from ibis.file.client import FileDatabase

pytest.importorskip('tables') # isort:skip

from ibis.file.hdf5 import HDFClient, HDFTable # noqa: E402
from ibis.file.client import (
FileDatabase,
execute_and_reset as execute,
) # noqa: E402
from ibis.file.hdf5 import HDFClient, HDFTable # noqa: E402, isort:skip


@pytest.fixture
Expand Down Expand Up @@ -93,14 +90,11 @@ def test_read(hdf, data):
expected = data['close']
tm.assert_frame_equal(result, expected)

result = execute(closes)
tm.assert_frame_equal(result, expected)


def test_insert(transformed, tmpdir):

t = transformed
expected = execute(t)
expected = t.execute()

tpath = tmpdir / 'new_dir'
tpath.mkdir()
Expand All @@ -110,7 +104,7 @@ def test_insert(transformed, tmpdir):
t = transformed[['time', 'ticker', 'avg']]
c = ibis.hdf5.connect(tpath)
c.insert('foo.h5', 'avg', t)
execute(t)
t.execute()
assert path.exists()

# readback
Expand Down
28 changes: 10 additions & 18 deletions ibis/file/tests/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import sys

import pytest
from pandas.util import testing as tm

import ibis

from pandas.util import testing as tm
from ibis.file.client import FileDatabase

pa = pytest.importorskip('pyarrow')
import pyarrow.parquet as pq # noqa: E402
pa = pytest.importorskip('pyarrow') # isort:skip
pq = pytest.importorskip('pyarrow.parquet') # isort:skip

from ibis.file.parquet import ParquetClient, ParquetTable # noqa: E402
from ibis.file.client import (
FileDatabase,
execute_and_reset as execute,
) # noqa: E402
from ibis.file.parquet import ParquetClient # noqa: E402, isort:skip
from ibis.file.parquet import ParquetTable # noqa: E402, isort:skip


pytestmark = pytest.mark.skipif(
Expand All @@ -21,7 +21,6 @@

@pytest.fixture
def transformed(parquet):

closes = parquet.pq.close
opens = parquet.pq.open

Expand All @@ -34,7 +33,6 @@ def transformed(parquet):

def test_creation(parquet):
# we have existing files in our dir

d = parquet.client.root
assert len(list(d.iterdir())) == 1

Expand All @@ -46,7 +44,6 @@ def test_creation(parquet):


def test_client(tmpdir, data):

# construct with a path to a file
d = tmpdir / 'pq'
d.mkdir()
Expand All @@ -62,7 +59,6 @@ def test_client(tmpdir, data):


def test_navigation(parquet):

# directory navigation
assert isinstance(parquet, FileDatabase)
result = dir(parquet)
Expand All @@ -84,21 +80,17 @@ def test_navigation(parquet):


def test_read(parquet, data):

closes = parquet.pq.close
assert str(closes) is not None

result = closes.execute()
expected = data['close']
tm.assert_frame_equal(result, expected)

result = execute(closes)
tm.assert_frame_equal(result, expected)


def test_write(transformed, tmpdir):
t = transformed
expected = execute(t)
expected = t.execute()

tpath = tmpdir / 'new_dir'
tpath.mkdir()
Expand All @@ -108,7 +100,7 @@ def test_write(transformed, tmpdir):
t = transformed[['time', 'ticker', 'avg']]
c = ibis.parquet.connect(tpath)
c.insert('foo.parquet', t)
execute(t)
t.execute()
assert path.exists()

# readback
Expand Down
6 changes: 4 additions & 2 deletions ibis/file/tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import tempfile
import pytest

import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import pytest

import ibis
import ibis.expr.datatypes as dt

pa = pytest.importorskip('pyarrow') # noqa: E402
import pyarrow.parquet as pq


@pytest.mark.parametrize(
Expand Down
5 changes: 2 additions & 3 deletions ibis/filesystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
# license), see the LICENSES directory.

import posixpath
from functools import wraps as implements


from ibis.config import options
from ibis.util import implements
import ibis.common as com
from ibis.config import options


class HDFSError(com.IbisError):
Expand Down
14 changes: 8 additions & 6 deletions ibis/impala/api.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from ibis.impala.client import ImpalaConnection, ImpalaClient

import ibis.common as com
from ibis.config import options
# these objects are exposed in the public API and are not used in the module
from ibis.impala.client import ImpalaDatabase, ImpalaTable # noqa: F401

from ibis.impala.client import ( # noqa: F401
ImpalaClient,
ImpalaConnection,
ImpalaDatabase,
ImpalaTable,
)
from ibis.impala.compiler import dialect # noqa: F401
from ibis.impala.udf import * # noqa: F401,F403
from ibis.config import options
import ibis.common as com


def compile(expr, params=None):
Expand Down
24 changes: 11 additions & 13 deletions ibis/impala/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,28 @@
import time
import traceback
import weakref

from posixpath import join as pjoin
from collections import deque
from posixpath import join as pjoin

import numpy as np
import pandas as pd
from pkg_resources import parse_version

import ibis.util as util
import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.rules as rlz
import ibis.expr.schema as sch
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops

import ibis.expr.rules as rlz
import ibis.expr.schema as sch
import ibis.expr.types as ir
import ibis.util as util
from ibis.client import Database, DatabaseEntity, Query, SQLClient
from ibis.config import options
from ibis.client import Query, Database, DatabaseEntity, SQLClient
from pkg_resources import parse_version
from ibis.filesystems import HDFS, WebHDFS
from ibis.impala import udf, ddl
from ibis.impala.compat import impyla, ImpylaError, HS2Error
from ibis.impala.compiler import build_ast, ImpalaDialect
from ibis.util import log
from ibis.impala import ddl, udf
from ibis.impala.compat import HS2Error, ImpylaError, impyla
from ibis.impala.compiler import ImpalaDialect, build_ast
from ibis.sql.compiler import DDL, DML
from ibis.util import log


class ImpalaDatabase(Database):
Expand Down
2 changes: 1 addition & 1 deletion ibis/impala/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import impala.dbapi as impyla # noqa
from impala.error import Error as ImpylaError # noqa
from impala.error import HiveServer2Error as HS2Error # noqa
import impala.dbapi as impyla # noqa
83 changes: 53 additions & 30 deletions ibis/impala/compiler.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
from io import StringIO
import datetime
import itertools
from io import StringIO
from operator import add, mul, sub
from typing import Optional

import ibis
import ibis.common as com
import ibis.expr.analysis as L
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.expr.operations as ops

import ibis.expr.types as ir
import ibis.impala.identifiers as identifiers
import ibis.sql.compiler as comp
import ibis.sql.transforms as transforms

import ibis.impala.identifiers as identifiers

import ibis.common as com
import ibis.util as util


Expand Down Expand Up @@ -188,11 +187,11 @@ def _cumulative_to_window(translator, expr, window):


_map_interval_to_microseconds = dict(
W=604_800_000_000,
D=86_400_000_000,
h=3_600_000_000,
m=60_000_000,
s=1_000_000,
W=604800000000,
D=86400000000,
h=3600000000,
m=60000000,
s=1000000,
ms=1000,
us=1,
ns=0.001,
Expand Down Expand Up @@ -321,7 +320,7 @@ def _window(translator, expr):
if any(col_type in time_range_types for col_type in order_by_types):
window = _time_range_to_range_window(translator, window)

window_formatted = _format_window(translator, window)
window_formatted = _format_window(translator, op, window)

arg_formatted = translator.translate(arg)
result = '{} {}'.format(arg_formatted, window_formatted)
Expand All @@ -332,7 +331,7 @@ def _window(translator, expr):
return result


def _format_window(translator, window):
def _format_window(translator, op, window):
components = []

if len(window._group_by) > 0:
Expand All @@ -352,13 +351,42 @@ def _format_window(translator, window):

p, f = window.preceding, window.following

def _prec(p):
return '{} PRECEDING'.format(p) if p > 0 else 'CURRENT ROW'
def _prec(p: Optional[int]) -> str:
assert p is None or p >= 0

if p is None:
prefix = 'UNBOUNDED'
else:
if not p:
return 'CURRENT ROW'
prefix = str(p)
return '{} PRECEDING'.format(prefix)

def _foll(f: Optional[int]) -> str:
assert f is None or f >= 0

def _foll(f):
return '{} FOLLOWING'.format(f) if f > 0 else 'CURRENT ROW'
if f is None:
prefix = 'UNBOUNDED'
else:
if not f:
return 'CURRENT ROW'
prefix = str(f)

return '{} FOLLOWING'.format(prefix)

frame_clause_not_allowed = (
ops.Lag,
ops.Lead,
ops.DenseRank,
ops.MinRank,
ops.NTile,
ops.PercentRank,
ops.RowNumber,
)

if p is not None and f is not None:
if isinstance(op.expr.op(), frame_clause_not_allowed):
frame = None
elif p is not None and f is not None:
frame = '{} BETWEEN {} AND {}'.format(
window.how.upper(), _prec(p), _foll(f)
)
Expand Down Expand Up @@ -464,26 +492,21 @@ def unary(func_name):
return fixed_arity(func_name, 1)


def _reduction_format(translator, func_name, arg, args, where):
def _reduction_format(translator, func_name, where, arg, *args):
if where is not None:
arg = where.ifelse(arg, ibis.NA)

return '{}({})'.format(
func_name, ', '.join(map(translator.translate, [arg] + list(args)))
func_name,
', '.join(map(translator.translate, itertools.chain([arg], args))),
)


def _reduction(func_name):
def formatter(translator, expr):
op = expr.op()

# HACK: support trailing arguments
where = op.where
args = [arg for arg in op.args if arg is not where]

return _reduction_format(
translator, func_name, args[0], args[1:], where
)
*args, where = op.args
return _reduction_format(translator, func_name, where, *args)

return formatter

Expand All @@ -496,7 +519,7 @@ def _variance_like(func_name):

def formatter(translator, expr):
arg, how, where = expr.op().args
return _reduction_format(translator, func_names[how], arg, [], where)
return _reduction_format(translator, func_names[how], where, arg)

return formatter

Expand Down
13 changes: 6 additions & 7 deletions ibis/impala/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import json
import re

from ibis.sql.compiler import DDL, DML
from .compiler import quote_identifier, _type_to_sql_string

import ibis.expr.schema as sch
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
from ibis.sql.compiler import DDL, DML

from .compiler import _type_to_sql_string, quote_identifier

fully_qualified_re = re.compile(r"(.*)\.(?:`(.*)`|(.*))")

Expand Down Expand Up @@ -227,7 +226,7 @@ def __init__(
example_table=None,
schema=None,
external=True,
**kwargs,
**kwargs
):
super().__init__(
table_name,
Expand Down Expand Up @@ -362,7 +361,7 @@ def __init__(
lineterminator=None,
na_rep=None,
external=True,
**kwargs,
**kwargs
):
table_format = DelimitedFormat(
path,
Expand Down
59 changes: 15 additions & 44 deletions ibis/impala/kudu_support.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,12 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from io import StringIO
from functools import wraps as copydoc

import kudu
import pandas as pd

import ibis.expr.datatypes as dt
from ibis.common import IbisError
from ibis.expr.api import schema
from ibis.impala import ddl
from ibis.util import implements as copydoc
import ibis.expr.datatypes as dt
import kudu


_kudu_type_to_ibis_typeclass = {
'int8': dt.Int8,
Expand All @@ -38,10 +22,7 @@


class KuduImpalaInterface:

"""
User-facing wrapper layer for the ImpalaClient
"""
"""User-facing wrapper layer for the ImpalaClient."""

def __init__(self, impala_client):
self.impala_client = impala_client
Expand Down Expand Up @@ -257,7 +238,7 @@ def __init__(
schema,
key_columns,
external=True,
**kwargs,
**kwargs
):
self.kudu_table_name = kudu_table_name
self.master_addrs = master_addrs
Expand All @@ -271,16 +252,11 @@ def _validate(self):
pass

def compile(self):
buf = StringIO()
buf.write(self._create_line())

schema = ddl.format_schema(self.schema)
buf.write('\n{0}'.format(schema))

props = self._get_table_properties()
buf.write('\n')
buf.write(ddl.format_tblproperties(props))
return buf.getvalue()
return '{}\n{}\n{}'.format(
self._create_line(),
ddl.format_schema(self.schema),
ddl.format_tblproperties(self._get_table_properties()),
)

_table_props_base = {
'storage_handler': 'com.cloudera.kudu.hive.KuduStorageHandler'
Expand Down Expand Up @@ -329,16 +305,11 @@ def __init__(
)

def compile(self):
buf = StringIO()
buf.write(self._create_line())

props = self._get_table_properties()
buf.write('\n')
buf.write(ddl.format_tblproperties(props))

select_query = self.select.compile()
buf.write(' AS\n{0}'.format(select_query))
return buf.getvalue()
return '{}\n{} AS\n{}'.format(
self._create_line(),
ddl.format_tblproperties(self._get_table_properties()),
self.select.compile(),
)


def schema_kudu_to_ibis(kschema, drop_nn=False):
Expand Down
1 change: 1 addition & 0 deletions ibis/impala/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from io import StringIO

import pandas as pd


Expand Down
3 changes: 1 addition & 2 deletions ibis/impala/pandas_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@

import csv
import tempfile

from posixpath import join as pjoin

import ibis.util as util
import ibis.common as com
import ibis.expr.schema as sch
import ibis.util as util
from ibis.config import options


Expand Down
3 changes: 1 addition & 2 deletions ibis/impala/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@

import pytest

import ibis.util as util
import ibis

import ibis.util as util
from ibis import options
from ibis.expr.tests.mocks import MockConnection

Expand Down
10 changes: 5 additions & 5 deletions ibis/impala/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import datetime
import time

import pandas as pd
import pytz
import pytest
import pytz

import ibis
import ibis.common as com
import ibis.config as config
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.util as util

from ibis.tests.util import assert_equal

pytest.importorskip('sqlalchemy')
Expand Down Expand Up @@ -133,7 +133,7 @@ def test_adapt_scalar_array_results(con, alltypes):


def test_interactive_repr_call_failure(con):
t = con.table('tpch_lineitem').limit(100_000)
t = con.table('tpch_lineitem').limit(100000)

t = t[t, t.l_receiptdate.cast('timestamp').name('date')]

Expand Down Expand Up @@ -192,7 +192,7 @@ def test_verbose_log_queries(con, test_data_db):

def test_sql_query_limits(con, test_data_db):
table = con.table('tpch_nation', database=test_data_db)
with config.option_context('sql.default_limit', 100_000):
with config.option_context('sql.default_limit', 100000):
# table has 25 rows
assert len(table.execute()) == 25
# comply with limit arg for TableExpr
Expand Down Expand Up @@ -374,7 +374,7 @@ def test_time_to_int_cast(con):
now = pytz.utc.localize(datetime.datetime.now())
d = ibis.literal(now)
result = con.execute(d.cast('int64'))
assert result == int(time.mktime(now.timetuple())) * 1_000_000
assert result == int(time.mktime(now.timetuple())) * 1000000


def test_set_option_with_dot(con):
Expand Down
1 change: 1 addition & 0 deletions ibis/impala/tests/test_connection_pool.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

import ibis

pytest.importorskip('sqlalchemy')
Expand Down
3 changes: 1 addition & 2 deletions ibis/impala/tests/test_ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.util as util

from ibis.tests.util import assert_equal

pytest.importorskip('hdfs')
pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala.compat import HS2Error # noqa: E402
from ibis.impala.compat import HS2Error # noqa: E402, isort:skip

pytestmark = pytest.mark.impala

Expand Down
6 changes: 3 additions & 3 deletions ibis/impala/tests/test_ddl_compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala import ddl # noqa: E402
from ibis.impala.client import build_ast # noqa: E402
from ibis.impala.compiler import ImpalaDialect # noqa: E402
from ibis.impala import ddl # noqa: E402, isort:skip
from ibis.impala.client import build_ast # noqa: E402, isort:skip
from ibis.impala.compiler import ImpalaDialect # noqa: E402, isort:skip


pytestmark = pytest.mark.impala
Expand Down
34 changes: 19 additions & 15 deletions ibis/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,29 @@
import unittest
from decimal import Decimal
from io import StringIO

import unittest

import pytest

import pandas as pd
import pandas.util.testing as tm
import pytest

import ibis
import ibis.expr.types as ir
import ibis.expr.api as api

import ibis.expr.types as ir
from ibis import literal as L
from ibis.expr.datatypes import Category

from ibis.common import RelationError
from ibis.expr.datatypes import Category
from ibis.expr.tests.mocks import MockConnection
from ibis.impala.compiler import ( # noqa: E402
ImpalaDialect,
ImpalaExprTranslator,
to_sql,
)
from ibis.sql.tests.test_compiler import ExprTestCases # noqa: E402

pytest.importorskip('hdfs')
pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala.compiler import (
ImpalaExprTranslator,
to_sql,
ImpalaDialect,
) # noqa: E402
from ibis.sql.tests.test_compiler import ExprTestCases # noqa: E402

pytestmark = pytest.mark.impala

Expand Down Expand Up @@ -524,7 +520,15 @@ def test_nullif_ifnull(self):
f = table.l_quantity

cases = [
(f.nullif(f == 0), 'nullif(`l_quantity`, `l_quantity` = 0)'),
(f.nullif(f), 'nullif(`l_quantity`, `l_quantity`)'),
(
(f == 0).nullif(f == 0),
'nullif(`l_quantity` = 0, `l_quantity` = 0)',
),
(
(f != 0).nullif(f == 0),
'nullif(`l_quantity` != 0, `l_quantity` = 0)',
),
(f.fillna(0), 'isnull(`l_quantity`, CAST(0 AS decimal(12, 2)))'),
]
self._check_expr_cases(cases)
Expand Down
21 changes: 12 additions & 9 deletions ibis/impala/tests/test_kudu_support.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
import unittest
import os
import unittest

import pytest

import ibis # noqa: E402
import ibis.expr.datatypes as dt # noqa: E402
import ibis.util as util # noqa: E402
from ibis.expr.tests.mocks import MockConnection # noqa: E402
from ibis.tests.util import assert_equal # noqa: E402

pytest.importorskip('hdfs')
pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

ksupport = pytest.importorskip('ibis.impala.kudu_support')
kudu = pytest.importorskip('kudu')

from ibis.expr.tests.mocks import MockConnection # noqa: E402
from ibis.impala.client import build_ast # noqa: E402
from ibis.impala.tests.common import IbisTestEnv, ImpalaE2E # noqa: E402
from ibis.tests.util import assert_equal # noqa: E402
import ibis.expr.datatypes as dt # noqa: E402
import ibis.util as util # noqa: E402
import ibis # noqa: E402
from ibis.impala.tests.common import IbisTestEnv # noqa: E402, isort:skip
from ibis.impala.tests.common import ImpalaE2E # noqa: E402, isort:skip
from ibis.impala.client import build_ast # noqa: E402, isort:skip


pytestmark = pytest.mark.kudu

Expand All @@ -27,7 +30,7 @@ def __init__(self):

# band-aid until Kudu support merged into Impala mainline
self.test_host = os.getenv(
'IBIS_TEST_KIMPALA_HOST', 'quickstart.cloudera'
'IBIS_TEST_KIMPALA_HOST', 'impala'
)

# XXX
Expand Down
6 changes: 4 additions & 2 deletions ibis/impala/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import unittest

import pandas as pd

import pytest
from numpy import nan

from ibis.impala.metadata import parse_metadata

pytestmark = pytest.mark.impala


def _glue_lists_spacer(spacer, lists):
result = list(lists[0])
Expand Down Expand Up @@ -101,7 +103,7 @@ def test_table_params(self):

assert params['EXTERNAL'] is True
assert params['STATS_GENERATED_VIA_STATS_TASK'] is True
assert params['numRows'] == 183_592
assert params['numRows'] == 183592
assert params['transient_lastDdlTime'] == pd.Timestamp(
'2015-11-12 15:09:01'
)
Expand Down
39 changes: 18 additions & 21 deletions ibis/impala/tests/test_pandas_interop.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
import pytest

import numpy as np

from pandas.util.testing import assert_frame_equal
import pandas as pd
import pytest
from pandas.util.testing import assert_frame_equal

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
from ibis.impala.pandas_interop import DataFrameWriter # noqa: E402

pytestmark = pytest.mark.impala

pytest.importorskip('hdfs')
pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala.pandas_interop import DataFrameWriter # noqa: E402


@pytest.fixture
def exhaustive_df():
Expand Down Expand Up @@ -57,28 +54,28 @@ def exhaustive_df():
0.0,
10.1,
np.nan,
30.299_999_999_999_997,
40.399_999_999_999_999,
30.299999999999997,
40.399999999999999,
50.5,
60.599_999_999_999_994,
70.700_000_000_000_003,
80.799_999_999_999_997,
90.899_999_999_999_991,
60.599999999999994,
70.700000000000003,
80.799999999999997,
90.899999999999991,
],
dtype=np.float64,
),
'float_col': np.array(
'floatcol': np.array(
[
np.nan,
1.100_000_023_841_857_9,
2.200_000_047_683_715_8,
3.299_999_952_316_284_2,
4.400_000_095_367_431_6,
1.1000000238418579,
2.2000000476837158,
3.2999999523162842,
4.4000000953674316,
5.5,
6.599_999_904_632_568_4,
7.699_999_809_265_136_7,
8.800_000_190_734_863_3,
9.899_999_618_530_273_4,
6.5999999046325684,
7.6999998092651367,
8.8000001907348633,
9.8999996185302734,
],
dtype='f4',
),
Expand Down
Loading