205 changes: 135 additions & 70 deletions ibis/expr/tests/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,98 +24,163 @@
from ibis.expr.rules import highest_precedence_type

from ibis.expr.tests.mocks import MockConnection
from ibis.compat import unittest


class TestTimestamp(unittest.TestCase):
@pytest.fixture
def con():
return MockConnection()

def setUp(self):
self.con = MockConnection()
self.alltypes = self.con.table('alltypes')
self.col = self.alltypes.i

def test_field_select(self):
assert isinstance(self.col, ir.TimestampColumn)
@pytest.fixture
def alltypes(con):
return con.table('alltypes')

def test_string_cast_to_timestamp(self):
casted = self.alltypes.g.cast('timestamp')
assert isinstance(casted, ir.TimestampColumn)

string = api.literal('2000-01-01')
casted = string.cast('timestamp')
assert isinstance(casted, ir.TimestampScalar)
@pytest.fixture
def col(alltypes):
return alltypes.i

def test_extract_fields(self):
# type-size may be database specific
cases = [
('year', ops.ExtractYear, ir.Int32Column),
('month', ops.ExtractMonth, ir.Int32Column),
('day', ops.ExtractDay, ir.Int32Column),
('hour', ops.ExtractHour, ir.Int32Column),
('minute', ops.ExtractMinute, ir.Int32Column),
('second', ops.ExtractSecond, ir.Int32Column),
('millisecond', ops.ExtractMillisecond, ir.Int32Column),
]

for attr, ex_op, ex_type in cases:
result = getattr(self.col, attr)()
assert result.get_name() == attr
assert isinstance(result, ex_type)
assert isinstance(result.op(), ex_op)
def test_field_select(col):
assert isinstance(col, ir.TimestampColumn)

def test_now(self):
result = api.now()
assert isinstance(result, ir.TimestampScalar)
assert isinstance(result.op(), ops.TimestampNow)

def test_timestamp_literals(self):
ts_str = '2015-01-01 00:00:00'
val = pd.Timestamp(ts_str)
def test_string_cast_to_timestamp(alltypes):
casted = alltypes.g.cast('timestamp')
assert isinstance(casted, ir.TimestampColumn)

expr = ibis.literal(val)
assert isinstance(expr, ir.TimestampScalar)
string = api.literal('2000-01-01')
casted = string.cast('timestamp')
assert isinstance(casted, ir.TimestampScalar)

expr = ibis.timestamp(ts_str)
assert isinstance(expr, ir.TimestampScalar)

self.assertRaises(ValueError, ibis.timestamp, '2015-01-01 00:71')
@pytest.mark.parametrize(
('field', 'expected_operation', 'expected_type'),
[
('year', ops.ExtractYear, ir.Int32Column),
('month', ops.ExtractMonth, ir.Int32Column),
('day', ops.ExtractDay, ir.Int32Column),
('hour', ops.ExtractHour, ir.Int32Column),
('minute', ops.ExtractMinute, ir.Int32Column),
('second', ops.ExtractSecond, ir.Int32Column),
('millisecond', ops.ExtractMillisecond, ir.Int32Column),
]
)
def test_extract_fields(field, expected_operation, expected_type, col):
# type-size may be database specific
result = getattr(col, field)()
assert result.get_name() == field
assert isinstance(result, expected_type)
assert isinstance(result.op(), expected_operation)

@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_integer_to_timestamp(self):
# #246
assert False

def test_comparison_timestamp(self):
expr = self.col > (self.col.min() + ibis.day(3))
assert isinstance(expr, ir.BooleanColumn)
def test_now():
result = api.now()
assert isinstance(result, ir.TimestampScalar)
assert isinstance(result.op(), ops.TimestampNow)

def test_comparisons_string(self):
val = '2015-01-01 00:00:00'
expr = self.col > val
op = expr.op()
assert isinstance(op.right, ir.TimestampScalar)

expr2 = val < self.col
op = expr2.op()
assert isinstance(op, ops.Greater)
assert isinstance(op.right, ir.TimestampScalar)
@pytest.mark.parametrize(
('function', 'value'),
[
(ibis.timestamp, '2015-01-01 00:00:00'),
(ibis.literal, pd.Timestamp('2015-01-01 00:00:00')),
]
)
def test_timestamp_literals(function, value):
expr = function(value)
assert isinstance(expr, ir.TimestampScalar)

def test_comparisons_pandas_timestamp(self):
val = pd.Timestamp('2015-01-01 00:00:00')
expr = self.col > val
op = expr.op()
assert isinstance(op.right, ir.TimestampScalar)

# TODO: this is broken for now because of upstream pandas problems
def test_invalid_timestamp_literal():
with pytest.raises(ValueError):
ibis.timestamp('2015-01-01 00:71')

# expr2 = val < self.col
# op = expr2.op()
# assert isinstance(op, ops.Greater)
# assert isinstance(op.right, ir.TimestampScalar)

@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_integer_to_timestamp():
# #246
assert False


def test_comparison_timestamp(col):
expr = col > (col.min() + ibis.day(3))
assert isinstance(expr, ir.BooleanColumn)


def test_comparisons_string(col):
val = '2015-01-01 00:00:00'
expr = col > val
op = expr.op()
assert isinstance(op.right, ir.TimestampScalar)

expr2 = val < col
op = expr2.op()
assert isinstance(op, ops.Greater)
assert isinstance(op.right, ir.TimestampScalar)


def test_comparisons_pandas_timestamp(col):
val = pd.Timestamp('2015-01-01 00:00:00')
expr = col > val
op = expr.op()
assert isinstance(op.right, ir.TimestampScalar)


@pytest.mark.xfail(raises=TypeError, reason='Upstream pandas bug')
def test_greater_comparison_pandas_timestamp(col):
val = pd.Timestamp('2015-01-01 00:00:00')
expr2 = val < col
op = expr2.op()
assert isinstance(op, ops.Greater)
assert isinstance(op.right, ir.TimestampScalar)


def test_timestamp_precedence():
ts = ibis.literal(datetime.now())
null_ts = ibis.NA
highest_type = highest_precedence_type([ts, null_ts])
highest_type = highest_precedence_type([ibis.NA, ts])
assert highest_type == 'timestamp'


@pytest.mark.parametrize(
('field', 'expected_operation', 'expected_type'),
[
('year', ops.ExtractYear, ir.Int32Column),
('month', ops.ExtractMonth, ir.Int32Column),
('day', ops.ExtractDay, ir.Int32Column),
]
)
def test_timestamp_field_access_on_date(
field, expected_operation, expected_type, col
):
date_col = col.cast('date')
result = getattr(date_col, field)()
assert isinstance(result, expected_type)
assert isinstance(result.op(), expected_operation)


@pytest.mark.parametrize(
('field', 'expected_operation', 'expected_type'),
[
('hour', ops.ExtractHour, ir.Int32Column),
('minute', ops.ExtractMinute, ir.Int32Column),
('second', ops.ExtractSecond, ir.Int32Column),
('millisecond', ops.ExtractMillisecond, ir.Int32Column),
]
)
def test_timestamp_field_access_on_date_failure(
field, expected_operation, expected_type, col
):
date_col = col.cast('date')
with pytest.raises(AttributeError):
getattr(date_col, field)


def test_timestamp_integer_warns():
with pytest.warns(UserWarning):
ibis.timestamp(1234)

t = ibis.table([('ts', 'timestamp')])
column = t.ts
with pytest.warns(UserWarning):
column < 1234
101 changes: 101 additions & 0 deletions ibis/expr/tests/test_value_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,44 @@ def test_literal_cases(value, expected_type):
assert expr.op().value is value


@pytest.mark.parametrize(
['value', 'expected_type'],
[
(5, 'int16'),
(127, 'double'),
(128, 'int64'),
(32767, 'double'),
(32768, 'float'),
(2147483647, 'int64'),
(-5, 'int16'),
(-128, 'int32'),
(-129, 'int64'),
(-32769, 'float'),
(-2147483649, 'double'),
(1.5, 'double'),
('foo', 'string'),
]
)
def test_literal_with_different_type(value, expected_type):
expr = ibis.literal(value, type=expected_type)
assert expr.type().equals(dt.validate_type(expected_type))


@pytest.mark.parametrize(
['value', 'expected_type'],
[
(32767, 'int8'),
(32768, 'int16'),
(2147483647, 'int16'),
(2147483648, 'int32'),
('foo', 'double'),
]
)
def test_literal_with_different_type_failure(value, expected_type):
with pytest.raises(TypeError):
ibis.literal(value, type=expected_type)


def test_literal_list():
what = [1, 2, 1000]
expr = api.as_value_expr(what)
Expand Down Expand Up @@ -756,3 +794,66 @@ def test_not_without_boolean(typ):
c = t.a
with pytest.raises(TypeError):
~c


@pytest.mark.parametrize(
('position', 'names'),
[
(0, 'foo'),
(1, 'bar'),
([0], ['foo']),
([1], ['bar']),
([0, 1], ['foo', 'bar']),
([1, 0], ['bar', 'foo']),
]
)
@pytest.mark.parametrize(
'expr_func',
[
lambda t, args: t[args],
lambda t, args: t.sort_by(args),
lambda t, args: t.group_by(args).aggregate(bar_avg=t.bar.mean())
]
)
def test_table_operations_with_integer_column(position, names, expr_func):
t = ibis.table([('foo', 'string'), ('bar', 'double')])
result = expr_func(t, position)
expected = expr_func(t, names)
assert result.equals(expected)


@pytest.mark.parametrize(
'value',
[
'abcdefg',
['a', 'b', 'c'],
[1, 2, 3],
]
)
@pytest.mark.parametrize(
'operation',
[
'pow',
'sub',
'truediv',
'floordiv',
'mod',
]
)
def test_generic_value_api_no_arithmetic(value, operation):
func = getattr(operator, operation)
expr = ibis.literal(value)
with pytest.raises(TypeError):
func(expr, expr)


@pytest.mark.parametrize(
('value', 'expected'),
[
(5, dt.int8),
(5.4, dt.double),
('abc', dt.string),
]
)
def test_fillna_null(value, expected):
assert ibis.NA.fillna(value).type().equals(expected)
76 changes: 76 additions & 0 deletions ibis/expr/tests/test_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pytest

pytest.importorskip('graphviz')

import ibis # noqa: E402
import ibis.expr.types as ir # noqa: E402
import ibis.expr.visualize as viz # noqa: E402

from ibis.expr import rules # noqa: E402


@pytest.fixture
def t():
return ibis.table(
[('a', 'int64'), ('b', 'double'), ('c', 'string')], name='t'
)


@pytest.mark.parametrize(
'expr_func',
[
lambda t: t.a,
lambda t: t.a + t.b,
lambda t: t.a + t.b > 3 ** t.a,
lambda t: t[(t.a + t.b * 2 * t.b / t.b ** 3 > 4) & (t.b > 5)],
lambda t: t[(t.a + t.b * 2 * t.b / t.b ** 3 > 4) & (t.b > 5)].group_by(
'c'
).aggregate(
amean=lambda f: f.a.mean(),
bsum=lambda f: f.b.sum(),
)
]
)
def test_exprs(t, expr_func):
expr = expr_func(t)
graph = viz.to_graph(expr)
assert str(hash(repr(t.op()))) in graph.source
assert str(hash(repr(expr.op()))) in graph.source


def test_custom_expr():
class MyExpr(ir.Expr):
pass

class MyExprNode(ir.Node):

input_type = [
rules.string(name='foo'),
rules.number(name='bar'),
]

def output_type(self):
return MyExpr

op = MyExprNode(['Hello!', 42.3])
expr = op.to_expr()
graph = viz.to_graph(expr)
assert str(hash(repr(op))) in graph.source


@pytest.mark.parametrize(
'how',
[
'inner',
'left',
pytest.mark.xfail('right', raises=KeyError, reason='NYI'),
'outer',
]
)
def test_join(how):
left = ibis.table([('a', 'int64'), ('b', 'string')])
right = ibis.table([('b', 'string'), ('c', 'int64')])
joined = left.join(right, left.b == right.b, how=how)
result = joined[left.a, right.c]
graph = viz.to_graph(result)
assert str(hash(repr(result.op()))) in graph.source
259 changes: 191 additions & 68 deletions ibis/expr/types.py

Large diffs are not rendered by default.

205 changes: 205 additions & 0 deletions ibis/expr/visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import itertools
import tempfile

import graphviz as g

from ibis.compat import zip_longest

import ibis
import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.operations as ops


def get_args(node):

if isinstance(node, (ops.Aggregation, ops.Selection)):
return get_args_selection_aggregation(node)
elif isinstance(node, ops.Join):
return get_args_join(node)
else:
args = (arg for arg in node.args if isinstance(arg, ir.Expr))
try:
input_type = node.input_type
except AttributeError:
names = node._arg_names
else:
names = (arg.name for arg in input_type.types)
return zip_longest(args, names)


def get_args_selection_aggregation(node):
return zip_longest(
itertools.chain(
[node.table],
itertools.chain.from_iterable(
getattr(node, argname) or [None]
for argname in node._arg_names if argname != 'table'
)
),
itertools.chain(
['table'],
itertools.chain.from_iterable(
[
'{}[{:d}]'.format(argname, i)
for i in range(len(getattr(node, argname)))
] or [None]
for argname in node._arg_names if argname != 'table'
)
),
)


def get_args_join(node):
return zip(
[node.left, node.right] + node.predicates,
['left', 'right'] + list(itertools.chain.from_iterable(
[
'{}[{:d}]'.format(argname, i)
for i in range(len(getattr(node, argname)))
] or [None]
for argname in node._arg_names if argname not in {'left', 'right'}
))
)


def get_type(expr):
try:
return str(expr.type())
except AttributeError:
pass

try:
schema = expr.schema()
except AttributeError:
try:
# As a last resort try get the name of the output_type class
return expr.op().output_type().__name__
except AttributeError:
return '\u2205' # empty set character
except com.IbisError:
op = expr.op()
assert isinstance(op, ops.Join)
left_table_name = op.left.op().name or ops.genname()
left_schema = op.left.schema()
right_table_name = op.right.op().name or ops.genname()
right_schema = op.right.schema()
pairs = [
('{}.{}'.format(left_table_name, left_column), type)
for left_column, type in left_schema.items()
] + [
('{}.{}'.format(right_table_name, right_column), type)
for right_column, type in right_schema.items()
]
schema = ibis.schema(pairs)

return ''.join(
'<BR ALIGN="LEFT" /> <I>{}</I>: {}'.format(name, type)
for name, type in zip(schema.names, schema.types)
) + '<BR ALIGN="LEFT" />'


def get_label(expr, argname=None):
import ibis.expr.operations as ops

node = expr.op()
typename = get_type(expr)
name = type(node).__name__
nodename = getattr(node, 'name', argname)
if nodename is not None:
if isinstance(node, ops.TableNode):
label_fmt = '<<I>{}</I>: <B>{}</B>{}>'
else:
label_fmt = '<<I>{}</I>: <B>{}</B> \u27f6 {}>'
label = label_fmt.format(nodename, name, typename)
else:
if isinstance(node, ops.TableNode):
label_fmt = '<<B>{}</B>{}>'
else:
label_fmt = '<{} \u27f6 {}>'
label = label_fmt.format(name, typename)
return label


def get_arg_names(node):
try:
return [arg.name for arg in node.input_type.types]
except AttributeError:
return node._arg_names


def to_graph(expr, node_attr=None, edge_attr=None):
if node_attr is None:
node_attr = {
'shape': 'box',
'fontname': 'Deja Vu Sans Mono',
}

if edge_attr is None:
edge_attr = {
'dir': 'back',
}

stack = [expr]
seen = set()
labeled = set()

graph = g.Digraph(node_attr=node_attr, edge_attr=edge_attr)

while stack:
e = stack.pop()
node = e.op()
a = str(hash(repr(node)))

if a not in seen:
seen.add(a)

if a not in labeled:
label = get_label(e)
else:
label = None

graph.node(a, label=label)

for arg, arg_name in get_args(node):
if arg is not None:
b = str(hash(repr(arg.op())))
label = get_label(arg, arg_name)
graph.node(b, label=label)
labeled.add(b)
graph.edge(a, b)
stack.append(arg)
return graph


def draw(graph, path=None, format='png'):
piped_source = graph.pipe(format=format)

if path is None:
with tempfile.NamedTemporaryFile(
delete=False, suffix='.{}'.format(format), mode='wb'
) as f:
f.write(piped_source)
return f.name
else:
with open(path, mode='wb') as f:
f.write(piped_source)
return path


if __name__ == '__main__':
t = ibis.table(
[('a', 'int64'), ('b', 'double'), ('c', 'string')], name='t'
)
left = ibis.table([('a', 'int64'), ('b', 'string')])
right = ibis.table([('b', 'string'), ('c', 'int64'), ('d', 'string')])
joined = left.inner_join(right, left.b == right.b)
df = joined[left.a, right.c.name('b'), right.d.name('c')]
a = df.a
b = df.b
filt = df[(a + b * 2 * b / b ** 3 > 4) & (b > 5)]
expr = filt.groupby(filt.c).aggregate(
amean=filt.a.mean(),
bsum=filt.b.sum(),
)
expr.visualize()
50 changes: 31 additions & 19 deletions ibis/impala/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,37 +48,49 @@ def connect(host='localhost', port=21050, database='default', timeout=45,
use_ssl=False, ca_cert=None, user=None,
password=None, auth_mechanism='NOSASL',
kerberos_service_name='impala', pool_size=8, hdfs_client=None):
"""
Create an ImpalaClient for use with Ibis.
"""Create an ImpalaClient for use with Ibis.
Parameters
----------
host : string, Host name of the impalad or HiveServer2 in Hive
port : int, Defaults to 21050 (Impala's HiveServer2)
database : string, Default database when obtaining new cursors
timeout : int, Connection timeout (seconds) when communicating with
HiveServer2
use_ssl : boolean, Use SSL when connecting to HiveServer2
ca_cert : string, Local path to 3rd party CA certificate or copy of server
certificate for self-signed certificates. If SSL is enabled, but this
argument is None, then certificate validation is skipped.
user : string, LDAP user to authenticate
password : string, LDAP password to authenticate
auth_mechanism : string, {'NOSASL' <- default, 'PLAIN', 'GSSAPI', 'LDAP'}.
host : str, optional
Host name of the impalad or HiveServer2 in Hive
port : int, optional
Impala's HiveServer2 port
database : str, optional
Default database when obtaining new cursors
timeout : int, optional
Connection timeout in seconds when communicating with HiveServer2
use_ssl : bool, optional
Use SSL when connecting to HiveServer2
ca_cert : str, optional
Local path to 3rd party CA certificate or copy of server certificate
for self-signed certificates. If SSL is enabled, but this argument is
``None``, then certificate validation is skipped.
user : str, optional
LDAP user to authenticate
password : str, optional
LDAP password to authenticate
auth_mechanism : str, optional
{'NOSASL' <- default, 'PLAIN', 'GSSAPI', 'LDAP'}.
Use NOSASL for non-secured Impala connections. Use PLAIN for
non-secured Hive clusters. Use LDAP for LDAP authenticated
connections. Use GSSAPI for Kerberos-secured clusters.
kerberos_service_name : string, Specify particular impalad service
principal.
kerberos_service_name : str, optional
Specify particular impalad service principal.
Examples
--------
>>> hdfs = ibis.hdfs_connect(**hdfs_params)
>>> client = ibis.impala.connect(hdfs_client=hdfs, **impala_params)
>>> import ibis
>>> hdfs = ibis.hdfs_connect(host='impala', port=50070)
>>> hdfs # doctest: +ELLIPSIS
<ibis.filesystems.WebHDFS object at 0x...>
>>> client = ibis.impala.connect(hdfs_client=hdfs, port=21050)
>>> client # doctest: +ELLIPSIS
<ibis.impala.client.ImpalaClient object at 0x...>
Returns
-------
con : ImpalaClient
ImpalaClient
"""
params = {
'host': host,
Expand Down
10 changes: 2 additions & 8 deletions ibis/impala/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def _fetch(self, cursor):
names = [x[0] for x in cursor.description]
return _column_batches_to_dataframe(names, batches)

def _db_type_to_dtype(self, db_type):
def _db_type_to_dtype(self, db_type, column):
return _HS2_TTypeId_to_dtype[db_type]


Expand Down Expand Up @@ -840,18 +840,12 @@ def create_table(self, table_name, obj=None, schema=None, database=None,
ast = self._build_ast(to_insert)
select = ast.queries[0]

if partition is not None:
# Fairly certain this is currently the case
raise ValueError('partition not supported with '
'create-table-as-select. Create an '
'empty partitioned table instead '
'and insert into those partitions.')

statement = ddl.CTAS(table_name, select,
database=database,
can_exist=force,
format=format,
external=external,
partition=partition,
path=location)
elif schema is not None:
statement = ddl.CreateTableWithSchema(
Expand Down
395 changes: 176 additions & 219 deletions ibis/impala/ddl.py

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions ibis/impala/kudu_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def list_tables(self, filter=''):
def table_exists(self, name):
return self.client.table_exists(name)

def connect(self, host_or_hosts, port_or_ports=7051, rpc_timeout=None):
def connect(self, host_or_hosts, port_or_ports=7051,
rpc_timeout=None, admin_timeout=None):
"""
Pass-through connection interface to the Kudu client
Expand All @@ -67,13 +68,16 @@ def connect(self, host_or_hosts, port_or_ports=7051, rpc_timeout=None):
If you pass multiple host names, pass multiple ports
rpc_timeout : kudu.TimeDelta
See Kudu client documentation for details
admin_timeout : kudu.TimeDelta
See Kudu client documentation for details
Returns
-------
None
"""
self.client = kudu.connect(host_or_hosts, port_or_ports,
rpc_timeout=rpc_timeout)
rpc_timeout_ms=rpc_timeout,
admin_timeout_ms=admin_timeout)

def _check_connected(self):
if not self.is_connected:
Expand Down
50 changes: 30 additions & 20 deletions ibis/impala/pandas_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,21 @@
from posixpath import join as pjoin
import os

import pandas.core.common as pdcom
import pandas as pd

try:
import pandas.api.types as pdcom
from pandas.api.types import infer_dtype
except ImportError:
import pandas.core.common as pdcom
from pandas.lib import infer_dtype

import ibis.common as com

from ibis.config import options
from ibis.util import log
import ibis.compat as compat
import ibis.expr.datatypes as itypes
import ibis.expr.datatypes as dt
import ibis.util as util


Expand All @@ -37,46 +43,50 @@ def pandas_col_to_ibis_type(col):
dty = col.dtype

# datetime types
if pdcom.is_datetime64tz_dtype(dty):
return dt.Timestamp(str(dty.tz))

if pdcom.is_datetime64_dtype(dty):
if pdcom.is_datetime64_ns_dtype(dty):
return 'timestamp'
return dt.timestamp
else:
raise com.IbisTypeError("Column {0} has dtype {1}, which is "
"datetime64-like but does "
"not use nanosecond units"
.format(col.name, dty))
if pdcom.is_timedelta64_dtype(dty):
print("Warning: encoding a timedelta64 as an int64")
return 'int64'
return dt.int64

if pdcom.is_categorical_dtype(dty):
return itypes.Category(len(col.cat.categories))
return dt.Category(len(col.cat.categories))

if pdcom.is_bool_dtype(dty):
return 'boolean'
return dt.boolean

# simple numerical types
if issubclass(dty.type, np.int8):
return 'int8'
return dt.int8
if issubclass(dty.type, np.int16):
return 'int16'
return dt.int16
if issubclass(dty.type, np.int32):
return 'int32'
return dt.int32
if issubclass(dty.type, np.int64):
return 'int64'
return dt.int64
if issubclass(dty.type, np.float32):
return 'float'
return dt.float
if issubclass(dty.type, np.float64):
return 'double'
return dt.double
if issubclass(dty.type, np.uint8):
return 'int16'
return dt.int16
if issubclass(dty.type, np.uint16):
return 'int32'
return dt.int32
if issubclass(dty.type, np.uint32):
return 'int64'
return dt.int64
if issubclass(dty.type, np.uint64):
raise com.IbisTypeError("Column {0} is an unsigned int64"
.format(col.name))
raise com.IbisTypeError(
"Column {} is an unsigned int64".format(col.name)
)

if pdcom.is_object_dtype(dty):
return _infer_object_dtype(col)
Expand All @@ -101,11 +111,11 @@ def _infer_object_dtype(arr):
elif state == STRING:
break
if state == BOOLEAN:
return 'boolean'
return dt.boolean
elif state == STRING:
return 'string'
return dt.string
else:
return pd.lib.infer_dtype(avalues)
return infer_dtype(avalues)


class DataFrameWriter(object):
Expand Down
3 changes: 2 additions & 1 deletion ibis/impala/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import numpy as np
import pandas as pd

from ibis.compat import unittest
from ibis.impala.tests.common import IbisTestEnv, ImpalaE2E, connect_test
from ibis.tests.util import assert_equal
import ibis
Expand Down
21 changes: 11 additions & 10 deletions ibis/impala/tests/test_ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa=E402
import unittest

from copy import copy
import gc
Expand All @@ -23,19 +23,20 @@
from posixpath import join as pjoin
import pytest

pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.expr.tests.mocks import MockConnection
from ibis.compat import unittest, mock
from ibis.impala import ddl
from ibis.impala.compat import HS2Error, ImpylaError
from ibis.impala.client import build_ast
from ibis.impala.tests.common import ENV, ImpalaE2E, connect_test
from ibis.tests.util import assert_equal
from ibis.compat import mock
import ibis.common as com
import ibis.expr.types as ir
import ibis.util as util
from ibis.tests.util import assert_equal

pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala import ddl # noqa: E402
from ibis.impala.compat import HS2Error, ImpylaError # noqa: E402
from ibis.impala.client import build_ast # noqa: E402
from ibis.impala.tests.common import ENV, ImpalaE2E, connect_test # noqa: E402


class TestDropTable(unittest.TestCase):
Expand Down
29 changes: 15 additions & 14 deletions ibis/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,30 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa=E402
import unittest

import pytest

pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

import pandas as pd
import pandas.util.testing as tm

import ibis
import ibis.expr.types as ir
import ibis.expr.api as api

from ibis import literal as L
from ibis.compat import unittest, StringIO, Decimal
from ibis.expr.datatypes import Category

from ibis.compat import StringIO, Decimal
from ibis.expr.tests.mocks import MockConnection
from ibis.impala.compiler import ImpalaExprTranslator, to_sql, ImpalaContext
from ibis.sql.tests.test_compiler import ExprTestCases
from ibis.impala.tests.common import ImpalaE2E
import ibis.expr.types as ir
import ibis.expr.api as api

pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala.compiler import ImpalaExprTranslator, to_sql # noqa: E402
from ibis.impala.compiler import ImpalaContext # noqa: E402
from ibis.sql.tests.test_compiler import ExprTestCases # noqa: E402
from ibis.impala.tests.common import ImpalaE2E # noqa: E402


def approx_equal(a, b, eps):
Expand Down Expand Up @@ -505,7 +508,6 @@ def test_identical_to_special_case(self):
assert result == 'SELECT TRUE AS `tmp`'



class TestBucketHistogram(unittest.TestCase, ExprSQLTest):

def setUp(self):
Expand Down Expand Up @@ -1444,10 +1446,9 @@ def test_aggregations(self):
d.var(where=cond),
]

agg_exprs = [expr.name('e%d' % i)
for i, expr in enumerate(exprs)]
metrics = [expr.name('e%d' % i) for i, expr in enumerate(exprs)]

agged_table = table.aggregate(agg_exprs)
agged_table = table.aggregate(metrics)
agged_table.execute()

def test_analytic_functions(self):
Expand Down
20 changes: 10 additions & 10 deletions ibis/impala/tests/test_kudu_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa=E402

import unittest
import os

import pytest

pytest.importorskip('sqlalchemy')
Expand All @@ -23,14 +23,13 @@
ksupport = pytest.importorskip('ibis.impala.kudu_support')
kudu = pytest.importorskip('kudu')

from ibis.compat import unittest
from ibis.expr.tests.mocks import MockConnection
from ibis.impala.client import build_ast
from ibis.impala.tests.common import IbisTestEnv, ImpalaE2E
from ibis.tests.util import assert_equal
import ibis.expr.datatypes as dt
import ibis.util as util
import ibis
from ibis.expr.tests.mocks import MockConnection # noqa: E402
from ibis.impala.client import build_ast # noqa: E402
from ibis.impala.tests.common import IbisTestEnv, ImpalaE2E # noqa: E402
from ibis.tests.util import assert_equal # noqa: E402
import ibis.expr.datatypes as dt # noqa: E402
import ibis.util as util # noqa: E402
import ibis # noqa: E402


class KuduImpalaTestEnv(IbisTestEnv):
Expand All @@ -54,6 +53,7 @@ def __init__(self):
self.hdfs_superuser = os.environ.get('IBIS_TEST_HDFS_SUPERUSER',
'hdfs')


ENV = KuduImpalaTestEnv()


Expand Down
3 changes: 2 additions & 1 deletion ibis/impala/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import pandas as pd

from numpy import nan

from ibis.compat import unittest
from ibis.impala.metadata import parse_metadata


Expand Down
21 changes: 16 additions & 5 deletions ibis/impala/tests/test_pandas_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import pytest

import numpy as np

from pandas.util.testing import assert_frame_equal
import pandas as pd

from ibis.compat import unittest
import ibis
import ibis.expr.datatypes as dt
import ibis.expr.types as ir

from ibis.common import IbisTypeError
from ibis.impala.pandas_interop import pandas_to_ibis_schema, DataFrameWriter
from ibis.impala.tests.common import ImpalaE2E
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.util as util
import ibis


class TestPandasTypeInterop(unittest.TestCase):
Expand Down Expand Up @@ -244,5 +246,14 @@ def _check_roundtrip(self, df):

table = writer.delimited_table(path)
df2 = table.execute()

assert_frame_equal(df2, df)


def test_timestamp_with_timezone():
df = pd.DataFrame({
'A': pd.date_range('20130101', periods=3, tz='US/Eastern')
})
schema = pandas_to_ibis_schema(df)
expected = ibis.schema([('A', "timestamp('US/Eastern')")])
assert schema.equals(expected)
assert schema.types[0].equals(dt.Timestamp('US/Eastern'))
35 changes: 25 additions & 10 deletions ibis/impala/tests/test_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa=E402
import unittest

from posixpath import join as pjoin

Expand All @@ -21,17 +21,16 @@
pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

import impala
import impala # noqa: E402

from pandas.util.testing import assert_frame_equal
import pandas as pd
from pandas.util.testing import assert_frame_equal # noqa: E402
import pandas as pd # noqa: E402

from ibis.compat import unittest
from ibis.impala.compat import ImpylaError
from ibis.impala.tests.common import ImpalaE2E, ENV
from ibis.tests.util import assert_equal
import ibis
import ibis.util as util
from ibis.impala.compat import ImpylaError # noqa: E402
from ibis.impala.tests.common import ImpalaE2E, ENV # noqa: E402
from ibis.tests.util import assert_equal # noqa: E402
import ibis # noqa: E402
import ibis.util as util # noqa: E402


def _tmp_name():
Expand Down Expand Up @@ -138,6 +137,22 @@ def test_insert_select_partitioned_table(self):

self._verify_partitioned_table(part_t, df, unique_keys)

def test_create_partitioned_table_from_expr(self):
t = self.con.table('functional_alltypes')
expr = t[t.id <= 10][['id', 'double_col', 'month', 'year']]
name = 'tmppart_{}'.format(util.guid())
try:
self.con.create_table(name, expr, partition=[t.year])
except:
raise
else:
new = self.con.table(name)
expected = expr.execute().sort_values('id').reset_index(drop=True)
result = new.execute().sort_values('id').reset_index(drop=True)
assert_frame_equal(result, expected)
finally:
self.con.drop_table(name, force=True)

@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_insert_overwrite_partition(self):
assert False
Expand Down
3 changes: 2 additions & 1 deletion ibis/impala/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import ibis

from ibis.impala.compiler import to_sql
from ibis.compat import unittest


class TestImpalaSQL(unittest.TestCase):
Expand Down
30 changes: 16 additions & 14 deletions ibis/impala/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,30 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa=E402

import unittest
from posixpath import join as pjoin
import pytest

pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

import ibis

import ibis.expr.types as ir

from ibis.impala import ddl
import ibis.impala as api

from ibis.common import IbisTypeError
from ibis.compat import unittest, Decimal
from ibis.compat import Decimal
from ibis.expr.datatypes import validate_type
from ibis.expr.tests.mocks import MockConnection
from ibis.impala.tests.common import ImpalaE2E

import ibis.expr.rules as rules
import ibis.common as com
import ibis.util as util

pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala import ddl # noqa: E402
import ibis.impala as api # noqa: E402
from ibis.impala.tests.common import ImpalaE2E # noqa: E402


class TestWrapping(unittest.TestCase):

Expand Down Expand Up @@ -90,9 +90,9 @@ def test_udf_primitive_output_types(self):
ibis_type = validate_type(t)

expr = func(sv)
assert type(expr) == ibis_type.scalar_type()
assert type(expr) == type(ibis_type.scalar_type()(expr.op())) # noqa: E501, E721
expr = func(av)
assert type(expr) == ibis_type.array_type()
assert type(expr) == type(ibis_type.array_type()(expr.op())) # noqa: E501, E721

def test_uda_primitive_output_types(self):
types = [
Expand All @@ -113,8 +113,10 @@ def test_uda_primitive_output_types(self):

expr1 = func(sv)
expr2 = func(sv)
assert isinstance(expr1, ibis_type.scalar_type())
assert isinstance(expr2, ibis_type.scalar_type())
expected_type1 = type(ibis_type.scalar_type()(expr1.op()))
expected_type2 = type(ibis_type.scalar_type()(expr2.op()))
assert isinstance(expr1, expected_type1)
assert isinstance(expr2, expected_type2)

def test_decimal(self):
func = self._register_udf(['decimal(9,0)'], 'decimal(9,0)', 'test')
Expand Down
Empty file added ibis/pandas/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions ibis/pandas/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ibis.pandas.client import PandasClient
from ibis.pandas.execution import execute # noqa: F401


def connect(dictionary):
return PandasClient(dictionary)
72 changes: 72 additions & 0 deletions ibis/pandas/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import six

import numpy as np
import pandas as pd

import ibis
import ibis.client as client
import ibis.expr.types as ir
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops


try:
infer_dtype = pd.api.types.infer_dtype
except AttributeError:
infer_dtype = pd.lib.infer_dtype


_DTYPE_TO_IBIS_TYPE = {
'float64': dt.double,
'float32': dt.float,
'datetime64[ns]': dt.timestamp,
}


_INFERRED_DTYPE_TO_IBIS_TYPE = {
'string': 'string',
'unicode': 'string',
'bytes': 'string',
}


def pandas_dtypes_to_ibis_schema(df):
dtypes = df.dtypes

pairs = []

for column_name, dtype in dtypes.iteritems():
if not isinstance(column_name, six.string_types):
raise TypeError(
'Column names must be strings to use the pandas backend'
)

if dtype == np.object_:
ibis_type = _INFERRED_DTYPE_TO_IBIS_TYPE[
infer_dtype(df[column_name])
]
elif hasattr(dtype, 'tz'):
ibis_type = dt.Timestamp(str(dtype.tz))
else:
dtype_string = str(dtype)
ibis_type = _DTYPE_TO_IBIS_TYPE.get(dtype_string, dtype_string)

pairs.append((column_name, ibis_type))
return ibis.schema(pairs)


class PandasClient(client.Client):

def __init__(self, dictionary):
self.dictionary = dictionary

def table(self, name):
df = self.dictionary[name]
schema = pandas_dtypes_to_ibis_schema(df)
return ops.DatabaseTable(name, schema, self).to_expr()

def execute(self, query, *args, **kwargs):
from ibis.pandas.execution import execute

assert isinstance(query, ir.Expr)
return execute(query)
83 changes: 83 additions & 0 deletions ibis/pandas/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import collections
import numbers
import datetime

import six

import numpy as np

import ibis.expr.types as ir
import ibis.expr.datatypes as dt

from ibis.pandas.dispatch import execute, execute_node


integer_types = six.integer_types + (np.integer,)
floating_types = numbers.Real,
numeric_types = integer_types + floating_types
boolean_types = bool, np.bool_
fixed_width_types = numeric_types + boolean_types
temporal_types = (
datetime.datetime, datetime.date, datetime.timedelta,
np.datetime64, np.timedelta64,
)
scalar_types = fixed_width_types + temporal_types
simple_types = scalar_types + six.string_types


def find_data(expr):
"""Find data sources bound to `expr`.
Parameters
----------
expr : ibis.expr.types.Expr
Returns
-------
data : collections.OrderedDict
"""
stack = [expr]
seen = set()
data = collections.OrderedDict()

while stack:
e = stack.pop()
node = e.op()

if node not in seen:
seen.add(node)

if hasattr(node, 'source'):
data[e] = node.source.dictionary[node.name]
elif isinstance(node, ir.Literal):
data[e] = node.value

stack.extend(arg for arg in node.args if isinstance(arg, ir.Expr))
return data


_VALID_INPUT_TYPES = (ir.Expr, dt.DataType, type(None)) + scalar_types


@execute.register(ir.Expr, dict)
def execute_with_scope(expr, scope):
if expr in scope:
return scope[expr]

op = expr.op()
args = op.args

computed_args = [
execute(arg, scope) if hasattr(arg, 'op') else arg
for arg in args if isinstance(arg, _VALID_INPUT_TYPES)
] or [scope.get(arg, arg) for arg in args]

return execute_node(op, *computed_args, scope=scope)


@execute.register(ir.Expr)
def execute_without_scope(expr):
scope = find_data(expr)
if not scope:
raise ValueError('No data sources found')
return execute(expr, scope)
5 changes: 5 additions & 0 deletions ibis/pandas/dispatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from multipledispatch import Dispatcher


execute = Dispatcher('execute')
execute_node = Dispatcher('execute_node')
646 changes: 646 additions & 0 deletions ibis/pandas/execution.py

Large diffs are not rendered by default.

Empty file added ibis/pandas/tests/__init__.py
Empty file.
30 changes: 30 additions & 0 deletions ibis/pandas/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pytest

pytest.importorskip('multipledispatch')

import pandas as pd # noqa: E402
import pandas.util.testing as tm # noqa: E402

from ibis.pandas.api import connect # noqa: E402


@pytest.fixture
def df():
return pd.DataFrame({
'a': [1, 2, 3],
'b': list('abc'),
'c': [4.0, 5.0, 6.0],
'd': pd.date_range('now', periods=3).values
})


@pytest.fixture
def dictionary(df):
return dict(df=df)


def test_table(dictionary, df):
con = connect(dictionary)
data = con.table('df')
result = data.execute()
tm.assert_frame_equal(df, result)
11 changes: 11 additions & 0 deletions ibis/pandas/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pytest

pytest.importorskip('multipledispatch')

from ibis.pandas.execution import execute, execute_node # noqa: E402
from multipledispatch.conflict import ambiguities # noqa: E402


@pytest.mark.parametrize('func', [execute, execute_node])
def test_no_execute_ambiguities(func):
assert not ambiguities(func.funcs)
675 changes: 675 additions & 0 deletions ibis/pandas/tests/test_operations.py

Large diffs are not rendered by default.

289 changes: 255 additions & 34 deletions ibis/sql/alchemy.py

Large diffs are not rendered by default.

25 changes: 16 additions & 9 deletions ibis/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def _collect_Aggregation(self, expr, toplevel=False):

self.group_by = self._convert_group_by(sub_op.by)
self.having = sub_op.having
self.select_set = sub_op.by + sub_op.agg_exprs
self.select_set = sub_op.by + sub_op.metrics
self.table_set = sub_op.table
self.filters = sub_op.predicates
self.sort_by = sub_op.sort_keys
Expand Down Expand Up @@ -544,14 +544,21 @@ def _analyze_subqueries(self):
def _get_subtables(expr):
subtables = []

def _walk(expr):
op = expr.op()
if isinstance(op, ops.Join):
_walk(op.left)
_walk(op.right)
else:
subtables.append(expr)
_walk(expr)
stack = [expr]
seen = set()

while stack:
e = stack.pop()
op = e.op()

if op not in seen:
seen.add(op)

if isinstance(op, ops.Join):
stack.append(op.right)
stack.append(op.left)
else:
subtables.append(e)

return subtables

Expand Down
90 changes: 77 additions & 13 deletions ibis/sql/postgres/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,51 @@
# limitations under the License.


from .client import PostgreSQLClient
from ibis.sql.alchemy import to_sqlalchemy

from .client import PostgreSQLClient, PostgreSQLDialect
from .compiler import rewrites # noqa


def compile(expr):
"""
Force compilation of expression for the PostgreSQL target
"""
from .client import PostgreSQLDialect
from ibis.sql.alchemy import to_sqlalchemy
return to_sqlalchemy(expr, dialect=PostgreSQLDialect)
"""Compile an ibis expression to the PostgreSQL target.
Parameters
----------
expr : ibis.expr.types.Expr
The ibis expression to compile
def connect(host=None, user=None, password=None, port=None, database=None,
url=None, driver=None):
Returns
-------
sqlalchemy_expression : sqlalchemy.sql.expression.ClauseElement
Examples
--------
>>> import os
>>> database = os.environ.get('IBIS_TEST_POSTGRES_DB', 'ibis_testing')
>>> con = connect(database=database, host='localhost')
>>> t = con.table('functional_alltypes')
>>> expr = t.double_col + 1
>>> sqla = compile(expr)
>>> print(str(sqla)) # doctest: +NORMALIZE_WHITESPACE
SELECT t0.double_col + %(param_1)s AS tmp
FROM functional_alltypes AS t0
"""
Create an Ibis client connected to a PostgreSQL database.
return to_sqlalchemy(expr, dialect=PostgreSQLDialect)


def connect(
host=None,
user=None,
password=None,
port=None,
database=None,
url=None,
driver=None
):

Multiple database files can be created using the attach() method
"""Create an Ibis client located at `user`:`password`@`host`:`port`
connected to a PostgreSQL database named `database`.
Parameters
----------
Expand All @@ -45,6 +70,45 @@ def connect(host=None, user=None, password=None, port=None, database=None,
Complete SQLAlchemy connection string. If passed, the other connection
arguments are ignored.
driver : string, default 'psycopg2'
Returns
-------
PostgreSQLClient
Examples
--------
>>> import os
>>> database = os.environ.get('IBIS_TEST_POSTGRES_DB', 'ibis_testing')
>>> con = connect(database=database, host='localhost')
>>> con.list_tables() # doctest: +ELLIPSIS
[...]
>>> t = con.table('functional_alltypes')
>>> t
PostgreSQLTable[table]
name: functional_alltypes
schema:
index : int64
Unnamed: 0 : int64
id : int32
bool_col : boolean
tinyint_col : int16
smallint_col : int16
int_col : int32
bigint_col : int64
float_col : float
double_col : double
date_string_col : string
string_col : string
timestamp_col : timestamp
year : int32
month : int32
"""
return PostgreSQLClient(host=host, user=user, password=password, port=port,
database=database, url=url, driver=driver)
return PostgreSQLClient(
host=host,
user=user,
password=password,
port=port,
database=database,
url=url,
driver=driver,
)
178 changes: 135 additions & 43 deletions ibis/sql/postgres/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,63 +12,129 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import getpass
import contextlib

import sqlalchemy as sa

from ibis.client import Database
from .compiler import PostgreSQLDialect
import ibis.expr.types as ir
from ibis.sql.postgres.compiler import PostgreSQLDialect
import ibis.sql.alchemy as alch


class PostgreSQLTable(alch.AlchemyTable):
pass


class PostgreSQLDatabase(Database):
class PostgreSQLSchema(alch.AlchemyDatabaseSchema):
pass


class PostgreSQLDatabase(alch.AlchemyDatabase):
schema_class = PostgreSQLSchema


class PostgreSQLClient(alch.AlchemyClient):

"""
The Ibis PostgreSQL client class
"""The Ibis PostgreSQL client class
Attributes
----------
con : sqlalchemy.engine.Engine
"""

dialect = PostgreSQLDialect
database_class = PostgreSQLDatabase

def __init__(self, host=None, user=None, password=None, port=None,
database=None, url=None, driver=None):
default_database_name = 'public'

def __init__(
self,
host=None,
user=None,
password=None,
port=None,
database=None,
url=None,
driver=None
):
if url is None:
if user is not None:
if password is None:
userpass = user
else:
userpass = '{0}:{1}'.format(user, password)
if driver is not None and driver != 'psycopg2':
raise NotImplementedError(
'psycopg2 is currently the only supported driver'
)
url = sa.engine.url.URL(
'postgresql+psycopg2',
username=user or getpass.getuser(),
password=password,
host=host or 'localhost',
port=port,
database=database or self.__class__.default_database_name,
)
else:
url = sa.engine.url.make_url(url)

address = '{0}@{1}'.format(userpass, host)
else:
address = host
super(PostgreSQLClient, self).__init__(sa.create_engine(url))
self.name = url.database
self.database_name = self.__class__.default_database_name

if port is not None:
address = '{0}:{1}'.format(address, port)
@contextlib.contextmanager
def begin(self):
with super(PostgreSQLClient, self).begin() as bind:
bind.execute('SET LOCAL TIMEZONE = UTC')
yield bind

if database is not None:
address = '{0}/{1}'.format(address, database)
def database(self, name=None):
"""Connect to a database called `name`.
if driver is not None and driver != 'psycopg2':
raise NotImplementedError(driver)
Parameters
----------
name : str, optional
The name of the database to connect to. If ``None``, return
the database named ``self.current_database``.
url = 'postgresql://{0}'.format(address)
Returns
-------
db : PostgreSQLDatabase
An :class:`ibis.sql.postgres.client.PostgreSQLDatabase` instance.
url = sa.engine.url.make_url(url)
self.name = url.database
self.database_name = 'public'
self.con = sa.create_engine(url)
self.meta = sa.MetaData(bind=self.con)
Notes
-----
This creates a new connection if `name` is both not ``None`` and not
equal to the current database.
"""
if name == self.current_database or (
name is None and name != self.current_database
):
return self.database_class(self.current_database, self)
else:
url = self.con.url
client_class = type(self)
new_client = client_class(
host=url.host,
user=url.username,
port=url.port,
password=url.password,
database=name,
)
return self.database_class(name, new_client)

def schema(self, name):
"""Get a schema object from the current database for the schema named `name`.
Parameters
----------
name : str
Returns
-------
schema : PostgreSQLSchema
An :class:`ibis.sql.postgres.client.PostgreSQLSchema` instance.
"""
return self.database().schema(name)

@property
def current_database(self):
"""The name of the current database this client is connected to."""
return self.database_name

def list_databases(self):
Expand All @@ -79,30 +145,56 @@ def list_databases(self):
)
]

def set_database(self):
raise NotImplementedError
def list_schemas(self):
"""List all the schemas in the current database."""
return self.inspector.get_schema_names()

def set_database(self, name):
raise NotImplementedError(
'Cannot set database with PostgreSQL client. To use a different'
' database, use client.database({!r})'.format(name)
)

@property
def client(self):
return self

def table(self, name, database=None):
"""
Create a table expression that references a particular table in the
PostgreSQL database
def table(self, name, database=None, schema=None):
"""Create a table expression that references a particular a table
called `name` in a PostgreSQL database called `database`.
Parameters
----------
name : string
name : str
The name of the table to retrieve.
database : str, optional
The database in which the table referred to by `name` resides. If
``None`` then the ``current_database`` is used.
schema : str, optional
The schema in which the table resides. If ``None`` then the
`public` schema is assumed.
Returns
-------
table : TableExpr
A table expression.
"""
alch_table = self._get_sqla_table(name)
node = PostgreSQLTable(alch_table, self)
return self._table_expr_klass(node)

@property
def _table_expr_klass(self):
return ir.TableExpr
if database is not None and database != self.current_database:
return (
self.database(name=database)
.table(name=name, schema=schema)
)
else:
alch_table = self._get_sqla_table(name, schema=schema)
node = PostgreSQLTable(alch_table, self, self._schemas.get(name))
return self._table_expr_klass(node)

def list_tables(self, like=None, database=None, schema=None):
if database is not None and database != self.current_database:
return (
self.database(name=database)
.list_tables(like=like, schema=schema)
)
else:
parent = super(PostgreSQLClient, self)
return parent.list_tables(like=like, schema=schema)
74 changes: 54 additions & 20 deletions ibis/sql/postgres/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import GenericFunction

from ibis.sql.alchemy import unary, varargs, fixed_arity, Over
from ibis.sql.alchemy import (
unary, varargs, fixed_arity, Over, _variance_reduction, _get_sqla_table
)
import ibis.expr.analytics as L
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.expr.window as W

import ibis.sql.alchemy as alch
Expand Down Expand Up @@ -93,7 +94,7 @@ def _cast(t, expr):
sa_type = t.get_sqla_type(typ)

# specialize going from an integer type to a timestamp
if isinstance(arg.type(), dt.Integer) and issubclass(sa_type, sa.DateTime):
if isinstance(arg.type(), dt.Integer) and isinstance(sa_type, sa.DateTime):
return sa.func.timezone('UTC', sa.func.to_timestamp(sa_arg))
return sa.cast(sa_arg, sa_type)

Expand Down Expand Up @@ -319,26 +320,19 @@ def _regex_replace(t, expr):
return sa.func.regexp_replace(string, pattern, replacement, 'g')


def _variance_reduction(func_name):
suffix = {
'sample': 'samp',
'pop': 'pop'
}
def _reduction(func_name):
def reduction_compiler(t, expr):
arg, where = expr.op().args

def variance_compiler(t, expr):
arg, where, how = expr.op().args
func = getattr(sa.func, '%s_%s' % (func_name, suffix.get(how, 'samp')))
if arg.type().equals(dt.boolean):
arg = arg.cast('int32')

if where is None:
return func(t.translate(arg))
else:
# TODO(wesm): PostgreSQL 9.4 stuff
# where_compiled = t.translate(where)
# return sa.funcfilter(result, where_compiled)
filtered = where.ifelse(ir.null(), arg)
return func(t.translate(filtered))
func = getattr(sa.func, func_name)

return variance_compiler
if where is not None:
arg = where.ifelse(arg, None)
return func(t.translate(arg))
return reduction_compiler


def _log(t, expr):
Expand Down Expand Up @@ -511,7 +505,41 @@ def _identical_to(t, expr):
return left.op('IS NOT DISTINCT FROM')(right)


def _hll_cardinality(t, expr):
# postgres doesn't have a builtin HLL algorithm, so we default to standard
# count distinct for now
arg, _ = expr.op().args
sa_arg = t.translate(arg)
return sa.func.count(sa.distinct(sa_arg))


def _table_column(t, expr):
op = expr.op()
ctx = t.context
table = op.table

sa_table = _get_sqla_table(ctx, table)
out_expr = getattr(sa_table.c, op.name)

expr_type = expr.type()

if isinstance(expr_type, dt.Timestamp):
timezone = expr_type.timezone
if timezone is not None:
out_expr = out_expr.op('AT TIME ZONE')(timezone).label(op.name)

# If the column does not originate from the table set in the current SELECT
# context, we should format as a subquery
if t.permit_subquery and ctx.is_foreign_expr(table):
return sa.select([out_expr])

return out_expr


_operation_registry.update({
# We override this here to support time zones
ops.TableColumn: _table_column,

# types
ops.Cast: _cast,
ops.TypeOf: _typeof,
Expand Down Expand Up @@ -564,6 +592,7 @@ def _identical_to(t, expr):
ops.Ln: fixed_arity(sa.func.ln, 1),
ops.Log2: fixed_arity(lambda x: sa.func.log(2, x), 1),
ops.Log10: fixed_arity(sa.func.log, 1),
ops.Power: fixed_arity(sa.func.power, 2),

# dates and times
ops.Strftime: _strftime,
Expand All @@ -574,6 +603,10 @@ def _identical_to(t, expr):
ops.ExtractMinute: _extract('minute'),
ops.ExtractSecond: _second,
ops.ExtractMillisecond: _millisecond,
ops.Sum: _reduction('sum'),
ops.Mean: _reduction('avg'),
ops.Min: _reduction('min'),
ops.Max: _reduction('max'),
ops.Variance: _variance_reduction('var'),
ops.StandardDev: _variance_reduction('stddev'),

Expand All @@ -597,6 +630,7 @@ def _identical_to(t, expr):
ops.ArrayConcat: fixed_arity(operator.add, 2),
ops.ArrayRepeat: _array_repeat,
ops.IdenticalTo: _identical_to,
ops.HLLCardinality: _hll_cardinality,
})


Expand Down
27 changes: 22 additions & 5 deletions ibis/sql/postgres/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# limitations under the License.

import os
import unittest

import pandas as pd
import pytest

from .common import PostgreSQLTests
from ibis.compat import unittest
from ibis.tests.util import assert_equal
import ibis.expr.types as ir
import ibis
Expand Down Expand Up @@ -57,7 +57,7 @@ def test_list_tables(self):
assert len(self.con.list_tables(like='functional')) == 1

def test_compile_verify(self):
unsupported_expr = self.alltypes.string_col.approx_nunique()
unsupported_expr = self.alltypes.double_col.approx_median()
assert not unsupported_expr.verify()

supported_expr = self.alltypes.double_col.sum()
Expand All @@ -71,10 +71,12 @@ def test_database_layer(self):

assert db.list_tables() == self.con.list_tables()

db_schema = self.con.schema("information_schema")

assert db_schema.list_tables() != self.con.list_tables()

def test_compile_toplevel(self):
t = ibis.table([
('foo', 'double')
])
t = ibis.table([('foo', 'double')], name='t0')

# it works!
expr = t.foo.sum()
Expand All @@ -88,6 +90,10 @@ def test_list_databases(self):
assert POSTGRES_TEST_DB is not None
assert POSTGRES_TEST_DB in self.con.list_databases()

def test_list_schemas(self):
assert 'public' in self.con.list_schemas()
assert 'information_schema' in self.con.list_schemas()


@pytest.mark.postgresql
def test_metadata_is_per_table():
Expand All @@ -98,3 +104,14 @@ def test_metadata_is_per_table():
t = con.table('functional_alltypes') # noqa
assert 'functional_alltypes' in con.meta.tables
assert len(con.meta.tables) == 1


@pytest.mark.postgresql
def test_schema_table():
con = ibis.postgres.connect(host='localhost', database=POSTGRES_TEST_DB)

# ensure that we can reflect the information schema (which is guaranteed
# to exist)
schema = con.schema('information_schema')

assert isinstance(schema['tables'], ir.TableExpr)
256 changes: 209 additions & 47 deletions ibis/sql/postgres/tests/test_functions.py

Large diffs are not rendered by default.

239 changes: 219 additions & 20 deletions ibis/sql/sqlite/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
# limitations under the License.

import os
import re
import math
import inspect
import functools

import sqlalchemy as sa

from ibis.client import Database
from .compiler import SQLiteDialect
import ibis.expr.types as ir
import ibis.sql.alchemy as alch
import ibis.common as com

Expand All @@ -31,6 +34,183 @@ class SQLiteDatabase(Database):
pass


def _ibis_sqlite_regex_search(string, regex):
"""Return whether `regex` exists in `string`.
Parameters
----------
string : str
regex : str
Returns
-------
found : bool
"""
if string is None or regex is None:
return None
return re.search(regex, string) is not None


def _ibis_sqlite_regex_replace(string, pattern, replacement):
"""Replace occurences of `pattern` in `string` with `replacement`.
Parameters
----------
string : str
pattern : str
replacement : str
Returns
-------
result : str
"""
if string is None or pattern is None or replacement is None:
return None
return re.sub(pattern, replacement, string)


def _ibis_sqlite_regex_extract(string, pattern, index):
"""Extract match of regular expression `pattern` from `string` at `index`.
Parameters
----------
string : str
pattern : str
index : int
Returns
-------
result : str or None
"""
if string is None or pattern is None or index is None:
return None

result = re.search(pattern, string)
if result is not None and 0 <= index <= result.lastindex:
return result.group(index)
else:
return None


def _ibis_sqlite_power(arg, power):
"""Raise `arg` to the `power` power.
Parameters
----------
arg : number
Number to raise to `power`.
power : number
Number to raise `arg` to.
Returns
-------
result : Optional[number]
None If either argument is None or we're trying to take a fractional
power or a negative number
"""
if arg is None or power is None or (arg < 0.0 and not power.is_integer()):
return None
return arg ** power


def _ibis_sqlite_sqrt(arg):
"""Square root of `arg`.
Parameters
----------
arg : Optional[number]
Number to take the square root of
Returns
-------
result : Optional[number]
None if `arg` is None or less than 0 otherwise the square root
"""
return None if arg is None or arg < 0.0 else math.sqrt(arg)


class _ibis_sqlite_var(object):

def __init__(self, offset):
self.mean = 0.0
self.sum_of_squares_of_differences = 0.0
self.count = 0
self.offset = offset

def step(self, value):
if value is None:
return

self.count += 1
delta = value - self.mean
self.mean = delta
self.sum_of_squares_of_differences += delta * (value - self.mean)

def finalize(self):
if not self.count:
return None
return self.sum_of_squares_of_differences / (self.count - self.offset)


class _ibis_sqlite_var_pop(_ibis_sqlite_var):

def __init__(self):
super(_ibis_sqlite_var_pop, self).__init__(0)


class _ibis_sqlite_var_samp(_ibis_sqlite_var):

def __init__(self):
super(_ibis_sqlite_var_samp, self).__init__(1)


def number_of_arguments(callable):
argspec = inspect.getargspec(callable)

if argspec.varargs is not None:
raise TypeError(
'Variable length arguments not supported in Ibis SQLite function '
'registration'
)

if argspec.keywords is not None:
raise NotImplementedError(
'Keyword arguments not implemented for Ibis SQLite function '
'registration'
)

if argspec.defaults is not None:
raise NotImplementedError(
'Keyword arguments not implemented for Ibis SQLite function '
'registration'
)
return len(argspec.args)


def _register_function(func, con):
"""Register a Python callable with a SQLite connection `con`.
Parameters
----------
func : callable
con : sqlalchemy.Connection
"""
nargs = number_of_arguments(func)
con.connection.connection.create_function(func.__name__, nargs, func)


def _register_aggregate(agg, con):
"""Register a Python class that performs aggregation in SQLite.
Parameters
----------
agg : type
con : sqlalchemy.Connection
"""
nargs = number_of_arguments(agg.step) - 1 # because self
con.connection.connection.create_aggregate(agg.__name__, nargs, agg)


class SQLiteClient(alch.AlchemyClient):

"""
Expand All @@ -41,43 +221,59 @@ class SQLiteClient(alch.AlchemyClient):
database_class = SQLiteDatabase

def __init__(self, path=None, create=False):
super(SQLiteClient, self).__init__(sa.create_engine('sqlite://'))
self.name = path
self.database_name = 'default'

self.con = sa.create_engine('sqlite://')

if path:
if path is not None:
self.attach(self.database_name, path, create=create)

self.meta = sa.MetaData(bind=self.con)
for func in (
_ibis_sqlite_regex_search,
_ibis_sqlite_regex_replace,
_ibis_sqlite_regex_extract,
_ibis_sqlite_power,
_ibis_sqlite_sqrt,
):
self.con.run_callable(functools.partial(_register_function, func))

for agg in (_ibis_sqlite_var_pop, _ibis_sqlite_var_samp):
self.con.run_callable(functools.partial(_register_aggregate, agg))

@property
def current_database(self):
return self.database_name

def list_databases(self):
raise NotImplementedError
raise NotImplementedError(
'Listing databases in SQLite is not implemented'
)

def set_database(self):
raise NotImplementedError
def set_database(self, name):
raise NotImplementedError('set_database is not implemented for SQLite')

def attach(self, name, path, create=False):
"""
Connect another SQLite database file
"""Connect another SQLite database file
Parameters
----------
name : string
Database name within SQLite
Database name within SQLite
path : string
Path to sqlite3 file
create : boolean, default False
If file does not exist, create file if True otherwise raise Exception
Path to sqlite3 file
create : boolean, optional
If file does not exist, create file if True otherwise raise an
Exception
"""
if not os.path.exists(path) and not create:
raise com.IbisError('File {0} does not exist'.format(path))
raise com.IbisError('File {!r} does not exist'.format(path))

self.con.execute("ATTACH DATABASE '{0}' AS '{1}'".format(path, name))
self.raw_sql(
"ATTACH DATABASE {path!r} AS {name}".format(
path=path,
name=self.con.dialect.identifier_preparer.quote(name),
)
)

@property
def client(self):
Expand All @@ -91,15 +287,18 @@ def table(self, name, database=None):
Parameters
----------
name : string
database : string, optional
name of the attached database that the table is located in.
Returns
-------
table : TableExpr
"""
alch_table = self._get_sqla_table(name)
alch_table = self._get_sqla_table(name, schema=database)
node = SQLiteTable(alch_table, self)
return self._table_expr_klass(node)

@property
def _table_expr_klass(self):
return ir.TableExpr
def list_tables(self, like=None, database=None, schema=None):
if database is None:
database = self.database_name
return super(SQLiteClient, self).list_tables(like, schema=database)
35 changes: 26 additions & 9 deletions ibis/sql/sqlite/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

import sqlalchemy as sa

from ibis.sql.alchemy import unary, varargs, fixed_arity
import toolz

from ibis.sql.alchemy import unary, varargs, fixed_arity, _variance_reduction
import ibis.sql.alchemy as alch
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
Expand All @@ -33,14 +35,19 @@ def _cast(t, expr):
sa_arg = t.translate(arg)
sa_type = t.get_sqla_type(target_type)

# SQLite does not have a physical date/time/timestamp type, so
# unfortunately cast to typestamp must be a no-op, and we have to trust
# that the user's data can actually be correctly parsed by SQLite.
if isinstance(target_type, dt.Timestamp):
if not isinstance(arg, (ir.IntegerValue, ir.StringValue)):
raise com.TranslationError(type(arg))

return sa_arg
if isinstance(arg, ir.IntegerValue):
return sa.func.datetime(sa_arg, 'unixepoch')
elif isinstance(arg, ir.StringValue):
return sa.func.strftime('%Y-%m-%d %H:%M:%f', sa_arg)
raise com.TranslationError(type(arg))

if isinstance(target_type, dt.Date):
if isinstance(arg, ir.IntegerValue):
return sa.func.date(sa.func.datetime(sa_arg, 'unixepoch'))
elif isinstance(arg, ir.StringValue):
return sa.func.date(sa_arg)
raise com.TranslationError(type(arg))

if isinstance(arg, ir.CategoryValue) and target_type == 'int32':
return sa_arg
Expand Down Expand Up @@ -110,7 +117,7 @@ def _strftime_int(fmt):
def translator(t, expr):
arg, = expr.op().args
sa_arg = t.translate(arg)
return sa.cast(sa.func.strftime(fmt, sa_arg), sa.types.INTEGER)
return sa.cast(sa.func.strftime(fmt, sa_arg), sa.INTEGER)
return translator


Expand Down Expand Up @@ -172,6 +179,16 @@ def _identical_to(t, expr):
ops.ExtractMillisecond: _millisecond,
ops.TimestampNow: _now,
ops.IdenticalTo: _identical_to,
ops.RegexSearch: fixed_arity(sa.func._ibis_sqlite_regex_search, 2),
ops.RegexReplace: fixed_arity(sa.func._ibis_sqlite_regex_replace, 3),
ops.RegexExtract: fixed_arity(sa.func._ibis_sqlite_regex_extract, 3),
ops.Sqrt: fixed_arity(sa.func._ibis_sqlite_sqrt, 1),
ops.Power: fixed_arity(sa.func._ibis_sqlite_power, 2),
ops.Variance: _variance_reduction('_ibis_sqlite_var'),
ops.StandardDev: toolz.compose(
sa.func._ibis_sqlite_sqrt,
_variance_reduction('_ibis_sqlite_var')
),
})


Expand Down
11 changes: 5 additions & 6 deletions ibis/sql/sqlite/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import os

import pandas as pd
Expand All @@ -20,7 +21,6 @@
import uuid

from .common import SQLiteTests
from ibis.compat import unittest
from ibis.tests.util import assert_equal
from ibis.util import guid
import ibis.expr.types as ir
Expand All @@ -31,13 +31,14 @@
class TestSQLiteClient(SQLiteTests, unittest.TestCase):

def test_file_not_exist_and_create(self):
path = '__ibis_tmp_{0}.db'.format(guid())
path = '__ibis_tmp_{}.db'.format(guid())

with self.assertRaises(com.IbisError):
ibis.sqlite.connect(path)

ibis.sqlite.connect(path, create=True)
con = ibis.sqlite.connect(path, create=True)
assert os.path.exists(path)
con.con.dispose()
os.remove(path)

def test_table(self):
Expand Down Expand Up @@ -91,9 +92,7 @@ def test_database_layer(self):
assert db.list_tables() == self.con.list_tables()

def test_compile_toplevel(self):
t = ibis.table([
('foo', 'double')
])
t = ibis.table([('foo', 'double')], name='t0')

# it works!
expr = t.foo.sum()
Expand Down
123 changes: 93 additions & 30 deletions ibis/sql/sqlite/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,33 @@

import os
import uuid
import unittest
import operator
import math

import pytest # noqa
import pytest

from .common import SQLiteTests
from ibis.compat import unittest
from ibis import literal as L
import ibis.expr.types as ir
import ibis
import pandas.util.testing as tm
sa = pytest.importorskip('sqlalchemy')

import sqlalchemy as sa
from .common import SQLiteTests # noqa: E402
from ibis import literal as L # noqa: E402
import ibis.expr.types as ir # noqa: E402
import ibis # noqa: E402
import pandas.util.testing as tm # noqa: E402


@pytest.fixture
def con():
# If we haven't defined an environment variable with the path of the SQLite
# database, assume it's in $PWD
return ibis.sqlite.connect(
os.environ.get('IBIS_TEST_SQLITE_DB_PATH', 'ibis_testing.db')
)


@pytest.fixture
def db(con):
return con.database()


class TestSQLiteFunctions(SQLiteTests, unittest.TestCase):
Expand Down Expand Up @@ -66,8 +82,14 @@ def test_timestamp_cast_noop(self):

# But it's a no-op when translated to SQLAlchemy
cases = [
(tc_casted, at.c.timestamp_col),
(ic_casted, at.c.int_col)
(
tc_casted.cast('timestamp'),
sa.func.strftime('%Y-%m-%d %H:%M:%f', at.c.timestamp_col)
),
(
ic_casted.cast('timestamp'),
sa.func.datetime(at.c.int_col, 'unixepoch')
),
]
self._check_expr_cases(cases)

Expand Down Expand Up @@ -100,7 +122,7 @@ def test_binary_arithmetic(self):
(L(3) - L(4), -1),
(L(3) * L(4), 12),
(L(12) / L(4), 3),
# (L(12) ** L(2), 144),
(L(12) ** L(2), 144),
(L(12) % L(5), 2)
]
self._check_e2e_cases(cases)
Expand Down Expand Up @@ -201,18 +223,45 @@ def test_math_functions(self):

(L(5.5).round(), 6.0),
(L(5.556).round(2), 5.56),
(L(5.556).sqrt(), math.sqrt(5.556)),
]
self._check_e2e_cases(cases)

def test_regexp(self):
pytest.skip('NYI: Requires adding regex udf with sqlite3')

v = L('abcd')
v2 = L('1222')
ns = L(None).cast('string')
ni = L(None).cast('int64')
cases = [
(v.re_search('[a-z]'), True),
(v.re_search('[\d]+'), False),
(v2.re_search('[\d]+'), True),
(v.re_replace('[ab]', ''), 'cd'),
(v2.re_extract(r'1(22)\d+', 1).cast('int64'), 22),
(v.re_extract('(\d+)', 1), None),
(v2.re_extract('([a-z]+)', 1), None),
(v2.re_extract(r'1(22)\d+', 2), None),
] + [
# search nulls
(v.re_search(None), None),
(ns.re_search('[a-z]'), None),
(ns.re_search(ns), None),
] + [
# replace nulls
(ns.re_replace(ns, ns), None),
(v.re_replace(ns, ns), None),
(v.re_replace('a', ns), None),
(v.re_replace(ns, 'a'), None),
(ns.re_replace('a', ns), None),
(ns.re_replace(ns, 'a'), None),
] + [
# extract nulls
(ns.re_extract(ns, ni), None),
(v.re_extract(ns, ni), None),
(v.re_extract('a', ni), None),
(v.re_extract(ns, 1), None),
(ns.re_extract('a', ni), None),
(ns.re_extract(ns, 1), None),
]
self._check_e2e_cases(cases)

Expand Down Expand Up @@ -302,12 +351,16 @@ def test_aggregations_execute(self):
d.mean(),
d.min(),
d.max(),
d.std(),
d.var(),

table.bool_col.count(where=cond),
d.sum(where=cond),
d.mean(where=cond),
d.min(where=cond),
d.max(where=cond),
d.std(where=cond),
d.var(where=cond),

s.group_concat(),
]
Expand Down Expand Up @@ -349,29 +402,22 @@ def test_subquery_invokes_sqlite_compiler(self):
expr.execute()

def _execute_aggregation(self, table, exprs):
agg_exprs = [expr.name('e%d' % i)
for i, expr in enumerate(exprs)]

agged_table = table.aggregate(agg_exprs)
metrics = [expr.name('e%d' % i) for i, expr in enumerate(exprs)]
agged_table = table.aggregate(metrics)
agged_table.execute()

def _execute_projection(self, table, exprs):
agg_exprs = [expr.name('e%d' % i)
for i, expr in enumerate(exprs)]

proj = table.projection(agg_exprs)
metrics = [expr.name('e%d' % i) for i, expr in enumerate(exprs)]
proj = table.projection(metrics)
proj.execute()

def test_filter_has_sqla_table(self):
t = self.alltypes
pred = t.year == 2010
filt = t.filter(pred).sort_by('float_col').float_col
filt = t.filter(pred).float_col
s = filt.execute()
result = s.squeeze().reset_index(drop=True)
expected = t.execute().query(
'year == 2010'
).sort('float_col').float_col

expected = t.execute().query('year == 2010').float_col
assert len(result) == len(expected)

def test_column_access_after_sort(self):
Expand All @@ -398,6 +444,7 @@ def test_materialized_join(self):
result = joined.val2.execute()
assert len(result) == 2
finally:
con.con.dispose()
os.remove(path)

def test_anonymous_aggregate(self):
Expand Down Expand Up @@ -462,7 +509,7 @@ def test_compile_with_named_table():
def test_compile_with_unnamed_table():
t = ibis.table([('a', 'string')])
result = ibis.sqlite.compile(t.a)
st = sa.table('t0', sa.column('a', sa.String)).alias('t0')
st = sa.table(t.op().name, sa.column('a', sa.String)).alias('t0')
assert str(result) == str(sa.select([st.c.a]))


Expand All @@ -472,8 +519,8 @@ def test_compile_with_multiple_unnamed_tables():
s = ibis.table([('b', 'string')])
join = t.join(s, t.a == s.b)
result = ibis.sqlite.compile(join)
sqla_t = sa.table('t0', sa.column('a', sa.String)).alias('t0')
sqla_s = sa.table('t1', sa.column('b', sa.String)).alias('t1')
sqla_t = sa.table(t.op().name, sa.column('a', sa.String)).alias('t0')
sqla_s = sa.table(s.op().name, sa.column('b', sa.String)).alias('t1')
sqla_join = sqla_t.join(sqla_s, sqla_t.c.a == sqla_s.c.b)
expected = sa.select([sqla_t.c.a, sqla_s.c.b]).select_from(sqla_join)
assert str(result) == str(expected)
Expand All @@ -485,8 +532,24 @@ def test_compile_with_one_unnamed_table():
s = ibis.table([('b', 'string')], name='s')
join = t.join(s, t.a == s.b)
result = ibis.sqlite.compile(join)
sqla_t = sa.table('t0', sa.column('a', sa.String)).alias('t0')
sqla_t = sa.table(t.op().name, sa.column('a', sa.String)).alias('t0')
sqla_s = sa.table('s', sa.column('b', sa.String)).alias('t1')
sqla_join = sqla_t.join(sqla_s, sqla_t.c.a == sqla_s.c.b)
expected = sa.select([sqla_t.c.a, sqla_s.c.b]).select_from(sqla_join)
assert str(result) == str(expected)


@pytest.mark.sqlite
@pytest.mark.parametrize(
('attr', 'expected'),
[
(operator.methodcaller('year'), {2009, 2010}),
(operator.methodcaller('month'), set(range(1, 13))),
(operator.methodcaller('day'), set(range(1, 32)))
]
)
def test_date_extract_field(db, attr, expected):
t = db.functional_alltypes
expr = attr(t.timestamp_col.cast('date')).distinct()
result = expr.execute().astype(int)
assert set(result) == expected
30 changes: 15 additions & 15 deletions ibis/sql/tests/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa=E402
import unittest

import pytest

import ibis
import ibis.expr.api as api
import ibis.expr.operations as ops

import pytest
from ibis.expr.tests.mocks import MockConnection

pytest.importorskip('sqlalchemy')
pytest.importorskip('impala.dbapi')

from ibis.impala.compiler import build_ast, to_sql

from ibis import impala

from ibis.expr.tests.mocks import MockConnection
from ibis.compat import unittest
import ibis.common as com

import ibis.expr.api as api
import ibis.expr.operations as ops
from ibis.impala.compiler import build_ast, to_sql # noqa: E402
from ibis import impala # noqa: E402


class TestASTBuilder(unittest.TestCase):
Expand Down Expand Up @@ -273,6 +269,7 @@ def _get_query(expr):
ast = build_ast(expr)
return ast.queries[0]


nation = api.table([
('n_regionkey', 'int32'),
('n_nationkey', 'int32'),
Expand Down Expand Up @@ -809,9 +806,11 @@ def _compare_sql(self, expr, expected):
assert result == expected

def test_nameless_table(self):
# Ensure that user gets some kind of sensible error
# Generate a unique table name when we haven't passed on
nameless = api.table([('key', 'string')])
self.assertRaises(com.RelationError, to_sql, nameless)
assert to_sql(nameless) == 'SELECT *\nFROM {}'.format(
nameless.op().name
)

with_name = api.table([('key', 'string')], name='baz')
result = to_sql(with_name)
Expand Down Expand Up @@ -2298,7 +2297,8 @@ def test_pushdown_with_or():
expected = """\
SELECT *
FROM functional_alltypes
WHERE (`double_col` > 3.14) AND (locate('foo', `string_col`) - 1 >= 0) AND
WHERE `double_col` > 3.14 AND
locate('foo', `string_col`) - 1 >= 0 AND
(((`int_col` - 1) = 0) OR (`float_col` <= 1.34))"""
assert result == expected

Expand Down
23 changes: 10 additions & 13 deletions ibis/sql/tests/test_sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa=E402

import unittest
import operator

import pytest

sa = pytest.importorskip('sqlalchemy')

from ibis.compat import unittest
from ibis.expr.tests.mocks import MockConnection
from ibis.sql.tests.test_compiler import ExprTestCases
from ibis.tests.util import assert_equal
import ibis
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.sql.alchemy as alch
import ibis
from ibis.expr.tests.mocks import MockConnection
from ibis.tests.util import assert_equal

sa = pytest.importorskip('sqlalchemy')

from sqlalchemy import types as sat, func as F
import sqlalchemy.sql as sql
from ibis.sql.tests.test_compiler import ExprTestCases # noqa: E402
import ibis.sql.alchemy as alch # noqa: E402

from sqlalchemy import types as sat, func as F # noqa: E402
import sqlalchemy.sql as sql # noqa: E402

L = sa.literal

Expand Down Expand Up @@ -119,7 +116,7 @@ def test_sqla_schema_conversion(self):
for name, t, nullable, ibis_type in typespec:
sqla_type = sa.Column(name, t, nullable=nullable)
sqla_types.append(sqla_type)
ibis_types.append((name, ibis_type(nullable)))
ibis_types.append((name, ibis_type(nullable=nullable)))

table = sa.Table('tname', self.meta, *sqla_types)

Expand Down
2 changes: 1 addition & 1 deletion ibis/sql/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _visit(self, expr):
if isinstance(arg, ir.TableExpr):
self._visit_table(arg)
elif isinstance(arg, ir.BooleanColumn):
for sub_expr in L.unwrap_ands(arg):
for sub_expr in L.flatten_predicate(arg):
self.predicates.append(sub_expr)
self._visit(sub_expr)
elif isinstance(arg, ir.Expr):
Expand Down
5 changes: 3 additions & 2 deletions ibis/tests/test_filesystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from six import BytesIO
import unittest

from posixpath import join as pjoin
from os import path as osp
import os
import shutil

from six import BytesIO

import pytest

from ibis.filesystems import HDFS
from ibis.compat import unittest
from ibis.impala.tests.common import IbisTestEnv
import ibis.compat as compat
import ibis.util as util
Expand Down
40 changes: 6 additions & 34 deletions ibis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import types

import ibis.compat as compat

from ibis.config import options
Expand All @@ -28,36 +31,9 @@ def guid():
return guid.hex if not compat.PY2 else guid.get_hex()


def bytes_to_uint8_array(val, width=70):
"""
Formats a byte string for use as a uint8_t* literal in C/C++
"""
if len(val) == 0:
return '{}'

lines = []
line = '{' + str(ord(val[0]))
for x in val[1:]:
token = str(ord(x))
if len(line) + len(token) > width:
lines.append(line + ',')
line = token
else:
line += ',%s' % token
lines.append(line)
return '\n'.join(lines) + '}'


def unique_by_key(values, key):
id_to_table = {}
for x in values:
id_to_table[key(x)] = x
return compat.dict_values(id_to_table)


def indent(text, spaces):
block = ' ' * spaces
return '\n'.join(block + x for x in text.split('\n'))
prefix = ' ' * spaces
return ''.join(prefix + line for line in text.splitlines(True))


def any_of(values, t):
Expand Down Expand Up @@ -167,13 +143,9 @@ def g(*args, **kwargs):
return g


def to_stdout(x):
print(x)


def log(msg):
if options.verbose:
(options.verbose_log or to_stdout)(msg)
(options.verbose_log or print)(msg)


class cache_readonly(object):
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
numpy>=1.7.0
enum34; python_version < '3'
numpy>=1.7
pandas>=0.12.0
six
toolz
4 changes: 0 additions & 4 deletions scripts/fixtures.sh

This file was deleted.

133 changes: 0 additions & 133 deletions scripts/run_jenkins.sh

This file was deleted.

Loading