625 changes: 356 additions & 269 deletions ibis/impala/client.py

Large diffs are not rendered by default.

660 changes: 440 additions & 220 deletions ibis/sql/exprs.py → ibis/impala/compiler.py

Large diffs are not rendered by default.

228 changes: 140 additions & 88 deletions ibis/impala/ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from io import BytesIO
from ibis.compat import StringIO
import re

from ibis.sql.ddl import DDL, Select
from ibis.sql.exprs import quote_identifier, _type_to_sql_string
from ibis.sql.compiler import DDL
from .compiler import quote_identifier, _type_to_sql_string

from ibis.expr.datatypes import validate_type
from ibis.compat import py_string
import ibis.expr.rules as rules


fully_qualified_re = re.compile("(.*)\.(?:`(.*)`|(.*))")
Expand All @@ -35,24 +39,20 @@ def _is_quoted(x):

class ImpalaDDL(DDL):

def _get_scoped_name(self, table_name, database):
def _get_scoped_name(self, obj_name, database):
if database:
scoped_name = '{0}.`{1}`'.format(database, table_name)
scoped_name = '{0}.`{1}`'.format(database, obj_name)
else:
if not _is_fully_qualified(table_name):
if _is_quoted(table_name):
return table_name
if not _is_fully_qualified(obj_name):
if _is_quoted(obj_name):
return obj_name
else:
return '`{0}`'.format(table_name)
return '`{0}`'.format(obj_name)
else:
return table_name
return obj_name
return scoped_name


class ImpalaSelect(Select):
pass


class CreateDDL(ImpalaDDL):

def _if_exists(self):
Expand Down Expand Up @@ -126,7 +126,7 @@ def __init__(self, table_name, select, database=None,
can_exist=can_exist, path=path)

def compile(self):
buf = BytesIO()
buf = StringIO()
buf.write(self._create_line())
buf.write(self._storage())
buf.write(self._location())
Expand All @@ -149,7 +149,7 @@ def __init__(self, name, select, database=None, can_exist=False):
self.can_exist = can_exist

def compile(self):
buf = BytesIO()
buf = StringIO()
buf.write(self._create_line())

select_query = self.select.compile()
Expand Down Expand Up @@ -182,7 +182,7 @@ def _validate(self):
pass

def compile(self):
buf = BytesIO()
buf = StringIO()
buf.write(self._create_line())

if self.example_file is not None:
Expand Down Expand Up @@ -211,7 +211,7 @@ def __init__(self, table_name, schema, table_format, **kwargs):
def compile(self):
from ibis.expr.api import schema

buf = BytesIO()
buf = StringIO()
buf.write(self._create_line())

def _push_schema(x):
Expand Down Expand Up @@ -260,7 +260,7 @@ def __init__(self, path, delimiter=None, escapechar=None,
self.lineterminator = lineterminator

def to_ddl(self):
buf = BytesIO()
buf = StringIO()

buf.write("\nROW FORMAT DELIMITED")

Expand Down Expand Up @@ -288,7 +288,7 @@ def __init__(self, path, avro_schema):
def to_ddl(self):
import json

buf = BytesIO()
buf = StringIO()
buf.write('\nSTORED AS AVRO')
buf.write("\nLOCATION '{0}'".format(self.path))

Expand Down Expand Up @@ -321,7 +321,7 @@ def __init__(self, table_name, path, avro_schema, external=True, **kwargs):
CreateTable.__init__(self, table_name, external=external, **kwargs)

def compile(self):
buf = BytesIO()
buf = StringIO()
buf.write(self._create_line())

format_ddl = self.table_format.to_ddl()
Expand Down Expand Up @@ -351,6 +351,39 @@ def compile(self):
return'{0} {1}\n{2}'.format(cmd, scoped_name, select_query)


class AlterTable(ImpalaDDL):

def _wrap_command(self, cmd):
return 'ALTER TABLE {0}'.format(cmd)


class RenameTable(AlterTable):

def __init__(self, old_name, new_name, old_database=None,
new_database=None):
# if either database is None, the name is assumed to be fully scoped
self.old_name = old_name
self.old_database = old_database
self.new_name = new_name
self.new_database = new_database

new_qualified_name = new_name
if new_database is not None:
new_qualified_name = self._get_scoped_name(new_name, new_database)

old_qualified_name = old_name
if old_database is not None:
old_qualified_name = self._get_scoped_name(old_name, old_database)

self.old_qualified_name = old_qualified_name
self.new_qualified_name = new_qualified_name

def compile(self):
cmd = '{0} RENAME TO {1}'.format(self.old_qualified_name,
self.new_qualified_name)
return self._wrap_command(cmd)


class DropObject(ImpalaDDL):

def __init__(self, must_exist=True):
Expand Down Expand Up @@ -451,87 +484,90 @@ def _format_schema_element(name, t):
_type_to_sql_string(t))


class CreateFunction(ImpalaDDL):
class CreateFunctionBase(ImpalaDDL):

_object_type = 'FUNCTION'

def __init__(self, hdfs_file, so_symbol, inputs, output,
name, database=None):
self.hdfs_file = hdfs_file
self.so_symbol = so_symbol
self.inputs = _impala_signature(inputs)
self.output = _impala_signature([output])[0]
def __init__(self, lib_path, inputs, output, name, database=None):
self.lib_path = lib_path

self.inputs, self.output = inputs, output
self.input_sig = _impala_signature(inputs)
self.output_sig = _arg_to_string(output)

self.name = name
self.database = database

def get_name(self):
return self.name
def _create_line(self):
scoped_name = self._get_scoped_name(self.name, self.database)
return ('{0!s}({1!s}) returns {2!s}'
.format(scoped_name, self.input_sig, self.output_sig))

def _get_scoped_name(self):
if self.database:
return '{0}.{1}'.format(self.database, self.name)
else:
return self.name

class CreateFunction(CreateFunctionBase):

def __init__(self, lib_path, so_symbol, inputs, output,
name, database=None):
self.so_symbol = so_symbol

CreateFunctionBase.__init__(self, lib_path, inputs, output,
name, database=database)

def compile(self):
create_decl = 'CREATE FUNCTION'
scoped_name = self._get_scoped_name()
create_line = ('{0!s}({1!s}) returns {2!s}'
.format(scoped_name, ', '.join(self.inputs),
self.output))
param_line = "location '{0!s}' symbol='{1!s}'".format(self.hdfs_file,
self.so_symbol)
create_line = self._create_line()
param_line = ("location '{0!s}' symbol='{1!s}'"
.format(self.lib_path, self.so_symbol))
full_line = ' '.join([create_decl, create_line, param_line])
return full_line


class CreateAggregateFunction(ImpalaDDL):

_object_type = 'FUNCTION'
class CreateAggregateFunction(CreateFunction):

def __init__(self, hdfs_file, inputs, output, init_fn, update_fn,
merge_fn, finalize_fn, name, database=None):
self.hdfs_file = hdfs_file
self.inputs = _impala_signature(inputs)
self.output = _impala_signature([output])[0]
def __init__(self, lib_path, inputs, output, update_fn, init_fn,
merge_fn, serialize_fn, finalize_fn, name, database):
self.init = init_fn
self.update = update_fn
self.merge = merge_fn
self.serialize = serialize_fn
self.finalize = finalize_fn
self.name = name
self.database = database

def get_name(self):
return self.name

def _get_scoped_name(self):
if self.database:
return '{0}.{1}'.format(self.database, self.name)
else:
return self.name
CreateFunctionBase.__init__(self, lib_path, inputs, output,
name, database=database)

def compile(self):
create_decl = 'CREATE AGGREGATE FUNCTION'
scoped_name = self._get_scoped_name()
create_line = ('{0!s}({1!s}) returns {2!s}'
.format(scoped_name, ', '.join(self.inputs),
self.output))
loc_ln = "location '{0!s}'".format(self.hdfs_file)
init_ln = "init_fn='{0}'".format(self.init)
update_ln = "update_fn='{0}'".format(self.update)
merge_ln = "merge_fn='{0}'".format(self.merge)
finalize_ln = "finalize_fn='{0}'".format(self.finalize)
full_line = ' '.join([create_decl, create_line, loc_ln,
init_ln, update_ln, merge_ln, finalize_ln])
create_line = self._create_line()
tokens = ["location '{0!s}'".format(self.lib_path)]

if self.init is not None:
tokens.append("init_fn='{0}'".format(self.init))

tokens.append("update_fn='{0}'".format(self.update))

if self.merge is not None:
tokens.append("merge_fn='{0}'".format(self.merge))

if self.serialize is not None:
tokens.append("serialize_fn='{0}'".format(self.serialize))

if self.finalize is not None:
tokens.append("finalize_fn='{0}'".format(self.finalize))

full_line = (' '.join([create_decl, create_line]) + ' ' +
'\n'.join(tokens))
return full_line


class DropFunction(DropObject):

def __init__(self, name, input_types, must_exist=True,
def __init__(self, name, inputs, must_exist=True,
aggregate=False, database=None):
self.name = name
self.inputs = _impala_signature(input_types)

self.inputs = inputs
self.input_sig = _impala_signature(inputs)

self.must_exist = must_exist
self.aggregate = aggregate
self.database = database
Expand All @@ -540,23 +576,20 @@ def __init__(self, name, input_types, must_exist=True,
def _object_name(self):
return self.name

def _get_scoped_name(self):
if self.database:
return '{0}.{1}'.format(self.database, self.name)
else:
return self.name
def _function_sig(self):
full_name = self._get_scoped_name(self.name, self.database)
return '{0!s}({1!s})'.format(full_name, self.input_sig)

def compile(self):
statement = 'DROP'
tokens = ['DROP']
if self.aggregate:
statement += ' AGGREGATE'
statement += ' FUNCTION'
tokens.append('AGGREGATE')
tokens.append('FUNCTION')
if not self.must_exist:
statement += ' IF EXISTS'
full_name = self._get_scoped_name()
func_line = ' {0!s}({1!s})'.format(full_name, ', '.join(self.inputs))
statement += func_line
return statement
tokens.append('IF EXISTS')

tokens.append(self._function_sig())
return ' '.join(tokens)


class ListFunction(ImpalaDDL):
Expand All @@ -576,6 +609,25 @@ def compile(self):
return statement


def _impala_signature(types):
from ibis.expr.datatypes import validate_type
return [_type_to_sql_string(validate_type(x)) for x in types]
def _impala_signature(sig):
if isinstance(sig, rules.TypeSignature):
if isinstance(sig, rules.VarArgs):
val = _arg_to_string(sig.arg_type)
return '{0}...'.format(val)
else:
return ', '.join([_arg_to_string(arg) for arg in sig.types])
else:
return ', '.join([_type_to_sql_string(validate_type(x))
for x in sig])


def _arg_to_string(arg):
if isinstance(arg, rules.ValueTyped):
types = arg.types
if len(types) > 1:
raise NotImplementedError
return _type_to_sql_string(types[0])
elif isinstance(arg, py_string):
return _type_to_sql_string(validate_type(arg))
else:
raise NotImplementedError
91 changes: 91 additions & 0 deletions ibis/impala/madlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Copyright 2015 Cloudera Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.impala.udf import wrap_uda, wrap_udf
import ibis.expr.rules as rules


class MADLibAPI(object):

"""
Class responsible for wrapping all MADLib-on-Impala API functions, creating
them in a particular Impala database, and registering them for use with
Ibis.
"""
_udas = {
'linr_fit': (['string', 'double'], 'string', 'LinrUpdate'),
'logr_fit': (['string', 'string', 'boolean', 'double', 'double'],
'string', 'LogrUpdate'),
'svm_fit': (['string', 'string', 'boolean', 'double', 'double'],
'string', 'SVMUpdate'),
}

_udfs = {
'linr_predict': (['string', 'string'], 'double', 'LinrPredict'),

'logr_predict': (['string', 'string'], 'boolean', 'LogrPredict'),
'logr_loss': (['string', 'string', 'boolean'], 'double', 'LogrLoss'),

'svm_predict': (['string', 'string'], 'boolean', 'SVMPredict'),
'svm_loss': (['string', 'string', 'boolean'], 'double', 'SVMLoss'),

'to_array': (rules.varargs(rules.double), 'string',
('_Z7ToArrayPN10impala_udf'
'15FunctionContextEiPNS_9DoubleValE')),
'arrayget': (['int64', 'string'], 'double', 'ArrayGet'),
'allbytes': ([], 'string', 'AllBytes'),
'printarray': (['string'], 'string', 'PrintArray'),
'encodearray': (['string'], 'string', 'EncodeArray'),
'decodearray': (['string'], 'string', 'DecodeArray'),
}

def __init__(self, library_path, database, func_prefix=None):
self.library_path = library_path
self.database = database

self.function_names = sorted(self._udfs.keys() + self._udas.keys())
self.func_prefix = func_prefix or 'madlib_'

self._generate_wrappers()
self._register_functions()

def _generate_wrappers(self):
for name, (inputs, output, update_sym) in self._udas.items():
func = wrap_uda(self.library_path, inputs, output, update_sym,
name=self.func_prefix + name)
setattr(self, name, func)

for name, (inputs, output, sym) in self._udfs.items():
func = wrap_udf(self.library_path, inputs, output, sym,
name=self.func_prefix + name)
setattr(self, name, func)

def _register_functions(self):
# Enable SQL translation to work correctly
for name in self.function_names:
func = getattr(self, name)
func.register(func.name, self.database)

def create_functions(self, client):
for name in self.function_names:
func = getattr(self, name)
client.create_function(func, database=self.database)

def logistic_regression(self):
pass

def linear_regression(self):
pass

def svm(self):
pass
150 changes: 150 additions & 0 deletions ibis/impala/tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import time
import six

import pytest

from ibis import options
import ibis.util as util
import ibis.compat as compat
import ibis


class IbisTestEnv(object):

def __init__(self):
# TODO: allow initializing values through a constructor
self.impala_host = os.environ.get('IBIS_TEST_IMPALA_HOST', 'localhost')
self.impala_port = int(os.environ.get('IBIS_TEST_IMPALA_PORT', 21050))
self.tmp_db = os.environ.get('IBIS_TEST_TMP_DB',
'__ibis_tmp_{0}'.format(util.guid()))
self.tmp_dir = os.environ.get('IBIS_TEST_TMP_HDFS_DIR',
'/tmp/__ibis_test')
self.test_data_db = os.environ.get('IBIS_TEST_DATA_DB', 'ibis_testing')
self.test_data_dir = os.environ.get('IBIS_TEST_DATA_HDFS_DIR',
'/__ibis/ibis-testing-data')
self.nn_host = os.environ.get('IBIS_TEST_NN_HOST', 'localhost')
# 5070 is default for impala dev env
self.webhdfs_port = int(os.environ.get('IBIS_TEST_WEBHDFS_PORT', 5070))
self.hdfs_superuser = os.environ.get('IBIS_TEST_HDFS_SUPERUSER',
'hdfs')
self.use_codegen = os.environ.get('IBIS_TEST_USE_CODEGEN',
'False').lower() == 'true'
self.cleanup_test_data = os.environ.get('IBIS_TEST_CLEANUP_TEST_DATA',
'True').lower() == 'true'
self.auth_mechanism = os.environ.get('IBIS_TEST_AUTH_MECH', 'NOSASL')
self.llvm_config = os.environ.get('IBIS_TEST_LLVM_CONFIG', None)
# update global Ibis config where relevant
options.impala.temp_db = self.tmp_db
options.impala.temp_hdfs_path = self.tmp_dir

def __repr__(self):
kvs = ['{0}={1}'.format(k, v)
for (k, v) in six.iteritems(self.__dict__)]
return 'IbisTestEnv(\n {0})'.format(',\n '.join(kvs))


def connect_test(env, with_hdfs=True):
if with_hdfs:
if env.auth_mechanism in ['GSSAPI', 'LDAP']:
print("Warning: ignoring invalid Certificate Authority errors")
hdfs_client = ibis.hdfs_connect(host=env.nn_host,
port=env.webhdfs_port,
auth_mechanism=env.auth_mechanism,
verify=(env.auth_mechanism
not in ['GSSAPI', 'LDAP']))
else:
hdfs_client = None

return ibis.impala.connect(host=env.impala_host,
database=env.test_data_db,
port=env.impala_port,
auth_mechanism=env.auth_mechanism,
pool_size=2,
hdfs_client=hdfs_client)


@pytest.mark.impala
class ImpalaE2E(object):

@classmethod
def setUpClass(cls):
ENV = IbisTestEnv()
cls.con = connect_test(ENV)
# Tests run generally faster without it
if not ENV.use_codegen:
cls.con.disable_codegen()
cls.hdfs = cls.con.hdfs
cls.test_data_dir = ENV.test_data_dir
cls.test_data_db = ENV.test_data_db
cls.tmp_dir = ENV.tmp_dir
cls.tmp_db = ENV.tmp_db
cls.alltypes = cls.con.table('functional_alltypes')

cls.db = cls.con.database(ENV.test_data_db)

if not cls.con.exists_database(cls.tmp_db):
cls.con.create_database(cls.tmp_db)

@classmethod
def tearDownClass(cls):
i, retries = 0, 3
while True:
# reduce test flakiness
try:
cls.con.drop_database(cls.tmp_db, force=True)
break
except:
i += 1
if i >= retries:
raise

time.sleep(0.1)

def setUp(self):
self.temp_databases = []
self.temp_tables = []
self.temp_views = []
self.temp_udfs = []
self.temp_udas = []

def tearDown(self):
for t in self.temp_tables:
self.con.drop_table(t, force=True)

for t in self.temp_views:
self.con.drop_view(t, force=True)

for f_name, f_inputs in self.temp_udfs:
self.con.drop_udf(f_name, input_types=f_inputs, force=True)

for f_name, f_inputs in self.temp_udas:
self.con.drop_uda(f_name, input_types=f_inputs, force=True)

self.con.set_database(self.test_data_db)
for t in self.temp_databases:
self.con.drop_database(t, force=True)


def format_schema(expr):
from ibis.impala.compiler import _type_to_sql_string
from pprint import pprint
schema = expr.schema()

what = compat.lzip(schema.names,
[_type_to_sql_string(x) for x in schema.types])
pprint(what)
86 changes: 85 additions & 1 deletion ibis/impala/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
import pandas as pd

from ibis.compat import unittest
from ibis.tests.util import IbisTestEnv, ImpalaE2E, assert_equal, connect_test
from ibis.impala.tests.common import IbisTestEnv, ImpalaE2E, connect_test
from ibis.tests.util import assert_equal
import ibis

import ibis.common as com
import ibis.config as config
Expand All @@ -32,6 +34,19 @@ def approx_equal(a, b, eps):

class TestImpalaClient(ImpalaE2E, unittest.TestCase):

def test_execute_exprs_default_backend(self):
cases = [
(ibis.literal(2), 2)
]

ibis.options.default_backend = None
client = connect_test(ENV, with_hdfs=False)
assert ibis.options.default_backend is client

for expr, expected in cases:
result = expr.execute()
assert result == expected

def test_raise_ibis_error_no_hdfs(self):
# #299
client = connect_test(ENV, with_hdfs=False)
Expand Down Expand Up @@ -114,6 +129,25 @@ def test_adapt_scalar_array_results(self):
result = self.con.execute(expr)
assert isinstance(result, pd.Series)

def test_interactive_repr_call_failure(self):
t = self.con.table('tpch_lineitem').limit(100000)

t = t[t, t.l_receiptdate.cast('timestamp').name('date')]

keys = [t.date.year().name('year'), 'l_linestatus']
filt = t.l_linestatus.isin(['F'])
expr = (t[filt]
.group_by(keys)
.aggregate(t.l_extendedprice.mean().name('avg_px')))

w2 = ibis.trailing_window(9, group_by=expr.l_linestatus,
order_by=expr.year)

metric = expr['avg_px'].mean().over(w2)
enriched = expr[expr, metric]
with config.option_context('interactive', True):
repr(enriched)

def test_array_default_limit(self):
t = self.alltypes

Expand Down Expand Up @@ -172,6 +206,24 @@ def test_sql_query_limits(self):
assert table.count().execute() == 25
assert table.count().execute(limit=10) == 25

def test_expr_compile_verify(self):
table = self.db.functional_alltypes
expr = table.double_col.sum()

assert isinstance(expr.compile(), str)
assert expr.verify()

def test_api_compile_verify(self):
t = self.db.functional_alltypes

s = t.string_col

supported = s.lower()
unsupported = s.replace('foo', 'bar')

assert ibis.impala.verify(supported)
assert not ibis.impala.verify(unsupported)

def test_database_repr(self):
assert self.test_data_db in repr(self.db)

Expand All @@ -184,6 +236,10 @@ def test_database_drop(self):
db.drop()
assert not self.con.exists_database(tmp_name)

def test_database_default_current_database(self):
db = self.con.database()
assert db.name == self.con.current_database

def test_namespace(self):
ns = self.db.namespace('tpch_')

Expand All @@ -210,3 +266,31 @@ def test_close_drops_temp_tables(self):
client.close()

assert not self.con.exists_table(name)

def test_execute_async_simple(self):
t = self.db.functional_alltypes
expr = t.double_col.sum()

q = expr.execute(async=True)
result = q.get_result()
expected = expr.execute()
assert result == expected

def test_query_cancel(self):
import time
t = self.db.functional_alltypes

t2 = t.union(t).union(t)

# WM: this query takes about 90 seconds to execute for me locally, so
# I'm eyeballing an acceptable time frame for the cancel to work
expr = t2.join(t2).count()

start = time.clock()
q = expr.execute(async=True)
q.cancel()
end = time.clock()
elapsed = end - start
assert elapsed < 5

assert q.is_finished()
62 changes: 49 additions & 13 deletions ibis/impala/tests/test_ddl.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
from ibis.compat import unittest
from ibis.impala import ddl
from ibis.impala.compat import HS2Error, ImpylaError
from ibis.sql.compiler import build_ast
from ibis.tests.util import (IbisTestEnv, ImpalaE2E,
assert_equal, connect_test)
from ibis.impala.client import build_ast
from ibis.impala.tests.common import IbisTestEnv, ImpalaE2E, connect_test
from ibis.tests.util import assert_equal
import ibis.common as com
import ibis.util as util

Expand Down Expand Up @@ -442,11 +442,23 @@ def test_create_database_with_location(self):
self.con.drop_database(name)
self.hdfs.rmdir(base)

@pytest.mark.superuser
def test_create_table_with_location(self):
base = pjoin(self.tmp_dir, util.guid())
name = 'test_{0}'.format(util.guid())
tmp_path = pjoin(base, name)

# impala user has trouble writing to jenkins-owned dir so here we give
# the tmp dir 777
superuser_hdfs = ibis.hdfs_connect(host=ENV.nn_host,
port=ENV.webhdfs_port,
auth_mechanism=ENV.auth_mechanism,
verify=(ENV.auth_mechanism
not in ['GSSAPI', 'LDAP']),
user=ENV.hdfs_superuser)
superuser_hdfs.mkdir(base)
superuser_hdfs.chmod(base, '777')

expr = self.alltypes
table_name = _random_table_name()

Expand All @@ -471,7 +483,7 @@ def test_truncate_table(self):
try:
self.con.truncate_table(table_name)
except HS2Error as e:
if 'AnalysisException' in e.message:
if 'AnalysisException' in e.args[0]:
pytest.skip('TRUNCATE not available in this '
'version of Impala')

Expand Down Expand Up @@ -508,15 +520,18 @@ def test_insert_table(self):

self.con.create_table(table_name, expr.limit(0), database=db)
self.temp_tables.append('.'.join((db, table_name)))

self.con.insert(table_name, expr.limit(10), database=db)
self.con.insert(table_name, expr.limit(10), database=db)

sz = self.con.table('{0}.{1}'.format(db, table_name)).count()
# check using ImpalaTable.insert
t = self.con.table(table_name, database=db)
t.insert(expr.limit(10))

sz = t.count()
assert sz.execute() == 20

# Overwrite and verify only 10 rows now
self.con.insert(table_name, expr.limit(10), database=db,
overwrite=True)
t.insert(expr.limit(10), overwrite=True)
assert sz.execute() == 10

def test_insert_validate_types(self):
Expand All @@ -531,20 +546,22 @@ def test_insert_validate_types(self):
database=db)
self.temp_tables.append('.'.join((db, table_name)))

t = self.con.table(table_name, database=db)

to_insert = expr[expr.tinyint_col, expr.smallint_col.name('int_col'),
expr.string_col]
self.con.insert(table_name, to_insert.limit(10))
t.insert(to_insert.limit(10))

to_insert = expr[expr.tinyint_col,
expr.smallint_col.cast('int32').name('int_col'),
expr.string_col]
self.con.insert(table_name, to_insert.limit(10))
t.insert(to_insert.limit(10))

to_insert = expr[expr.tinyint_col,
expr.bigint_col.name('int_col'),
expr.string_col]
with self.assertRaises(com.IbisError):
self.con.insert(table_name, to_insert.limit(10))
t.insert(to_insert.limit(10))

def test_compute_stats(self):
self.con.table('functional_alltypes').compute_stats()
Expand All @@ -569,6 +586,26 @@ def test_drop_table_or_view(self):
assert vname not in self.db


class TestAlterTable(ImpalaE2E, unittest.TestCase):

def test_rename_table(self):
tmp_db = '__ibis_tmp_{0}'.format(util.guid()[:4])
self.con.create_database(tmp_db)
self.temp_databases.append(tmp_db)

self.con.create_table('tmp_rename_test',
self.con.table('tpch_region'))
table = self.con.table('tmp_rename_test')

new_name = 'rename_test'
table.rename(new_name, database=tmp_db)

table.execute()

t = self.con.table(new_name, database=tmp_db)
assert_equal(table, t)


class TestQueryHDFSData(ImpalaE2E, unittest.TestCase):

def test_cleanup_tmp_table_on_gc(self):
Expand Down Expand Up @@ -764,8 +801,7 @@ def _get_select(expr):


def _random_table_name():
import uuid
table_name = 'testing_' + uuid.uuid4().get_hex()
table_name = '__ibis_test_' + util.guid()
return table_name


Expand Down
913 changes: 906 additions & 7 deletions ibis/impala/tests/test_exprs.py

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions ibis/impala/tests/test_madlib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright 2015 Cloudera Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from posixpath import join as pjoin
import pytest

from ibis.compat import unittest
from ibis.impala.tests.common import ImpalaE2E

from ibis.impala import madlib

import ibis.util as util


class TestMADLib(ImpalaE2E, unittest.TestCase):

@classmethod
def setUpClass(cls):
super(TestMADLib, cls).setUpClass()
cls.db = '__ibis_madlib_{0}'.format(util.guid()[:4])

cls.con.create_database(cls.db)

@classmethod
def tearDownClass(cls):
super(TestMADLib, cls).tearDownClass()

try:
cls.con.drop_database(cls.db, force=True)
except:
pass

def setUp(self):
super(TestMADLib, self).setUp()
self.madlib_so = pjoin(self.test_data_dir, 'udf/madlib.so')

self.api = madlib.MADLibAPI(self.madlib_so, self.db)

@pytest.mark.madlib
def test_create_functions(self):
self.api.create_functions(self.con)

for name in self.api._udfs:
func = getattr(self.api, name)
assert self.con.exists_udf(func.name, database=self.db)

for name in self.api._udas:
func = getattr(self.api, name)
assert self.con.exists_uda(func.name, database=self.db)
8 changes: 4 additions & 4 deletions ibis/impala/tests/test_pandas_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@
import ibis.expr.types as ir
from ibis.compat import unittest
from ibis.common import IbisTypeError
from ibis.tests.util import ImpalaE2E

from ibis.impala.client import pandas_to_ibis_schema
from ibis.impala.tests.common import ImpalaE2E


functional_alltypes_with_nulls = pd.DataFrame({
Expand Down Expand Up @@ -136,7 +135,7 @@ def test_dtype_uint32(self):
def test_dtype_uint64(self):
df = pd.DataFrame({'col': np.uint64([666, 2, 3])})
with self.assertRaises(IbisTypeError):
inferred = pandas_to_ibis_schema(df)
inferred = pandas_to_ibis_schema(df) # noqa

def test_dtype_datetime64(self):
df = pd.DataFrame({
Expand Down Expand Up @@ -181,6 +180,7 @@ def test_round_trip(self):
assert (df1 == df2).all().all()

def test_round_trip_non_int_missing_data(self):
pytest.skip('WM: hangs -- will investigate later')
df1 = functional_alltypes_with_nulls
table = self.con.pandas(df1, 'fawn', database=self.tmp_db)
df2 = table.execute()
Expand Down Expand Up @@ -220,6 +220,6 @@ def test_round_trip_missing_type_promotion(self):
self.con.con.cursor.execute(insert_query)

table = self.con.table('missing_ints', database=self.tmp_db)
df = table.execute()
df = table.execute() # noqa # REMOVE LATER

# WHAT NOW?
3 changes: 2 additions & 1 deletion ibis/impala/tests/test_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import ibis

from ibis.compat import unittest
from ibis.tests.util import ImpalaE2E, assert_equal
from ibis.impala.tests.common import ImpalaE2E
from ibis.tests.util import assert_equal

import ibis.util as util

Expand Down
33 changes: 33 additions & 0 deletions ibis/impala/tests/test_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2014 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import ibis

from ibis.impala.compiler import to_sql
from ibis.compat import unittest


class TestImpalaSQL(unittest.TestCase):

def test_relabel_projection(self):
# GH #551
types = ['int32', 'string', 'double']
table = ibis.table(zip(['foo', 'bar', 'baz'], types), 'table')
relabeled = table.relabel({'foo': 'one', 'baz': 'three'})

result = to_sql(relabeled)
expected = """\
SELECT `foo` AS `one`, `bar`, `baz` AS `three`
FROM `table`"""
assert result == expected
561 changes: 280 additions & 281 deletions ibis/impala/tests/test_udf.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
from ibis import window
import ibis

from ibis.sql.compiler import to_sql
from ibis.impala.compiler import to_sql
from ibis.expr.tests.mocks import BasicTestCase
from ibis.compat import unittest
from ibis.tests.util import assert_equal
import ibis.common as com


Expand Down Expand Up @@ -214,6 +215,26 @@ def test_unsupported_aggregate_functions(self):
proj = t.projection([expr.over(w).name('foo')])
to_sql(proj)

def test_propagate_nested_windows(self):
# GH #469
t = self.con.table('alltypes')

w = ibis.window(group_by=t.g, order_by=t.f)

col = (t.f - t.f.lag()).lag()

# propagate down here!
result = col.over(w)
ex_expr = (t.f - t.f.lag().over(w)).lag().over(w)
assert_equal(result, ex_expr)

expr = t.projection(col.over(w).name('foo'))
expected = """\
SELECT lag(`f` - lag(`f`) OVER (PARTITION BY `g` ORDER BY `f`)) \
OVER (PARTITION BY `g` ORDER BY `f`) AS `foo`
FROM alltypes"""
self._check_sql(expr, expected)

def test_math_on_windowed_expr(self):
# Window clause may not be found at top level of expression
pass
Expand Down
345 changes: 203 additions & 142 deletions ibis/impala/udf.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ibis/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def run_daemon(self):
elif msg.startswith('kill'):
# e.g. kill 12345
worker_pid = int(msg[4:])
print 'Killing %d' % worker_pid
print('Killing %d' % worker_pid)
try:
os.kill(worker_pid, signal.SIGINT)
except OSError:
Expand Down
847 changes: 847 additions & 0 deletions ibis/sql/alchemy.py

Large diffs are not rendered by default.

661 changes: 581 additions & 80 deletions ibis/sql/compiler.py

Large diffs are not rendered by default.

170 changes: 0 additions & 170 deletions ibis/sql/context.py

This file was deleted.

451 changes: 0 additions & 451 deletions ibis/sql/ddl.py

This file was deleted.

Empty file added ibis/sql/postgres/__init__.py
Empty file.
Empty file.
15 changes: 15 additions & 0 deletions ibis/sql/postgres/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.tests.conftest import * # noqa
44 changes: 44 additions & 0 deletions ibis/sql/sqlite/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from .client import SQLiteClient
from .compiler import rewrites # noqa


def compile(expr):
"""
Force compilation of expression for the SQLite target
"""
from .client import SQLiteDialect
from ibis.sql.alchemy import to_sqlalchemy
return to_sqlalchemy(expr, dialect=SQLiteDialect)


def connect(path, create=False):

"""
Create an Ibis client connected to a SQLite database.
Multiple database files can be created using the attach() method
Parameters
----------
path : string
File path to the SQLite database file
create : boolean, default False
If file does not exist, create it
"""

return SQLiteClient(path, create=create)
108 changes: 108 additions & 0 deletions ibis/sql/sqlite/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import sqlalchemy as sa

from ibis.client import Database
from .compiler import SQLiteDialect
import ibis.expr.types as ir
import ibis.sql.alchemy as alch
import ibis.common as com


class SQLiteTable(alch.AlchemyTable):
pass


class SQLiteDatabase(Database):
pass


class SQLiteClient(alch.AlchemyClient):

"""
The Ibis SQLite client class
"""

dialect = SQLiteDialect
database_class = SQLiteDatabase

def __init__(self, path, create=False):
self.name = path
self.database_name = 'default'

self.con = sa.create_engine('sqlite://')
self.attach(self.database_name, path, create=create)
self.meta = sa.MetaData(bind=self.con)

@property
def current_database(self):
return self.database_name

def list_databases(self):
raise NotImplementedError

def set_database(self):
raise NotImplementedError

def attach(self, name, path, create=False):
"""
Connect another SQLite database file
Parameters
----------
name : string
Database name within SQLite
path : string
Path to sqlite3 file
create : boolean, default False
If file does not exist, create file if True otherwise raise Exception
"""
if not os.path.exists(path) and not create:
raise com.IbisError('File {0} does not exist'.format(path))

self.con.execute("ATTACH DATABASE '{0}' AS '{1}'".format(path, name))

@property
def client(self):
return self

def table(self, name, database=None):
"""
Create a table expression that references a particular table in the
SQLite database
Parameters
----------
name : string
Returns
-------
table : TableExpr
"""
alch_table = self._get_sqla_table(name)
node = SQLiteTable(alch_table, self)
return self._table_expr_klass(node)

def drop_table(self):
pass

def create_table(self, name, expr=None):
pass

@property
def _table_expr_klass(self):
return ir.TableExpr
183 changes: 183 additions & 0 deletions ibis/sql/sqlite/compiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright 2014 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sqlalchemy as sa

from ibis.sql.alchemy import unary, varargs, fixed_arity
import ibis.sql.alchemy as alch
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.common as com

_operation_registry = alch._operation_registry.copy()


def _cast(t, expr):
# It's not all fun and games with SQLite

op = expr.op()
arg, target_type = op.args
sa_arg = t.translate(arg)
sa_type = t.get_sqla_type(target_type)

# SQLite does not have a physical date/time/timestamp type, so
# unfortunately cast to typestamp must be a no-op, and we have to trust
# that the user's data can actually be correctly parsed by SQLite.
if isinstance(target_type, dt.Timestamp):
if not isinstance(arg, (ir.IntegerValue, ir.StringValue)):
raise com.TranslationError(type(arg))

return sa_arg

if isinstance(arg, ir.CategoryValue) and target_type == 'int32':
return sa_arg
else:
return sa.cast(sa_arg, sa_type)


def _substr(t, expr):
f = sa.func.substr

arg, start, length = expr.op().args

sa_arg = t.translate(arg)
sa_start = t.translate(start)

if length is None:
return f(sa_arg, sa_start + 1)
else:
sa_length = t.translate(length)
return f(sa_arg, sa_start + 1, sa_length)


def _string_right(t, expr):
f = sa.func.substr

arg, length = expr.op().args

sa_arg = t.translate(arg)
sa_length = t.translate(length)

return f(sa_arg, -sa_length, sa_length)


def _string_find(t, expr):
arg, substr, start, _ = expr.op().args

if start is not None:
raise NotImplementedError

sa_arg = t.translate(arg)
sa_substr = t.translate(substr)

f = sa.func.instr
return f(sa_arg, sa_substr) - 1


def _infix_op(infix_sym):
def formatter(t, expr):
op = expr.op()
left, right = op.args

left_arg = t.translate(left)
right_arg = t.translate(right)
return left_arg.op(infix_sym)(right_arg)

return formatter


def _strftime(t, expr):
arg, format = expr.op().args
sa_arg = t.translate(arg)
sa_format = t.translate(format)
return sa.func.strftime(sa_format, sa_arg)


def _strftime_int(fmt):
def translator(t, expr):
arg, = expr.op().args
sa_arg = t.translate(arg)
return sa.cast(sa.func.strftime(fmt, sa_arg), sa.types.INTEGER)
return translator


def _now(t, expr):
return sa.func.datetime('now')


def _millisecond(t, expr):
arg, = expr.op().args
sa_arg = t.translate(arg)
fractional_second = sa.func.strftime('%f', sa_arg)
return (fractional_second * 1000) % 1000


_operation_registry.update({
ops.Cast: _cast,

ops.Substring: _substr,
ops.StrRight: _string_right,

ops.StringFind: _string_find,

ops.StringLength: unary('length'),

ops.Least: varargs(sa.func.min),
ops.Greatest: varargs(sa.func.max),
ops.IfNull: fixed_arity(sa.func.ifnull, 2),

ops.Lowercase: unary('lower'),
ops.Uppercase: unary('upper'),

ops.Strip: unary('trim'),
ops.LStrip: unary('ltrim'),
ops.RStrip: unary('rtrim'),

ops.StringReplace: fixed_arity(sa.func.replace, 3),
ops.StringSQLLike: _infix_op('LIKE'),
ops.RegexSearch: _infix_op('REGEXP'),

ops.Strftime: _strftime,
ops.ExtractYear: _strftime_int('%Y'),
ops.ExtractMonth: _strftime_int('%m'),
ops.ExtractDay: _strftime_int('%d'),
ops.ExtractHour: _strftime_int('%H'),
ops.ExtractMinute: _strftime_int('%M'),
ops.ExtractSecond: _strftime_int('%S'),
ops.ExtractMillisecond: _millisecond,
ops.TimestampNow: _now
})


def add_operation(op, translation_func):
_operation_registry[op] = translation_func


class SQLiteExprTranslator(alch.AlchemyExprTranslator):

_registry = _operation_registry
_rewrites = alch.AlchemyExprTranslator._rewrites.copy()
_type_map = alch.AlchemyExprTranslator._type_map.copy()
_type_map.update({
dt.Double: sa.types.REAL
})


rewrites = SQLiteExprTranslator.rewrites


class SQLiteDialect(alch.AlchemyDialect):

translator = SQLiteExprTranslator
59 changes: 59 additions & 0 deletions ibis/sql/sqlite/tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pytest

from ibis.sql.sqlite.compiler import SQLiteExprTranslator
import ibis.sql.sqlite.api as api
from sqlalchemy.dialects.sqlite import dialect as sqlite_dialect


@pytest.mark.sqlite
class SQLiteTests(object):

@classmethod
def setUpClass(cls):
cls.env = SQLiteTestEnv()
cls.dialect = sqlite_dialect()
cls.con = api.connect(cls.env.db_path)
cls.alltypes = cls.con.table('functional_alltypes')

def _check_expr_cases(self, cases, context=None, named=False):
for expr, expected in cases:
result = self._translate(expr, named=named, context=context)

compiled = result.compile(dialect=self.dialect)
ex_compiled = expected.compile(dialect=self.dialect)

assert str(compiled) == str(ex_compiled)

def _translate(self, expr, named=False, context=None):
translator = SQLiteExprTranslator(expr, context=context, named=named)
return translator.get_result()

def _to_sqla(self, table):
return table.op().sqla_table

def _check_e2e_cases(self, cases):
for expr, expected in cases:
result = self.con.execute(expr)
assert result == expected


class SQLiteTestEnv(object):

def __init__(self):
self.db_path = os.environ.get('IBIS_TEST_SQLITE_DB_PATH',
'ibis_testing.db')
15 changes: 15 additions & 0 deletions ibis/sql/sqlite/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.tests.conftest import * # noqa
97 changes: 97 additions & 0 deletions ibis/sql/sqlite/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import pandas as pd

from .common import SQLiteTests
from ibis.compat import unittest
from ibis.tests.util import assert_equal
from ibis.util import guid
import ibis.expr.types as ir
import ibis.common as com
import ibis


class TestSQLiteClient(SQLiteTests, unittest.TestCase):

@classmethod
def tearDownClass(cls):
pass

def test_file_not_exist_and_create(self):
path = '__ibis_tmp_{0}.db'.format(guid())

with self.assertRaises(com.IbisError):
ibis.sqlite.connect(path)

ibis.sqlite.connect(path, create=True)
assert os.path.exists(path)
os.remove(path)

def test_table(self):
table = self.con.table('functional_alltypes')
assert isinstance(table, ir.TableExpr)

def test_array_execute(self):
d = self.alltypes.limit(10).double_col
s = d.execute()
assert isinstance(s, pd.Series)
assert len(s) == 10

def test_literal_execute(self):
expr = ibis.literal('1234')
result = self.con.execute(expr)
assert result == '1234'

def test_simple_aggregate_execute(self):
d = self.alltypes.double_col.sum()
v = d.execute()
assert isinstance(v, float)

def test_list_tables(self):
assert len(self.con.list_tables()) > 0
assert len(self.con.list_tables(like='functional')) == 1

def test_compile_verify(self):
unsupported_expr = self.alltypes.string_col.approx_nunique()
assert not unsupported_expr.verify()

supported_expr = self.alltypes.double_col.sum()
assert supported_expr.verify()

def test_attach_file(self):
pass

def test_database_layer(self):
db = self.con.database()

t = db.functional_alltypes
assert_equal(t, self.alltypes)

assert db.list_tables() == self.con.list_tables()

def test_compile_toplevel(self):
# t = ibis.table([
# ('foo', 'double')
# ])

# # it works!
# expr = t.foo.sum()
# ibis.sqlite.compile(expr)

# This does not work yet because if the compiler encounters a
# non-SQLAlchemy table it fails
pass
339 changes: 339 additions & 0 deletions ibis/sql/sqlite/tests/test_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest # noqa

from .common import SQLiteTests
from ibis.compat import unittest
from ibis import literal as L
import ibis.expr.types as ir
import ibis

import sqlalchemy as sa


class TestSQLiteFunctions(SQLiteTests, unittest.TestCase):

def test_cast(self):
at = self._to_sqla(self.alltypes)

d = self.alltypes.double_col
s = self.alltypes.string_col

sa_d = at.c.double_col
sa_s = at.c.string_col

cases = [
(d.cast('int8'), sa.cast(sa_d, sa.types.SMALLINT)),
(s.cast('double'), sa.cast(sa_s, sa.types.REAL)),
]
self._check_expr_cases(cases)

def test_decimal_cast(self):
pass

def test_timestamp_cast_noop(self):
# See GH #592

at = self._to_sqla(self.alltypes)

tc = self.alltypes.timestamp_col
ic = self.alltypes.int_col

tc_casted = tc.cast('timestamp')
ic_casted = ic.cast('timestamp')

# Logically, it's a timestamp
assert isinstance(tc_casted, ir.TimestampArray)
assert isinstance(ic_casted, ir.TimestampArray)

# But it's a no-op when translated to SQLAlchemy
cases = [
(tc_casted, at.c.timestamp_col),
(ic_casted, at.c.int_col)
]
self._check_expr_cases(cases)

def test_timestamp_functions(self):
from datetime import datetime

v = L('2015-09-01 14:48:05.359').cast('timestamp')

cases = [
(v.strftime('%Y%m%d'), '20150901'),

(v.year(), 2015),
(v.month(), 9),
(v.day(), 1),
(v.hour(), 14),
(v.minute(), 48),
(v.second(), 5),
(v.millisecond(), 359),

# there could be pathological failure at midnight somewhere, but
# that's okay
(ibis.now().strftime('%Y%m%d %H'),
datetime.utcnow().strftime('%Y%m%d %H'))
]
self._check_e2e_cases(cases)

def test_binary_arithmetic(self):
cases = [
(L(3) + L(4), 7),
(L(3) - L(4), -1),
(L(3) * L(4), 12),
(L(12) / L(4), 3),
# (L(12) ** L(2), 144),
(L(12) % L(5), 2)
]
self._check_e2e_cases(cases)

def test_typeof(self):
cases = [
(L('foo_bar').typeof(), 'text'),
(L(5).typeof(), 'integer'),
(ibis.NA.typeof(), 'null'),
(L(1.2345).typeof(), 'real'),
]
self._check_e2e_cases(cases)

def test_nullifzero(self):
cases = [
(L(0).nullifzero(), None),
(L(5.5).nullifzero(), 5.5),
]
self._check_e2e_cases(cases)

def test_string_length(self):
cases = [
(L('foo_bar').length(), 7),
(L('').length(), 0),
]
self._check_e2e_cases(cases)

def test_string_substring(self):
cases = [
(L('foo_bar').left(3), 'foo'),
(L('foo_bar').right(3), 'bar'),

(L('foo_bar').substr(0, 3), 'foo'),
(L('foo_bar').substr(4, 3), 'bar'),
(L('foo_bar').substr(1), 'oo_bar'),
]
self._check_e2e_cases(cases)

def test_string_strip(self):
cases = [
(L(' foo ').lstrip(), 'foo '),
(L(' foo ').rstrip(), ' foo'),
(L(' foo ').strip(), 'foo'),
]
self._check_e2e_cases(cases)

def test_string_upper_lower(self):
cases = [
(L('foo').upper(), 'FOO'),
(L('FOO').lower(), 'foo'),
]
self._check_e2e_cases(cases)

def test_string_contains(self):
cases = [
(L('foobar').contains('bar'), True),
(L('foobar').contains('foo'), True),
(L('foobar').contains('baz'), False),
]
self._check_e2e_cases(cases)

def test_string_functions(self):
cases = [
(L('foobar').find('bar'), 3),
(L('foobar').find('baz'), -1),

(L('foobar').like('%bar'), True),
(L('foobar').like('foo%'), True),
(L('foobar').like('%baz%'), False),

(L('foobarfoo').replace('foo', 'H'), 'HbarH'),
]
self._check_e2e_cases(cases)

def test_math_functions(self):
cases = [
(L(-5).abs(), 5),
(L(5).abs(), 5),
(ibis.least(L(5), L(10), L(1)), 1),
(ibis.greatest(L(5), L(10), L(1)), 10),

(L(5.5).round(), 6.0),
(L(5.556).round(2), 5.56),
]
self._check_e2e_cases(cases)

def test_regexp(self):
pytest.skip('NYI: Requires adding regex udf with sqlite3')

v = L('abcd')
v2 = L('1222')
cases = [
(v.re_search('[a-z]'), True),
(v.re_search('[\d]+'), False),
(v2.re_search('[\d]+'), True),
]
self._check_e2e_cases(cases)

def test_fillna_nullif(self):
cases = [
(ibis.NA.fillna(5), 5),
(L(5).fillna(10), 5),
(L(5).nullif(5), None),
(L(10).nullif(5), 10),
]
self._check_e2e_cases(cases)

def test_coalesce(self):
pass

def test_numeric_builtins_work(self):
t = self.alltypes
d = t.double_col

exprs = [
d.fillna(0),
]
self._execute_projection(t, exprs)

def test_misc_builtins_work(self):
t = self.alltypes
d = t.double_col

exprs = [
(d > 20).ifelse(10, -20),
(d > 20).ifelse(10, -20).abs(),

# tier and histogram
d.bucket([0, 10, 25, 50, 100]),
d.bucket([0, 10, 25, 50], include_over=True),
d.bucket([0, 10, 25, 50], include_over=True, close_extreme=False),
d.bucket([10, 25, 50, 100], include_under=True),
]
self._execute_projection(t, exprs)

def test_category_label(self):
t = self.alltypes
d = t.double_col

bucket = d.bucket([0, 10, 25, 50, 100])

exprs = [
bucket.label(['a', 'b', 'c', 'd'])
]
self._execute_projection(t, exprs)

def test_union(self):
pytest.skip('union not working yet')

t = self.alltypes

expr = (t.group_by('string_col')
.aggregate(t.double_col.sum().name('foo'))
.sort_by('string_col'))

t1 = expr.limit(4)
t2 = expr.limit(4, offset=4)
t3 = expr.limit(8)

result = t1.union(t2).execute()
expected = t3.execute()

assert (result.string_col == expected.string_col).all()

def test_aggregations_execute(self):
table = self.alltypes.limit(100)

d = table.double_col
s = table.string_col

cond = table.string_col.isin(['1', '7'])

exprs = [
table.bool_col.count(),
table.bool_col.any(),
table.bool_col.all(),
table.bool_col.notany(),
table.bool_col.notall(),

d.sum(),
d.mean(),
d.min(),
d.max(),

table.bool_col.count(where=cond),
d.sum(where=cond),
d.mean(where=cond),
d.min(where=cond),
d.max(where=cond),

s.group_concat(),
]
self._execute_aggregation(table, exprs)

def test_distinct_aggregates(self):
table = self.alltypes.limit(100)

exprs = [
table.double_col.nunique()
]
self._execute_aggregation(table, exprs)

def test_not_exists_works(self):
t = self.alltypes
t2 = t.view()

expr = t[-(t.string_col == t2.string_col).any()]
expr.execute()

def test_interactive_repr_shows_error(self):
# #591. Doing this in SQLite because so many built-in functions are not
# available
import ibis.config as config

expr = self.alltypes.double_col.approx_nunique()

with config.option_context('interactive', True):
result = repr(expr)
assert 'no translator rule' in result.lower()

def test_subquery_invokes_sqlite_compiler(self):
t = self.alltypes

expr = (t.mutate(d=t.double_col.fillna(0))
.limit(1000)
.group_by('string_col')
.size())
expr.execute()

def _execute_aggregation(self, table, exprs):
agg_exprs = [expr.name('e%d' % i)
for i, expr in enumerate(exprs)]

agged_table = table.aggregate(agg_exprs)
agged_table.execute()

def _execute_projection(self, table, exprs):
agg_exprs = [expr.name('e%d' % i)
for i, expr in enumerate(exprs)]

proj = table.projection(agg_exprs)
proj.execute()
15 changes: 15 additions & 0 deletions ibis/sql/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.tests.conftest import * # noqa
1,606 changes: 928 additions & 678 deletions ibis/sql/tests/test_compiler.py

Large diffs are not rendered by default.

903 changes: 0 additions & 903 deletions ibis/sql/tests/test_exprs.py

This file was deleted.

537 changes: 537 additions & 0 deletions ibis/sql/tests/test_sqlalchemy.py

Large diffs are not rendered by default.

15 changes: 6 additions & 9 deletions ibis/sql/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,13 @@ def _visit(self, expr):
def _visit_table(self, expr):
node = expr.op()

if isinstance(node, (ops.PhysicalTable, ops.SelfReference)):
if isinstance(node, ir.BlockingTableNode):
self._ref_check(expr)

for arg in node.flat_args():
if isinstance(arg, ir.Expr):
self._visit(arg)
if not isinstance(node, ir.BlockingTableNode):
for arg in node.flat_args():
if isinstance(arg, ir.Expr):
self._visit(arg)

def _ref_check(self, expr):
node = expr.op()
Expand All @@ -99,11 +100,7 @@ def _ref_check(self, expr):
pass
else:
# Foreign ref
if isinstance(node, ops.SelfReference):
foreign_table = node.table
else:
foreign_table = expr

foreign_table = expr
self.foreign_table = foreign_table

def _is_root(self, what):
Expand Down
16 changes: 7 additions & 9 deletions ibis/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@

import traceback

from cPickle import loads as pickle_load
from ibis.cloudpickle import dumps as pickle_dump

from ibis.wire import PackedMessageReader, PackedMessageWriter
import ibis.compat as compat
import ibis.wire as wire

try:
Expand Down Expand Up @@ -199,7 +197,7 @@ def _write_response(self, agg_inst):
self.shmem.seek(0)
self.mark_success()

serialized_inst = pickle_dump(agg_inst)
serialized_inst = compat.pickle_dump(agg_inst)
wire.write_string(self.shmem, serialized_inst)


Expand Down Expand Up @@ -227,15 +225,15 @@ def _read_header(self):
has_prior_state = reader.uint8() != 0

if has_prior_state:
self.prior_state = pickle_load(reader.string())
self.prior_state = compat.pickle_load(reader.string())
else:
self.prior_state = None

def run(self):
if self.prior_state is not None:
agg_inst = self.prior_state
else:
klass = pickle_load(self.agg_class_pickled)
klass = compat.pickle_load(self.agg_class_pickled)
agg_inst = klass()

args = self._deserialize_args()
Expand Down Expand Up @@ -270,8 +268,8 @@ def __init__(self, shmem):
# TODO: may wish to merge more than 2 at a time?

# Unpack header
self.left_inst = pickle_load(reader.string())
self.right_inst = pickle_load(reader.string())
self.left_inst = compat.pickle_load(reader.string())
self.right_inst = compat.pickle_load(reader.string())

def run(self):
# Objects to merge stored in length-prefixed strings in shared memory
Expand All @@ -285,7 +283,7 @@ def __init__(self, shmem):
AggregationTask.__init__(self, shmem)

reader = wire.PackedMessageReader(shmem)
self.state = pickle_load(reader.string())
self.state = compat.pickle_load(reader.string())

def run(self):
# Single length-prefixed string to finalize
Expand Down
44 changes: 39 additions & 5 deletions ibis/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,56 @@
from pytest import skip


groups = ['hdfs', 'impala', 'madlib', 'sqlite']


def pytest_addoption(parser):
parser.addoption('--e2e', action='store_true', default=False,
help='Enable the e2e (end-to-end) tests')
for group in groups:
parser.addoption('--{0}'.format(group), action='store_true',
default=False,
help=('Enable the {0} (end-to-end) tests'
.format(group)))

for group in groups:
parser.addoption('--only-{0}'.format(group), action='store_true',
default=False,
help=('Enable only the {0} (end-to-end) tests'
.format(group)))

parser.addoption('--skip-udf', action='store_true', default=False,
help='Skip tests marked udf')
parser.addoption('--skip-superuser', action='store_true', default=False,
help='Skip tests marked superuser')


def pytest_runtest_setup(item):
if getattr(item.obj, 'e2e', None): # the test item is marked e2e
if not item.config.getoption('--e2e'): # but --e2e option not set
skip('--e2e NOT enabled')
only_set = False

for group in groups:
only_flag = '--only-{0}'.format(group)
flag = '--{0}'.format(group)

if item.config.getoption(only_flag):
only_set = True
elif getattr(item.obj, group, None):
if not item.config.getoption(flag):
skip('{0} NOT enabled'.format(flag))

if only_set:
skip_item = True
for group in groups:
only_flag = '--only-{0}'.format(group)
if (getattr(item.obj, group, False) and
item.config.getoption(only_flag)):
skip_item = False

if skip_item:
skip('Only running some groups with only flags')

if getattr(item.obj, 'udf', None):
if item.config.getoption('--skip-udf'):
skip('--skip-udf enabled')

if getattr(item.obj, 'superuser', None):
if item.config.getoption('--skip-superuser'):
skip('--skip-superuser enabled')
106 changes: 64 additions & 42 deletions ibis/tests/test_filesystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@

from ibis.filesystems import HDFS
from ibis.compat import unittest
from ibis.tests.util import IbisTestEnv
from ibis.impala.tests.common import IbisTestEnv
import ibis.compat as compat
import ibis.util as util
import ibis

Expand All @@ -49,40 +50,41 @@ def setUp(self):
self.con = MockHDFS()

def test_find_any_file(self):
ls_contents = [(u'/path/foo',
ls_contents = [(u'foo',
{u'type': u'DIRECTORY'}),
(u'/path/bar.tmp',
(u'bar.tmp',
{u'type': u'FILE'}),
(u'/path/baz.copying',
(u'baz.copying',
{u'type': u'FILE'}),
(u'/path/_SUCCESS',
(u'_SUCCESS',
{u'type': u'FILE'}),
(u'/path/.peekaboo',
(u'.peekaboo',
{u'type': u'FILE'}),
(u'/path/0.parq',
(u'0.parq',
{u'type': u'FILE'}),
(u'/path/_FILE',
(u'_FILE',
{u'type': u'DIRECTORY'})]

self.con.set_ls(ls_contents)

result = self.con.find_any_file('/path')
assert result == '/path/0.parq'
result = self.con._find_any_file('/path')
assert result == '0.parq'


@pytest.mark.e2e
@pytest.mark.hdfs
class TestHDFSE2E(unittest.TestCase):

@classmethod
def setUpClass(cls):
cls.ENV = ENV
cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
if cls.ENV.use_kerberos:
if cls.ENV.auth_mechanism in ['GSSAPI', 'LDAP']:
print("Warning: ignoring invalid Certificate Authority errors")
cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
port=cls.ENV.webhdfs_port,
use_kerberos=cls.ENV.use_kerberos,
verify=(not cls.ENV.use_kerberos))
auth_mechanism=cls.ENV.auth_mechanism,
verify=(cls.ENV.auth_mechanism
not in ['GSSAPI', 'LDAP']))
cls.hdfs.mkdir(cls.tmp_dir)

@classmethod
Expand Down Expand Up @@ -119,7 +121,7 @@ def _make_test_directory(self, files=5, filesize=1024, directory=None):
os.mkdir(directory)
self.test_directories.append(directory)

for i in xrange(files):
for i in range(files):
self._make_random_file(size=filesize, directory=directory)

return directory
Expand All @@ -133,8 +135,8 @@ def _make_random_file(self, size=1024, directory=None):
units = size / 32

with open(path, 'wb') as f:
for i in xrange(units):
f.write(util.guid())
for i in range(int(units)):
f.write(guidbytes())

self.test_files.append(path)
return path
Expand Down Expand Up @@ -215,7 +217,7 @@ def test_put_get_directory(self):
os.mkdir(local_dir)

try:
for i in xrange(K):
for i in range(K):
self._make_random_file(directory=local_dir)

remote_dir = pjoin(self.tmp_dir, local_dir)
Expand Down Expand Up @@ -253,7 +255,7 @@ def test_get_file_overwrite(self):
remote_path2 = pjoin(self.tmp_dir, local_path2)
self.hdfs.put(remote_path2, local_path2)

with self.assertRaises(IOError):
with self.assertRaises(Exception):
self.hdfs.get(remote_path, '.')

self.hdfs.get(remote_path, local_path2, overwrite=True)
Expand Down Expand Up @@ -288,7 +290,7 @@ def test_get_directory_nested_dirs(self):
os.mkdir(local_dir)

try:
for i in xrange(K):
for i in range(K):
self._make_random_file(directory=local_dir)

nested_dir = osp.join(local_dir, 'nested-dir')
Expand All @@ -309,21 +311,33 @@ def test_get_directory_nested_dirs(self):
finally:
shutil.rmtree(local_dir)

def test_get_directory_overwrite(self):
local_dir = self._make_test_directory()
local_dir2 = self._make_test_directory()

remote_dir = pjoin(self.tmp_dir, local_dir)
remote_dir2 = pjoin(self.tmp_dir, local_dir2)

self.hdfs.put(remote_dir, local_dir)
self.hdfs.put(remote_dir2, local_dir2)

self.hdfs.get(remote_dir, local_dir2, overwrite=True)
_check_directories_equal(local_dir2, local_dir)

self.hdfs.get(remote_dir, local_dir2, overwrite=True)
_check_directories_equal(local_dir2, local_dir)
def test_get_directory_overwrite_file(self):
try:
local_path1 = self._make_test_directory()
local_path2 = self._make_random_file()
remote_path = pjoin(self.tmp_dir, local_path1)
self.hdfs.put(remote_path, local_path1)
self.hdfs.get(remote_path, local_path2, overwrite=True)
_check_directories_equal(local_path1, local_path2)
finally:
# Path changed from file to directory, must be cleaned manually.
self._try_delete_directory(local_path2)

def test_get_directory_overwrite_directory(self):
local_path1 = self._make_test_directory()
local_path2 = self._make_test_directory()
remote_path = pjoin(self.tmp_dir, local_path2)
self.hdfs.put(remote_path, local_path1)
self.hdfs.get(remote_path, osp.dirname(local_path2), overwrite=True)
_check_directories_equal(local_path1, local_path2)

def test_get_directory_into_directory(self):
local_path1 = self._make_test_directory()
local_path2 = self._make_test_directory()
remote_path = pjoin(self.tmp_dir, local_path1)
self.hdfs.put(remote_path, local_path1)
local_path3 = self.hdfs.get(remote_path, local_path2)
_check_directories_equal(local_path3, local_path1)

def _try_delete_directory(self, path):
try:
Expand All @@ -334,7 +348,7 @@ def _try_delete_directory(self, path):
def test_ls(self):
test_dir = pjoin(self.tmp_dir, 'ls-test')
self.hdfs.mkdir(test_dir)
for i in xrange(10):
for i in range(10):
local_path = self._make_random_file()
hdfs_path = pjoin(test_dir, local_path)
self.hdfs.put(hdfs_path, local_path)
Expand Down Expand Up @@ -391,21 +405,22 @@ def _sample_nested_directory(self):
return dirname


@pytest.mark.e2e
@pytest.mark.hdfs
@pytest.mark.superuser
class TestSuperUserHDFSE2E(unittest.TestCase):

@classmethod
def setUpClass(cls):
cls.ENV = ENV
cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
if cls.ENV.use_kerberos:
if cls.ENV.auth_mechanism in ['GSSAPI', 'LDAP']:
print("Warning: ignoring invalid Certificate Authority errors")
# NOTE: specifying superuser as set in IbisTestEnv
cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
port=cls.ENV.webhdfs_port,
use_kerberos=cls.ENV.use_kerberos,
verify=(not cls.ENV.use_kerberos),
auth_mechanism=cls.ENV.auth_mechanism,
verify=(cls.ENV.auth_mechanism
not in ['GSSAPI', 'LDAP']),
user=cls.ENV.hdfs_superuser)
cls.hdfs.mkdir(cls.tmp_dir)

Expand Down Expand Up @@ -446,8 +461,8 @@ def _make_random_file(self, size=1024, directory=None):
units = size / 32

with open(path, 'wb') as f:
for i in xrange(units):
f.write(util.guid())
for i in range(int(units)):
f.write(guidbytes())

self.test_files.append(path)
return path
Expand Down Expand Up @@ -514,3 +529,10 @@ def _get_all_files(path):
paths[relpath] = abspath

return paths


def guidbytes():
if compat.PY3:
return util.guid().encode('utf8')
else:
return util.guid()
3 changes: 2 additions & 1 deletion ibis/tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@

from ibis.compat import unittest
from ibis.server import IbisServerNode
import ibis.compat as compat


# non-POSIX system (e.g. Windows)
pytestmark = pytest.mark.skipif(not hasattr(os, 'setpgid'),
pytestmark = pytest.mark.skipif(compat.PY3 or not hasattr(os, 'setpgid'),
reason='non-POSIX system')


Expand Down
19 changes: 9 additions & 10 deletions ibis/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@

import pandas as pd

from cPickle import loads as pickle_load
from ibis.cloudpickle import dumps as pickle_dump
import ibis.compat as compat

from test_comms import double_ex
from .test_comms import double_ex

from ibis.tasks import IbisTaskMessage, IbisTaskExecutor
from ibis.util import guid
Expand All @@ -37,7 +36,7 @@
SKIP_TESTS = True


pytestmark = pytest.mark.skipif(SKIP_TESTS,
pytestmark = pytest.mark.skipif(SKIP_TESTS or compat.PY3,
reason='Comms extension disabled')


Expand Down Expand Up @@ -235,7 +234,7 @@ def test_update(self):
if not reader.uint8():
raise Exception(reader.string())

result = pickle_load(reader.string())
result = compat.pickle_load(reader.string())

ex_total = pd.Series(col.to_numpy_for_pandas()).sum()
assert result.total == ex_total
Expand All @@ -255,7 +254,7 @@ def test_update(self):
if not reader.uint8():
raise Exception(reader.string())

result = pickle_load(reader.string())
result = compat.pickle_load(reader.string())

ex_total += pd.Series(col.to_numpy_for_pandas()).sum()

Expand Down Expand Up @@ -283,7 +282,7 @@ def test_merge(self):
if not reader.uint8():
raise Exception(reader.string())

result = pickle_load(reader.string())
result = compat.pickle_load(reader.string())

larr = lcol.to_numpy_for_pandas()
rarr = rcol.to_numpy_for_pandas()
Expand All @@ -307,7 +306,7 @@ def test_finalize(self):
if not reader.uint8():
raise Exception(reader.string())

result = pickle_load(reader.string())
result = compat.pickle_load(reader.string())

arr = col.to_numpy_for_pandas()
ex_result = pd.Series(arr).mean()
Expand Down Expand Up @@ -339,11 +338,11 @@ def _make_update_task(self, uda_class, cols, prior_state=None):
payload = BytesIO()
msg_writer = wire.PackedMessageWriter(payload)
msg_writer.string('agg-update')
msg_writer.string(pickle_dump(uda_class))
msg_writer.string(compat.pickle_dump(uda_class))

if prior_state is not None:
msg_writer.uint8(1)
msg_writer.string(pickle_dump(prior_state))
msg_writer.string(compat.pickle_dump(prior_state))
else:
msg_writer.uint8(0)

Expand Down
120 changes: 0 additions & 120 deletions ibis/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,128 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import time

import pytest

from ibis import Schema
from ibis import options
import ibis.util as util
import ibis


class IbisTestEnv(object):

def __init__(self):
# TODO: allow initializing values through a constructor
self.impala_host = os.environ.get('IBIS_TEST_IMPALA_HOST', 'localhost')
self.impala_protocol = os.environ.get('IBIS_TEST_IMPALA_PROTOCOL',
'hiveserver2')
self.impala_port = int(os.environ.get('IBIS_TEST_IMPALA_PORT', 21050))
self.tmp_db = os.environ.get('IBIS_TEST_TMP_DB',
'__ibis_tmp_{0}'.format(util.guid()))
self.tmp_dir = os.environ.get('IBIS_TEST_TMP_HDFS_DIR',
'/tmp/__ibis_test')
self.test_data_db = os.environ.get('IBIS_TEST_DATA_DB', 'ibis_testing')
self.test_data_dir = os.environ.get('IBIS_TEST_DATA_HDFS_DIR',
'/__ibis/ibis-testing-data')
self.nn_host = os.environ.get('IBIS_TEST_NN_HOST', 'localhost')
# 5070 is default for impala dev env
self.webhdfs_port = int(os.environ.get('IBIS_TEST_WEBHDFS_PORT', 5070))
self.hdfs_superuser = os.environ.get('IBIS_TEST_HDFS_SUPERUSER',
'hdfs')
self.use_codegen = os.environ.get('IBIS_TEST_USE_CODEGEN',
'False').lower() == 'true'
self.cleanup_test_data = os.environ.get('IBIS_TEST_CLEANUP_TEST_DATA',
'True').lower() == 'true'
self.use_kerberos = os.environ.get('IBIS_TEST_USE_KERBEROS',
'False').lower() == 'true'
# update global Ibis config where relevant
options.impala.temp_db = self.tmp_db
options.impala.temp_hdfs_path = self.tmp_dir

def __repr__(self):
kvs = ['{0}={1}'.format(k, v) for (k, v) in self.__dict__.iteritems()]
return 'IbisTestEnv(\n {0})'.format(',\n '.join(kvs))


def connect_test(env, with_hdfs=True):
con = ibis.impala_connect(host=env.impala_host,
protocol=env.impala_protocol,
database=env.test_data_db,
port=env.impala_port,
use_kerberos=env.use_kerberos,
pool_size=2)
if with_hdfs:
if env.use_kerberos:
print("Warning: ignoring invalid Certificate Authority errors")
hdfs_client = ibis.hdfs_connect(host=env.nn_host,
port=env.webhdfs_port,
use_kerberos=env.use_kerberos,
verify=(not env.use_kerberos))
else:
hdfs_client = None
return ibis.make_client(con, hdfs_client)


@pytest.mark.e2e
class ImpalaE2E(object):

@classmethod
def setUpClass(cls):
ENV = IbisTestEnv()
cls.con = connect_test(ENV)
# Tests run generally faster without it
if not ENV.use_codegen:
cls.con.disable_codegen()
cls.hdfs = cls.con.hdfs
cls.test_data_dir = ENV.test_data_dir
cls.test_data_db = ENV.test_data_db
cls.tmp_dir = ENV.tmp_dir
cls.tmp_db = ENV.tmp_db
cls.alltypes = cls.con.table('functional_alltypes')

cls.db = cls.con.database(ENV.test_data_db)

if not cls.con.exists_database(cls.tmp_db):
cls.con.create_database(cls.tmp_db)

@classmethod
def tearDownClass(cls):
i, retries = 0, 3
while True:
# reduce test flakiness
try:
cls.con.drop_database(cls.tmp_db, force=True)
break
except:
i += 1
if i >= retries:
raise

time.sleep(0.1)

def setUp(self):
self.temp_databases = []
self.temp_tables = []
self.temp_views = []
self.temp_functions = []

def tearDown(self):
for t in self.temp_tables:
self.con.drop_table(t, force=True)

for t in self.temp_views:
self.con.drop_view(t, force=True)

for f_name, f_inputs in self.temp_functions:
self.con.drop_udf(f_name, input_types=f_inputs,
force=True)

self.con.set_database(self.test_data_db)
for t in self.temp_databases:
self.con.drop_database(t, force=True)


def assert_equal(left, right):
Expand Down
39 changes: 0 additions & 39 deletions ibis/uda.py

This file was deleted.

5 changes: 3 additions & 2 deletions ibis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def guid():
return uuid4_hex()
except ImportError:
from uuid import uuid4
return uuid4().get_hex()
guid = uuid4()
return guid.hex if compat.PY3 else guid.get_hex()


def bytes_to_uint8_array(val, width=70):
Expand All @@ -49,7 +50,7 @@ def unique_by_key(values, key):
id_to_table = {}
for x in values:
id_to_table[key(x)] = x
return id_to_table.values()
return compat.dict_values(id_to_table)


def indent(text, spaces):
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ numpy>=1.7.0
pandas>=0.12.0
impyla>=0.10.0
psutil==0.6.1
hdfs==1.4.3
hdfs>=2.0.0
sqlalchemy>=1.0.0
six
2 changes: 1 addition & 1 deletion scripts/airline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def wrangle_csvs():
('depdelay', 'int32'),
('origin', 'string'),
('dest', 'string'),
('distince', 'int32'),
('distance', 'int32'),
('taxi_in', 'int32'),
('taxi_out', 'int32'),
('cancelled', 'int8'),
Expand Down
42 changes: 31 additions & 11 deletions scripts/run_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# This script calls machinery that initializes an ibis.tests.util.IbisTestEnv,
# so it needs those variables set correctly. It also assumes that WORKSPACE is
# set (i.e., that it is being run as a Jenkins job). If the latter is not
# true, you can instead set GIT_URL and GIT_BRANCH to check them out manually.
# This script calls machinery that initializes an
# ibis.impala.tests.common.IbisTestEnv, so it needs those variables set
# correctly. It also assumes that WORKSPACE is set (i.e., that it is being run
# as a Jenkins job). If the latter is not true, you can instead set GIT_URL
# and GIT_BRANCH to check them out manually. For pulling in a pull request,
# set GITHUB_PR. For reporting to codecov.io, set CODECOV_TOKEN. Set
# PYTHON_VERSION to specify which version to run the tests on.

set -e
set -x
Expand Down Expand Up @@ -70,9 +73,10 @@ conda info -a

# Install ibis and deps into new environment
CONDA_ENV_NAME=pyenv-ibis-test
conda create -y -q -n $CONDA_ENV_NAME python=$PYTHON_VERSION numpy pandas
conda create -y -q -n $CONDA_ENV_NAME python=$PYTHON_VERSION pip numpy pandas
source activate $CONDA_ENV_NAME
pip install click
pip install pytest-cov
# preempt the requirements.txt file by installing impyla master
pip install git+https://github.com/cloudera/impyla.git
pip install $IBIS_HOME
Expand All @@ -95,20 +99,36 @@ fi

cd $IBIS_HOME

python -c "from ibis.tests.util import IbisTestEnv; print(IbisTestEnv())"
python -c "from ibis.impala.tests.common import IbisTestEnv; print(IbisTestEnv())"

# load necessary test data (without overwriting)
scripts/test_data_admin.py load --data --no-udf

if [ -z "$WORKSPACE" ]; then
# on kerberized cluster, skip UDF work
py.test --skip-udf --skip-superuser --e2e ibis
else
# build and load the UDFs
PYTEST_KERB_ARGS="--skip-udf --skip-superuser"
fi

if [ -n "$WORKSPACE" ]; then
# non-kerb cluster: build and load the UDFs
scripts/test_data_admin.py load --no-data --udf --overwrite
# run the full test suite
py.test --e2e ibis
fi

PYTEST_COV_ARGS="--cov ibis --cov-report xml --cov-report term --cov-config .coveragerc"

# run the test suite
py.test -vv -r sxX $PYTEST_KERB_ARGS $PYTEST_COV_ARGS --impala --hdfs --sqlite ibis

# cleanup temporary data (but not testing data)
scripts/test_data_admin.py cleanup --tmp-data --tmp-db

# Enforce flake8 etc
if [ $PYTHON_VERSION != "2.6" ]; then
pip install prospector
prospector
fi

# Report code coverage to codecov.io
if [ -n $CODECOV_TOKEN ]; then
bash <(curl -s https://codecov.io/bash) -t $CODECOV_TOKEN
fi
Loading