| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| # Copyright 2015 Cloudera Inc | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from ibis.impala.udf import wrap_uda, wrap_udf | ||
| import ibis.expr.rules as rules | ||
|
|
||
|
|
||
| class MADLibAPI(object): | ||
|
|
||
| """ | ||
| Class responsible for wrapping all MADLib-on-Impala API functions, creating | ||
| them in a particular Impala database, and registering them for use with | ||
| Ibis. | ||
| """ | ||
| _udas = { | ||
| 'linr_fit': (['string', 'double'], 'string', 'LinrUpdate'), | ||
| 'logr_fit': (['string', 'string', 'boolean', 'double', 'double'], | ||
| 'string', 'LogrUpdate'), | ||
| 'svm_fit': (['string', 'string', 'boolean', 'double', 'double'], | ||
| 'string', 'SVMUpdate'), | ||
| } | ||
|
|
||
| _udfs = { | ||
| 'linr_predict': (['string', 'string'], 'double', 'LinrPredict'), | ||
|
|
||
| 'logr_predict': (['string', 'string'], 'boolean', 'LogrPredict'), | ||
| 'logr_loss': (['string', 'string', 'boolean'], 'double', 'LogrLoss'), | ||
|
|
||
| 'svm_predict': (['string', 'string'], 'boolean', 'SVMPredict'), | ||
| 'svm_loss': (['string', 'string', 'boolean'], 'double', 'SVMLoss'), | ||
|
|
||
| 'to_array': (rules.varargs(rules.double), 'string', | ||
| ('_Z7ToArrayPN10impala_udf' | ||
| '15FunctionContextEiPNS_9DoubleValE')), | ||
| 'arrayget': (['int64', 'string'], 'double', 'ArrayGet'), | ||
| 'allbytes': ([], 'string', 'AllBytes'), | ||
| 'printarray': (['string'], 'string', 'PrintArray'), | ||
| 'encodearray': (['string'], 'string', 'EncodeArray'), | ||
| 'decodearray': (['string'], 'string', 'DecodeArray'), | ||
| } | ||
|
|
||
| def __init__(self, library_path, database, func_prefix=None): | ||
| self.library_path = library_path | ||
| self.database = database | ||
|
|
||
| self.function_names = sorted(self._udfs.keys() + self._udas.keys()) | ||
| self.func_prefix = func_prefix or 'madlib_' | ||
|
|
||
| self._generate_wrappers() | ||
| self._register_functions() | ||
|
|
||
| def _generate_wrappers(self): | ||
| for name, (inputs, output, update_sym) in self._udas.items(): | ||
| func = wrap_uda(self.library_path, inputs, output, update_sym, | ||
| name=self.func_prefix + name) | ||
| setattr(self, name, func) | ||
|
|
||
| for name, (inputs, output, sym) in self._udfs.items(): | ||
| func = wrap_udf(self.library_path, inputs, output, sym, | ||
| name=self.func_prefix + name) | ||
| setattr(self, name, func) | ||
|
|
||
| def _register_functions(self): | ||
| # Enable SQL translation to work correctly | ||
| for name in self.function_names: | ||
| func = getattr(self, name) | ||
| func.register(func.name, self.database) | ||
|
|
||
| def create_functions(self, client): | ||
| for name in self.function_names: | ||
| func = getattr(self, name) | ||
| client.create_function(func, database=self.database) | ||
|
|
||
| def logistic_regression(self): | ||
| pass | ||
|
|
||
| def linear_regression(self): | ||
| pass | ||
|
|
||
| def svm(self): | ||
| pass |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import os | ||
| import time | ||
| import six | ||
|
|
||
| import pytest | ||
|
|
||
| from ibis import options | ||
| import ibis.util as util | ||
| import ibis.compat as compat | ||
| import ibis | ||
|
|
||
|
|
||
| class IbisTestEnv(object): | ||
|
|
||
| def __init__(self): | ||
| # TODO: allow initializing values through a constructor | ||
| self.impala_host = os.environ.get('IBIS_TEST_IMPALA_HOST', 'localhost') | ||
| self.impala_port = int(os.environ.get('IBIS_TEST_IMPALA_PORT', 21050)) | ||
| self.tmp_db = os.environ.get('IBIS_TEST_TMP_DB', | ||
| '__ibis_tmp_{0}'.format(util.guid())) | ||
| self.tmp_dir = os.environ.get('IBIS_TEST_TMP_HDFS_DIR', | ||
| '/tmp/__ibis_test') | ||
| self.test_data_db = os.environ.get('IBIS_TEST_DATA_DB', 'ibis_testing') | ||
| self.test_data_dir = os.environ.get('IBIS_TEST_DATA_HDFS_DIR', | ||
| '/__ibis/ibis-testing-data') | ||
| self.nn_host = os.environ.get('IBIS_TEST_NN_HOST', 'localhost') | ||
| # 5070 is default for impala dev env | ||
| self.webhdfs_port = int(os.environ.get('IBIS_TEST_WEBHDFS_PORT', 5070)) | ||
| self.hdfs_superuser = os.environ.get('IBIS_TEST_HDFS_SUPERUSER', | ||
| 'hdfs') | ||
| self.use_codegen = os.environ.get('IBIS_TEST_USE_CODEGEN', | ||
| 'False').lower() == 'true' | ||
| self.cleanup_test_data = os.environ.get('IBIS_TEST_CLEANUP_TEST_DATA', | ||
| 'True').lower() == 'true' | ||
| self.auth_mechanism = os.environ.get('IBIS_TEST_AUTH_MECH', 'NOSASL') | ||
| self.llvm_config = os.environ.get('IBIS_TEST_LLVM_CONFIG', None) | ||
| # update global Ibis config where relevant | ||
| options.impala.temp_db = self.tmp_db | ||
| options.impala.temp_hdfs_path = self.tmp_dir | ||
|
|
||
| def __repr__(self): | ||
| kvs = ['{0}={1}'.format(k, v) | ||
| for (k, v) in six.iteritems(self.__dict__)] | ||
| return 'IbisTestEnv(\n {0})'.format(',\n '.join(kvs)) | ||
|
|
||
|
|
||
| def connect_test(env, with_hdfs=True): | ||
| if with_hdfs: | ||
| if env.auth_mechanism in ['GSSAPI', 'LDAP']: | ||
| print("Warning: ignoring invalid Certificate Authority errors") | ||
| hdfs_client = ibis.hdfs_connect(host=env.nn_host, | ||
| port=env.webhdfs_port, | ||
| auth_mechanism=env.auth_mechanism, | ||
| verify=(env.auth_mechanism | ||
| not in ['GSSAPI', 'LDAP'])) | ||
| else: | ||
| hdfs_client = None | ||
|
|
||
| return ibis.impala.connect(host=env.impala_host, | ||
| database=env.test_data_db, | ||
| port=env.impala_port, | ||
| auth_mechanism=env.auth_mechanism, | ||
| pool_size=2, | ||
| hdfs_client=hdfs_client) | ||
|
|
||
|
|
||
| @pytest.mark.impala | ||
| class ImpalaE2E(object): | ||
|
|
||
| @classmethod | ||
| def setUpClass(cls): | ||
| ENV = IbisTestEnv() | ||
| cls.con = connect_test(ENV) | ||
| # Tests run generally faster without it | ||
| if not ENV.use_codegen: | ||
| cls.con.disable_codegen() | ||
| cls.hdfs = cls.con.hdfs | ||
| cls.test_data_dir = ENV.test_data_dir | ||
| cls.test_data_db = ENV.test_data_db | ||
| cls.tmp_dir = ENV.tmp_dir | ||
| cls.tmp_db = ENV.tmp_db | ||
| cls.alltypes = cls.con.table('functional_alltypes') | ||
|
|
||
| cls.db = cls.con.database(ENV.test_data_db) | ||
|
|
||
| if not cls.con.exists_database(cls.tmp_db): | ||
| cls.con.create_database(cls.tmp_db) | ||
|
|
||
| @classmethod | ||
| def tearDownClass(cls): | ||
| i, retries = 0, 3 | ||
| while True: | ||
| # reduce test flakiness | ||
| try: | ||
| cls.con.drop_database(cls.tmp_db, force=True) | ||
| break | ||
| except: | ||
| i += 1 | ||
| if i >= retries: | ||
| raise | ||
|
|
||
| time.sleep(0.1) | ||
|
|
||
| def setUp(self): | ||
| self.temp_databases = [] | ||
| self.temp_tables = [] | ||
| self.temp_views = [] | ||
| self.temp_udfs = [] | ||
| self.temp_udas = [] | ||
|
|
||
| def tearDown(self): | ||
| for t in self.temp_tables: | ||
| self.con.drop_table(t, force=True) | ||
|
|
||
| for t in self.temp_views: | ||
| self.con.drop_view(t, force=True) | ||
|
|
||
| for f_name, f_inputs in self.temp_udfs: | ||
| self.con.drop_udf(f_name, input_types=f_inputs, force=True) | ||
|
|
||
| for f_name, f_inputs in self.temp_udas: | ||
| self.con.drop_uda(f_name, input_types=f_inputs, force=True) | ||
|
|
||
| self.con.set_database(self.test_data_db) | ||
| for t in self.temp_databases: | ||
| self.con.drop_database(t, force=True) | ||
|
|
||
|
|
||
| def format_schema(expr): | ||
| from ibis.impala.compiler import _type_to_sql_string | ||
| from pprint import pprint | ||
| schema = expr.schema() | ||
|
|
||
| what = compat.lzip(schema.names, | ||
| [_type_to_sql_string(x) for x in schema.types]) | ||
| pprint(what) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| # Copyright 2015 Cloudera Inc | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from posixpath import join as pjoin | ||
| import pytest | ||
|
|
||
| from ibis.compat import unittest | ||
| from ibis.impala.tests.common import ImpalaE2E | ||
|
|
||
| from ibis.impala import madlib | ||
|
|
||
| import ibis.util as util | ||
|
|
||
|
|
||
| class TestMADLib(ImpalaE2E, unittest.TestCase): | ||
|
|
||
| @classmethod | ||
| def setUpClass(cls): | ||
| super(TestMADLib, cls).setUpClass() | ||
| cls.db = '__ibis_madlib_{0}'.format(util.guid()[:4]) | ||
|
|
||
| cls.con.create_database(cls.db) | ||
|
|
||
| @classmethod | ||
| def tearDownClass(cls): | ||
| super(TestMADLib, cls).tearDownClass() | ||
|
|
||
| try: | ||
| cls.con.drop_database(cls.db, force=True) | ||
| except: | ||
| pass | ||
|
|
||
| def setUp(self): | ||
| super(TestMADLib, self).setUp() | ||
| self.madlib_so = pjoin(self.test_data_dir, 'udf/madlib.so') | ||
|
|
||
| self.api = madlib.MADLibAPI(self.madlib_so, self.db) | ||
|
|
||
| @pytest.mark.madlib | ||
| def test_create_functions(self): | ||
| self.api.create_functions(self.con) | ||
|
|
||
| for name in self.api._udfs: | ||
| func = getattr(self.api, name) | ||
| assert self.con.exists_udf(func.name, database=self.db) | ||
|
|
||
| for name in self.api._udas: | ||
| func = getattr(self.api, name) | ||
| assert self.con.exists_uda(func.name, database=self.db) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| # Copyright 2014 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import ibis | ||
|
|
||
| from ibis.impala.compiler import to_sql | ||
| from ibis.compat import unittest | ||
|
|
||
|
|
||
| class TestImpalaSQL(unittest.TestCase): | ||
|
|
||
| def test_relabel_projection(self): | ||
| # GH #551 | ||
| types = ['int32', 'string', 'double'] | ||
| table = ibis.table(zip(['foo', 'bar', 'baz'], types), 'table') | ||
| relabeled = table.relabel({'foo': 'one', 'baz': 'three'}) | ||
|
|
||
| result = to_sql(relabeled) | ||
| expected = """\ | ||
| SELECT `foo` AS `one`, `bar`, `baz` AS `three` | ||
| FROM `table`""" | ||
| assert result == expected |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from ibis.tests.conftest import * # noqa |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
|
|
||
| from .client import SQLiteClient | ||
| from .compiler import rewrites # noqa | ||
|
|
||
|
|
||
| def compile(expr): | ||
| """ | ||
| Force compilation of expression for the SQLite target | ||
| """ | ||
| from .client import SQLiteDialect | ||
| from ibis.sql.alchemy import to_sqlalchemy | ||
| return to_sqlalchemy(expr, dialect=SQLiteDialect) | ||
|
|
||
|
|
||
| def connect(path, create=False): | ||
|
|
||
| """ | ||
| Create an Ibis client connected to a SQLite database. | ||
| Multiple database files can be created using the attach() method | ||
| Parameters | ||
| ---------- | ||
| path : string | ||
| File path to the SQLite database file | ||
| create : boolean, default False | ||
| If file does not exist, create it | ||
| """ | ||
|
|
||
| return SQLiteClient(path, create=create) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import os | ||
|
|
||
| import sqlalchemy as sa | ||
|
|
||
| from ibis.client import Database | ||
| from .compiler import SQLiteDialect | ||
| import ibis.expr.types as ir | ||
| import ibis.sql.alchemy as alch | ||
| import ibis.common as com | ||
|
|
||
|
|
||
| class SQLiteTable(alch.AlchemyTable): | ||
| pass | ||
|
|
||
|
|
||
| class SQLiteDatabase(Database): | ||
| pass | ||
|
|
||
|
|
||
| class SQLiteClient(alch.AlchemyClient): | ||
|
|
||
| """ | ||
| The Ibis SQLite client class | ||
| """ | ||
|
|
||
| dialect = SQLiteDialect | ||
| database_class = SQLiteDatabase | ||
|
|
||
| def __init__(self, path, create=False): | ||
| self.name = path | ||
| self.database_name = 'default' | ||
|
|
||
| self.con = sa.create_engine('sqlite://') | ||
| self.attach(self.database_name, path, create=create) | ||
| self.meta = sa.MetaData(bind=self.con) | ||
|
|
||
| @property | ||
| def current_database(self): | ||
| return self.database_name | ||
|
|
||
| def list_databases(self): | ||
| raise NotImplementedError | ||
|
|
||
| def set_database(self): | ||
| raise NotImplementedError | ||
|
|
||
| def attach(self, name, path, create=False): | ||
| """ | ||
| Connect another SQLite database file | ||
| Parameters | ||
| ---------- | ||
| name : string | ||
| Database name within SQLite | ||
| path : string | ||
| Path to sqlite3 file | ||
| create : boolean, default False | ||
| If file does not exist, create file if True otherwise raise Exception | ||
| """ | ||
| if not os.path.exists(path) and not create: | ||
| raise com.IbisError('File {0} does not exist'.format(path)) | ||
|
|
||
| self.con.execute("ATTACH DATABASE '{0}' AS '{1}'".format(path, name)) | ||
|
|
||
| @property | ||
| def client(self): | ||
| return self | ||
|
|
||
| def table(self, name, database=None): | ||
| """ | ||
| Create a table expression that references a particular table in the | ||
| SQLite database | ||
| Parameters | ||
| ---------- | ||
| name : string | ||
| Returns | ||
| ------- | ||
| table : TableExpr | ||
| """ | ||
| alch_table = self._get_sqla_table(name) | ||
| node = SQLiteTable(alch_table, self) | ||
| return self._table_expr_klass(node) | ||
|
|
||
| def drop_table(self): | ||
| pass | ||
|
|
||
| def create_table(self, name, expr=None): | ||
| pass | ||
|
|
||
| @property | ||
| def _table_expr_klass(self): | ||
| return ir.TableExpr |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,183 @@ | ||
| # Copyright 2014 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import sqlalchemy as sa | ||
|
|
||
| from ibis.sql.alchemy import unary, varargs, fixed_arity | ||
| import ibis.sql.alchemy as alch | ||
| import ibis.expr.datatypes as dt | ||
| import ibis.expr.operations as ops | ||
| import ibis.expr.types as ir | ||
| import ibis.common as com | ||
|
|
||
| _operation_registry = alch._operation_registry.copy() | ||
|
|
||
|
|
||
| def _cast(t, expr): | ||
| # It's not all fun and games with SQLite | ||
|
|
||
| op = expr.op() | ||
| arg, target_type = op.args | ||
| sa_arg = t.translate(arg) | ||
| sa_type = t.get_sqla_type(target_type) | ||
|
|
||
| # SQLite does not have a physical date/time/timestamp type, so | ||
| # unfortunately cast to typestamp must be a no-op, and we have to trust | ||
| # that the user's data can actually be correctly parsed by SQLite. | ||
| if isinstance(target_type, dt.Timestamp): | ||
| if not isinstance(arg, (ir.IntegerValue, ir.StringValue)): | ||
| raise com.TranslationError(type(arg)) | ||
|
|
||
| return sa_arg | ||
|
|
||
| if isinstance(arg, ir.CategoryValue) and target_type == 'int32': | ||
| return sa_arg | ||
| else: | ||
| return sa.cast(sa_arg, sa_type) | ||
|
|
||
|
|
||
| def _substr(t, expr): | ||
| f = sa.func.substr | ||
|
|
||
| arg, start, length = expr.op().args | ||
|
|
||
| sa_arg = t.translate(arg) | ||
| sa_start = t.translate(start) | ||
|
|
||
| if length is None: | ||
| return f(sa_arg, sa_start + 1) | ||
| else: | ||
| sa_length = t.translate(length) | ||
| return f(sa_arg, sa_start + 1, sa_length) | ||
|
|
||
|
|
||
| def _string_right(t, expr): | ||
| f = sa.func.substr | ||
|
|
||
| arg, length = expr.op().args | ||
|
|
||
| sa_arg = t.translate(arg) | ||
| sa_length = t.translate(length) | ||
|
|
||
| return f(sa_arg, -sa_length, sa_length) | ||
|
|
||
|
|
||
| def _string_find(t, expr): | ||
| arg, substr, start, _ = expr.op().args | ||
|
|
||
| if start is not None: | ||
| raise NotImplementedError | ||
|
|
||
| sa_arg = t.translate(arg) | ||
| sa_substr = t.translate(substr) | ||
|
|
||
| f = sa.func.instr | ||
| return f(sa_arg, sa_substr) - 1 | ||
|
|
||
|
|
||
| def _infix_op(infix_sym): | ||
| def formatter(t, expr): | ||
| op = expr.op() | ||
| left, right = op.args | ||
|
|
||
| left_arg = t.translate(left) | ||
| right_arg = t.translate(right) | ||
| return left_arg.op(infix_sym)(right_arg) | ||
|
|
||
| return formatter | ||
|
|
||
|
|
||
| def _strftime(t, expr): | ||
| arg, format = expr.op().args | ||
| sa_arg = t.translate(arg) | ||
| sa_format = t.translate(format) | ||
| return sa.func.strftime(sa_format, sa_arg) | ||
|
|
||
|
|
||
| def _strftime_int(fmt): | ||
| def translator(t, expr): | ||
| arg, = expr.op().args | ||
| sa_arg = t.translate(arg) | ||
| return sa.cast(sa.func.strftime(fmt, sa_arg), sa.types.INTEGER) | ||
| return translator | ||
|
|
||
|
|
||
| def _now(t, expr): | ||
| return sa.func.datetime('now') | ||
|
|
||
|
|
||
| def _millisecond(t, expr): | ||
| arg, = expr.op().args | ||
| sa_arg = t.translate(arg) | ||
| fractional_second = sa.func.strftime('%f', sa_arg) | ||
| return (fractional_second * 1000) % 1000 | ||
|
|
||
|
|
||
| _operation_registry.update({ | ||
| ops.Cast: _cast, | ||
|
|
||
| ops.Substring: _substr, | ||
| ops.StrRight: _string_right, | ||
|
|
||
| ops.StringFind: _string_find, | ||
|
|
||
| ops.StringLength: unary('length'), | ||
|
|
||
| ops.Least: varargs(sa.func.min), | ||
| ops.Greatest: varargs(sa.func.max), | ||
| ops.IfNull: fixed_arity(sa.func.ifnull, 2), | ||
|
|
||
| ops.Lowercase: unary('lower'), | ||
| ops.Uppercase: unary('upper'), | ||
|
|
||
| ops.Strip: unary('trim'), | ||
| ops.LStrip: unary('ltrim'), | ||
| ops.RStrip: unary('rtrim'), | ||
|
|
||
| ops.StringReplace: fixed_arity(sa.func.replace, 3), | ||
| ops.StringSQLLike: _infix_op('LIKE'), | ||
| ops.RegexSearch: _infix_op('REGEXP'), | ||
|
|
||
| ops.Strftime: _strftime, | ||
| ops.ExtractYear: _strftime_int('%Y'), | ||
| ops.ExtractMonth: _strftime_int('%m'), | ||
| ops.ExtractDay: _strftime_int('%d'), | ||
| ops.ExtractHour: _strftime_int('%H'), | ||
| ops.ExtractMinute: _strftime_int('%M'), | ||
| ops.ExtractSecond: _strftime_int('%S'), | ||
| ops.ExtractMillisecond: _millisecond, | ||
| ops.TimestampNow: _now | ||
| }) | ||
|
|
||
|
|
||
| def add_operation(op, translation_func): | ||
| _operation_registry[op] = translation_func | ||
|
|
||
|
|
||
| class SQLiteExprTranslator(alch.AlchemyExprTranslator): | ||
|
|
||
| _registry = _operation_registry | ||
| _rewrites = alch.AlchemyExprTranslator._rewrites.copy() | ||
| _type_map = alch.AlchemyExprTranslator._type_map.copy() | ||
| _type_map.update({ | ||
| dt.Double: sa.types.REAL | ||
| }) | ||
|
|
||
|
|
||
| rewrites = SQLiteExprTranslator.rewrites | ||
|
|
||
|
|
||
| class SQLiteDialect(alch.AlchemyDialect): | ||
|
|
||
| translator = SQLiteExprTranslator |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import os | ||
| import pytest | ||
|
|
||
| from ibis.sql.sqlite.compiler import SQLiteExprTranslator | ||
| import ibis.sql.sqlite.api as api | ||
| from sqlalchemy.dialects.sqlite import dialect as sqlite_dialect | ||
|
|
||
|
|
||
| @pytest.mark.sqlite | ||
| class SQLiteTests(object): | ||
|
|
||
| @classmethod | ||
| def setUpClass(cls): | ||
| cls.env = SQLiteTestEnv() | ||
| cls.dialect = sqlite_dialect() | ||
| cls.con = api.connect(cls.env.db_path) | ||
| cls.alltypes = cls.con.table('functional_alltypes') | ||
|
|
||
| def _check_expr_cases(self, cases, context=None, named=False): | ||
| for expr, expected in cases: | ||
| result = self._translate(expr, named=named, context=context) | ||
|
|
||
| compiled = result.compile(dialect=self.dialect) | ||
| ex_compiled = expected.compile(dialect=self.dialect) | ||
|
|
||
| assert str(compiled) == str(ex_compiled) | ||
|
|
||
| def _translate(self, expr, named=False, context=None): | ||
| translator = SQLiteExprTranslator(expr, context=context, named=named) | ||
| return translator.get_result() | ||
|
|
||
| def _to_sqla(self, table): | ||
| return table.op().sqla_table | ||
|
|
||
| def _check_e2e_cases(self, cases): | ||
| for expr, expected in cases: | ||
| result = self.con.execute(expr) | ||
| assert result == expected | ||
|
|
||
|
|
||
| class SQLiteTestEnv(object): | ||
|
|
||
| def __init__(self): | ||
| self.db_path = os.environ.get('IBIS_TEST_SQLITE_DB_PATH', | ||
| 'ibis_testing.db') |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from ibis.tests.conftest import * # noqa |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import os | ||
|
|
||
| import pandas as pd | ||
|
|
||
| from .common import SQLiteTests | ||
| from ibis.compat import unittest | ||
| from ibis.tests.util import assert_equal | ||
| from ibis.util import guid | ||
| import ibis.expr.types as ir | ||
| import ibis.common as com | ||
| import ibis | ||
|
|
||
|
|
||
| class TestSQLiteClient(SQLiteTests, unittest.TestCase): | ||
|
|
||
| @classmethod | ||
| def tearDownClass(cls): | ||
| pass | ||
|
|
||
| def test_file_not_exist_and_create(self): | ||
| path = '__ibis_tmp_{0}.db'.format(guid()) | ||
|
|
||
| with self.assertRaises(com.IbisError): | ||
| ibis.sqlite.connect(path) | ||
|
|
||
| ibis.sqlite.connect(path, create=True) | ||
| assert os.path.exists(path) | ||
| os.remove(path) | ||
|
|
||
| def test_table(self): | ||
| table = self.con.table('functional_alltypes') | ||
| assert isinstance(table, ir.TableExpr) | ||
|
|
||
| def test_array_execute(self): | ||
| d = self.alltypes.limit(10).double_col | ||
| s = d.execute() | ||
| assert isinstance(s, pd.Series) | ||
| assert len(s) == 10 | ||
|
|
||
| def test_literal_execute(self): | ||
| expr = ibis.literal('1234') | ||
| result = self.con.execute(expr) | ||
| assert result == '1234' | ||
|
|
||
| def test_simple_aggregate_execute(self): | ||
| d = self.alltypes.double_col.sum() | ||
| v = d.execute() | ||
| assert isinstance(v, float) | ||
|
|
||
| def test_list_tables(self): | ||
| assert len(self.con.list_tables()) > 0 | ||
| assert len(self.con.list_tables(like='functional')) == 1 | ||
|
|
||
| def test_compile_verify(self): | ||
| unsupported_expr = self.alltypes.string_col.approx_nunique() | ||
| assert not unsupported_expr.verify() | ||
|
|
||
| supported_expr = self.alltypes.double_col.sum() | ||
| assert supported_expr.verify() | ||
|
|
||
| def test_attach_file(self): | ||
| pass | ||
|
|
||
| def test_database_layer(self): | ||
| db = self.con.database() | ||
|
|
||
| t = db.functional_alltypes | ||
| assert_equal(t, self.alltypes) | ||
|
|
||
| assert db.list_tables() == self.con.list_tables() | ||
|
|
||
| def test_compile_toplevel(self): | ||
| # t = ibis.table([ | ||
| # ('foo', 'double') | ||
| # ]) | ||
|
|
||
| # # it works! | ||
| # expr = t.foo.sum() | ||
| # ibis.sqlite.compile(expr) | ||
|
|
||
| # This does not work yet because if the compiler encounters a | ||
| # non-SQLAlchemy table it fails | ||
| pass |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,339 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import pytest # noqa | ||
|
|
||
| from .common import SQLiteTests | ||
| from ibis.compat import unittest | ||
| from ibis import literal as L | ||
| import ibis.expr.types as ir | ||
| import ibis | ||
|
|
||
| import sqlalchemy as sa | ||
|
|
||
|
|
||
| class TestSQLiteFunctions(SQLiteTests, unittest.TestCase): | ||
|
|
||
| def test_cast(self): | ||
| at = self._to_sqla(self.alltypes) | ||
|
|
||
| d = self.alltypes.double_col | ||
| s = self.alltypes.string_col | ||
|
|
||
| sa_d = at.c.double_col | ||
| sa_s = at.c.string_col | ||
|
|
||
| cases = [ | ||
| (d.cast('int8'), sa.cast(sa_d, sa.types.SMALLINT)), | ||
| (s.cast('double'), sa.cast(sa_s, sa.types.REAL)), | ||
| ] | ||
| self._check_expr_cases(cases) | ||
|
|
||
| def test_decimal_cast(self): | ||
| pass | ||
|
|
||
| def test_timestamp_cast_noop(self): | ||
| # See GH #592 | ||
|
|
||
| at = self._to_sqla(self.alltypes) | ||
|
|
||
| tc = self.alltypes.timestamp_col | ||
| ic = self.alltypes.int_col | ||
|
|
||
| tc_casted = tc.cast('timestamp') | ||
| ic_casted = ic.cast('timestamp') | ||
|
|
||
| # Logically, it's a timestamp | ||
| assert isinstance(tc_casted, ir.TimestampArray) | ||
| assert isinstance(ic_casted, ir.TimestampArray) | ||
|
|
||
| # But it's a no-op when translated to SQLAlchemy | ||
| cases = [ | ||
| (tc_casted, at.c.timestamp_col), | ||
| (ic_casted, at.c.int_col) | ||
| ] | ||
| self._check_expr_cases(cases) | ||
|
|
||
| def test_timestamp_functions(self): | ||
| from datetime import datetime | ||
|
|
||
| v = L('2015-09-01 14:48:05.359').cast('timestamp') | ||
|
|
||
| cases = [ | ||
| (v.strftime('%Y%m%d'), '20150901'), | ||
|
|
||
| (v.year(), 2015), | ||
| (v.month(), 9), | ||
| (v.day(), 1), | ||
| (v.hour(), 14), | ||
| (v.minute(), 48), | ||
| (v.second(), 5), | ||
| (v.millisecond(), 359), | ||
|
|
||
| # there could be pathological failure at midnight somewhere, but | ||
| # that's okay | ||
| (ibis.now().strftime('%Y%m%d %H'), | ||
| datetime.utcnow().strftime('%Y%m%d %H')) | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_binary_arithmetic(self): | ||
| cases = [ | ||
| (L(3) + L(4), 7), | ||
| (L(3) - L(4), -1), | ||
| (L(3) * L(4), 12), | ||
| (L(12) / L(4), 3), | ||
| # (L(12) ** L(2), 144), | ||
| (L(12) % L(5), 2) | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_typeof(self): | ||
| cases = [ | ||
| (L('foo_bar').typeof(), 'text'), | ||
| (L(5).typeof(), 'integer'), | ||
| (ibis.NA.typeof(), 'null'), | ||
| (L(1.2345).typeof(), 'real'), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_nullifzero(self): | ||
| cases = [ | ||
| (L(0).nullifzero(), None), | ||
| (L(5.5).nullifzero(), 5.5), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_string_length(self): | ||
| cases = [ | ||
| (L('foo_bar').length(), 7), | ||
| (L('').length(), 0), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_string_substring(self): | ||
| cases = [ | ||
| (L('foo_bar').left(3), 'foo'), | ||
| (L('foo_bar').right(3), 'bar'), | ||
|
|
||
| (L('foo_bar').substr(0, 3), 'foo'), | ||
| (L('foo_bar').substr(4, 3), 'bar'), | ||
| (L('foo_bar').substr(1), 'oo_bar'), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_string_strip(self): | ||
| cases = [ | ||
| (L(' foo ').lstrip(), 'foo '), | ||
| (L(' foo ').rstrip(), ' foo'), | ||
| (L(' foo ').strip(), 'foo'), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_string_upper_lower(self): | ||
| cases = [ | ||
| (L('foo').upper(), 'FOO'), | ||
| (L('FOO').lower(), 'foo'), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_string_contains(self): | ||
| cases = [ | ||
| (L('foobar').contains('bar'), True), | ||
| (L('foobar').contains('foo'), True), | ||
| (L('foobar').contains('baz'), False), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_string_functions(self): | ||
| cases = [ | ||
| (L('foobar').find('bar'), 3), | ||
| (L('foobar').find('baz'), -1), | ||
|
|
||
| (L('foobar').like('%bar'), True), | ||
| (L('foobar').like('foo%'), True), | ||
| (L('foobar').like('%baz%'), False), | ||
|
|
||
| (L('foobarfoo').replace('foo', 'H'), 'HbarH'), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_math_functions(self): | ||
| cases = [ | ||
| (L(-5).abs(), 5), | ||
| (L(5).abs(), 5), | ||
| (ibis.least(L(5), L(10), L(1)), 1), | ||
| (ibis.greatest(L(5), L(10), L(1)), 10), | ||
|
|
||
| (L(5.5).round(), 6.0), | ||
| (L(5.556).round(2), 5.56), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_regexp(self): | ||
| pytest.skip('NYI: Requires adding regex udf with sqlite3') | ||
|
|
||
| v = L('abcd') | ||
| v2 = L('1222') | ||
| cases = [ | ||
| (v.re_search('[a-z]'), True), | ||
| (v.re_search('[\d]+'), False), | ||
| (v2.re_search('[\d]+'), True), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_fillna_nullif(self): | ||
| cases = [ | ||
| (ibis.NA.fillna(5), 5), | ||
| (L(5).fillna(10), 5), | ||
| (L(5).nullif(5), None), | ||
| (L(10).nullif(5), 10), | ||
| ] | ||
| self._check_e2e_cases(cases) | ||
|
|
||
| def test_coalesce(self): | ||
| pass | ||
|
|
||
| def test_numeric_builtins_work(self): | ||
| t = self.alltypes | ||
| d = t.double_col | ||
|
|
||
| exprs = [ | ||
| d.fillna(0), | ||
| ] | ||
| self._execute_projection(t, exprs) | ||
|
|
||
| def test_misc_builtins_work(self): | ||
| t = self.alltypes | ||
| d = t.double_col | ||
|
|
||
| exprs = [ | ||
| (d > 20).ifelse(10, -20), | ||
| (d > 20).ifelse(10, -20).abs(), | ||
|
|
||
| # tier and histogram | ||
| d.bucket([0, 10, 25, 50, 100]), | ||
| d.bucket([0, 10, 25, 50], include_over=True), | ||
| d.bucket([0, 10, 25, 50], include_over=True, close_extreme=False), | ||
| d.bucket([10, 25, 50, 100], include_under=True), | ||
| ] | ||
| self._execute_projection(t, exprs) | ||
|
|
||
| def test_category_label(self): | ||
| t = self.alltypes | ||
| d = t.double_col | ||
|
|
||
| bucket = d.bucket([0, 10, 25, 50, 100]) | ||
|
|
||
| exprs = [ | ||
| bucket.label(['a', 'b', 'c', 'd']) | ||
| ] | ||
| self._execute_projection(t, exprs) | ||
|
|
||
| def test_union(self): | ||
| pytest.skip('union not working yet') | ||
|
|
||
| t = self.alltypes | ||
|
|
||
| expr = (t.group_by('string_col') | ||
| .aggregate(t.double_col.sum().name('foo')) | ||
| .sort_by('string_col')) | ||
|
|
||
| t1 = expr.limit(4) | ||
| t2 = expr.limit(4, offset=4) | ||
| t3 = expr.limit(8) | ||
|
|
||
| result = t1.union(t2).execute() | ||
| expected = t3.execute() | ||
|
|
||
| assert (result.string_col == expected.string_col).all() | ||
|
|
||
| def test_aggregations_execute(self): | ||
| table = self.alltypes.limit(100) | ||
|
|
||
| d = table.double_col | ||
| s = table.string_col | ||
|
|
||
| cond = table.string_col.isin(['1', '7']) | ||
|
|
||
| exprs = [ | ||
| table.bool_col.count(), | ||
| table.bool_col.any(), | ||
| table.bool_col.all(), | ||
| table.bool_col.notany(), | ||
| table.bool_col.notall(), | ||
|
|
||
| d.sum(), | ||
| d.mean(), | ||
| d.min(), | ||
| d.max(), | ||
|
|
||
| table.bool_col.count(where=cond), | ||
| d.sum(where=cond), | ||
| d.mean(where=cond), | ||
| d.min(where=cond), | ||
| d.max(where=cond), | ||
|
|
||
| s.group_concat(), | ||
| ] | ||
| self._execute_aggregation(table, exprs) | ||
|
|
||
| def test_distinct_aggregates(self): | ||
| table = self.alltypes.limit(100) | ||
|
|
||
| exprs = [ | ||
| table.double_col.nunique() | ||
| ] | ||
| self._execute_aggregation(table, exprs) | ||
|
|
||
| def test_not_exists_works(self): | ||
| t = self.alltypes | ||
| t2 = t.view() | ||
|
|
||
| expr = t[-(t.string_col == t2.string_col).any()] | ||
| expr.execute() | ||
|
|
||
| def test_interactive_repr_shows_error(self): | ||
| # #591. Doing this in SQLite because so many built-in functions are not | ||
| # available | ||
| import ibis.config as config | ||
|
|
||
| expr = self.alltypes.double_col.approx_nunique() | ||
|
|
||
| with config.option_context('interactive', True): | ||
| result = repr(expr) | ||
| assert 'no translator rule' in result.lower() | ||
|
|
||
| def test_subquery_invokes_sqlite_compiler(self): | ||
| t = self.alltypes | ||
|
|
||
| expr = (t.mutate(d=t.double_col.fillna(0)) | ||
| .limit(1000) | ||
| .group_by('string_col') | ||
| .size()) | ||
| expr.execute() | ||
|
|
||
| def _execute_aggregation(self, table, exprs): | ||
| agg_exprs = [expr.name('e%d' % i) | ||
| for i, expr in enumerate(exprs)] | ||
|
|
||
| agged_table = table.aggregate(agg_exprs) | ||
| agged_table.execute() | ||
|
|
||
| def _execute_projection(self, table, exprs): | ||
| agg_exprs = [expr.name('e%d' % i) | ||
| for i, expr in enumerate(exprs)] | ||
|
|
||
| proj = table.projection(agg_exprs) | ||
| proj.execute() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| # Copyright 2015 Cloudera Inc. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from ibis.tests.conftest import * # noqa |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,5 +3,6 @@ numpy>=1.7.0 | |
| pandas>=0.12.0 | ||
| impyla>=0.10.0 | ||
| psutil==0.6.1 | ||
| hdfs>=2.0.0 | ||
| sqlalchemy>=1.0.0 | ||
| six | ||