Showing with 2,379 additions and 920 deletions.
  1. +7 −7 conda-recipes/hdfs/meta.yaml
  2. +4 −2 conda-recipes/ibis-framework/meta.yaml
  3. +3 −18 conda-recipes/impyla/meta.yaml
  4. +4 −4 conda-recipes/thrift/meta.yaml
  5. +2 −0 conda-recipes/thrift_sasl/meta.yaml
  6. +8 −0 conda-recipes/thriftpy/bld.bat
  7. +9 −0 conda-recipes/thriftpy/build.sh
  8. +39 −0 conda-recipes/thriftpy/meta.yaml
  9. +80 −0 docs/source/release.rst
  10. +16 −3 ibis/__init__.py
  11. +232 −76 ibis/expr/analysis.py
  12. +20 −14 ibis/expr/api.py
  13. +4 −4 ibis/expr/format.py
  14. +3 −1 ibis/expr/groupby.py
  15. +249 −86 ibis/expr/operations.py
  16. +1 −1 ibis/expr/tests/mocks.py
  17. +94 −79 ibis/expr/tests/test_analysis.py
  18. +2 −1 ibis/expr/tests/test_analytics.py
  19. +14 −9 ibis/expr/tests/test_format.py
  20. +14 −15 ibis/expr/tests/test_table.py
  21. +4 −3 ibis/expr/tests/test_window_functions.py
  22. +12 −34 ibis/expr/types.py
  23. +1 −7 ibis/filesystems.py
  24. +3 −3 ibis/impala/api.py
  25. +58 −36 ibis/impala/client.py
  26. +4 −3 ibis/impala/compiler.py
  27. +57 −18 ibis/impala/ddl.py
  28. +300 −0 ibis/impala/kudu_support.py
  29. +4 −2 ibis/impala/metadata.py
  30. +6 −0 ibis/impala/pandas_interop.py
  31. +20 −26 ibis/impala/tests/common.py
  32. +25 −4 ibis/impala/tests/test_client.py
  33. +65 −38 ibis/impala/tests/test_ddl.py
  34. +289 −0 ibis/impala/tests/test_kudu_support.py
  35. +6 −6 ibis/impala/tests/test_pandas_interop.py
  36. +19 −39 ibis/impala/tests/test_partition.py
  37. +27 −0 ibis/impala/udf.py
  38. +31 −10 ibis/sql/alchemy.py
  39. +29 −62 ibis/sql/compiler.py
  40. +2 −0 ibis/sql/sqlite/tests/common.py
  41. +39 −0 ibis/sql/sqlite/tests/test_functions.py
  42. +225 −79 ibis/sql/tests/test_compiler.py
  43. +10 −6 ibis/sql/tests/test_sqlalchemy.py
  44. +3 −2 ibis/sql/transforms.py
  45. +1 −1 ibis/tests/conftest.py
  46. +11 −0 ibis/util.py
  47. +1 −1 requirements.txt
  48. +1 −1 scripts/test_data_admin.py
  49. +68 −2 testing/udf/CMakeLists.txt
  50. +0 −1 testing/udf/lib/udf-debug.h
  51. +253 −216 testing/udf/lib/udf.h
14 changes: 7 additions & 7 deletions conda-recipes/hdfs/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package:
name: hdfs
version: "2.0.2"
version: "2.0.4"

source:
fn: hdfs-2.0.2.tar.gz
url: https://pypi.python.org/packages/source/h/hdfs/hdfs-2.0.2.tar.gz
md5: ade3a92382e0889e2a845b9cc707c704
fn: hdfs-2.0.4.tar.gz
url: https://pypi.python.org/packages/source/h/hdfs/hdfs-2.0.4.tar.gz
md5: c48b86a0fb05d22b5f4eb4cb84fc6452

build:
entry_points:
- hdfscli = hdfs.__main__:main
# - hdfscli-avro = hdfs.ext.avro.__main__:main # disabled
# - hdfscli-avro = hdfs.ext.avro.__main__:main

requirements:
build:
Expand All @@ -30,11 +30,11 @@ test:
imports:
- hdfs
- hdfs.ext
# - hdfs.ext.avro # disabled
# - hdfs.ext.avro

commands:
- hdfscli --help
# - hdfscli-avro --help # disabled
# - hdfscli-avro --help

about:
home: http://hdfscli.readthedocs.org
Expand Down
6 changes: 4 additions & 2 deletions conda-recipes/ibis-framework/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,22 @@ requirements:
- pytest
- numpy >=1.7.0
- pandas >=0.12.0
- impyla >=0.10.0
- impyla >=0.12.0
- hdfs >=2.0.0
- sqlalchemy >=1.0.0
- six
- mock

run:
- python
- pytest
- numpy >=1.7.0
- pandas >=0.12.0
- impyla >=0.10.0
- impyla >=0.12.0
- hdfs >=2.0.0
- sqlalchemy >=1.0.0
- six
- mock

test:
imports:
Expand Down
21 changes: 3 additions & 18 deletions conda-recipes/impyla/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: impyla
version: "0.11.2"
version: "0.12.0"

source:
git_url: https://github.com/cloudera/impyla
Expand All @@ -16,36 +16,21 @@ requirements:
- six
- thrift_sasl
- bitarray
- thrift
- llvmpy
- numba
- thriftpy

run:
- python
- setuptools
- six
- thrift_sasl
- bitarray
- thrift
- llvmpy
- numba
- thriftpy

test:
imports:
- impala
- impala._thrift_api
- impala._thrift_gen
- impala._thrift_gen.ExecStats
- impala._thrift_gen.ImpalaService
- impala._thrift_gen.Status
- impala._thrift_gen.TCLIService
- impala._thrift_gen.Types
- impala._thrift_gen.beeswax
- impala._thrift_gen.fb303
- impala._thrift_gen.hive_metastore
- impala.tests
- impala.thrift
# - impala.udf # Test fails on osx-64

about:
home: https://github.com/cloudera/impyla
Expand Down
8 changes: 4 additions & 4 deletions conda-recipes/thrift/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package:
name: thrift
version: "0.9.2"
version: "0.9.3"

source:
fn: thrift-0.9.2.tar.gz
url: https://pypi.python.org/packages/source/t/thrift/thrift-0.9.2.tar.gz
md5: 91f1c224c46a257bb428431943387dfd
fn: thrift-0.9.3.tar.gz
url: https://pypi.python.org/packages/source/t/thrift/thrift-0.9.3.tar.gz
md5: b519551d7a086bb0b4f222a8f566b7e8

requirements:
build:
Expand Down
2 changes: 2 additions & 0 deletions conda-recipes/thrift_sasl/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ requirements:
- python
- setuptools
- thrift
- thriftpy

run:
- python
- thrift
- thriftpy

test:
imports:
Expand Down
8 changes: 8 additions & 0 deletions conda-recipes/thriftpy/bld.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"%PYTHON%" setup.py install
if errorlevel 1 exit 1

:: Add more build steps here, if they are necessary.

:: See
:: http://docs.continuum.io/conda/build.html
:: for a list of environment variables that are set during the build process.
9 changes: 9 additions & 0 deletions conda-recipes/thriftpy/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

$PYTHON setup.py install

# Add more build steps here, if they are necessary.

# See
# http://docs.continuum.io/conda/build.html
# for a list of environment variables that are set during the build process.
39 changes: 39 additions & 0 deletions conda-recipes/thriftpy/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package:
name: thriftpy
version: "0.3.2"

source:
fn: thriftpy-0.3.2.tar.gz
url: https://pypi.python.org/packages/source/t/thriftpy/thriftpy-0.3.2.tar.gz
md5: 7e882aac1d3999af3bb29a6b65ed810f

requirements:
build:
- python
- setuptools
- ply >=3.4,<4.0

run:
- python
- ply >=3.4,<4.0

test:
imports:
- thriftpy
- thriftpy.contrib
- thriftpy.contrib.tracking
- thriftpy.parser
- thriftpy.protocol
- thriftpy.transport
- thriftpy.transport.buffered
- thriftpy.transport.framed
- thriftpy.transport.memory

requires:
- tornado >=4.0,<5.0
# - toro ==0.6

about:
home: https://thriftpy.readthedocs.org/
license: MIT License
summary: 'Pure python implementation of Apache Thrift.'
80 changes: 80 additions & 0 deletions docs/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,80 @@ Release Notes
interesting. Point (minor, e.g. 0.5.1) releases will generally not be found
here and contain only bug fixes.

0.7 (March 16, 2016)
--------------------

This release brings initial Kudu-Impala integration and improved Impala and
SQLite support, along with several critical bug fixes.

New features
~~~~~~~~~~~~
* Apache Kudu (incubating) integration for Impala users. See the `blog post <http://blog.ibis-project.org/kudu-impala-ibis>`_ for now. Will add some documentation here when possible.
* Add ``use_https`` option to ``ibis.hdfs_connect`` for WebHDFS connections in
secure (Kerberized) clusters without SSL enabled.
* Correctly compile aggregate expressions involving multiple subqueries.

To explain this last point in more detail, suppose you had:

.. code-block:: python
table = ibis.table([('flag', 'string'),
('value', 'double')],
'tbl')
flagged = table[table.flag == '1']
unflagged = table[table.flag == '0']
fv = flagged.value
uv = unflagged.value
expr = (fv.mean() / fv.sum()) - (uv.mean() / uv.sum())
The last expression now generates the correct Impala or SQLite SQL:

.. code-block:: sql
SELECT t0.`tmp` - t1.`tmp` AS `tmp`
FROM (
SELECT avg(`value`) / sum(`value`) AS `tmp`
FROM tbl
WHERE `flag` = '1'
) t0
CROSS JOIN (
SELECT avg(`value`) / sum(`value`) AS `tmp`
FROM tbl
WHERE `flag` = '0'
) t1
Bug fixes
~~~~~~~~~
* ``CHAR(n)`` and ``VARCHAR(n)`` Impala types now correctly map to Ibis string
expressions
* Fix inappropriate projection-join-filter expression rewrites resulting in
incorrect generated SQL.
* ``ImpalaClient.create_table`` correctly passes ``STORED AS PARQUET`` for
``format='parquet'``.
* Fixed several issues with Ibis dependencies (impyla, thriftpy, sasl,
thrift_sasl), especially for secure clusters. Upgrading will pull in these
new dependencies.
* Do not fail in ``ibis.impala.connect`` when trying to create the temporary
Ibis database if no HDFS connection passed.
* Fix join predicate evaluation bug when column names overlap with table
attributes.
* Fix handling of fully-materialized joins (aka ``select *`` joins) in
SQLAlchemy / SQLite.

Contributors
~~~~~~~~~~~~
Thank you to all who contributed patches to this release.

::

$ git log v0.6.0..v0.7.0 --pretty=format:%aN | sort | uniq -c | sort -rn
21 Wes McKinney
1 Uri Laserson
1 Kristopher Overholt

0.6 (December 1, 2015)
----------------------

Expand Down Expand Up @@ -72,6 +146,12 @@ Contributors
::

$ git log v0.5.0..v0.6.0 --pretty=format:%aN | sort | uniq -c | sort -rn
46 Wes McKinney
3 Uri Laserson
1 Phillip Cloud
1 mariusvniekerk
1 Kristopher Overholt


0.5 (September 10, 2015)
------------------------
Expand Down
19 changes: 16 additions & 3 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def make_client(db, hdfs_client=None):


def hdfs_connect(host='localhost', port=50070, protocol='webhdfs',
auth_mechanism='NOSASL', verify=True, **kwds):
use_https='default', auth_mechanism='NOSASL',
verify=True, **kwds):
"""
Connect to HDFS
Expand All @@ -80,6 +81,9 @@ def hdfs_connect(host='localhost', port=50070, protocol='webhdfs',
host : string, Host name of the HDFS NameNode
port : int, NameNode's WebHDFS port (default 50070)
protocol : {'webhdfs'}
use_https : boolean, default 'default'
Connect to WebHDFS with HTTPS, otherwise plain HTTP. For secure
authentication, the default for this is True, otherwise False
auth_mechanism : string, Set to NOSASL or PLAIN for non-secure clusters.
Set to GSSAPI or LDAP for Kerberos-secured clusters.
verify : boolean, Set to False to turn off verifying SSL certificates.
Expand All @@ -95,6 +99,10 @@ def hdfs_connect(host='localhost', port=50070, protocol='webhdfs',
session = kwds.setdefault('session', requests.Session())
session.verify = verify
if auth_mechanism in ['GSSAPI', 'LDAP']:
if use_https == 'default':
prefix = 'https'
else:
prefix = 'https' if use_https else 'http'
try:
import requests_kerberos
except ImportError:
Expand All @@ -103,12 +111,17 @@ def hdfs_connect(host='localhost', port=50070, protocol='webhdfs',
"Kerberos HDFS support. Install it by executing `pip install "
"requests-kerberos` or `pip install hdfs[kerberos]`.")
from hdfs.ext.kerberos import KerberosClient
url = 'https://{0}:{1}'.format(host, port) # note SSL
# note SSL
url = '{0}://{1}:{2}'.format(prefix, host, port)
kwds.setdefault('mutual_auth', 'OPTIONAL')
hdfs_client = KerberosClient(url, **kwds)
else:
if use_https == 'default':
prefix = 'http'
else:
prefix = 'https' if use_https else 'http'
from hdfs.client import InsecureClient
url = 'http://{0}:{1}'.format(host, port)
url = '{0}://{1}:{2}'.format(prefix, host, port)
hdfs_client = InsecureClient(url, **kwds)
return WebHDFS(hdfs_client)

Expand Down
Loading