Showing with 5,834 additions and 2,897 deletions.
  1. +1 −0 .gitignore
  2. +4 −4 README.md
  3. +39 −0 appveyor.yml
  4. +150 −0 asv.conf.json
  5. 0 benchmarks/__init__.py
  6. +85 −0 benchmarks/benchmarks.py
  7. +14 −0 ci/asvconfig.py
  8. +12 −0 ci/run.sh
  9. +39 −42 circle.yml
  10. +10 −8 conda-recipes/ibis-framework/meta.yaml
  11. +2 −2 dev/merge-pr.py
  12. +1 −0 docs/_config.yml
  13. +1 −15 docs/source/developer.rst
  14. +1 −1 docs/source/impala.rst
  15. +2 −1 docs/source/index.rst
  16. +54 −1 docs/source/release.rst
  17. +5 −0 ibis/__init__.py
  18. +32 −20 ibis/client.py
  19. +0 −874 ibis/cloudpickle.py
  20. +16 −16 ibis/compat.py
  21. +18 −13 ibis/config.py
  22. +226 −67 ibis/expr/analysis.py
  23. +117 −54 ibis/expr/api.py
  24. +182 −59 ibis/expr/datatypes.py
  25. +32 −29 ibis/expr/format.py
  26. +57 −11 ibis/expr/groupby.py
  27. +1 −1 ibis/expr/lineage.py
  28. +75 −51 ibis/expr/operations.py
  29. +34 −19 ibis/expr/rules.py
  30. +6 −2 ibis/expr/tests/conftest.py
  31. +4 −3 ibis/expr/tests/test_analytics.py
  32. +109 −2 ibis/expr/tests/test_datatypes.py
  33. +14 −10 ibis/expr/tests/test_format.py
  34. +2 −1 ibis/expr/tests/test_interactive.py
  35. +42 −21 ibis/expr/tests/test_lineage.py
  36. +34 −31 ibis/expr/tests/test_pipe.py
  37. +32 −0 ibis/expr/tests/test_rules.py
  38. +166 −139 ibis/expr/tests/test_sql_builtins.py
  39. +64 −63 ibis/expr/tests/test_string.py
  40. +16 −11 ibis/expr/tests/test_table.py
  41. +162 −164 ibis/expr/tests/test_temporal.py
  42. +135 −70 ibis/expr/tests/test_timestamp.py
  43. +101 −0 ibis/expr/tests/test_value_exprs.py
  44. +76 −0 ibis/expr/tests/test_visualize.py
  45. +191 −68 ibis/expr/types.py
  46. +205 −0 ibis/expr/visualize.py
  47. +31 −19 ibis/impala/api.py
  48. +2 −8 ibis/impala/client.py
  49. +176 −219 ibis/impala/ddl.py
  50. +6 −2 ibis/impala/kudu_support.py
  51. +30 −20 ibis/impala/pandas_interop.py
  52. +2 −1 ibis/impala/tests/test_client.py
  53. +11 −10 ibis/impala/tests/test_ddl.py
  54. +15 −14 ibis/impala/tests/test_exprs.py
  55. +10 −10 ibis/impala/tests/test_kudu_support.py
  56. +2 −1 ibis/impala/tests/test_metadata.py
  57. +16 −5 ibis/impala/tests/test_pandas_interop.py
  58. +25 −10 ibis/impala/tests/test_partition.py
  59. +2 −1 ibis/impala/tests/test_sql.py
  60. +16 −14 ibis/impala/tests/test_udf.py
  61. 0 ibis/pandas/__init__.py
  62. +6 −0 ibis/pandas/api.py
  63. +72 −0 ibis/pandas/client.py
  64. +83 −0 ibis/pandas/core.py
  65. +5 −0 ibis/pandas/dispatch.py
  66. +646 −0 ibis/pandas/execution.py
  67. 0 ibis/pandas/tests/__init__.py
  68. +30 −0 ibis/pandas/tests/test_client.py
  69. +11 −0 ibis/pandas/tests/test_core.py
  70. +675 −0 ibis/pandas/tests/test_operations.py
  71. +255 −34 ibis/sql/alchemy.py
  72. +16 −9 ibis/sql/compiler.py
  73. +77 −13 ibis/sql/postgres/api.py
  74. +135 −43 ibis/sql/postgres/client.py
  75. +54 −20 ibis/sql/postgres/compiler.py
  76. +22 −5 ibis/sql/postgres/tests/test_client.py
  77. +209 −47 ibis/sql/postgres/tests/test_functions.py
  78. +219 −20 ibis/sql/sqlite/client.py
  79. +26 −9 ibis/sql/sqlite/compiler.py
  80. +5 −6 ibis/sql/sqlite/tests/test_client.py
  81. +93 −30 ibis/sql/sqlite/tests/test_functions.py
  82. +15 −15 ibis/sql/tests/test_compiler.py
  83. +10 −13 ibis/sql/tests/test_sqlalchemy.py
  84. +1 −1 ibis/sql/transforms.py
  85. +3 −2 ibis/tests/test_filesystems.py
  86. +6 −34 ibis/util.py
  87. +2 −1 requirements.txt
  88. +0 −4 scripts/fixtures.sh
  89. +0 −133 scripts/run_jenkins.sh
  90. +0 −42 scripts/semaphore_perf.py
  91. +170 −115 scripts/test_data_admin.py
  92. +3 −0 setup.cfg
  93. +50 −62 setup.py
  94. +22 −32 tox.ini
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ testing/udf/Makefile
scripts/ibis-testing*
ibis_testing*
.tox/
.asv/
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[![circleci-badge](https://circleci.com/gh/cloudera/ibis.svg?style=shield&circle-token=b84ff8383cbb0d6788ee0f9635441cb962949a4f)](https://circleci.com/gh/cloudera/ibis/tree/master)

[![codecov.io](http://codecov.io/github/cloudera/ibis/coverage.svg?branch=master)](http://codecov.io/github/cloudera/ibis?branch=master)
[![circleci](https://circleci.com/gh/ibis-project/ibis.svg?style=shield&circle-token=b84ff8383cbb0d6788ee0f9635441cb962949a4f)](https://circleci.com/gh/ibis-project/ibis/tree/master)
[![appveyor](https://ci.appveyor.com/api/projects/status/github/ibis-project/ibis?branch=master&svg=true)](https://ci.appveyor.com/project/cpcloud/ibis-xh5g1)

Current release from Anaconda.org [![Anaconda-Server Badge](https://anaconda.org/conda-forge/ibis-framework/badges/version.svg)](https://anaconda.org/conda-forge/ibis-framework)

Expand All @@ -20,10 +19,11 @@ At this time, Ibis provides tools for the interacting with the following
systems:

- [Apache Impala (incubating)](http://impala.io/)
- [Apache Kudu (incubating)](http://getkudu.io)
- [Apache Kudu](http://getkudu.io)
- Hadoop Distributed File System (HDFS)
- PostgreSQL (Experimental)
- SQLite
- Direct execution of ibis expressions against pandas object (Experimental)

Learn more about using the library at http://docs.ibis-project.org and read the
project blog at http://ibis-project.org for news and updates.
39 changes: 39 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
build: off

platform:
- x64

environment:
PGUSER: "postgres"
PGPASSWORD: "Password12!"
IBIS_TEST_POSTGRES_DB: "ibis_testing"
IBIS_TEST_CRUNCHBASE_DB: "%USERPROFILE%\\crunchbase.db"
IBIS_TEST_SQLITE_DB_PATH: "%USERPROFILE%\\ibis_testing.db"
PG: "\"C:\\Program Files\\PostgreSQL\\9.6\\bin\\createdb\" ibis_testing"
CHOCO: "C:\\ProgramData\\chocolatey\\bin"

matrix:
- PYTHON: "C:\\Python27-x64"
- PYTHON: "C:\\Python34-x64"
- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python36-x64"

services:
- postgresql

test_script:
- "%PYTHON%\\python.exe -m pip install -U pip"
- "%PYTHON%\\python.exe -m pip install -U setuptools"
- "%PYTHON%\\python.exe -m pip install -e .\"[sqlite, postgres, visualization, pandas]\""
- "%PYTHON%\\python.exe -m pip install flake8 mock pytest"
- "%PYTHON%\\python.exe -m flake8"

- "curl -o crunchbase.db https://storage.googleapis.com/ibis-ci-data/crunchbase.db"
- "mv crunchbase.db %IBIS_TEST_CRUNCHBASE_DB%"
- "curl -o ibis-testing-data.tar.gz https://storage.googleapis.com/ibis-ci-data/ibis-testing-data.tar.gz"
- "7z x ibis-testing-data.tar.gz"
- "7z x ibis-testing-data.tar"
- "mv ibis-testing-data\\ibis_testing.db %IBIS_TEST_SQLITE_DB_PATH%"
- "%PG%\\createdb.exe %IBIS_TEST_POSTGRES_DB%"

- "%PYTHON%\\python.exe -m pytest --tb=short -m \"not impala and not hdfs and not postgresql\" ibis"
150 changes: 150 additions & 0 deletions asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,

// The name of the project being benchmarked
"project": "ibis",

// The project's homepage
"project_url": "http://www.ibis-project.org/",

// The URL or local path of the source code repository for the
// project being benchmarked
"repo": ".",

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
// "branches": ["master"], // for git
// "branches": ["default"], // for mercurial

// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
// "dvcs": "git",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
//"install_timeout": 600,

// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/ibis-project/ibis/commit/",

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": ["3.5"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
"matrix": {
"numpy": [""],
"pandas": [""],
"toolz": [""],
"six": [""],
"graphviz": [""],
"multipledispatch": [""],
"python-graphviz": [""],
"pip+hdfs": [""],
"impyla": [""],
"sqlalchemy": [""],
"pip+thrift": [""],
"pip+thriftpy": [""]
},

// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
// "benchmark_dir": "benchmarks",

// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
"env_dir": ".asv/env",

// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
"results_dir": ".asv/results",

// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
"html_dir": ".asv/html",

// The number of characters to retain in the commit hashes.
// "hash_length": 8,

// `asv` will cache wheels of the recent builds in each
// environment, making them faster to install next time. This is
// number of builds to keep, per environment.
// "wheel_cache_size": 0

// The commits after which the regression search in `asv publish`
// should start looking for regressions. Dictionary whose keys are
// regexps matching to benchmark names, and values corresponding to
// the commit (exclusive) after which to start looking for
// regressions. The default is to start from the first commit
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// }

// The thresholds for relative change in results, after which `asv
// publish` starts reporting regressions. Dictionary of the same
// form as in ``regressions_first_commits``, with values
// indicating the thresholds. If multiple entries match, the
// maximum is taken. If no entry matches, the default is 5%.
//
// "regressions_thresholds": {
// "some_benchmark": 0.01, // Threshold of 1%
// "another_benchmark": 0.5, // Threshold of 50%
// }
}
Empty file added benchmarks/__init__.py
Empty file.
85 changes: 85 additions & 0 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import ibis


class Suite:
def setup(self):
self.t = t = ibis.table((('_timestamp', 'int32'),
('dim1', 'int32'),
('dim2', 'int32'),
('valid_seconds', 'int32'),
('meas1', 'int32'),
('meas2', 'int32'),
('year', 'int32'),
('month', 'int32'),
('day', 'int32'),
('hour', 'int32'),
('minute', 'int32')), name='t')
self.base = (
(t.year > 2016) | (
(t.year == 2016) & (t.month > 6)) | (
(t.year == 2016) & (t.month == 6) &
(t.day > 6)) | (
(t.year == 2016) & (t.month == 6) &
(t.day == 6) & (t.hour > 6)) |
((t.year == 2016) & (t.month == 6) &
(t.day == 6) & (t.hour == 6) &
(t.minute >= 5))) & ((t.year < 2016) | (
(t.year == 2016) & (t.month < 6)) | (
(t.year == 2016) & (t.month == 6) &
(t.day < 6)) | (
(t.year == 2016) & (t.month == 6) &
(t.day == 6) & (t.hour < 6)) | (
(t.year == 2016) &
(t.month == 6) & (t.day == 6) &
(t.hour == 6) &
(t.minute <= 5)))
self.expr = self.large_expr

@property
def large_expr(self):
src_table = self.t[self.base]
src_table = src_table.mutate(_timestamp=(
src_table['_timestamp'] - src_table['_timestamp'] % 3600
).cast('int32').name('_timestamp'), valid_seconds=300)

aggs = []
for meas in ['meas1', 'meas2']:
aggs.append(src_table[meas].sum().cast('float').name(meas))
src_table = src_table.aggregate(
aggs, by=['_timestamp', 'dim1', 'dim2', 'valid_seconds'])

part_keys = ['year', 'month', 'day', 'hour', 'minute']
ts_col = src_table['_timestamp'].cast('timestamp')
new_cols = {}
for part_key in part_keys:
part_col = getattr(ts_col, part_key)()
new_cols[part_key] = part_col
src_table = src_table.mutate(**new_cols)
return src_table[[
'_timestamp', 'dim1', 'dim2', 'meas1', 'meas2',
'year', 'month', 'day', 'hour', 'minute'
]]


class Construction(Suite):

def time_large_expr_construction(self):
self.large_expr


class Formatting(Suite):

def time_base_expr_formatting(self):
str(self.base)

def time_large_expr_formatting(self):
str(self.expr)


class Compilation(Suite):

def time_impala_base_compile(self):
ibis.impala.compile(self.base)

def time_impala_large_expr_compile(self):
ibis.impala.compile(self.expr)
14 changes: 14 additions & 0 deletions ci/asvconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python

if __name__ == '__main__':
import os
import json
import socket

import asv

hostname = 'circle' if os.environ.get('CIRCLECI') else socket.gethostname()
machine_info = asv.machine.Machine.get_defaults()
machine_info['machine'] = hostname
machine_info['ram'] = '{:d}GB'.format(int(machine_info['ram']) // 1000000)
print(json.dumps({hostname: machine_info, 'version': 1}, indent=2))
12 changes: 12 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash -e

function run()
{
local python_version="${1}"
local no_dots_python_version="${python_version//.}"
pyenv install --skip-existing "${python_version}"
pyenv local "${python_version}"
tox -e "py${no_dots_python_version:0:2}"
}

run "$@"
Loading