81 changes: 39 additions & 42 deletions circle.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
machine:
python:
version: 3.4.3
version: 3.6.1
pre:
# upgrade sqlite3 to # make sure we have instr
# upgrade sqlite3 to # make sure we have the "instr" function available
- sudo apt-get -qq install --only-upgrade sqlite3
- sudo apt-get -qq install clang libboost-dev
# clang/boost for UDFs and graphviz for visualization
- sudo apt-get -qq install clang libboost-dev graphviz
environment:
IBIS_TEST_SQLITE_DB_PATH: $HOME/ibis-testing-data/ibis_testing.db
IBIS_TEST_POSTGRES_DB: circle_test
Expand All @@ -18,33 +19,43 @@ machine:
IBIS_TEST_WEBHDFS_USER: ubuntu
post:
# download the crunchbase sqlite database, for lineage testing
- wget https://ibis-resources.s3.amazonaws.com/data/crunchbase/crunchbase.db
- wget https://storage.googleapis.com/ibis-ci-data/crunchbase.db

# download the data
- wget https://ibis-resources.s3.amazonaws.com/testing/ibis-testing-data.tar.gz
- wget https://storage.googleapis.com/ibis-ci-data/ibis-testing-data.tar.gz

# untar it
- tar xvzf ibis-testing-data.tar.gz

services:
- docker

dependencies:
cache_directories:
- ~/docker
override:
- >
if [[ -e ~/docker/impala.tar ]]; then
echo "Loading impala docker image from cache"
docker load -i ~/docker/impala.tar
else
echo "Pulling impala docker image from Docker Hub"
docker pull cpcloud86/impala:metastore
mkdir -p ~/docker
docker save -o ~/docker/impala.tar cpcloud86/impala:metastore
fi
- pip install -U pip
- pip install click flake8 pylint
- cd $(pyenv root); git pull
- pip install -U tox tox-pyenv
- pyenv install --skip-existing 2.7.9
- pyenv install --skip-existing 3.4.3
- pyenv install --skip-existing 3.5.0
- pyenv install --skip-existing 3.6.0
- pyenv local 2.7.9 3.4.3 3.5.0 3.6.0
post:
# pull down the docker image
- docker pull cpcloud86/impala:metastore

# these are used for benchmarking
- pip install -U asv
- wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O $HOME/miniconda.sh
- bash $HOME/miniconda.sh -b -p $HOME/miniconda
- ci/asvconfig.py | tee $HOME/.asv-machine.json
post:
# run the docker image, exposing relevant ports
- docker run -it -d --name impala --hostname impala -p 9000:9000 -p 50010:50010 -p 50020:50020 -p 50070:50070 -p 50075:50075 -p 21000:21000 -p 21050:21050 -p 25000:25000 -p 25010:25010 -p 25020:25020 cpcloud86/impala:metastore
- docker run -it -d --name impala --hostname impala -p 50070:50070 -p 21050:21050 cpcloud86/impala:metastore

# map the docker container's IP address to a human-readable hostname
- echo "$(docker inspect --format '{{.NetworkSettings.IPAddress}}' impala) impala" | sudo tee -a /etc/hosts
Expand All @@ -61,33 +72,19 @@ dependencies:
# make sure the hive metastore is working
- sudo lxc-attach -n "$(docker inspect --format '{{.Id}}' impala)" -- hive -e 'show tables'

# load the ibis test data into impala and postgres
- scripts/test_data_admin.py load --data --overwrite --data-dir "$HOME/ibis-testing-data"

database:
override:
# load some additional test data for postgres ARRAY types
- >
psql -U ubuntu -d $IBIS_TEST_POSTGRES_DB <<EOF
CREATE TABLE array_types (
x BIGINT[],
y TEXT[],
z FLOAT8[],
grouper TEXT,
scalar_column float8
);
INSERT INTO array_types VALUES
(ARRAY[1, 2, 3], ARRAY['a', 'b', 'c'], ARRAY[1.0, 2.0, 3.0], 'a', 1.0),
(ARRAY[4, 5], ARRAY['d', 'e'], ARRAY[4.0, 5.0], 'a', 2.0),
(ARRAY[6, NULL], ARRAY['f', NULL], ARRAY[6.0, NULL], 'a', 3.0),
(ARRAY[NULL, 1, NULL], ARRAY[NULL, 'a', NULL], ARRAY[]::float8[], 'b', 4.0),
(ARRAY[2, NULL, 3], ARRAY['b', NULL, 'c'], NULL, 'b', 5.0),
(ARRAY[4, NULL, NULL, 5], ARRAY['d', NULL, NULL, 'e'], ARRAY[4.0, NULL, NULL, 5.0], 'c', 6.0);
EOF
test:
override:
- flake8 --exclude=docs,versioneer.py,.svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.eggs,*.egg
- case $CIRCLE_NODE_INDEX in 0) tox -e py27 ;; 1) tox -e py34 ;; 2) tox -e py35 ;; 3) tox -e py36 ;; esac:
- case $CIRCLE_NODE_INDEX in 0) ci/run.sh 2.7.12 ;; 1) ci/run.sh 3.4.4 ;; 2) ci/run.sh 3.5.3 ;; 3) ci/run.sh 3.6.1 ;; esac:
parallel: true

# Run benchmarks
deployment:
pr: # PRs
branch: /^(?!master$).*$/
commands:
- PATH="$HOME/miniconda/bin:$PATH" asv continuous -f 1.5 -e master ${CIRCLE_SHA1}
master: # Merges to master
branch: master
owner: ibis-project
commands:
- PATH="$HOME/miniconda/bin:$PATH" asv continuous -f 1.5 -e master^ ${CIRCLE_SHA1}
18 changes: 10 additions & 8 deletions conda-recipes/ibis-framework/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ source:

requirements:
build:
- enum34 # [py27]
- numpy >=1.7.0
- pandas >=0.12.0
- python
- setuptools
- six
- toolz
run:
- enum34 # [py27]
- numpy >=1.7.0
- pandas >=0.12.0
- python
Expand All @@ -28,17 +30,17 @@ requirements:

test:
requires:
- mock
- numpy >=1.7.0
- pandas >=0.12.0
- pytest <=2.9.2
- python
- six
- toolz
- graphviz
- mock # [py27]
- multipledispatch
- pytest <3 # [py27]
- pytest >=3 # [py3k]
- python-graphviz
imports:
- ibis
- ibis.expr
- ibis.expr.tests
- ibis.expr.visualize
- ibis.hive
- ibis.hive.tests
- ibis.impala
Expand All @@ -57,7 +59,7 @@ test:
- ibis.sql.vertica.tests
- ibis.tests
commands:
- py.test --tb=short --pyargs ibis
- py.test --tb=short --pyargs ibis -m 'not impala and not hdfs'

about:
license: Apache License, Version 2.0
Expand Down
4 changes: 2 additions & 2 deletions dev/merge-pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@
# Remote name where results pushed
PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "upstream")

GITHUB_BASE = "https://github.com/cloudera/" + PROJECT_NAME + "/pull"
GITHUB_API_BASE = "https://api.github.com/repos/cloudera/" + PROJECT_NAME
GITHUB_BASE = "https://github.com/pandas-dev/" + PROJECT_NAME + "/pull"
GITHUB_API_BASE = "https://api.github.com/repos/pandas-dev/" + PROJECT_NAME

# Prefix added to temporary branches
BRANCH_PREFIX = "PR_TOOL"
Expand Down
1 change: 1 addition & 0 deletions docs/_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
theme: jekyll-theme-slate
16 changes: 1 addition & 15 deletions docs/source/developer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,6 @@ development roadmap:

* Documentation
* Use cases and IPython notebooks
* Other SQL-based backends (Presto, Hive, Spark SQL, PostgreSQL)
* Other SQL-based backends (Presto, Hive, Spark SQL)
* S3 filesytem support
* Integration with MLLib via PySpark

Contributor License Agreements
------------------------------

While Ibis is an Apache-licensed open source project, we require individual and
corporate contributors to execute a `contributor license agreement
<https://en.wikipedia.org/wiki/Contributor_License_Agreement>`_ to enable any
copyright issues to be avoided and to protect the user base from
disruption. This agreement only needs to be signed once.

We'll use the same CLA's that Impala uses:

* `Individual CLA <https://github.com/cloudera/Impala/wiki/Individual-Contributor-License-Agreement-(ICLA)>`_
* `Corporate CLA <https://github.com/cloudera/Impala/wiki/Corporate-Contributor-License-Agreement-(CCLA)>`_
2 changes: 1 addition & 1 deletion docs/source/impala.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ without requiring you to switch back and forth between Python code and the
Impala shell (where one would be using a mix of DDL and SQL statements).

If you find an Impala task that you cannot perform with Ibis, please get in
touch on the `GitHub issue tracker <http://github.com/cloudera/ibis>`_.
touch on the `GitHub issue tracker <http://github.com/pandas-dev/ibis>`_.

While interoperability between the Hadoop / Spark ecosystems and pandas / the
PyData stack is overall poor (but improving), we also show some ways that you
Expand Down
3 changes: 2 additions & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ natively within other systems like Apache Spark and Apache Impala (incubating).
To learn more about Ibis's vision, roadmap, and updates, please follow
http://ibis-project.org.

Source code is on GitHub: http://github.com/cloudera/ibis
Source code is on GitHub: http://github.com/pandas-dev/ibis

Install Ibis from PyPI with:

Expand All @@ -50,6 +50,7 @@ At this time, Ibis offers some level of support for the following systems:
- Hadoop Distributed File System (HDFS)
- PostgreSQL (Experimental)
- SQLite
- Direct execution of ibis expressions against pandas objects (Experimental)

Coming from SQL? Check out :ref:`Ibis for SQL Programmers <sql>`.

Expand Down
55 changes: 54 additions & 1 deletion docs/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,59 @@ Release Notes
interesting. Point (minor, e.g. 0.5.1) releases will generally not be found
here and contain only bug fixes.

0.11.0 (June 28, 2017)
----------------------

This release brings initial Pandas backend support along with a number of
bug fixes and reliability enhancements. We recommend that all users upgrade
from earlier versions of Ibis.

New features
~~~~~~~~~~~~
* Experimental pandas backend to allow execution of ibis expression against
pandas DataFrames
* Graphviz visualization of ibis expressions. Implements ``_repr_png_`` for
Jupyter Notebook functionality
* Ability to create a partitioned table from an ibis expression
* Support for missing operations in the SQLite backend: sqrt, power, variance,
and standard deviation, regular expression functions, and missing power
support for PostgreSQL
* Support for schemas inside databases with the PostgreSQL backend
* Appveyor testing on core ibis across all supported Python versions
* Add ``year``/``month``/``day`` methods to ``date`` types
* Ability to sort, group by and project columns according to positional index
rather than only by name
* Added a ``type`` parameter to ``ibis.literal`` to allow user specification of
literal types

Bug fixes
~~~~~~~~~
* Fix broken conda recipe
* Fix incorrectly typed fillna operation
* Fix postgres boolean summary operations
* Fix kudu support to reflect client API changes
* Fix equality of nested types and construction of nested types when the value
type is specified as a string

API changes
~~~~~~~~~~~
* Deprecate passing integer values to the ``ibis.timestamp`` literal
constructor, this will be removed in 0.12.0
* Added the ``admin_timeout`` parameter to the kudu client ``connect`` function

Contributors
~~~~~~~~~~~~

::

$ git shortlog --summary --numbered v0.10.0..v0.11.0

58 Phillip Cloud
1 Greg Rahn
1 Marius van Niekerk
1 Tarun Gogineni
1 Wes McKinney

0.8 (May 19, 2016)
------------------

Expand All @@ -17,7 +70,7 @@ versions of Ibis.

New features
~~~~~~~~~~~~
* Initial PostgreSQL backend contributed by Philip Cloud.
* Initial PostgreSQL backend contributed by Phillip Cloud.
* Add ``groupby`` as an alias for ``group_by`` to table expressions

Bug fixes
Expand Down
5 changes: 5 additions & 0 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
except ImportError: # pip install ibis-framework[postgres]
pass

try:
import ibis.pandas.api as pandas
except ImportError: # pip install ibis-framework[pandas]
pass

import ibis.config_init
from ibis.config import options
import ibis.util as util
Expand Down
52 changes: 32 additions & 20 deletions ibis/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pandas as pd

import toolz

from ibis.compat import zip as czip
from ibis.config import options

import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis.sql.compiler as comp
Expand All @@ -22,7 +27,6 @@


class Client(object):

pass


Expand All @@ -36,6 +40,9 @@ class Query(object):

def __init__(self, client, ddl):
self.client = client
self.expr = getattr(
ddl, 'parent_expr', getattr(ddl, 'table_set', None)
)

if isinstance(ddl, comp.DDL):
self.compiled_ddl = ddl.compile()
Expand All @@ -57,7 +64,6 @@ def _wrap_result(self, result):
return result

def _fetch(self, cursor):
import pandas as pd
rows = cursor.fetchall()
# TODO(wesm): please evaluate/reimpl to optimize for perf/memory
dtypes = [self._db_type_to_dtype(x[1]) for x in cursor.description]
Expand All @@ -71,15 +77,13 @@ def _fetch(self, cursor):
cols[name] = pd.Series(col)
return pd.DataFrame(cols, columns=names)

def _db_type_to_dtype(self, db_type):
def _db_type_to_dtype(self, db_type, column):
raise NotImplementedError


class AsyncQuery(Query):

"""
Abstract asynchronous query
"""
"""Abstract asynchronous query"""

def execute(self):
raise NotImplementedError
Expand Down Expand Up @@ -322,24 +326,31 @@ def compile(expr, limit=None):
def find_backend(expr):
backends = []

def walk(expr):
node = expr.op()
for arg in node.flat_args():
if isinstance(arg, Client):
backends.append(arg)
elif isinstance(arg, ir.Expr):
walk(arg)
stack = [expr.op()]
seen = set()

while stack:
node = stack.pop()

if node not in seen:
seen.add(node)

for arg in node.flat_args():
if isinstance(arg, Client):
backends.append(arg)
elif isinstance(arg, ir.Expr):
stack.append(arg.op())

walk(expr)
backends = util.unique_by_key(backends, id)
backends = list(toolz.unique(backends, key=id))

if len(backends) > 1:
raise ValueError('Multiple backends found')
elif len(backends) == 0:
elif not backends:
default = options.default_backend
if default is None:
raise com.IbisError('Expression depends on no backends, '
'and found no default')
raise com.IbisError(
'Expression depends on no backends, and found no default'
)
return default

return backends[0]
Expand Down Expand Up @@ -437,8 +448,9 @@ def __init__(self, parent, namespace):
self.namespace = namespace

def __repr__(self):
return ("{0}(database={1!r}, namespace={2!r})"
.format('DatabaseNamespace', self.name, self.namespace))
return "{}(database={!r}, namespace={!r})".format(
type(self).__name__, self.name, self.namespace
)

@property
def client(self):
Expand Down
874 changes: 0 additions & 874 deletions ibis/cloudpickle.py

This file was deleted.

32 changes: 16 additions & 16 deletions ibis/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,56 +12,56 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa

import itertools

import numpy as np

import sys
import six
from six import BytesIO, StringIO, string_types as py_string
from six import BytesIO, StringIO, string_types # noqa: F401


PY2 = sys.version_info[0] == 2


import unittest

if not PY2:
import pickle
unicode_type = str

def lzip(*x):
return list(zip(*x))

zip = zip
pickle_dump = pickle.dumps
pickle_load = pickle.loads
zip_longest = itertools.zip_longest

def dict_values(x):
return list(x.values())

from decimal import Decimal
import unittest.mock as mock
range = range
else:
import cPickle
import builtins

else:
try:
from cdecimal import Decimal
except ImportError:
from decimal import Decimal
from decimal import Decimal # noqa: F401

unicode_type = unicode
unicode_type = unicode # noqa: F821
lzip = zip
zip = itertools.izip
from ibis.cloudpickle import dumps as pickle_dump
pickle_load = cPickle.loads
zip_longest = itertools.izip_longest

def dict_values(x):
return x.values()

try:
import mock # mock is an optional dependency
import mock # noqa: F401
except ImportError:
pass
range = xrange

import __builtin__ as builtins # noqa: F401

range = xrange # noqa: F821

integer_types = six.integer_types + (np.integer,)
31 changes: 18 additions & 13 deletions ibis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
import warnings
import sys

from six import StringIO

PY2 = sys.version_info[0] == 2

if not PY2:
Expand Down Expand Up @@ -155,35 +153,39 @@ def __init__(self, d, prefix=""):
object.__setattr__(self, "prefix", prefix)

def __repr__(self):
buf = StringIO()
pprint.pprint(self.d, stream=buf)
return buf.getvalue()
return pprint.pformat(self.d)

def __setattr__(self, key, val):
prefix = object.__getattribute__(self, "prefix")
prefix = self.prefix
if prefix:
prefix += "."
prefix += key
# you can't set new keys
# can you can't overwrite subtrees

# you can't set new keys and you can't overwrite subtrees

if key in self.d and not isinstance(self.d[key], dict):
_set_option(prefix, val)
else:
raise OptionError("You can only set the value of existing options")

def __getattr__(self, key):
prefix = object.__getattribute__(self, "prefix")
prefix = self.prefix
if prefix:
prefix += "."
prefix += key
v = object.__getattribute__(self, "d")[key]

try:
v = self.d[key]
except KeyError as e:
raise AttributeError(*e.args)

if isinstance(v, dict):
return DictWrapper(v, prefix)
else:
return _get_option(prefix)

def __dir__(self):
return list(self.d.keys())
return sorted(list(self.d.keys()))


# For user convenience, we'd like to have the available options described
Expand Down Expand Up @@ -327,8 +329,11 @@ class option_context(object):
You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
Examples
--------
>>> with option_context('display.max_rows', 10, 'display.max_columns', 5):
...
>>> with option_context('interactive', True):
... print(options.interactive)
True
>>> options.interactive
False
"""

def __init__(self, *args):
Expand Down
293 changes: 226 additions & 67 deletions ibis/expr/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.common import RelationError, ExpressionError, IbisTypeError
from ibis.expr.datatypes import HasSchema
from ibis.expr.window import window
import toolz

import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis.util as util
import toolz

from ibis.expr.datatypes import HasSchema
from ibis.expr.window import window

from ibis.common import RelationError, ExpressionError, IbisTypeError

# ---------------------------------------------------------------------
# Some expression metaprogramming / graph transformations to support
# compilation later


def sub_for(expr, substitutions):
mapping = dict((repr(k.op()), v) for k, v in substitutions)
mapping = {repr(k.op()): v for k, v in substitutions}
return _subs(expr, mapping)


Expand Down Expand Up @@ -128,11 +130,11 @@ def _key(self, expr):


def has_multiple_bases(expr):
return len(find_all_tables(expr)) > 1
return toolz.count(find_immediate_parent_tables(expr)) > 1


def reduction_to_aggregation(expr, default_name='tmp'):
tables = find_all_tables(expr)
tables = list(find_immediate_parent_tables(expr))

try:
name = expr.get_name()
Expand All @@ -142,29 +144,72 @@ def reduction_to_aggregation(expr, default_name='tmp'):
named_expr = expr.name(default_name)

if len(tables) == 1:
table = list(tables.values())[0]
table, = tables
return table.aggregate([named_expr]), name
else:
return ScalarAggregate(expr, None, default_name).get_result()


def find_all_tables(expr, memo=None):
if memo is None:
memo = {}

node = expr.op()
def find_immediate_parent_tables(expr):
"""Find every first occurrence of a :class:`ibis.expr.types.TableExpr`
object in `expr`.
Parameters
----------
expr : ir.Expr
Yields
------
e : ir.Expr
Notes
-----
This function does not traverse into TableExpr objects. This means that the
underlying PhysicalTable of a Selection will not be yielded, for example.
Examples
--------
>>> import ibis, toolz
>>> t = ibis.table([('a', 'int64')], name='t')
>>> expr = t.mutate(foo=t.a + 1)
>>> result = list(find_immediate_parent_tables(expr))
>>> len(result)
1
>>> result[0] # doctest: +NORMALIZE_WHITESPACE
ref_0
UnboundTable[table]
name: t
schema:
a : int64
Selection[table]
table:
Table: ref_0
selections:
Table: ref_0
foo = Add[int64*]
left:
a = Column[int64*] 'a' from table
ref_0
right:
Literal[int8]
1
"""
stack = [expr]
seen = set()

if isinstance(expr, ir.TableExpr):
key = id(node)
if key not in memo:
memo[key] = expr
return memo
while stack:
e = stack.pop()
node = e.op()

for arg in node.flat_args():
if isinstance(arg, ir.Expr):
find_all_tables(arg, memo)
if node not in seen:
seen.add(node)

return memo
if isinstance(e, ir.TableExpr):
yield e
else: # Only traverse into non TableExpr objects
stack.extend(
arg for arg in node.flat_args() if isinstance(arg, ir.Expr)
)


def is_scalar_reduce(x):
Expand Down Expand Up @@ -334,7 +379,7 @@ def _lift_Aggregation(self, expr, block=None):

unch = lifted_table is op.table

lifted_aggs, unch1 = self._lift_arg(op.agg_exprs, block=True)
lifted_aggs, unch1 = self._lift_arg(op.metrics, block=True)
lifted_by, unch2 = self._lift_arg(op.by, block=True)
lifted_having, unch3 = self._lift_arg(op.having, block=True)

Expand Down Expand Up @@ -440,7 +485,7 @@ def apply_filter(expr, predicates):

if op.table._is_valid(simplified_predicates):
result = ops.Aggregation(
op.table, op.agg_exprs, by=op.by, having=op.having,
op.table, op.metrics, by=op.by, having=op.having,
predicates=op.predicates + simplified_predicates,
sort_keys=op.sort_keys)

Expand Down Expand Up @@ -534,15 +579,23 @@ def get_result(self):
return self.valid

def _walk(self, expr):
node = expr.op()
if isinstance(node, ops.TableColumn):
is_valid = self._validate_column(expr)
self.valid = self.valid and is_valid
stack = [expr]
seen = set()

for arg in node.flat_args():
if isinstance(arg, ir.ValueExpr):
self._walk(arg)
# Skip other types of exprs
while stack:
e = stack.pop()
node = e.op()

if node not in seen:
seen.add(node)

if isinstance(node, ops.TableColumn):
self.valid = self.valid and self._validate_column(e)

stack.extend(
arg for arg in node.flat_args()
if isinstance(arg, ir.ValueExpr)
)

def _validate_column(self, expr):
if isinstance(self.parent, ops.Selection):
Expand Down Expand Up @@ -900,7 +953,7 @@ def validate(self, expr):
is_valid = True

if isinstance(op, ops.Contains):
value_valid = ExprValidator.validate(self, op.value)
value_valid = super(FilterValidator, self).validate(op.value)
is_valid = value_valid
else:
roots_valid = []
Expand All @@ -925,48 +978,154 @@ def validate(self, expr):


def find_source_table(expr):
# A more complex version of _find_base_table.
# TODO: Revisit/refactor this all at some point
node = expr.op()

# First table expression observed for each argument that the expr
# depends on
"""Find the first table expression observed for each argument that the
expression depends on
Parameters
----------
expr : ir.Expr
Returns
-------
table_expr : ir.TableExpr
Examples
--------
>>> import ibis
>>> t = ibis.table([('a', 'double'), ('b', 'string')], name='t')
>>> expr = t.mutate(c=t.a + 42.0)
>>> expr # doctest: +NORMALIZE_WHITESPACE
ref_0
UnboundTable[table]
name: t
schema:
a : double
b : string
Selection[table]
table:
Table: ref_0
selections:
Table: ref_0
c = Add[double*]
left:
a = Column[double*] 'a' from table
ref_0
right:
Literal[double]
42.0
>>> find_source_table(expr)
UnboundTable[table]
name: t
schema:
a : double
b : string
>>> left = ibis.table([('a', 'int64'), ('b', 'string')])
>>> right = ibis.table([('c', 'int64'), ('d', 'string')])
>>> result = left.inner_join(right, left.a == right.c)
>>> find_source_table(result) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
NotImplementedError: More than one base table not implemented
"""
first_tables = []

def push_first(arg):
if not isinstance(arg, ir.Expr):
return
if isinstance(arg, ir.TableExpr):
first_tables.append(arg)
else:
collect(arg.op())
stack = [expr]
seen = set()

while stack:
e = stack.pop()
op = e.op()

def collect(node):
for arg in node.flat_args():
push_first(arg)
if op not in seen:
seen.add(op)

collect(node)
options = util.unique_by_key(first_tables, id)
arguments = [
arg for arg in reversed(list(op.flat_args()))
if isinstance(arg, ir.Expr)
]
first_tables.extend(
arg for arg in arguments if isinstance(arg, ir.TableExpr)
)
stack.extend(
arg for arg in arguments if not isinstance(arg, ir.TableExpr)
)

options = list(toolz.unique(first_tables, key=id))

if len(options) > 1:
raise NotImplementedError
raise NotImplementedError('More than one base table not implemented')

return options[0]


def unwrap_ands(expr):
out_exprs = []
def flatten_predicate(expr):
"""Yield the expressions corresponding to the `And` nodes of a predicate.
Parameters
----------
expr : ir.BooleanColumn
Returns
-------
exprs : List[ir.BooleanColumn]
Examples
--------
>>> import ibis
>>> t = ibis.table([('a', 'int64'), ('b', 'string')], name='t')
>>> filt = (t.a == 1) & (t.b == 'foo')
>>> predicates = flatten_predicate(filt)
>>> len(predicates)
2
>>> predicates[0] # doctest: +NORMALIZE_WHITESPACE
ref_0
UnboundTable[table]
name: t
schema:
a : int64
b : string
Equals[boolean*]
left:
a = Column[int64*] 'a' from table
ref_0
right:
Literal[int8]
1
>>> predicates[1] # doctest: +NORMALIZE_WHITESPACE
ref_0
UnboundTable[table]
name: t
schema:
a : int64
b : string
Equals[boolean*]
left:
b = Column[string*] 'b' from table
ref_0
right:
Literal[string]
foo
"""
predicates = []
stack = [expr]
seen = set()

def walk(expr):
op = expr.op()
if isinstance(op, ops.Comparison):
out_exprs.append(expr)
elif isinstance(op, ops.And):
walk(op.left)
walk(op.right)
else:
raise Exception('Invalid predicate: {0!s}'
.format(expr._repr()))
while stack:
e = stack.pop()

walk(expr)
return out_exprs
if not isinstance(e, ir.BooleanColumn):
raise TypeError(
'Predicate component is not an instance of ir.BooleanColumn'
)

op = e.op()

if op not in seen:
seen.add(op)

if isinstance(op, ops.And):
stack.append(op.right)
stack.append(op.left)
else:
predicates.append(e)
return predicates
171 changes: 117 additions & 54 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings

import six
import toolz

Expand All @@ -32,6 +34,7 @@
StringValue, StringScalar, StringColumn,
DecimalValue, DecimalScalar, DecimalColumn,
TimestampValue, TimestampScalar, TimestampColumn,
DateValue,
ArrayValue, ArrayScalar, ArrayColumn,
CategoryValue, unnamed, as_value_expr, literal,
null, sequence)
Expand All @@ -41,7 +44,6 @@

import ibis.common as _com

from ibis.compat import py_string
from ibis.expr.analytics import bucket, histogram
from ibis.expr.groupby import GroupedTableExpr # noqa
from ibis.expr.window import window, trailing_window, cumulative_window
Expand Down Expand Up @@ -149,11 +151,16 @@ def timestamp(value):
"""
Returns a timestamp literal if value is likely coercible to a timestamp
"""
if isinstance(value, py_string):
if isinstance(value, six.string_types):
from pandas import Timestamp
value = Timestamp(value)
op = ir.Literal(value)
return ir.TimestampScalar(op)
if isinstance(value, six.integer_types):
warnings.warn(
'Integer values for timestamp literals are deprecated in 0.11.0 '
'and will be removed in 0.12.0. To pass integers as timestamp '
'literals, use pd.Timestamp({:d}, unit=...)'.format(value)
)
return ir.TimestampScalar(ir.literal(value).op())


schema.__doc__ = """\
Expand Down Expand Up @@ -767,41 +774,6 @@ def cases(arg, case_result_pairs, default=None):
cases=cases,
substitute=substitute,

__add__=add,
add=add,

__sub__=sub,
sub=sub,

__mul__=mul,
mul=mul,

__div__=div,
__truediv__=div,
__floordiv__=floordiv,
div=div,
floordiv=floordiv,

__rdiv__=rdiv,
__rtruediv__=rdiv,
__rfloordiv__=rfloordiv,
rdiv=rdiv,
rfloordiv=rfloordiv,

__pow__=pow,
pow=pow,

__radd__=add,

__rsub__=rsub,
rsub=rsub,

__rmul__=_rbinop_expr('__rmul__', _ops.Multiply),
__rpow__=_binop_expr('__rpow__', _ops.Power),

__mod__=mod,
__rmod__=_rbinop_expr('__rmod__', _ops.Modulus),

__eq__=_binop_expr('__eq__', _ops.Equals),
__ne__=_binop_expr('__ne__', _ops.NotEquals),
__ge__=_binop_expr('__ge__', _ops.GreaterEqual),
Expand Down Expand Up @@ -1080,6 +1052,41 @@ def _integer_to_timestamp(arg, unit='s'):
round=round,
nullifzero=_unary_op('nullifzero', _ops.NullIfZero),
zeroifnull=_unary_op('zeroifnull', _ops.ZeroIfNull),

__add__=add,
add=add,

__sub__=sub,
sub=sub,

__mul__=mul,
mul=mul,

__div__=div,
__truediv__=div,
__floordiv__=floordiv,
div=div,
floordiv=floordiv,

__rdiv__=rdiv,
__rtruediv__=rdiv,
__rfloordiv__=rfloordiv,
rdiv=rdiv,
rfloordiv=rfloordiv,

__pow__=pow,
pow=pow,

__radd__=add,

__rsub__=rsub,
rsub=rsub,

__rmul__=_rbinop_expr('__rmul__', _ops.Multiply),
__rpow__=_binop_expr('__rpow__', _ops.Power),

__mod__=mod,
__rmod__=_rbinop_expr('__rmod__', _ops.Modulus),
)


Expand Down Expand Up @@ -1602,6 +1609,9 @@ def _string_getitem(self, key):
join=_string_join,
lpad=_lpad,
rpad=_rpad,
__add__=add,
__mul__=mul,
__rmul__=mul,
)


Expand Down Expand Up @@ -1703,7 +1713,16 @@ def _timestamp_strftime(arg, format_str):
)


_date_value_methods = dict(
strftime=_timestamp_strftime,
year=_extract_field('year', _ops.ExtractYear),
month=_extract_field('month', _ops.ExtractMonth),
day=_extract_field('day', _ops.ExtractDay),
)


_add_methods(TimestampValue, _timestamp_value_methods)
_add_methods(DateValue, _date_value_methods)


# ---------------------------------------------------------------------
Expand Down Expand Up @@ -1767,35 +1786,79 @@ def join(left, right, predicates=(), how='inner'):
"""
klass = _join_classes[how.lower()]
if isinstance(predicates, Expr):
predicates = _L.unwrap_ands(predicates)
predicates = _L.flatten_predicate(predicates)

op = klass(left, right, predicates)
return TableExpr(op)


def cross_join(*args, **kwargs):
def cross_join(*tables, **kwargs):
"""
Perform a cross join (cartesian product) amongst a list of tables, with
optional set of prefixes to apply to overlapping column names
Parameters
----------
positional args: tables to join
prefixes keyword : prefixes for each table
Not yet implemented
Examples
--------
>>> joined1 = ibis.cross_join(a, b, c, d, e)
>>> joined2 = ibis.cross_join(a, b, c, prefixes=['a_', 'b_', 'c_']))
tables : ibis.expr.types.TableExpr
Returns
-------
joined : TableExpr
If prefixes not provided, the result schema is not yet materialized
"""
op = _ops.CrossJoin(*args, **kwargs)
return TableExpr(op)
Examples
--------
>>> import ibis
>>> schemas = [(name, 'int64') for name in 'abcde']
>>> a, b, c, d, e = [
... ibis.table([(name, type)], name=name) for name, type in schemas
... ]
>>> joined1 = ibis.cross_join(a, b, c, d, e)
>>> joined1 # doctest: +NORMALIZE_WHITESPACE
ref_0
UnboundTable[table]
name: a
schema:
a : int64
ref_1
UnboundTable[table]
name: b
schema:
b : int64
ref_2
UnboundTable[table]
name: c
schema:
c : int64
ref_3
UnboundTable[table]
name: d
schema:
d : int64
ref_4
UnboundTable[table]
name: e
schema:
e : int64
CrossJoin[table]
left:
Table: ref_0
right:
CrossJoin[table]
left:
CrossJoin[table]
left:
CrossJoin[table]
left:
Table: ref_1
right:
Table: ref_2
right:
Table: ref_3
right:
Table: ref_4
"""
# TODO(phillipc): Implement prefix keyword argument
return TableExpr(_ops.CrossJoin(*tables, **kwargs))


def _table_count(self):
Expand Down Expand Up @@ -1902,7 +1965,7 @@ def filter(table, predicates):

def _resolve_predicates(table, predicates):
if isinstance(predicates, Expr):
predicates = _L.unwrap_ands(predicates)
predicates = _L.flatten_predicate(predicates)
predicates = util.promote_list(predicates)
predicates = [ir.bind_expr(table, x) for x in predicates]
resolved_predicates = []
Expand Down
241 changes: 182 additions & 59 deletions ibis/expr/datatypes.py

Large diffs are not rendered by default.

61 changes: 32 additions & 29 deletions ibis/expr/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,15 @@ def get_result(self):
text = self._format_node(self.expr)
elif isinstance(what, ops.TableColumn):
text = self._format_column(self.expr)
elif isinstance(what, ir.Literal):
text = 'Literal[{}]\n {}'.format(
self._get_type_display(), str(what.value)
)
elif isinstance(what, ir.Node):
text = self._format_node(self.expr)
elif isinstance(what, ir.Literal):
text = 'Literal[%s] %s' % (self._get_type_display(),
str(what.value))

if isinstance(self.expr, ir.ValueExpr) and self.expr._name is not None:
text = '{0} = {1}'.format(self.expr.get_name(), text)
text = '{} = {}'.format(self.expr.get_name(), text)

if self.memoize:
alias_to_text = [(self.memo.aliases[x],
Expand All @@ -138,31 +139,33 @@ def get_result(self):
def _memoize_tables(self):
table_memo_ops = (ops.Aggregation, ops.Selection,
ops.SelfReference)

def walk(expr):
if id(expr) in self.memo.visit_memo:
return

op = expr.op()

def visit(arg):
if isinstance(arg, list):
[visit(x) for x in arg]
elif isinstance(arg, ir.Expr):
walk(arg)

if isinstance(op, ops.PhysicalTable):
self.memo.observe(expr, self._format_table)
elif isinstance(op, ir.Node):
visit(op.args)
if isinstance(op, table_memo_ops):
self.memo.observe(expr, self._format_node)
elif isinstance(op, ops.TableNode) and op.has_schema():
self.memo.observe(expr, self._format_table)

self.memo.visit_memo.add(id(expr))

walk(self.expr)
if id(self.expr) in self.memo.visit_memo:
return

stack = [self.expr]
seen = set()
memo = self.memo

while stack:
e = stack.pop()
op = e.op()

if op not in seen:
seen.add(op)

if isinstance(op, ops.PhysicalTable):
memo.observe(e, self._format_table)
elif isinstance(op, ir.Node):
stack.extend(
arg for arg in reversed(op.args)
if isinstance(arg, ir.Expr)
)
if isinstance(op, table_memo_ops):
memo.observe(e, self._format_node)
elif isinstance(op, ops.TableNode) and op.has_schema():
memo.observe(e, self._format_table)

memo.visit_memo.add(id(e))

def _indent(self, text, indents=1):
return util.indent(text, self.indent_size * indents)
Expand Down
68 changes: 57 additions & 11 deletions ibis/expr/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,18 @@ def having(self, expr):
Parameters
----------
expr : ibis.expr.types.Expr
Returns
-------
grouped : GroupedTableExpr
"""
exprs = util.promote_list(expr)
new_having = self._having + exprs
return GroupedTableExpr(self.table, self.by, having=new_having,
order_by=self._order_by,
window=self._window)
return GroupedTableExpr(
self.table, self.by,
having=new_having, order_by=self._order_by, window=self._window
)

def order_by(self, expr):
"""
Expand All @@ -101,9 +103,10 @@ def order_by(self, expr):
"""
exprs = util.promote_list(expr)
new_order = self._order_by + exprs
return GroupedTableExpr(self.table, self.by, having=self._having,
order_by=new_order,
window=self._window)
return GroupedTableExpr(
self.table, self.by,
having=self._having, order_by=new_order, window=self._window
)

def mutate(self, exprs=None, **kwds):
"""
Expand All @@ -117,11 +120,54 @@ def mutate(self, exprs=None, **kwds):
Examples
--------
>>> expr = (table
.group_by('foo')
.order_by(ibis.desc('bar'))
.mutate(qux=lambda x: x.baz.lag(),
qux2=table.baz.lead()))
>>> import ibis
>>> t = ibis.table([
... ('foo', 'string'),
... ('bar', 'string'),
... ('baz', 'double'),
... ], name='t')
>>> t
UnboundTable[table]
name: t
schema:
foo : string
bar : string
baz : double
>>> expr = (t.group_by('foo')
... .order_by(ibis.desc('bar'))
... .mutate(qux=lambda x: x.baz.lag(),
... qux2=t.baz.lead()))
>>> print(expr) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
ref_0
UnboundTable[table]
name: t
schema:
foo : string
bar : string
baz : double
Selection[table]
table:
Table: ref_0
selections:
Table: ref_0
qux = WindowOp[double*]
qux = Lag[double*]
baz = Column[double*] 'baz' from table
ref_0
offset:
None
default:
None
<ibis.expr.window.Window object at 0x...>
qux2 = WindowOp[double*]
qux2 = Lead[double*]
baz = Column[double*] 'baz' from table
ref_0
offset:
None
default:
None
<ibis.expr.window.Window object at 0x...>
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _get_args(op, name):
return [col for col in result if col._name == name]
elif isinstance(op, ops.Aggregation):
assert name is not None, 'name is None'
return [col for col in chain(op.by, op.agg_exprs) if col._name == name]
return [col for col in chain(op.by, op.metrics) if col._name == name]
else:
return op.args

Expand Down
126 changes: 75 additions & 51 deletions ibis/expr/operations.py

Large diffs are not rendered by default.

53 changes: 34 additions & 19 deletions ibis/expr/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@

import operator

import six

from toolz import first

from ibis.common import IbisTypeError
from ibis.compat import py_string
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.common as com
Expand Down Expand Up @@ -146,9 +147,9 @@ def highest_precedence_type(exprs):
if not exprs:
raise ValueError('Must pass at least one expression')

type_counts = Counter(expr.type() for expr in exprs)
expr_types = {expr.type() for expr in exprs}
scores = (
(_TYPE_PRECEDENCE[k.name.lower()], k) for k, v in type_counts.items()
(_TYPE_PRECEDENCE[t.name.lower()], t) for t in expr_types
)
_, highest_type = max(scores, key=first)

Expand Down Expand Up @@ -264,12 +265,12 @@ class Argument(object):
"""

def __init__(self, name=None, default=None, optional=False,
validator=None):
validator=None, doc=None):
self.name = name
self.default = default
self.optional = optional

self.validator = validator
self.doc = doc

def validate(self, args, i):
arg = args[i]
Expand Down Expand Up @@ -459,7 +460,7 @@ def _validate(self, args, i):
arg = args[i]

if not self._type_matches(arg):
if isinstance(self.fail_message, py_string):
if isinstance(self.fail_message, six.string_types):
exc = self.fail_message
else:
exc = self.fail_message(self.types, arg)
Expand Down Expand Up @@ -493,7 +494,7 @@ class MultipleTypes(Argument):

def __init__(self, types, **arg_kwds):
self.types = [_to_argument(t) for t in types]
Argument.__init__(self, **arg_kwds)
super(MultipleTypes, self).__init__(**arg_kwds)

def _validate(self, args, i):
for t in self.types:
Expand Down Expand Up @@ -548,21 +549,28 @@ def value_typed_as(types, **arg_kwds):
return ValueTyped(types, fail_message, **arg_kwds)


def array(value_type=None, name=None, optional=False):
array_checker = ValueTyped(ir.ColumnExpr, 'not an array expr',
def column(value_type=None, name=None, optional=False):
array_checker = ValueTyped(ir.ColumnExpr, 'not a column expr',
name=name,
optional=optional)
if value_type is None:
return array_checker
else:
return MultipleTypes([array_checker, value_type],
name=name,
optional=optional)
return MultipleTypes(
[array_checker, value_type], name=name, optional=optional
)


def scalar(name=None, optional=False):
return ValueTyped(ir.ScalarExpr, 'not a scalar expr', name=name,
optional=optional)
def scalar(value_type=None, name=None, optional=False):
scalar_checker = ValueTyped(
ir.ScalarExpr, 'not a scalar expr', name=name, optional=optional
)
if value_type is None:
return scalar_checker
else:
return MultipleTypes(
[scalar_checker, value_type], name=name, optional=optional
)


def collection(name=None, optional=False):
Expand Down Expand Up @@ -610,7 +618,11 @@ def decimal(**arg_kwds):


def timestamp(**arg_kwds):
return ValueTyped(ir.TimestampValue, 'not decimal', **arg_kwds)
return ValueTyped(ir.TimestampValue, 'not timestamp', **arg_kwds)


def date(**arg_kwds):
return ValueTyped(ir.DateValue, 'not date', **arg_kwds)


def timedelta(**arg_kwds):
Expand All @@ -622,8 +634,8 @@ def string(**arg_kwds):
return ValueTyped(dt.string, 'not string', **arg_kwds)


def array_column(value_type):
return lambda **arg_kwds: ValueTyped(
def array(value_type, **arg_kwds):
return ValueTyped(
dt.Array(value_type),
'not array with value_type {0}'.format(value_type),
**arg_kwds
Expand All @@ -638,6 +650,9 @@ def one_of(args, **arg_kwds):
return OneOf(args, **arg_kwds)


temporal = one_of((dt.timestamp, dt.date))


def instance_of(type_, **arg_kwds):
fail_message = 'not a {0}'.format(str(type_))
return AnyTyped(type_, fail_message, **arg_kwds)
Expand Down Expand Up @@ -754,7 +769,7 @@ class DataTypeArgument(Argument):
def _validate(self, args, i):
arg = args[i]

if isinstance(arg, py_string):
if isinstance(arg, six.string_types):
arg = arg.lower()

arg = args[i] = dt.validate_type(arg)
Expand Down
8 changes: 6 additions & 2 deletions ibis/expr/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.expr.tests.mocks import MockConnection
import collections

import pytest

import ibis

from ibis.expr.tests.mocks import MockConnection


@pytest.fixture
def schema():
Expand All @@ -29,12 +32,13 @@ def schema():
('f', 'double'),
('g', 'string'),
('h', 'boolean'),
('i', 'timestamp'),
]


@pytest.fixture
def schema_dict(schema):
return dict(schema)
return collections.OrderedDict(schema)


@pytest.fixture
Expand Down
7 changes: 4 additions & 3 deletions ibis/expr/tests/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.expr.tests.mocks import MockConnection
from ibis.compat import unittest
import ibis.expr.types as ir
import unittest

import ibis
import ibis.expr.types as ir

from ibis.expr.tests.mocks import MockConnection
from ibis.tests.util import assert_equal


Expand Down
111 changes: 109 additions & 2 deletions ibis/expr/tests/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ def test_nested_array():
) == dt.Array(dt.Array(dt.string))


def test_array_with_string_value_type():
assert dt.Array('int32') == dt.Array(dt.int32)
assert dt.Array(dt.Array('array<map<string, double>>')) == (
dt.Array(dt.Array(dt.Array(dt.Map(dt.string, dt.double))))
)


def test_map():
assert dt.validate_type(
'map<string, double>'
Expand All @@ -30,6 +37,12 @@ def test_nested_map():
) == dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))


def test_map_with_string_value_type():
assert dt.Map('int32', 'double') == dt.Map(dt.int32, dt.double)
assert dt.Map('int32', 'array<double>') == \
dt.Map(dt.int32, dt.Array(dt.double))


def test_map_does_not_allow_non_primitive_keys():
with pytest.raises(SyntaxError):
dt.validate_type('map<array<string>, double>')
Expand Down Expand Up @@ -79,9 +92,40 @@ def test_struct():
assert dt.validate_type(orders) == expected


def test_decimal_failure():
def test_struct_with_string_types():
result = dt.Struct.from_tuples(
[
('a', 'map<double, string>'),
('b', 'array<map<string, array<int32>>>'),
('c', 'array<string>'),
('d', 'int8'),
]
)

assert result == dt.Struct.from_tuples(
[
('a', dt.Map(dt.double, dt.string)),
('b', dt.Array(dt.Map(dt.string, dt.Array(dt.int32)))),
('c', dt.Array(dt.string)),
('d', dt.int8),
]
)


@pytest.mark.parametrize(
'case',
[
'decimal(',
'decimal()',
'decimal(3)',
'decimal(,)',
'decimal(3,)',
'decimal(3,',
]
)
def test_decimal_failure(case):
with pytest.raises(SyntaxError):
dt.validate_type('decimal(')
dt.validate_type(case)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -219,3 +263,66 @@ def test_literal_mixed_type_fails():
data = [1, 'a']
with pytest.raises(TypeError):
ibis.literal(data)


def test_array_type_not_equals():
left = dt.Array(dt.string)
right = dt.Array(dt.int32)

assert not left.equals(right)
assert left != right
assert not (left == right)


def test_array_type_equals():
left = dt.Array(dt.string)
right = dt.Array(dt.string)

assert left.equals(right)
assert left == right
assert not (left != right)


def test_timestamp_with_timezone_parser_single_quote():
t = dt.validate_type("timestamp('US/Eastern')")
assert isinstance(t, dt.Timestamp)
assert t.timezone == 'US/Eastern'


def test_timestamp_with_timezone_parser_double_quote():
t = dt.validate_type("timestamp('US/Eastern')")
assert isinstance(t, dt.Timestamp)
assert t.timezone == 'US/Eastern'


def test_timestamp_with_timezone_parser_invalid_timezone():
ts = dt.validate_type("timestamp('US/Ea')")
assert str(ts) == "timestamp('US/Ea')"


@pytest.mark.parametrize(
'case',
[
"timestamp(US/Ea)",
"timestamp('US/Eastern\")",
'timestamp("US/Eastern\')',
]
)
def test_timestamp_parsing_failure_mode(case):
with pytest.raises(SyntaxError):
dt.validate_type(case)


def test_timestamp_with_invalid_timezone():
ts = dt.Timestamp('Foo/Bar&234')
assert str(ts) == "timestamp('Foo/Bar&234')"


def test_timestamp_with_timezone_repr():
ts = dt.Timestamp('UTC')
assert repr(ts) == "Timestamp(timezone='UTC')"


def test_timestamp_with_timezone_str():
ts = dt.Timestamp('UTC')
assert str(ts) == "timestamp('UTC')"
24 changes: 14 additions & 10 deletions ibis/expr/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import ibis

from ibis.compat import unittest
from ibis.expr.types import Expr
from ibis.expr.format import ExprFormatter
from ibis.expr.tests.mocks import MockConnection

Expand All @@ -39,13 +41,12 @@ def setUp(self):
self.con = MockConnection()

def test_format_custom_expr(self):
from ibis.expr.types import Expr, Literal

class CustomExpr(Expr):
def _type_display(self):
return 'my-custom'

op = Literal(5)
op = ibis.literal(5).op()
expr = CustomExpr(op)

result = repr(expr)
Expand Down Expand Up @@ -81,10 +82,13 @@ def test_memoize_aggregate_correctly(self):
table = self.table

agg_expr = (table['c'].sum() / table['c'].mean() - 1).name('analysis')
agg_exprs = [table['a'].sum().name('sum(a)'),
table['b'].mean().name('mean(b)'), agg_expr]
metrics = [
table['a'].sum().name('sum(a)'),
table['b'].mean().name('mean(b)'),
agg_expr,
]

result = table.aggregate(agg_exprs, by=['g'])
result = table.aggregate(metrics, by=['g'])

formatter = ExprFormatter(result)
formatted = formatter.get_result()
Expand All @@ -98,9 +102,9 @@ def test_aggregate_arg_names(self):
t = self.table

by_exprs = [t.g.name('key1'), t.f.round().name('key2')]
agg_exprs = [t.c.sum().name('c'), t.d.mean().name('d')]
metrics = [t.c.sum().name('c'), t.d.mean().name('d')]

expr = self.table.group_by(by_exprs).aggregate(agg_exprs)
expr = self.table.group_by(by_exprs).aggregate(metrics)
result = repr(expr)
assert 'metrics' in result
assert 'by' in result
Expand Down Expand Up @@ -220,13 +224,13 @@ def test_memoize_filtered_tables_in_join(self):


def test_argument_repr_shows_name():
t = ibis.table([('a', 'int64')])
t = ibis.table([('a', 'int64')], name='t')
expr = t.a.nullif(2)
result = repr(expr)
expected = """\
ref_0
UnboundTable[table]
name: None
name: t
schema:
a : int64
Expand Down
3 changes: 2 additions & 1 deletion ibis/expr/tests/test_interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.compat import unittest
import unittest

from ibis.expr.tests.mocks import MockConnection
import ibis.config as config

Expand Down
63 changes: 42 additions & 21 deletions ibis/expr/tests/test_lineage.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,57 @@
import os
import pytest

import ibis
import ibis.expr.lineage as lin
from ibis.tests.util import assert_equal


IBIS_TEST_CRUNCHBASE_DB = os.environ.get(
'IBIS_TEST_CRUNCHBASE_DB',
'crunchbase.db'
)

pytestmark = pytest.mark.skipif(
not os.path.exists(IBIS_TEST_CRUNCHBASE_DB),
reason='{} does not exist'.format(IBIS_TEST_CRUNCHBASE_DB)
)
pytest.importorskip('sqlalchemy')


@pytest.fixture
def con():
# make sure this is in the directory where you run py.test
return ibis.sqlite.connect(IBIS_TEST_CRUNCHBASE_DB)
from ibis.tests.util import assert_equal


@pytest.fixture
def companies(con):
return con.table('companies')
schema = [
('permalink', 'string'),
('name', 'string'),
('homepage_url', 'string'),
('category_list', 'string'),
('market', 'string'),
('funding_total_usd', 'float'),
('status', 'string'),
('country_code', 'string'),
('state_code', 'string'),
('region', 'string'),
('city', 'string'),
('funding_rounds', 'int32'),
('founded_at', 'string'),
('founded_month', 'string'),
('founded_quarter', 'string'),
('founded_year', 'float'),
('first_funding_at', 'string'),
('last_funding_at', 'string'),
]
return ibis.table(schema, name='companies')


@pytest.fixture
def rounds(con):
return con.table('rounds')
schema = [
('company_permalink', 'string'),
('company_name', 'string'),
('company_category_list', 'string'),
('company_market', 'string'),
('company_country_code', 'string'),
('company_state_code', 'string'),
('company_region', 'string'),
('company_city', 'string'),
('funding_round_permalink', 'string'),
('funding_round_type', 'string'),
('funding_round_code', 'string'),
('funded_at', 'string'),
('funded_month', 'string'),
('funded_quarter', 'string'),
('funded_year', 'int32'),
('raised_amount_usd', 'float'),
]
return ibis.table(schema, name='rounds')


def test_lineage(companies):
Expand Down
65 changes: 34 additions & 31 deletions ibis/expr/tests/test_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,48 +12,51 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.compat import unittest
import pytest
import ibis


class TestPipe(unittest.TestCase):
@pytest.fixture
def pipe_table():
return ibis.table([
('key1', 'string'),
('key2', 'string'),
('key3', 'string'),
('value', 'double')
], 'foo_table')

def setUp(self):
self.table = ibis.table([
('key1', 'string'),
('key2', 'string'),
('key3', 'string'),
('value', 'double')
], 'foo_table')

def test_pipe_positional_args(self):
def my_func(data, foo, bar):
return data[bar] + foo
def test_pipe_positional_args(pipe_table):
def my_func(data, foo, bar):
return data[bar] + foo

result = self.table.pipe(my_func, 4, 'value')
expected = self.table['value'] + 4
result = pipe_table.pipe(my_func, 4, 'value')
expected = pipe_table['value'] + 4

assert result.equals(expected)
assert result.equals(expected)

def test_pipe_keyword_args(self):
def my_func(data, foo=None, bar=None):
return data[bar] + foo

result = self.table.pipe(my_func, foo=4, bar='value')
expected = self.table['value'] + 4
def test_pipe_keyword_args(pipe_table):
def my_func(data, foo=None, bar=None):
return data[bar] + foo

assert result.equals(expected)
result = pipe_table.pipe(my_func, foo=4, bar='value')
expected = pipe_table['value'] + 4

def test_pipe_pass_to_keyword(self):
def my_func(x, y, data=None):
return data[x] + y
assert result.equals(expected)

result = self.table.pipe((my_func, 'data'), 'value', 4)
expected = self.table['value'] + 4

assert result.equals(expected)
def test_pipe_pass_to_keyword(pipe_table):
def my_func(x, y, data=None):
return data[x] + y

def test_call_pipe_equivalence(self):
result = self.table(lambda x: x['key1'].cast('double').sum())
expected = self.table.key1.cast('double').sum()
assert result.equals(expected)
result = pipe_table.pipe((my_func, 'data'), 'value', 4)
expected = pipe_table['value'] + 4

assert result.equals(expected)


def test_call_pipe_equivalence(pipe_table):
result = pipe_table(lambda x: x['key1'].cast('double').sum())
expected = pipe_table.key1.cast('double').sum()
assert result.equals(expected)
32 changes: 32 additions & 0 deletions ibis/expr/tests/test_rules.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

import ibis
from ibis.common import IbisTypeError
import ibis.expr.operations as ops
import ibis.expr.types as ir
Expand Down Expand Up @@ -103,3 +104,34 @@ def output_type(self):

op = MyOp(option)
assert op._validate_args(op.args) == [expected_case(option)]


def test_argument_docstring():
doc = 'A wonderful integer'

class MyExpr(ir.Expr):
pass

class MyOp(ops.ValueOp):

input_type = [rules.integer(name='foo', doc=doc)]

def output_type(self):
return MyExpr

op = MyOp(1)
assert type(op).foo.__doc__ == doc


def test_scalar_value_type():

class MyOp(ops.ValueOp):

input_type = [rules.scalar(value_type=rules.number)]
output_type = rules.double

with pytest.raises(IbisTypeError):
MyOp('a')

assert MyOp(1).args[0].equals(ibis.literal(1))
assert MyOp(1.42).args[0].equals(ibis.literal(1.42))
305 changes: 166 additions & 139 deletions ibis/expr/tests/test_sql_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,189 +14,216 @@

import pytest

from ibis.expr.tests.mocks import MockConnection
from ibis.compat import unittest
from ibis.tests.util import assert_equal
import ibis
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis

from ibis.tests.util import assert_equal
from ibis.expr.tests.mocks import MockConnection


@pytest.fixture
def con():
return MockConnection()


@pytest.fixture
def alltypes(con):
return con.table('functional_alltypes')


@pytest.fixture
def lineitem(con):
return con.table('tpch_lineitem')


@pytest.fixture
def sql_table():
return ibis.table([
('v1', 'decimal(12, 2)'),
('v2', 'decimal(10, 4)'),
('v3', 'int32'),
('v4', 'int64'),
('v5', 'float'),
('v6', 'double'),
('v7', 'string'),
('v8', 'boolean')
], 'testing')


@pytest.fixture(params=(ibis.coalesce, ibis.greatest, ibis.least))
def function(request):
return request.param


@pytest.mark.parametrize(
'colname',
[
'tinyint_col',
'smallint_col',
'int_col',
'bigint_col',
'float_col',
'double_col',
]
)
def test_abs(alltypes, lineitem, colname):
fname = 'abs'
op = ops.Abs

expr = alltypes[colname]
_check_unary_op(expr, fname, op, type(expr))

expr = lineitem.l_extendedprice
_check_unary_op(expr, fname, op, type(expr))


def test_group_concat(alltypes):
col = alltypes.string_col

expr = col.group_concat()
assert isinstance(expr.op(), ops.GroupConcat)
arg, sep = expr.op().args
sep == ','

expr = col.group_concat('|')
arg, sep = expr.op().args
sep == '|'

class TestBuiltins(unittest.TestCase):

def setUp(self):
self.con = MockConnection()
self.alltypes = self.con.table('functional_alltypes')
self.lineitem = self.con.table('tpch_lineitem')
def test_zeroifnull(alltypes):
dresult = alltypes.double_col.zeroifnull()
iresult = alltypes.int_col.zeroifnull()

def test_abs(self):
colnames = ['tinyint_col', 'smallint_col', 'int_col', 'bigint_col',
'float_col', 'double_col']
assert type(dresult.op()) == ops.ZeroIfNull
assert type(dresult) == ir.DoubleColumn

fname = 'abs'
op = ops.Abs
# Impala upconverts all ints to bigint. Hmm.
assert type(iresult) == type(iresult)

for col in colnames:
expr = self.alltypes[col]
self._check_unary_op(expr, fname, op, type(expr))

expr = self.lineitem.l_extendedprice
self._check_unary_op(expr, fname, op, type(expr))
def test_fillna(alltypes):
result = alltypes.double_col.fillna(5)
assert isinstance(result, ir.DoubleColumn)

def test_group_concat(self):
col = self.alltypes.string_col
assert isinstance(result.op(), ops.IfNull)

expr = col.group_concat()
assert isinstance(expr.op(), ops.GroupConcat)
arg, sep = expr.op().args
sep == ','
result = alltypes.bool_col.fillna(True)
assert isinstance(result, ir.BooleanColumn)

expr = col.group_concat('|')
arg, sep = expr.op().args
sep == '|'
# Highest precedence type
result = alltypes.int_col.fillna(alltypes.bigint_col)
assert isinstance(result, ir.Int64Column)

def test_zeroifnull(self):
dresult = self.alltypes.double_col.zeroifnull()
iresult = self.alltypes.int_col.zeroifnull()

assert type(dresult.op()) == ops.ZeroIfNull
assert type(dresult) == ir.DoubleColumn
def test_ceil_floor(alltypes, lineitem):
cresult = alltypes.double_col.ceil()
fresult = alltypes.double_col.floor()
assert isinstance(cresult, ir.Int64Column)
assert isinstance(fresult, ir.Int64Column)
assert type(cresult.op()) == ops.Ceil
assert type(fresult.op()) == ops.Floor

# Impala upconverts all ints to bigint. Hmm.
assert type(iresult) == type(iresult)
cresult = ibis.literal(1.2345).ceil()
fresult = ibis.literal(1.2345).floor()
assert isinstance(cresult, ir.Int64Scalar)
assert isinstance(fresult, ir.Int64Scalar)

def test_fillna(self):
result = self.alltypes.double_col.fillna(5)
assert isinstance(result, ir.DoubleColumn)
dec_col = lineitem.l_extendedprice
cresult = dec_col.ceil()
fresult = dec_col.floor()
assert isinstance(cresult, ir.DecimalColumn)
assert cresult.meta == dec_col.meta

assert isinstance(result.op(), ops.IfNull)
assert isinstance(fresult, ir.DecimalColumn)
assert fresult.meta == dec_col.meta

result = self.alltypes.bool_col.fillna(True)
assert isinstance(result, ir.BooleanColumn)

# Retains type of caller (for now)
result = self.alltypes.int_col.fillna(self.alltypes.bigint_col)
assert isinstance(result, ir.Int32Column)
def test_sign(alltypes, lineitem):
result = alltypes.double_col.sign()
assert isinstance(result, ir.FloatColumn)
assert type(result.op()) == ops.Sign

def test_ceil_floor(self):
cresult = self.alltypes.double_col.ceil()
fresult = self.alltypes.double_col.floor()
assert isinstance(cresult, ir.Int64Column)
assert isinstance(fresult, ir.Int64Column)
assert type(cresult.op()) == ops.Ceil
assert type(fresult.op()) == ops.Floor
result = ibis.literal(1.2345).sign()
assert isinstance(result, ir.FloatScalar)

cresult = ibis.literal(1.2345).ceil()
fresult = ibis.literal(1.2345).floor()
assert isinstance(cresult, ir.Int64Scalar)
assert isinstance(fresult, ir.Int64Scalar)
dec_col = lineitem.l_extendedprice
result = dec_col.sign()
assert isinstance(result, ir.FloatColumn)

dec_col = self.lineitem.l_extendedprice
cresult = dec_col.ceil()
fresult = dec_col.floor()
assert isinstance(cresult, ir.DecimalColumn)
assert cresult.meta == dec_col.meta

assert isinstance(fresult, ir.DecimalColumn)
assert fresult.meta == dec_col.meta
def test_round(alltypes, lineitem):
result = alltypes.double_col.round()
assert isinstance(result, ir.Int64Column)
assert result.op().args[1] is None

def test_sign(self):
result = self.alltypes.double_col.sign()
assert isinstance(result, ir.FloatColumn)
assert type(result.op()) == ops.Sign
result = alltypes.double_col.round(2)
assert isinstance(result, ir.DoubleColumn)
assert result.op().args[1].equals(ibis.literal(2))

result = ibis.literal(1.2345).sign()
assert isinstance(result, ir.FloatScalar)
# Even integers are double (at least in Impala, check with other DB
# implementations)
result = alltypes.int_col.round(2)
assert isinstance(result, ir.DoubleColumn)

dec_col = self.lineitem.l_extendedprice
result = dec_col.sign()
assert isinstance(result, ir.FloatColumn)
dec = lineitem.l_extendedprice
result = dec.round()
assert isinstance(result, ir.DecimalColumn)

def test_round(self):
result = self.alltypes.double_col.round()
assert isinstance(result, ir.Int64Column)
assert result.op().args[1] is None
result = dec.round(2)
assert isinstance(result, ir.DecimalColumn)

result = self.alltypes.double_col.round(2)
assert isinstance(result, ir.DoubleColumn)
assert result.op().args[1].equals(ibis.literal(2))
result = ibis.literal(1.2345).round()
assert isinstance(result, ir.Int64Scalar)

# Even integers are double (at least in Impala, check with other DB
# implementations)
result = self.alltypes.int_col.round(2)
assert isinstance(result, ir.DoubleColumn)

dec = self.lineitem.l_extendedprice
result = dec.round()
assert isinstance(result, ir.DecimalColumn)
def _check_unary_op(expr, fname, ex_op, ex_type):
result = getattr(expr, fname)()
assert type(result.op()) == ex_op
assert type(result) == ex_type

result = dec.round(2)
assert isinstance(result, ir.DecimalColumn)

result = ibis.literal(1.2345).round()
assert isinstance(result, ir.Int64Scalar)
def test_coalesce_instance_method(sql_table):
v7 = sql_table.v7
v5 = sql_table.v5.cast('string')
v8 = sql_table.v8.cast('string')

def _check_unary_op(self, expr, fname, ex_op, ex_type):
result = getattr(expr, fname)()
assert type(result.op()) == ex_op
assert type(result) == ex_type
result = v7.coalesce(v5, v8, 'foo')
expected = ibis.coalesce(v7, v5, v8, 'foo')
assert_equal(result, expected)


class TestCoalesceLikeFunctions(unittest.TestCase):
def test_integer_promotions(sql_table, function):
t = sql_table

def setUp(self):
self.table = ibis.table([
('v1', 'decimal(12, 2)'),
('v2', 'decimal(10, 4)'),
('v3', 'int32'),
('v4', 'int64'),
('v5', 'float'),
('v6', 'double'),
('v7', 'string'),
('v8', 'boolean')
], 'testing')
expr = function(t.v3, t.v4)
assert isinstance(expr, ir.Int64Column)

self.functions = [ibis.coalesce, ibis.greatest, ibis.least]
expr = function(5, t.v3)
assert isinstance(expr, ir.Int64Column)

def test_coalesce_instance_method(self):
v7 = self.table.v7
v5 = self.table.v5.cast('string')
v8 = self.table.v8.cast('string')
expr = function(5, 12)
assert isinstance(expr, ir.Int64Scalar)

result = v7.coalesce(v5, v8, 'foo')
expected = ibis.coalesce(v7, v5, v8, 'foo')
assert_equal(result, expected)

def test_integer_promotions(self):
t = self.table
def test_floats(sql_table, function):
t = sql_table

for f in self.functions:
expr = f(t.v3, t.v4)
assert isinstance(expr, ir.Int64Column)
expr = function(t.v5)
assert isinstance(expr, ir.DoubleColumn)

expr = f(5, t.v3)
assert isinstance(expr, ir.Int64Column)

expr = f(5, 12)
assert isinstance(expr, ir.Int64Scalar)

def test_floats(self):
t = self.table
expr = function(5.5, t.v5)
assert isinstance(expr, ir.DoubleColumn)

for f in self.functions:
expr = f(t.v5)
assert isinstance(expr, ir.DoubleColumn)
expr = function(5.5, 5)
assert isinstance(expr, ir.DoubleScalar)

expr = f(5.5, t.v5)
assert isinstance(expr, ir.DoubleColumn)

expr = f(5.5, 5)
assert isinstance(expr, ir.DoubleScalar)
@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_bools():
assert False

@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_bools(self):
assert False

@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_decimal_promotions(self):
assert False
@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_decimal_promotions():
assert False
127 changes: 64 additions & 63 deletions ibis/expr/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,96 +12,97 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest

from ibis import literal
import ibis.expr.types as ir
import ibis.expr.operations as ops

from ibis.expr.tests.mocks import MockConnection
from ibis.compat import unittest
from ibis.tests.util import assert_equal


class TestStringOps(unittest.TestCase):
def test_lower_upper(table):
lresult = table.g.lower()
uresult = table.g.upper()

assert isinstance(lresult, ir.StringColumn)
assert isinstance(uresult, ir.StringColumn)

assert isinstance(lresult.op(), ops.Lowercase)
assert isinstance(uresult.op(), ops.Uppercase)

lit = literal('FoO')

lresult = lit.lower()
uresult = lit.upper()
assert isinstance(lresult, ir.StringScalar)
assert isinstance(uresult, ir.StringScalar)

def setUp(self):
self.con = MockConnection()
self.table = self.con.table('alltypes')

def test_lower_upper(self):
lresult = self.table.g.lower()
uresult = self.table.g.upper()
def test_substr(table):
lit = literal('FoO')

assert isinstance(lresult, ir.StringColumn)
assert isinstance(uresult, ir.StringColumn)
result = table.g.substr(2, 4)
lit_result = lit.substr(0, 2)

assert isinstance(lresult.op(), ops.Lowercase)
assert isinstance(uresult.op(), ops.Uppercase)
assert isinstance(result, ir.StringColumn)
assert isinstance(lit_result, ir.StringScalar)

lit = literal('FoO')
op = result.op()
assert isinstance(op, ops.Substring)

lresult = lit.lower()
uresult = lit.upper()
assert isinstance(lresult, ir.StringScalar)
assert isinstance(uresult, ir.StringScalar)
start, length = op.args[1:]

def test_substr(self):
lit = literal('FoO')
assert start.equals(literal(2))
assert length.equals(literal(4))

result = self.table.g.substr(2, 4)
lit_result = lit.substr(0, 2)

assert isinstance(result, ir.StringColumn)
assert isinstance(lit_result, ir.StringScalar)
def test_left_right(table):
result = table.g.left(5)
expected = table.g.substr(0, 5)
assert result.equals(expected)

op = result.op()
assert isinstance(op, ops.Substring)
result = table.g.right(5)
op = result.op()
assert isinstance(op, ops.StrRight)
assert op.args[1].equals(literal(5))

start, length = op.args[1:]

assert start.equals(literal(2))
assert length.equals(literal(4))
def test_length(table):
lit = literal('FoO')
result = table.g.length()
lit_result = lit.length()

def test_left_right(self):
result = self.table.g.left(5)
expected = self.table.g.substr(0, 5)
assert result.equals(expected)
assert isinstance(result, ir.Int32Column)
assert isinstance(lit_result, ir.Int32Scalar)
assert isinstance(result.op(), ops.StringLength)

result = self.table.g.right(5)
op = result.op()
assert isinstance(op, ops.StrRight)
assert op.args[1].equals(literal(5))

def test_length(self):
lit = literal('FoO')
result = self.table.g.length()
lit_result = lit.length()
def test_join(table):
dash = literal('-')

assert isinstance(result, ir.Int32Column)
assert isinstance(lit_result, ir.Int32Scalar)
assert isinstance(result.op(), ops.StringLength)
expr = dash.join([table.f.cast('string'),
table.g])
assert isinstance(expr, ir.StringColumn)

def test_join(self):
dash = literal('-')
expr = dash.join([literal('ab'), literal('cd')])
assert isinstance(expr, ir.StringScalar)

expr = dash.join([self.table.f.cast('string'),
self.table.g])
assert isinstance(expr, ir.StringColumn)

expr = dash.join([literal('ab'), literal('cd')])
assert isinstance(expr, ir.StringScalar)
def test_contains(table):
expr = table.g.contains('foo')
expected = table.g.find('foo') >= 0
assert_equal(expr, expected)

def test_contains(self):
expr = self.table.g.contains('foo')
expected = self.table.g.find('foo') >= 0
assert_equal(expr, expected)
with pytest.raises(Exception):
'foo' in table.g

self.assertRaises(Exception, lambda: 'foo' in self.table.g)

def test_getitem_slice(self):
cases = [
(self.table.g[:3], self.table.g.substr(0, 3)),
(self.table.g[2:6], self.table.g.substr(2, 4)),
]
def test_getitem_slice(table):
cases = [
(table.g[:3], table.g.substr(0, 3)),
(table.g[2:6], table.g.substr(2, 4)),
]

for case, expected in cases:
assert_equal(case, expected)
for case, expected in cases:
assert_equal(case, expected)
27 changes: 16 additions & 11 deletions ibis/expr/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

from ibis.expr.types import ColumnExpr, TableExpr, RelationError
from ibis.common import ExpressionError
from ibis.expr.datatypes import array_type
import ibis.expr.api as api
import ibis.expr.types as ir
import ibis.expr.operations as ops
Expand Down Expand Up @@ -66,7 +65,6 @@ def test_getitem_column_select(table, schema_dict):

# Make sure it's the right type
assert isinstance(col, ColumnExpr)
assert isinstance(col, array_type(v))

# Ensure we have a field selection with back-reference to the table
parent = col.parent()
Expand Down Expand Up @@ -277,6 +275,7 @@ def test_mutate_alter_existing_columns(table):

expected = table['a', 'b', 'c', 'd', 'e',
new_f.name('f'), 'g', 'h',
'i',
foo.name('foo')]

assert_equal(expr, expected)
Expand Down Expand Up @@ -480,22 +479,26 @@ def test_mean_expr_basics(table, numeric_col):


def test_aggregate_no_keys(table):
agg_exprs = [table['a'].sum().name('sum(a)'),
table['c'].mean().name('mean(c)')]
metrics = [
table['a'].sum().name('sum(a)'),
table['c'].mean().name('mean(c)'),
]

# A TableExpr, which in SQL at least will yield a table with a single
# row
result = table.aggregate(agg_exprs)
result = table.aggregate(metrics)
assert isinstance(result, TableExpr)


def test_aggregate_keys_basic(table):
agg_exprs = [table['a'].sum().name('sum(a)'),
table['c'].mean().name('mean(c)')]
metrics = [
table['a'].sum().name('sum(a)'),
table['c'].mean().name('mean(c)'),
]

# A TableExpr, which in SQL at least will yield a table with a single
# row
result = table.aggregate(agg_exprs, by=['g'])
result = table.aggregate(metrics, by=['g'])
assert isinstance(result, TableExpr)

# it works!
Expand Down Expand Up @@ -873,9 +876,11 @@ def test_semi_join_schema(table):


def test_cross_join(table):
agg_exprs = [table['a'].sum().name('sum_a'),
table['b'].mean().name('mean_b')]
scalar_aggs = table.aggregate(agg_exprs)
metrics = [
table['a'].sum().name('sum_a'),
table['b'].mean().name('mean_b'),
]
scalar_aggs = table.aggregate(metrics)

joined = table.cross_join(scalar_aggs).materialize()
agg_schema = api.Schema(['sum_a', 'mean_b'], ['int64', 'double'])
Expand Down
326 changes: 162 additions & 164 deletions ibis/expr/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,167 +19,165 @@
import ibis.expr.types as ir
import ibis.expr.temporal as T

from ibis.expr.tests.mocks import MockConnection
from ibis.compat import unittest


class TestFixedOffsets(unittest.TestCase):

def setUp(self):
self.con = MockConnection()
self.table = self.con.table('alltypes')

def test_upconvert(self):
cases = [
(T.day(14), 'w', T.week(2)),
(T.hour(72), 'd', T.day(3)),
(T.minute(240), 'h', T.hour(4)),
(T.second(360), 'm', T.minute(6)),
(T.second(3 * 86400), 'd', T.day(3)),
(T.millisecond(5000), 's', T.second(5)),
(T.microsecond(5000000), 's', T.second(5)),
(T.nanosecond(5000000000), 's', T.second(5)),
]

for offset, unit, expected in cases:
result = offset.to_unit(unit)
assert result.equals(expected)

def test_multiply(self):
offset = T.day(2)

assert (offset * 2).equals(T.day(4))
assert (offset * (-2)).equals(T.day(-4))
assert (3 * offset).equals(T.day(6))
assert ((-3) * offset).equals(T.day(-6))

def test_repr(self):
assert repr(T.day()) == '<Timedelta: 1 day>'
assert repr(T.day(2)) == '<Timedelta: 2 days>'
assert repr(T.year()) == '<Timedelta: 1 year>'
assert repr(T.month(2)) == '<Timedelta: 2 months>'
assert repr(T.second(40)) == '<Timedelta: 40 seconds>'

def test_cannot_upconvert(self):
cases = [
(T.day(), 'w'),
(T.hour(), 'd'),
(T.minute(), 'h'),
(T.second(), 'm'),
(T.second(), 'd'),
(T.millisecond(), 's'),
(T.microsecond(), 's'),
(T.nanosecond(), 's'),
]

for delta, target in cases:
self.assertRaises(IbisError, delta.to_unit, target)

def test_downconvert_second_parts(self):
K = 2

sec = T.second(K)
milli = T.millisecond(K)
micro = T.microsecond(K)
nano = T.nanosecond(K)

cases = [
(sec.to_unit('s'), T.second(K)),
(sec.to_unit('ms'), T.millisecond(K * 1000)),
(sec.to_unit('us'), T.microsecond(K * 1000000)),
(sec.to_unit('ns'), T.nanosecond(K * 1000000000)),

(milli.to_unit('ms'), T.millisecond(K)),
(milli.to_unit('us'), T.microsecond(K * 1000)),
(milli.to_unit('ns'), T.nanosecond(K * 1000000)),

(micro.to_unit('us'), T.microsecond(K)),
(micro.to_unit('ns'), T.nanosecond(K * 1000)),

(nano.to_unit('ns'), T.nanosecond(K))
]
self._check_cases(cases)

def test_downconvert_hours(self):
K = 2
offset = T.hour(K)

cases = [
(offset.to_unit('h'), T.hour(K)),
(offset.to_unit('m'), T.minute(K * 60)),
(offset.to_unit('s'), T.second(K * 3600)),
(offset.to_unit('ms'), T.millisecond(K * 3600000)),
(offset.to_unit('us'), T.microsecond(K * 3600000000)),
(offset.to_unit('ns'), T.nanosecond(K * 3600000000000))
]
self._check_cases(cases)

def test_downconvert_day(self):
K = 2

week = T.week(K)
day = T.day(K)

cases = [
(week.to_unit('d'), T.day(K * 7)),
(week.to_unit('h'), T.hour(K * 7 * 24)),

(day.to_unit('d'), T.day(K)),
(day.to_unit('h'), T.hour(K * 24)),
(day.to_unit('m'), T.minute(K * 1440)),
(day.to_unit('s'), T.second(K * 86400)),
(day.to_unit('ms'), T.millisecond(K * 86400000)),
(day.to_unit('us'), T.microsecond(K * 86400000000)),
(day.to_unit('ns'), T.nanosecond(K * 86400000000000))
]
self._check_cases(cases)

def test_combine_with_different_kinds(self):
cases = [
(T.day() + T.minute(), T.minute(1441)),
(T.second() + T.millisecond(10), T.millisecond(1010)),
(T.hour() + T.minute(5) + T.second(10), T.second(3910))
]
self._check_cases(cases)

def test_timedelta_generic_api(self):
cases = [
(T.timedelta(weeks=2), T.week(2)),
(T.timedelta(days=3), T.day(3)),
(T.timedelta(hours=4), T.hour(4)),
(T.timedelta(minutes=5), T.minute(5)),
(T.timedelta(seconds=6), T.second(6)),
(T.timedelta(milliseconds=7), T.millisecond(7)),
(T.timedelta(microseconds=8), T.microsecond(8)),
(T.timedelta(nanoseconds=9), T.nanosecond(9)),
]
self._check_cases(cases)

def _check_cases(self, cases):
for x, y in cases:
assert x.equals(y)

def test_offset_timestamp_expr(self):
c = self.table.i
x = T.timedelta(days=1)

expr = x + c
assert isinstance(expr, ir.TimestampColumn)
assert isinstance(expr.op(), ops.TimestampDelta)

# test radd
expr = c + x
assert isinstance(expr, ir.TimestampColumn)
assert isinstance(expr.op(), ops.TimestampDelta)


class TestTimedelta(unittest.TestCase):

@pytest.mark.xfail(raises=AssertionError, reason='NYI')
def test_compound_offset(self):
# These are not yet allowed (e.g. 1 month + 1 hour)
assert False

@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_offset_months(self):
assert False

@pytest.mark.parametrize(
('offset', 'unit', 'expected'),
[
(T.day(14), 'w', T.week(2)),
(T.hour(72), 'd', T.day(3)),
(T.minute(240), 'h', T.hour(4)),
(T.second(360), 'm', T.minute(6)),
(T.second(3 * 86400), 'd', T.day(3)),
(T.millisecond(5000), 's', T.second(5)),
(T.microsecond(5000000), 's', T.second(5)),
(T.nanosecond(5000000000), 's', T.second(5)),
]
)
def test_upconvert(offset, unit, expected):
result = offset.to_unit(unit)
assert result.equals(expected)


def test_multiply():
offset = T.day(2)

assert (offset * 2).equals(T.day(4))
assert (offset * (-2)).equals(T.day(-4))
assert (3 * offset).equals(T.day(6))
assert ((-3) * offset).equals(T.day(-6))


def test_repr():
assert repr(T.day()) == '<Timedelta: 1 day>'
assert repr(T.day(2)) == '<Timedelta: 2 days>'
assert repr(T.year()) == '<Timedelta: 1 year>'
assert repr(T.month(2)) == '<Timedelta: 2 months>'
assert repr(T.second(40)) == '<Timedelta: 40 seconds>'


@pytest.mark.parametrize(
('delta', 'target'),
[
(T.day(), 'w'),
(T.hour(), 'd'),
(T.minute(), 'h'),
(T.second(), 'm'),
(T.second(), 'd'),
(T.millisecond(), 's'),
(T.microsecond(), 's'),
(T.nanosecond(), 's'),
]
)
def test_cannot_upconvert(delta, target):
with pytest.raises(IbisError):
delta.to_unit(target)


@pytest.mark.parametrize(
('case', 'expected'),
[
(T.second(2).to_unit('s'), T.second(2)),
(T.second(2).to_unit('ms'), T.millisecond(2 * 1000)),
(T.second(2).to_unit('us'), T.microsecond(2 * 1000000)),
(T.second(2).to_unit('ns'), T.nanosecond(2 * 1000000000)),
(T.millisecond(2).to_unit('ms'), T.millisecond(2)),
(T.millisecond(2).to_unit('us'), T.microsecond(2 * 1000)),
(T.millisecond(2).to_unit('ns'), T.nanosecond(2 * 1000000)),
(T.microsecond(2).to_unit('us'), T.microsecond(2)),
(T.microsecond(2).to_unit('ns'), T.nanosecond(2 * 1000)),
(T.nanosecond(2).to_unit('ns'), T.nanosecond(2)),
]
)
def test_downconvert_second_parts(case, expected):
assert case.equals(expected)


@pytest.mark.parametrize(
('case', 'expected'),
[
(T.hour(2).to_unit('h'), T.hour(2)),
(T.hour(2).to_unit('m'), T.minute(2 * 60)),
(T.hour(2).to_unit('s'), T.second(2 * 3600)),
(T.hour(2).to_unit('ms'), T.millisecond(2 * 3600000)),
(T.hour(2).to_unit('us'), T.microsecond(2 * 3600000000)),
(T.hour(2).to_unit('ns'), T.nanosecond(2 * 3600000000000))
]
)
def test_downconvert_hours(case, expected):
assert case.equals(expected)


@pytest.mark.parametrize(
('case', 'expected'),
[
(T.week(2).to_unit('d'), T.day(2 * 7)),
(T.week(2).to_unit('h'), T.hour(2 * 7 * 24)),
(T.day(2).to_unit('d'), T.day(2)),
(T.day(2).to_unit('h'), T.hour(2 * 24)),
(T.day(2).to_unit('m'), T.minute(2 * 1440)),
(T.day(2).to_unit('s'), T.second(2 * 86400)),
(T.day(2).to_unit('ms'), T.millisecond(2 * 86400000)),
(T.day(2).to_unit('us'), T.microsecond(2 * 86400000000)),
(T.day(2).to_unit('ns'), T.nanosecond(2 * 86400000000000)),
]
)
def test_downconvert_day(case, expected):
assert case.equals(expected)


@pytest.mark.parametrize(
('case', 'expected'),
[
(T.day() + T.minute(), T.minute(1441)),
(T.second() + T.millisecond(10), T.millisecond(1010)),
(T.hour() + T.minute(5) + T.second(10), T.second(3910)),
]
)
def test_combine_with_different_kinds(case, expected):
assert case.equals(expected)


@pytest.mark.parametrize(
('case', 'expected'),
[
(T.timedelta(weeks=2), T.week(2)),
(T.timedelta(days=3), T.day(3)),
(T.timedelta(hours=4), T.hour(4)),
(T.timedelta(minutes=5), T.minute(5)),
(T.timedelta(seconds=6), T.second(6)),
(T.timedelta(milliseconds=7), T.millisecond(7)),
(T.timedelta(microseconds=8), T.microsecond(8)),
(T.timedelta(nanoseconds=9), T.nanosecond(9)),
]
)
def test_timedelta_generic_api(case, expected):
assert case.equals(expected)


def test_offset_timestamp_expr(table):
c = table.i
x = T.timedelta(days=1)

expr = x + c
assert isinstance(expr, ir.TimestampColumn)
assert isinstance(expr.op(), ops.TimestampDelta)

# test radd
expr = c + x
assert isinstance(expr, ir.TimestampColumn)
assert isinstance(expr.op(), ops.TimestampDelta)


@pytest.mark.xfail(raises=AssertionError, reason='NYI')
def test_compound_offset():
# These are not yet allowed (e.g. 1 month + 1 hour)
assert False


@pytest.mark.xfail(raises=AssertionError, reason='NYT')
def test_offset_months():
assert False
Loading