242 changes: 242 additions & 0 deletions docs/make.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
@ECHO OFF

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
set I18NSPHINXOPTS=%SPHINXOPTS% source
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)

if "%1" == "" goto help

if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. xml to make Docutils-native XML files
echo. pseudoxml to make pseudoxml-XML files for display purposes
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)

if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)


%SPHINXBUILD% 2> nul
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)

if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)

if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)

if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)

if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)

if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)

if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)

if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Ibis.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Ibis.ghc
goto end
)

if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)

if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)

if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)

if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)

if "%1" == "latexpdfja" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf-ja
cd %BUILDDIR%/..
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)

if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)

if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)

if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)

if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)

if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)

if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)

if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)

if "%1" == "xml" (
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The XML files are in %BUILDDIR%/xml.
goto end
)

if "%1" == "pseudoxml" (
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
goto end
)

:end
2 changes: 2 additions & 0 deletions docs/requirements-docs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sphinx_rtd_theme
numpydoc
3 changes: 3 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
===
API
===
269 changes: 269 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
# -*- coding: utf-8 -*-
#
# Ibis documentation build configuration file, created by
# sphinx-quickstart on Wed Jun 10 11:06:29 2015.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

import sys
import os

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))

# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.mathjax',
'sphinx.ext.autosummary',
'numpydoc'
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix of source filenames.
source_suffix = '.rst'

# The encoding of source files.
#source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = 'index'

# General information about the project.
project = u'Ibis'
copyright = u'2015, Cloudera, Inc.'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
# version = '0.2'

from ibis import __version__ as version

# The full version, including alpha/beta/rc tags.
release = version

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = []

# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None

# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True

# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True

# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []

# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False


# -- Options for HTML output ----------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.

import sphinx_rtd_theme
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}

# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []

# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None

# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None

# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None

# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []

# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'

# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True

# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}

# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}

# If false, no module index is generated.
#html_domain_indices = True

# If false, no index is generated.
#html_use_index = True

# If true, the index is split into individual pages for each letter.
#html_split_index = False

# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True

# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True

# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True

# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''

# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None

# Output file base name for HTML help builder.
htmlhelp_basename = 'Ibisdoc'


# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#'preamble': '',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'Ibis.tex', u'Ibis Documentation',
u'Cloudera, Inc.', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None

# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False

# If true, show page references after internal links.
#latex_show_pagerefs = False

# If true, show URL addresses after external links.
#latex_show_urls = False

# Documents to append as an appendix to all manuals.
#latex_appendices = []

# If false, no module index is generated.
#latex_domain_indices = True


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'ibis', u'Ibis Documentation',
[u'Cloudera, Inc.'], 1)
]

# If true, show URL addresses after external links.
#man_show_urls = False


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'Ibis', u'Ibis Documentation',
u'Cloudera, Inc.', 'Ibis', 'One line description of project.',
'Miscellaneous'),
]

# Documents to append as an appendix to all manuals.
#texinfo_appendices = []

# If false, no module index is generated.
#texinfo_domain_indices = True

# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'

# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
23 changes: 23 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
.. Ibis documentation master file, created by
sphinx-quickstart on Wed Jun 10 11:06:29 2015.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Ibis
====

Contents:

.. toctree::
:maxdepth: 1

api
release
legal

Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
3 changes: 3 additions & 0 deletions docs/source/legal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
=====
Legal
=====
66 changes: 66 additions & 0 deletions docs/source/release.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
=============
Release Notes
=============

0.3.0 (TBD)
-----------

0.2.0 (June 16, 2015)
---------------------

New features
~~~~~~~~~~~~
* ``insert`` method on Ibis client for inserting data into existing tables.
* ``parquet_file``, ``delimited_file``, and ``avro_file`` client methods for
querying datasets not yet available in Impala
* New ``ibis.hdfs_connect`` method and ``HDFS`` client API for WebHDFS for
writing files and directories to HDFS
* New timedelta API and improved timestamp data support
* New ``bucket`` and ``histogram`` methods on numeric expressions
* New ``category`` logical datatype for handling bucketed data, among other
things
* Add ``summary`` API to numeric expressions
* Add ``value_counts`` convenience API to array expressions
* New string methods ``like``, ``rlike``, and ``contains`` for fuzzy and regex
searching
* Add ``options.verbose`` option and configurable ``options.verbose_log``
callback function for improved query logging and visibility
* Support for new SQL built-in functions

* ``ibis.coalesce``
* ``ibis.greatest`` and ``ibis.least``
* ``ibis.where`` for conditional logic (see also ``ibis.case`` and
``ibis.cases``)
* ``nullif`` method on value expressions
* ``ibis.now``

* New aggregate functions: ``approx_median``, ``approx_nunique``, and
``group_concat``
* ``where`` argument in aggregate functions
* Add ``having`` method to ``group_by`` intermediate object
* Added group-by convenience
``table.group_by(exprs).COLUMN_NAME.agg_function()``
* Add default expression names to most aggregate functions
* New Impala database client helper methods

* ``create_database``
* ``drop_database``
* ``exists_database``
* ``list_databases``
* ``set_database``

* Client ``list_tables`` searching / listing method
* Add ``add``, ``sub``, and other explicit arithmetic methods to value
expressions

API Changes
~~~~~~~~~~~
* New Ibis client and Impala connection workflow. Client now combined from an
Impala connection and an optional HDFS connection

Bug fixes
~~~~~~~~~
* Numerous expression API bug fixes and rough edges fixed

0.1.0 (March 26, 2015)
----------------------
107 changes: 104 additions & 3 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,120 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.connection import impala_connect

# flake8: noqa

__version__ = '0.2.0'

from ibis.client import ImpalaConnection, ImpalaClient
from ibis.filesystems import WebHDFS

import ibis.expr.api as api
import ibis.expr.types as ir

from ibis.expr.api import desc, literal, table, case
# __all__ is defined
from ibis.expr.api import *

import ibis.config_init
from ibis.config import options


def make_client(db, hdfs_client=None):
"""
Create an Ibis client from a database connection and optional additional
connections (like HDFS)
Parameters
----------
db : Connection
e.g. produced by ibis.impala_connect
hdfs_client : ibis HDFS client
Examples
--------
con = ibis.impala_connect(**impala_params)
hdfs = ibis.hdfs_connect(**hdfs_params)
client = ibis.make_client(con, hdfs_client=hdfs)
Returns
-------
client : IbisClient
"""
return ImpalaClient(db, hdfs_client=hdfs_client)


def impala_connect(host='localhost', port=21050, protocol='hiveserver2',
database=None, timeout=45, use_ssl=False, ca_cert=None,
use_ldap=False, ldap_user=None, ldap_password=None,
use_kerberos=False, kerberos_service_name='impala'):
"""
Create an Impala Client for use with Ibis
Parameters
----------
host : host name
port : int, default 21050 (HiveServer 2)
protocol : {'hiveserver2', 'beeswax'}
database :
timeout :
use_ssl :
ca_cert :
use_ldap : boolean, default False
ldap_user :
ldap_password :
use_kerberos : boolean, default False
kerberos_service_name : string, default 'impala'
Returns
-------
con : ImpalaConnection
"""
params = {
'host': host,
'port': port,
'protocol': protocol,
'database': database,
'timeout': timeout,
'use_ssl': use_ssl,
'ca_cert': ca_cert,
'use_ldap': use_ldap,
'ldap_user': ldap_user,
'ldap_password': ldap_password,
'use_kerberos': use_kerberos,
'kerberos_service_name': kerberos_service_name
}

return ImpalaConnection(**params)


def hdfs_connect(host='localhost', port=50070, protocol='webhdfs', **kwds):
"""
Connect to HDFS
Parameters
----------
host : string
port : int, default 50070 (webhdfs default)
protocol : {'webhdfs'}
Returns
-------
client : ibis HDFS client
"""
from hdfs import InsecureClient
url = 'http://{}:{}'.format(host, port)
client = InsecureClient(url, **kwds)
return WebHDFS(client)


def test(include_e2e=False):
import pytest
args = ['--pyargs', 'ibis']
import ibis
import os

ibis_dir, _ = os.path.split(ibis.__file__)

args = ['--pyargs', ibis_dir]
if not include_e2e:
args.extend(['-m', 'not e2e'])
pytest.main(args)
717 changes: 717 additions & 0 deletions ibis/client.py

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions ibis/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,11 @@ class RelationError(ExpressionError):

class TranslationError(IbisError):
pass


class IbisTypeError(IbisError, TypeError):
pass


class InputTypeError(IbisTypeError):
pass
48 changes: 13 additions & 35 deletions ibis/config.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,21 @@
# This file has been adapted from pandas/core/config.py. pandas 3-clause BSD
# license is as follows
# license. See LICENSES/pandas
#
# ----------------------------------------------------------------------
# Copyright (c) 2012-2015 pandas development team
# Further modifications:
#
# Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
# All rights reserved.
# Copyright 2014 Cloudera Inc.
#
# Copyright (c) 2008-2011 AQR Capital Management, LLC
# All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# http://www.apache.org/licenses/LICENSE-2.0
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# * Neither the name of the copyright holder nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ----------------------------------------------------------------------
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re

Expand Down Expand Up @@ -722,7 +700,7 @@ def inner(x):

def is_one_of_factory(legal_values):
def inner(x):
if not x in legal_values:
if x not in legal_values:
pp_values = map(str, legal_values)
raise ValueError("Value must be one of %s"
% str("|".join(pp_values)))
Expand Down
9 changes: 8 additions & 1 deletion ibis/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
import ibis.config as cf

cf.register_option('interactive', False, validator=cf.is_bool)
cf.register_option('verbose', False, validator=cf.is_bool)


def to_stdout(x):
print(x)


cf.register_option('verbose_log', to_stdout)


sql_default_limit_doc = """
Expand All @@ -23,5 +31,4 @@


with cf.config_prefix('sql'):

cf.register_option('default_limit', 10000, sql_default_limit_doc)
275 changes: 0 additions & 275 deletions ibis/connection.py

This file was deleted.

263 changes: 188 additions & 75 deletions ibis/expr/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import ibis.expr.operations as ops
import ibis.util as util

#----------------------------------------------------------------------
# ---------------------------------------------------------------------
# Some expression metaprogramming / graph transformations to support
# compilation later

Expand Down Expand Up @@ -77,7 +77,7 @@ def _sub_arg(self, arg):
return subbed_arg

def _key(self, expr):
return id(expr.op())
return repr(expr.op())

def sub(self, expr):
key = self._key(expr)
Expand All @@ -99,8 +99,9 @@ def _sub(self, expr):
return helper.get_result()


def substitute_parents(expr, lift_memo=None):
rewriter = ExprSimplifier(expr, lift_memo=lift_memo)
def substitute_parents(expr, lift_memo=None, past_projection=True):
rewriter = ExprSimplifier(expr, lift_memo=lift_memo,
block_projection=not past_projection)
return rewriter.get_result()


Expand All @@ -122,7 +123,7 @@ def __init__(self, expr, lift_memo=None, block_projection=False):
def get_result(self):
expr = self.expr
node = expr.op()
if isinstance(node, ops.Literal):
if isinstance(node, ir.Literal):
return expr

# For table column references, in the event that we're on top of a
Expand Down Expand Up @@ -184,7 +185,9 @@ def _lift(x):
return result, unchanged[0]

def lift(self, expr, block=None):
# This use of id() is OK since only for memoization
key = id(expr.op()), block

if key in self.lift_memo:
return self.lift_memo[key]

Expand Down Expand Up @@ -230,6 +233,9 @@ def _lift_TableColumn(self, expr, block=None):
can_lift = True
lifted_root = self.lift(val.op().table)

# XXX
# can_lift = False

# HACK: If we've projected a join, do not lift the children
# TODO: what about limits and other things?
# if isinstance(root.table.op(), Join):
Expand Down Expand Up @@ -347,20 +353,13 @@ def apply_filter(expr, predicates):
# their roots, then pushdown (at least of that predicate) is not
# possible

# TODO: is partial pushdown (one or more, but not all of the passed
# predicates) something we should consider doing? Could be reasonable

can_pushdown = True
for pred in predicates:
roots = pred._root_tables()
if _in_roots(expr, roots):
can_pushdown = False

# It's not unusual for the filter to reference the projection
# itself. If a predicate can be pushed down, in this case we must
# rewrite replacing the table refs with the roots internal to the
# projection we are referencing
#
# If the filter references any new or derived aliases in the
#
# in pseudocode
# c = Projection(Join(a, b, jpreds), ppreds)
# filter_pred = c.field1 == c.field2
Expand All @@ -370,6 +369,16 @@ def apply_filter(expr, predicates):
# below the projection, we need to rewrite the predicate referencing
# the parent tables in the join being projected

# TODO: is partial pushdown (one or more, but not all of the passed
# predicates) something we should consider doing? Could be reasonable

# if isinstance(op, ops.Projection):
# else:
# # Aggregation
# can_pushdown = op.table.is_an

can_pushdown = _can_pushdown(op, predicates)

if can_pushdown:
predicates = [substitute_parents(x) for x in predicates]

Expand All @@ -383,54 +392,156 @@ def apply_filter(expr, predicates):

return result

# def _pushdown_substitute(expr):
# rewriter = _PushdownRewrite(expr)
# return rewriter.get_result()
# class _PushdownRewrite(object):
# Hm, this is quite similar to the ExprSimplifier above
# def __init__(self, expr):
# self.expr = expr
# def get_result(self):
# return self._rewrite(expr)
# def _rewrite(self, expr):
# node = expr.op()
# unchanged = True
# new_args = []
# for arg in node.args:
# pass


def _in_roots(expr, roots):
# XXX
what = expr.op() if isinstance(expr, ir.Expr) else expr
return id(what) in [id(x) for x in roots]


def _maybe_fuse_projection(expr, clean_exprs):
node = expr.op()

if isinstance(node, ops.Projection):
roots = [node]
else:
roots = node.root_tables()
def _can_pushdown(op, predicates):
# Per issues discussed in #173
#
# The only case in which pushdown is possible is that all table columns
# referenced must meet all of the following (not that onerous in practice)
# criteria
#
# 1) Is a table column, not any other kind of expression
# 2) Is unaliased. So, if you project t3.foo AS bar, then filter on bar,
# this cannot be pushed down (until we implement alias rewriting if
# necessary)
# 3) Appears in the selections in the projection (either is part of one of
# the entire tables or a single column selection)

can_pushdown = True
for pred in predicates:
validator = _PushdownValidate(op, pred)
predicate_is_valid = validator.get_result()
can_pushdown = can_pushdown and predicate_is_valid
return can_pushdown


class _PushdownValidate(object):

def __init__(self, parent, predicate):
self.parent = parent
self.pred = predicate

self.validator = ExprValidator([self.parent.table])

self.valid = True

def get_result(self):
self._walk(self.pred)
return self.valid

def _walk(self, expr):
node = expr.op()
if isinstance(node, ops.TableColumn):
is_valid = self._validate_column(expr)
self.valid = self.valid and is_valid

for arg in node.flat_args():
if isinstance(arg, ir.ValueExpr):
self._walk(arg)

# Skip other types of exprs

def _validate_column(self, expr):
if isinstance(self.parent, ops.Projection):
return self._validate_projection(expr)
else:
validator = ExprValidator([self.parent.table])
return validator.validate(expr)

def _validate_projection(self, expr):
is_valid = False
node = expr.op()

# Has a different alias, invalid
if _is_aliased(expr):
return False

for val in self.parent.selections:
if (isinstance(val.op(), ops.PhysicalTable)
and node.name in val.schema()):
is_valid = True
elif (isinstance(val.op(), ops.TableColumn)
and node.name == val.get_name()
and not _is_aliased(val)):
# Aliased table columns are no good
col_table = val.op().table.op()

if len(roots) == 1 and isinstance(roots[0], ops.Projection):
root = roots[0]
lifted_node = substitute_parents(expr).op()

roots = root.root_tables()
validator = ExprValidator([ir.TableExpr(root)])
is_valid = (col_table.is_ancestor(node.table)
or col_table.is_ancestor(lifted_node.table))

# is_valid = True

return is_valid


def _is_aliased(col_expr):
return col_expr.op().name != col_expr.get_name()


class Projector(object):

"""
Analysis and validation of projection operation, taking advantage of
"projection fusion" opportunities where they exist, i.e. combining
compatible projections together rather than nesting them. Translation /
evaluation later will not attempt to do any further fusion /
simplification.
"""

def __init__(self, parent, proj_exprs):
self.parent = parent

node = self.parent.op()

if isinstance(node, ops.Projection):
roots = [node]
else:
roots = node.root_tables()

self.parent_roots = roots

clean_exprs = []
validator = ExprValidator([parent])

for expr in proj_exprs:
# Perform substitution only if we share common roots
if validator.shares_some_roots(expr):
expr = substitute_parents(expr, past_projection=False)
clean_exprs.append(expr)

self.clean_exprs = clean_exprs

def get_result(self):
roots = self.parent_roots

if len(roots) == 1 and isinstance(roots[0], ops.Projection):
fused_op = self._check_fusion(roots[0])
if fused_op is not None:
return fused_op

return ops.Projection(self.parent, self.clean_exprs)

def _check_fusion(self, root):
roots = root.table._root_tables()
validator = ExprValidator([root.table])
fused_exprs = []
can_fuse = False
for val in clean_exprs:
for val in self.clean_exprs:
# XXX
lifted_val = substitute_parents(val)

# a * projection
if (isinstance(val, ir.TableExpr) and
(val is expr or

# gross we share the same table root. Better way to detect?
len(roots) == 1 and val._root_tables()[0] is roots[0])
):
(self.parent.op().is_ancestor(val) or
# gross we share the same table root. Better way to
# detect?
len(roots) == 1 and val._root_tables()[0] is roots[0])):
can_fuse = True
fused_exprs.extend(root.selections)
elif validator.validate(lifted_val):
fused_exprs.append(lifted_val)
elif not validator.validate(val):
can_fuse = False
break
Expand All @@ -439,8 +550,8 @@ def _maybe_fuse_projection(expr, clean_exprs):

if can_fuse:
return ops.Projection(root.table, fused_exprs)

return ops.Projection(expr, clean_exprs)
else:
return None


class ExprValidator(object):
Expand All @@ -450,34 +561,36 @@ def __init__(self, exprs):

self.roots = []
for expr in self.parent_exprs:

self.roots.extend(expr._root_tables())

self.root_ids = set(id(x) for x in self.roots)
def has_common_roots(self, expr):
return self.validate(expr)

def validate(self, expr):
return self.has_common_roots(expr)

def has_common_roots(self, expr):
op = expr.op()
if isinstance(op, ops.TableColumn):
for root in self.roots:
if root is op.table.op():
return True
if self._among_roots(op.table.op()):
return True
elif isinstance(op, ops.Projection):
for root in self.roots:
if root is op:
return True
if self._among_roots(op):
return True

expr_roots = expr._root_tables()
for root in expr_roots:
if id(root) not in self.root_ids:
if not self._among_roots(root):
return False
return True

def _among_roots(self, node):
for root in self.roots:
if root.is_ancestor(node):
return True
return False

def shares_some_roots(self, expr):
expr_roots = expr._root_tables()
return any(id(root) in self.root_ids for root in expr_roots)
return any(self._among_roots(root)
for root in expr_roots)

def validate_all(self, exprs):
for expr in exprs:
Expand Down Expand Up @@ -513,21 +626,21 @@ def validate(self, expr):
is_valid = True

if isinstance(op, ops.Contains):
value_valid = self.has_common_roots(op.value)
value_valid = ExprValidator.validate(self, op.value)
is_valid = value_valid
else:
roots_valid = []
for arg in op.flat_args():
if isinstance(arg, ir.ScalarExpr):
arg_valid = True
# arg_valid = True
pass
elif isinstance(arg, ir.ArrayExpr):
roots_valid.append(self.shares_some_roots(arg))
elif isinstance(arg, ir.Expr):
raise NotImplementedError
else:
arg_valid = True

# args_valid.append(arg_valid)
# arg_valid = True
pass

is_valid = any(roots_valid)

Expand Down Expand Up @@ -593,14 +706,14 @@ def walk(expr):


def find_backend(expr):
from ibis.connection import Connection
from ibis.client import Client

backends = []

def walk(expr):
node = expr.op()
for arg in node.flat_args():
if isinstance(arg, Connection):
if isinstance(arg, Client):
backends.append(arg)
elif isinstance(arg, ir.Expr):
walk(arg)
Expand Down
178 changes: 178 additions & 0 deletions ibis/expr/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# Copyright 2015 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import ibis.expr.types as ir
import ibis.expr.operations as ops


class BucketLike(ir.ValueNode):

def _validate_closed(self, closed):
closed = closed.lower()
if closed not in ['left', 'right']:
raise ValueError("closed must be 'left' or 'right'")
return closed

@property
def nbuckets(self):
return None

def output_type(self):
ctype = ir.CategoryType(self.nbuckets)
return ctype.array_ctor()


class Bucket(BucketLike):

def __init__(self, arg, buckets, closed='left', close_extreme=True,
include_under=False, include_over=False):
self.arg = arg
self.buckets = buckets
self.closed = self._validate_closed(closed)

self.close_extreme = bool(close_extreme)
self.include_over = bool(include_over)
self.include_under = bool(include_under)

if len(buckets) == 0:
raise ValueError('Must be at least one bucket edge')
elif len(buckets) == 1:
if not self.include_under or not self.include_over:
raise ValueError('If one bucket edge provided, must have'
' include_under=True and include_over=True')

ir.ValueNode.__init__(self, [self.arg, self.buckets, self.closed,
self.close_extreme,
self.include_under,
self.include_over])

@property
def nbuckets(self):
k = len(self.buckets) - 1
k += int(self.include_over) + int(self.include_under)
return k


class Histogram(BucketLike):

def __init__(self, arg, nbins, binwidth, base, closed='left',
aux_hash=None):
self.arg = arg

self.nbins = nbins
self.binwidth = binwidth
self.base = base

if self.nbins is None:
if self.binwidth is None:
raise ValueError('Must indicate nbins or binwidth')
elif self.binwidth is not None:
raise ValueError('nbins and binwidth are mutually exclusive')

self.closed = self._validate_closed(closed)

self.aux_hash = aux_hash
ir.ValueNode.__init__(self, [self.arg, self.nbins, self.binwidth,
self.base, self.closed, self.aux_hash])

def output_type(self):
# always undefined cardinality (for now)
ctype = ir.CategoryType()
return ctype.array_ctor()


class CategoryLabel(ir.ValueNode):

def __init__(self, arg, labels, nulls):
self.arg = ops.as_value_expr(arg)
self.labels = labels

card = self.arg.type().cardinality
if len(self.labels) != card:
raise ValueError('Number of labels must match number of '
'categories: %d' % card)

self.nulls = nulls
ir.ValueNode.__init__(self, [self.arg, self.labels, self.nulls])

def output_type(self):
return ops._shape_like(self.arg, 'string')


def bucket(arg, buckets, closed='left', close_extreme=True,
include_under=False, include_over=False):
"""
Parameters
----------
arg : numeric array expression
buckets : list
closed : {'left', 'right'}, default 'left'
Which side of each interval is closed. For example
buckets = [0, 100, 200]
closed = 'left': 100 falls in 2nd bucket
closed = 'right': 100 falls in 1st bucket
close_extreme : boolean, default True
Returns
-------
bucketed : coded value expression
"""
op = Bucket(arg, buckets, closed=closed, close_extreme=close_extreme,
include_under=include_under, include_over=include_over)
return op.to_expr()


def histogram(arg, nbins=None, binwidth=None, base=None, closed='left',
aux_hash=None):
"""
Compute a histogram with fixed width bins
Parameters
----------
arg : numeric array expression
nbins : int, default None
If supplied, will be used to compute the binwidth
binwidth : number, default None
If not supplied, computed from the data (actual max and min values)
base : number, default None
closed : {'left', 'right'}, default 'left'
Which side of each interval is closed
Returns
-------
histogrammed : coded value expression
"""
op = Histogram(arg, nbins, binwidth, base, closed=closed,
aux_hash=aux_hash)
return op.to_expr()


def category_label(arg, labels, nulls=None):
"""
Format a known number of categories as strings
Parameters
----------
labels : list of string
nulls : string, optional
How to label any null values among the categories
Returns
-------
string_categories : string value expression
"""
op = CategoryLabel(arg, labels, nulls)
return op.to_expr()
788 changes: 722 additions & 66 deletions ibis/expr/api.py

Large diffs are not rendered by default.

92 changes: 69 additions & 23 deletions ibis/expr/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,32 @@ def __init__(self):
from collections import defaultdict
self.formatted = {}
self.aliases = {}
self.ops = {}
self.counts = defaultdict(lambda: 0)

def __contains__(self, obj):
return id(obj) in self.formatted
return self._key(obj) in self.formatted

def observe(self, obj, formatter=repr):
key = id(obj)
def _key(self, obj):
return obj._repr()

def observe(self, obj, formatter=lambda x: x._repr()):
key = self._key(obj)
if key not in self.formatted:
self.aliases[key] = 'ref_%d' % len(self.formatted)
self.formatted[key] = formatter(obj)
self.ops[key] = obj

self.counts[key] += 1

def count(self, obj):
return self.counts[id(obj)]
return self.counts[self._key(obj)]

def get_alias(self, obj):
return self.aliases[id(obj)]
return self.aliases[self._key(obj)]

def get_formatted(self, obj):
return self.formatted[id(obj)]
return self.formatted[self._key(obj)]


class ExprFormatter(object):
Expand Down Expand Up @@ -91,16 +96,21 @@ def get_result(self):
text = 'Literal[%s] %s' % (self._get_type_display(),
str(what.value))

if isinstance(self.expr, ir.ValueExpr) and self.expr._name is not None:
text = '{} = {}'.format(self.expr.get_name(), text)

if self.memoize:
alias_to_text = [(self.memo.aliases[x],
self.memo.formatted[x], x)
self.memo.formatted[x],
self.memo.ops[x])
for x in self.memo.formatted]
alias_to_text.sort()

# A hack to suppress printing out of a ref that is the result of
# the top level expression
refs = [x + '\n' + y
for x, y, key in alias_to_text if key != id(what)]
for x, y, op in alias_to_text
if not op.equals(what)]

text = '\n\n'.join(refs + [text])

Expand Down Expand Up @@ -138,7 +148,7 @@ def _format_table(self, table):
rows.extend([' %s : %s' % tup for tup in
zip(table.schema.names, table.schema.types)])
opname = type(table).__name__
type_display = self._get_type_display()
type_display = self._get_type_display(table)
opline = '%s[%s]' % (opname, type_display)
return '{}\n{}'.format(opline, self._indent('\n'.join(rows)))

Expand All @@ -158,22 +168,48 @@ def _format_column(self, expr):
def _format_node(self, op):
formatted_args = []

def visit(what):
def visit(what, extra_indents=0):
if isinstance(what, ir.Expr):
result = self._format_subexpr(what)
else:
result = self._indent(str(what))

if extra_indents > 0:
result = util.indent(result, self.indent_size)

formatted_args.append(result)

for arg in op.args:
if isinstance(arg, list):
for x in arg:
visit(x)
else:
visit(arg)
arg_names = getattr(op, '_arg_names', None)

if arg_names is None:
for arg in op.args:
if isinstance(arg, list):
for x in arg:
visit(x)
else:
visit(arg)
else:
for arg, name in zip(op.args, arg_names):
if name is not None:
name = self._indent('{}:'.format(name))
if isinstance(arg, list):
if name is not None and len(arg) > 0:
formatted_args.append(name)
indents = 1
else:
indents = 0
for x in arg:
visit(x, extra_indents=indents)
else:
if name is not None:
formatted_args.append(name)
indents = 1
else:
indents = 0
visit(arg, extra_indents=indents)

opname = type(op).__name__
type_display = self._get_type_display()
type_display = self._get_type_display(op)
opline = '%s[%s]' % (opname, type_display)

return '\n'.join([opline] + formatted_args)
Expand All @@ -183,12 +219,22 @@ def _format_subexpr(self, expr):
memoize=False)
return formatter.get_result()

def _get_type_display(self):
if isinstance(self.expr, ir.TableExpr):
def _get_type_display(self, expr=None):
if expr is None:
expr = self.expr

if isinstance(expr, ir.Node):
expr = expr.to_expr()

if isinstance(expr, ir.TableExpr):
return 'table'
elif isinstance(self.expr, ir.ArrayExpr):
return 'array(%s)' % self.expr.type()
elif isinstance(self.expr, ir.ScalarExpr):
return '%s' % self.expr.type()
elif isinstance(expr, ir.ArrayExpr):
return 'array(%s)' % expr.type()
elif isinstance(expr, ir.ScalarExpr):
return '%s' % expr.type()
elif isinstance(expr, ir.ExprList):
list_args = [self._get_type_display(arg)
for arg in expr.op().args]
return ', '.join(list_args)
else:
raise NotImplementedError
128 changes: 128 additions & 0 deletions ibis/expr/groupby.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright 2014 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# User API for grouped data operations

import ibis.expr.types as ir
import ibis.util as util


class GroupedTableExpr(object):

"""
Helper intermediate construct
"""

def __init__(self, table, by, having=None):
if not isinstance(by, (list, tuple)):
if not isinstance(by, ir.Expr):
by = table._resolve([by])
else:
by = [by]
else:
by = table._resolve(by)

self.table = table
self.by = by
self._having = having or []

def __getattr__(self, attr):
if hasattr(self.table, attr):
return self._column_wrapper(attr)

raise AttributeError("GroupBy has no attribute %r" % attr)

def _column_wrapper(self, attr):
col = self.table[attr]
if isinstance(col, ir.NumericValue):
return GroupedNumbers(col, self)
else:
return GroupedArray(col, self)

def aggregate(self, metrics):
return self.table.aggregate(metrics, by=self.by,
having=self._having)

def having(self, expr):
"""
Add a post-aggregation result filter (like the having argument in
`aggregate`), for composability with the group_by API
Returns
-------
grouped : GroupedTableExpr
"""
exprs = util.promote_list(expr)
new_having = self._having + exprs
return GroupedTableExpr(self.table, self.by, having=new_having)

def count(self, metric_name='count'):
"""
Convenience function for computing the group sizes (number of rows per
group) given a grouped table.
Parameters
----------
metric_name : string, default 'count'
Name to use for the row count metric
Returns
-------
aggregated : TableExpr
The aggregated table
"""
metric = self.table.count().name(metric_name)
return self.table.aggregate([metric], by=self.by)

size = count


def _group_agg_dispatch(name):
def wrapper(self, *args, **kwargs):
f = getattr(self.arr, name)
metric = f(*args, **kwargs)
alias = '{}({})'.format(name, self.arr.get_name())
return self.parent.aggregate(metric.name(alias))

wrapper.__name__ = name
return wrapper


class GroupedArray(object):

def __init__(self, arr, parent):
self.arr = arr
self.parent = parent

count = _group_agg_dispatch('count')
size = count
min = _group_agg_dispatch('min')
max = _group_agg_dispatch('max')
approx_nunique = _group_agg_dispatch('approx_nunique')
approx_median = _group_agg_dispatch('approx_median')
group_concat = _group_agg_dispatch('group_concat')

def summary(self, exact_nunique=False):
metric = self.arr.summary(exact_nunique=exact_nunique)
return self.parent.aggregate(metric)


class GroupedNumbers(GroupedArray):

mean = _group_agg_dispatch('mean')
sum = _group_agg_dispatch('sum')

def summary(self, exact_nunique=False):
metric = self.arr.summary(exact_nunique=exact_nunique)
return self.parent.aggregate(metric)
530 changes: 437 additions & 93 deletions ibis/expr/operations.py

Large diffs are not rendered by default.

71 changes: 60 additions & 11 deletions ibis/expr/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,19 @@ def _get_type(self):
return 'float'
elif util.all_of(self.args, ir.IntegerValue):
return self._get_int_type()
elif util.any_of(self.args, ir.DecimalValue):
return _decimal_promoted_type(self.args)
else:
raise NotImplementedError

def _get_int_type(self):
deps = [x.op() for x in self.args]

if util.all_of(deps, ops.Literal):
if util.all_of(deps, ir.Literal):
return _smallest_int_containing(
[self.op(deps[0].value, deps[1].value)])
elif util.any_of(deps, ops.Literal):
if isinstance(deps[0], ops.Literal):
elif util.any_of(deps, ir.Literal):
if isinstance(deps[0], ir.Literal):
val = deps[0].value
atype = self.args[1].type()
else:
Expand All @@ -70,6 +72,16 @@ def _check_compatibility(self):
raise TypeError('String and non-string incompatible')


def _decimal_promoted_type(args):
precisions = []
scales = []
for arg in args:
if isinstance(arg, ir.DecimalValue):
precisions.append(arg.meta.precision)
scales.append(arg.meta.scale)
return ir.DecimalType(max(precisions), max(scales))


class PowerPromoter(BinaryPromoter):

def __init__(self, left, right):
Expand All @@ -83,7 +95,9 @@ def _get_type(self):
return 'double'
else:
return 'float'
elif isinstance(rval, ops.Literal) and rval.value < 0:
elif util.any_of(self.args, ir.DecimalValue):
return _decimal_promoted_type(self.args)
elif isinstance(rval, ir.Literal) and rval.value < 0:
return 'double'
elif util.all_of(self.args, ir.IntegerValue):
return self._get_int_type()
Expand Down Expand Up @@ -128,9 +142,18 @@ class _TypePrecedence(object):
# boolean
# string

_precedence = ['double', 'float', 'decimal',
'int64', 'int32', 'int16', 'int8',
'boolean', 'string']
_precedence = {
'double': 9,
'float': 8,
'decimal': 7,
'int64': 6,
'int32': 5,
'int16': 4,
'int8': 3,
'boolean': 2,
'string': 1,
'null': 0
}

def __init__(self, exprs):
self.exprs = exprs
Expand All @@ -148,12 +171,22 @@ def get_result(self):

def _count_types(self):
for expr in self.exprs:
self.type_counts[expr._base_type()] += 1
self.type_counts[expr.type()] += 1

def _get_highest_type(self):
for typename in self._precedence:
if self.type_counts[typename] > 0:
return typename
scores = []
for k, v in self.type_counts.items():
if not v:
continue
if isinstance(k, ir.DataType):
score = self._precedence[k._base_type()]
else:
score = self._precedence[k]

scores.append((score, k))

scores.sort()
return scores[-1][1]

def _check_casts(self, typename):
for expr in self.exprs:
Expand Down Expand Up @@ -208,3 +241,19 @@ def int_literal_class(value, allow_overflow=False):
def _largest_int(int_types):
nbytes = max(_nbytes[t] for t in int_types)
return 'int%d' % (8 * nbytes)


class ImplicitCast(object):

def __init__(self, value_type, implicit_targets):
self.value_type = value_type
self.implicit_targets = implicit_targets

def can_cast(self, target):
if isinstance(target, ir.DataType):
base_type = target._base_type()
else:
base_type = target

return (base_type in self.implicit_targets or
target == self.value_type)
298 changes: 298 additions & 0 deletions ibis/expr/temporal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
# Copyright 2014 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.common import IbisError
import ibis.expr.types as ir


__all__ = ['timedelta', 'year', 'month', 'week', 'day',
'hour', 'minute', 'second',
'millisecond', 'microsecond']


class Timedelta(object):
"""
Represents any kind of date/time/timestamp increment, the precise length
possibly dependent on the timestamp being modified.
"""
def __init__(self, n):
self.n = int(n)

@property
def unit(self):
raise NotImplementedError

@property
def unit_name(self):
return type(self).__name__.lower()

def __repr__(self):
if self.n == 1:
pretty_unit = self.unit_name
else:
pretty_unit = '{}s'.format(self.unit_name)

return '<Timedelta: {} {}>'.format(self.n, pretty_unit)

def replace(self, n):
return type(self)(n)

def __mul__(self, times):
return self.replace(self.n * times)

__rmul__ = __mul__

def __add__(self, arg):
from ibis.expr.operations import TimestampDelta

if isinstance(arg, ir.TimestampValue):
op = TimestampDelta(arg, self)
return op.to_expr()
elif isinstance(arg, Timedelta):
return self.combine(arg)
else:
raise TypeError(arg)

__radd__ = __add__

def __sub__(self, arg):
if isinstance(arg, ir.Expr):
raise TypeError(arg)
elif isinstance(arg, Timedelta):
return self.combine(arg.replace(-arg.n))
else:
raise NotImplementedError

def __rsub__(self, arg):
return self.replace(-self.n).__add__(arg)

def combine(self, other):
if type(self) != type(other):
raise TypeError(type(other))

klass = type(self)
return klass(self.n + other.n)

def equals(self, other):
if type(self) != type(other):
return False

return self.n == other.n


class TimeIncrement(Timedelta):

@property
def unit(self):
return self._unit

def combine(self, other):
if not isinstance(other, TimeIncrement):
raise TypeError('Must be a fixed size timedelta, was {!r}'
.format(type(other)))

a, b = _to_common_units([self, other])
return type(a)(a.n + b.n)

def to_unit(self, target_unit):
"""
"""
target_unit = target_unit.lower()
if self.unit == target_unit:
return self

klass = _timedelta_units[target_unit]
increments = CONVERTER.convert(self.n, self.unit, target_unit)
return klass(increments)


def _to_common_units(args):
common_unit = CONVERTER.get_common_unit([x.unit for x in args])
return [x.to_unit(common_unit) for x in args]


class Nanosecond(TimeIncrement):
_unit = 'ns'


class Microsecond(TimeIncrement):
_unit = 'us'


class Millisecond(TimeIncrement):
_unit = 'ms'


class Second(TimeIncrement):
_unit = 's'


class Minute(TimeIncrement):
_unit = 'm'


class Hour(TimeIncrement):
_unit = 'h'


class Day(TimeIncrement):
_unit = 'd'


class Week(TimeIncrement):
_unit = 'w'


class Month(Timedelta):
_unit = 'M'


class Year(Timedelta):
_unit = 'Y'


_timedelta_units = {
'Y': Year,
'M': Month,
'w': Week,
'd': Day,
'h': Hour,
'm': Minute,
's': Second,
'ms': Millisecond,
'us': Microsecond,
'ns': Nanosecond
}


class UnitConverter(object):

def __init__(self, ordering, conv_factors, names):
self.ordering = ordering
self.conv_factors = conv_factors
self.names = names

self.ranks = dict((name, i) for i, name in enumerate(ordering))
self.rank_to_unit = dict((v, k) for k, v in self.ranks.items())

def get_common_unit(self, units):
min_rank = max(self.ranks[x] for x in units)
return self.rank_to_unit[min_rank]

def convert(self, n, from_unit, to_unit):
i = self.ranks[from_unit]
j = self.ranks[to_unit]

if i == j:
return n

factors = self.conv_factors[min(i, j) + 1: max(i, j) + 1]
factor = 1
for x in factors:
factor *= x

if j < i:
if n % factor:
raise IbisError('{} is not a multiple of {}'.format(n, factor))
return n / factor
else:
return n * factor

def anglicize(self, n, unit):
raise NotImplementedError


_ordering = ['w', 'd', 'h', 'm', 's', 'ms', 'us', 'ns']
_factors = [1, 7, 24, 60, 60, 1000, 1000, 1000]
_names = ['week', 'day', 'hour', 'minute', 'second',
'millisecond', 'microsecond', 'nanosecond']


CONVERTER = UnitConverter(_ordering, _factors, _names)


def _delta_factory(name, unit):
klass = _timedelta_units[unit]

def factory(n=1):
return klass(n)

factory.__name__ = name

return factory

nanosecond = _delta_factory('nanosecond', 'ns')
microsecond = _delta_factory('microsecond', 'us')
millisecond = _delta_factory('millisecond', 'ms')
second = _delta_factory('second', 's')
minute = _delta_factory('minute', 'm')
hour = _delta_factory('hour', 'h')
day = _delta_factory('day', 'd')
week = _delta_factory('week', 'w')
month = _delta_factory('month', 'M')
year = _delta_factory('year', 'Y')


def timedelta(days=None, hours=None, minutes=None, seconds=None,
milliseconds=None, microseconds=None, nanoseconds=None,
weeks=None):
"""
Generic API for creating a fixed size timedelta
Parameters
----------
days : int, default None
weeks : int, default None
hours : int, default None
minutes : int, default None
seconds : int, default None
milliseconds : int, default None
microseconds : int, default None
nanoseconds : int, default None
Notes
-----
For potentially non-fixed-length timedeltas (like year, month, etc.), use
the corresponding named API (e.g. ibis.month).
Returns
-------
delta : TimeIncrement (Timedelta)
"""
out = {
'result': None
}

def _apply(klass, n):
if not n:
return
offset = klass(n)
delta = out['result']
out['result'] = delta + offset if delta else offset

_apply(Week, weeks)
_apply(Day, days)
_apply(Hour, hours)
_apply(Minute, minutes)
_apply(Second, seconds)
_apply(Millisecond, milliseconds)
_apply(Microsecond, microseconds)
_apply(Nanosecond, nanoseconds)

result = out['result']
if not result:
raise IbisError('Must pass some offset parameter')

return result
5 changes: 3 additions & 2 deletions ibis/expr/tests/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.connection import SQLConnection
from ibis.client import SQLClient
import ibis.expr.types as ir


class MockConnection(SQLConnection):
class MockConnection(SQLClient):

_tables = {
'alltypes': [
Expand Down Expand Up @@ -125,6 +125,7 @@ def __init__(self):
self.last_executed_expr = None

def _get_table_schema(self, name):
name = name.replace('`', '')
return ir.Schema.from_tuples(self._tables[name])

def execute(self, expr, default_limit=None):
Expand Down
67 changes: 67 additions & 0 deletions ibis/expr/tests/test_analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2014 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

from ibis.expr.tests.mocks import MockConnection
import ibis.expr.types as ir


class TestAnalytics(unittest.TestCase):

def setUp(self):
self.con = MockConnection()
self.alltypes = self.con.table('functional_alltypes')

def test_category_project(self):
t = self.alltypes

tier = t.double_col.bucket([0, 50, 100]).name('tier')
expr = t[tier, t]

assert isinstance(expr.tier, ir.CategoryArray)

def test_bucket(self):
d = self.alltypes.double_col
bins = [0, 10, 50, 100]

expr = d.bucket(bins)
assert isinstance(expr, ir.CategoryArray)
assert expr.op().nbuckets == 3

expr = d.bucket(bins, include_over=True)
assert expr.op().nbuckets == 4

expr = d.bucket(bins, include_over=True, include_under=True)
assert expr.op().nbuckets == 5

def test_bucket_error_cases(self):
d = self.alltypes.double_col

self.assertRaises(ValueError, d.bucket, [])
self.assertRaises(ValueError, d.bucket, [1, 2], closed='foo')

# it works!
d.bucket([10], include_under=True, include_over=True)

self.assertRaises(ValueError, d.bucket, [10])
self.assertRaises(ValueError, d.bucket, [10], include_under=True)
self.assertRaises(ValueError, d.bucket, [10], include_over=True)

def test_histogram(self):
d = self.alltypes.double_col

self.assertRaises(ValueError, d.histogram, nbins=10, binwidth=5)
self.assertRaises(ValueError, d.histogram)
self.assertRaises(ValueError, d.histogram, 10, closed='foo')
339 changes: 318 additions & 21 deletions ibis/expr/tests/test_base.py

Large diffs are not rendered by default.

50 changes: 44 additions & 6 deletions ibis/expr/tests/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import ibis.expr.api as api
import ibis.expr.types as ir
import ibis.expr.operations as ops

from ibis.expr.tests.mocks import MockConnection

Expand All @@ -30,16 +31,16 @@ def test_type_metadata(self):
col = self.lineitem.l_extendedprice
assert isinstance(col, ir.DecimalArray)

assert col.precision == 12
assert col.scale == 2
assert col._precision == 12
assert col._scale == 2

def test_cast_scalar_to_decimal(self):
val = api.literal('1.2345')

casted = val.cast('decimal(15,5)')
assert isinstance(casted, ir.DecimalScalar)
assert casted.precision == 15
assert casted.scale == 5
assert casted._precision == 15
assert casted._scale == 5

def test_decimal_aggregate_function_behavior(self):
# From the Impala documentation: "The result of an aggregate function
Expand All @@ -53,8 +54,45 @@ def test_decimal_aggregate_function_behavior(self):
for func_name in functions:
result = getattr(col, func_name)()
assert isinstance(result, ir.DecimalScalar)
assert result.precision == col.precision
assert result.scale == 38
assert result._precision == col._precision
assert result._scale == 38

def test_where(self):
table = self.lineitem

q = table.l_quantity
expr = api.where(table.l_discount > 0,
q * table.l_discount, api.null)

assert isinstance(expr, ir.DecimalArray)

expr = api.where(table.l_discount > 0,
(q * table.l_discount).sum(), api.null)
assert isinstance(expr, ir.DecimalArray)

expr = api.where(table.l_discount.sum() > 0,
(q * table.l_discount).sum(), api.null)
assert isinstance(expr, ir.DecimalScalar)

def test_fillna(self):
expr = self.lineitem.l_extendedprice.fillna(0)
assert isinstance(expr, ir.DecimalArray)

expr = self.lineitem.l_extendedprice.fillna(
self.lineitem.l_quantity)
assert isinstance(expr, ir.DecimalArray)

def test_precision_scale(self):
col = self.lineitem.l_extendedprice

p = col.precision()
s = col.scale()

assert isinstance(p, ir.IntegerValue)
assert isinstance(p.op(), ops.DecimalPrecision)

assert isinstance(s, ir.IntegerValue)
assert isinstance(s.op(), ops.DecimalScale)

def test_invalid_precision_scale_combo(self):
pass
66 changes: 62 additions & 4 deletions ibis/expr/tests/test_sql_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ def test_abs(self):
expr = self.lineitem.l_extendedprice
self._check_unary_op(expr, fname, op, type(expr))

def test_group_concat(self):
col = self.alltypes.string_col

expr = col.group_concat()
assert isinstance(expr.op(), ops.GroupConcat)
assert expr.op().sep == ','

expr = col.group_concat('|')
assert expr.op().sep == '|'

def test_zeroifnull(self):
dresult = self.alltypes.double_col.zeroifnull()
iresult = self.alltypes.int_col.zeroifnull()
Expand All @@ -55,15 +65,14 @@ def test_fillna(self):
result = self.alltypes.double_col.fillna(5)
assert isinstance(result, ir.DoubleArray)

expected = (self.alltypes.double_col.isnull()
.ifelse(5, self.alltypes.double_col))
assert result.equals(expected)
assert isinstance(result.op(), ops.IfNull)

result = self.alltypes.bool_col.fillna(True)
assert isinstance(result, ir.BooleanArray)

# Retains type of caller (for now)
result = self.alltypes.int_col.fillna(self.alltypes.bigint_col)
assert isinstance(result, ir.Int64Array)
assert isinstance(result, ir.Int32Array)

def test_ceil_floor(self):
cresult = self.alltypes.double_col.ceil()
Expand Down Expand Up @@ -127,3 +136,52 @@ def _check_unary_op(self, expr, fname, ex_op, ex_type):
result = getattr(expr, fname)()
assert type(result.op()) == ex_op
assert type(result) == ex_type


class TestCoalesceLikeFunctions(unittest.TestCase):

def setUp(self):
self.table = api.table([
('v1', 'decimal(12, 2)'),
('v2', 'decimal(10, 4)'),
('v3', 'int32'),
('v4', 'int64'),
('v5', 'float'),
('v6', 'double'),
('v7', 'string'),
('v8', 'boolean')
], 'testing')

self.functions = [api.coalesce, api.greatest, api.least]

def test_integer_promotions(self):
t = self.table

for f in self.functions:
expr = f(t.v3, t.v4)
assert isinstance(expr, ir.Int64Array)

expr = f(5, t.v3)
assert isinstance(expr, ir.Int64Array)

expr = f(5, 12)
assert isinstance(expr, ir.Int64Scalar)

def test_floats(self):
t = self.table

for f in self.functions:
expr = f(t.v5)
assert isinstance(expr, ir.DoubleArray)

expr = f(5.5, t.v5)
assert isinstance(expr, ir.DoubleArray)

expr = f(5.5, 5)
assert isinstance(expr, ir.DoubleScalar)

def test_bools(self):
pass

def test_decimal_promotions(self):
pass
7 changes: 7 additions & 0 deletions ibis/expr/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,10 @@ def test_length(self):
assert isinstance(result, ir.Int32Array)
assert isinstance(lit_result, ir.Int32Scalar)
assert isinstance(result.op(), ops.StringLength)

def test_contains(self):
expr = self.table.g.contains('foo')
expected = self.table.g.like('%foo%')
assert expr.equals(expected)

self.assertRaises(Exception, lambda: 'foo' in self.table.g)
182 changes: 182 additions & 0 deletions ibis/expr/tests/test_temporal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Copyright 2014 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

from ibis.common import IbisError
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.expr.temporal as T

from ibis.expr.tests.mocks import MockConnection


class TestFixedOffsets(unittest.TestCase):

def setUp(self):
self.con = MockConnection()
self.table = self.con.table('alltypes')

def test_upconvert(self):
cases = [
(T.day(14), 'w', T.week(2)),
(T.hour(72), 'd', T.day(3)),
(T.minute(240), 'h', T.hour(4)),
(T.second(360), 'm', T.minute(6)),
(T.second(3 * 86400), 'd', T.day(3)),
(T.millisecond(5000), 's', T.second(5)),
(T.microsecond(5000000), 's', T.second(5)),
(T.nanosecond(5000000000), 's', T.second(5)),
]

for offset, unit, expected in cases:
result = offset.to_unit(unit)
assert result.equals(expected)

def test_multiply(self):
offset = T.day(2)

assert (offset * 2).equals(T.day(4))
assert (offset * (-2)).equals(T.day(-4))
assert (3 * offset).equals(T.day(6))
assert ((-3) * offset).equals(T.day(-6))

def test_repr(self):
assert repr(T.day()) == '<Timedelta: 1 day>'
assert repr(T.day(2)) == '<Timedelta: 2 days>'
assert repr(T.year()) == '<Timedelta: 1 year>'
assert repr(T.month(2)) == '<Timedelta: 2 months>'
assert repr(T.second(40)) == '<Timedelta: 40 seconds>'

def test_cannot_upconvert(self):
cases = [
(T.day(), 'w'),
(T.hour(), 'd'),
(T.minute(), 'h'),
(T.second(), 'm'),
(T.second(), 'd'),
(T.millisecond(), 's'),
(T.microsecond(), 's'),
(T.nanosecond(), 's'),
]

for delta, target in cases:
self.assertRaises(IbisError, delta.to_unit, target)

def test_downconvert_second_parts(self):
K = 2

sec = T.second(K)
milli = T.millisecond(K)
micro = T.microsecond(K)
nano = T.nanosecond(K)

cases = [
(sec.to_unit('s'), T.second(K)),
(sec.to_unit('ms'), T.millisecond(K * 1000)),
(sec.to_unit('us'), T.microsecond(K * 1000000)),
(sec.to_unit('ns'), T.nanosecond(K * 1000000000)),

(milli.to_unit('ms'), T.millisecond(K)),
(milli.to_unit('us'), T.microsecond(K * 1000)),
(milli.to_unit('ns'), T.nanosecond(K * 1000000)),

(micro.to_unit('us'), T.microsecond(K)),
(micro.to_unit('ns'), T.nanosecond(K * 1000)),

(nano.to_unit('ns'), T.nanosecond(K))
]
self._check_cases(cases)

def test_downconvert_hours(self):
K = 2
offset = T.hour(K)

cases = [
(offset.to_unit('h'), T.hour(K)),
(offset.to_unit('m'), T.minute(K * 60)),
(offset.to_unit('s'), T.second(K * 3600)),
(offset.to_unit('ms'), T.millisecond(K * 3600000)),
(offset.to_unit('us'), T.microsecond(K * 3600000000)),
(offset.to_unit('ns'), T.nanosecond(K * 3600000000000L))
]
self._check_cases(cases)

def test_downconvert_day(self):
K = 2

week = T.week(K)
day = T.day(K)

cases = [
(week.to_unit('d'), T.day(K * 7)),
(week.to_unit('h'), T.hour(K * 7 * 24)),

(day.to_unit('d'), T.day(K)),
(day.to_unit('h'), T.hour(K * 24)),
(day.to_unit('m'), T.minute(K * 1440)),
(day.to_unit('s'), T.second(K * 86400)),
(day.to_unit('ms'), T.millisecond(K * 86400000)),
(day.to_unit('us'), T.microsecond(K * 86400000000)),
(day.to_unit('ns'), T.nanosecond(K * 86400000000000L))
]
self._check_cases(cases)

def test_combine_with_different_kinds(self):
cases = [
(T.day() + T.minute(), T.minute(1441)),
(T.second() + T.millisecond(10), T.millisecond(1010)),
(T.hour() + T.minute(5) + T.second(10), T.second(3910))
]
self._check_cases(cases)

def test_timedelta_generic_api(self):
cases = [
(T.timedelta(weeks=2), T.week(2)),
(T.timedelta(days=3), T.day(3)),
(T.timedelta(hours=4), T.hour(4)),
(T.timedelta(minutes=5), T.minute(5)),
(T.timedelta(seconds=6), T.second(6)),
(T.timedelta(milliseconds=7), T.millisecond(7)),
(T.timedelta(microseconds=8), T.microsecond(8)),
(T.timedelta(nanoseconds=9), T.nanosecond(9)),
]
self._check_cases(cases)

def _check_cases(self, cases):
for x, y in cases:
assert x.equals(y)

def test_offset_timestamp_expr(self):
c = self.table.i
x = T.timedelta(days=1)

expr = x + c
assert isinstance(expr, ir.TimestampArray)
assert isinstance(expr.op(), ops.TimestampDelta)

# test radd
expr = c + x
assert isinstance(expr, ir.TimestampArray)
assert isinstance(expr.op(), ops.TimestampDelta)


class TestTimedelta(unittest.TestCase):

def test_compound_offset(self):
# These are not yet allowed (e.g. 1 month + 1 hour)
pass

def test_offset_months(self):
pass
47 changes: 47 additions & 0 deletions ibis/expr/tests/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

import unittest

import pandas as pd

import ibis
import ibis.common as com
import ibis.expr.api as api
import ibis.expr.operations as ops
Expand Down Expand Up @@ -67,3 +70,47 @@ def test_now(self):
result = api.now()
assert isinstance(result, ir.TimestampScalar)
assert isinstance(result.op(), ops.TimestampNow)

def test_timestamp_literals(self):
ts_str = '2015-01-01 00:00:00'
val = pd.Timestamp(ts_str)

expr = ibis.literal(val)
assert isinstance(expr, ir.TimestampScalar)

expr = ibis.timestamp(ts_str)
assert isinstance(expr, ir.TimestampScalar)

self.assertRaises(ValueError, ibis.timestamp, '2015-01-01 00:71')

def test_integer_to_timestamp(self):
# #246
pass

def test_comparison_timestamp(self):
expr = self.col > (self.col.min() + ibis.day(3))
assert isinstance(expr, ir.BooleanArray)

def test_comparisons_string(self):
val = '2015-01-01 00:00:00'
expr = self.col > val
op = expr.op()
assert isinstance(op.right, ir.TimestampScalar)

expr2 = val < self.col
op = expr2.op()
assert isinstance(op, ops.Greater)
assert isinstance(op.right, ir.TimestampScalar)

def test_comparisons_pandas_timestamp(self):
val = pd.Timestamp('2015-01-01 00:00:00')
expr = self.col > val
op = expr.op()
assert isinstance(op.right, ir.TimestampScalar)

# TODO: this is broken for now because of upstream pandas problems

# expr2 = val < self.col
# op = expr2.op()
# assert isinstance(op, ops.Greater)
# assert isinstance(op.right, ir.TimestampScalar)
Loading