157 changes: 87 additions & 70 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,20 @@ jobs:
strategy:
maxParallel: 3
matrix:
py35:
python.version: "3.5"
conda.version: "4.5"
conda.env: "ibis_3.5"
py36:
python.version: "3.6"
conda.version: "4.6"
conda.env: "ibis_3.6"
py37:
python.version: "3.7"
conda.version: "4.6"
conda.env: "ibis_3.7"
variables:
AZURECI: 1
steps:
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
displayName: Add conda to PATH
Expand All @@ -23,9 +31,17 @@ jobs:
- script: conda update --all
displayName: 'Update conda'

- script: conda create --name $(conda.env) python=$(python.version) numpy pandas pytables ruamel.yaml jinja2 pyarrow multipledispatch pymysql sqlalchemy psycopg2 graphviz click mock plumbum flake8 pytest-xdist
- script: conda install conda=$(conda.version)
displayName: 'Install an appropriate conda version'

- script: conda create --name $(conda.env) python=$(python.version) numpy pandas pytables ruamel.yaml jinja2 pyarrow multipledispatch pymysql sqlalchemy psycopg2 graphviz click mock plumbum flake8
displayName: 'Create conda environment'

- script: |
call activate $(conda.env)
pip install -U pytest pytest-xdist
displayName: 'Install latest pytest'
- script: conda info
displayName: 'Show conda info'

Expand All @@ -42,14 +58,14 @@ jobs:
flake8
displayName: 'Lint'
- script: choco install -y mariadb --version=10.3.11
displayName: 'Install mariadb (mysql) from chocolatey'
#- script: choco install -y mariadb --version=10.3.11
# displayName: 'Install mariadb (mysql) from chocolatey'

- script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "CREATE OR REPLACE USER ibis@localhost IDENTIFIED BY ''ibis''"'
displayName: 'Create ibis user and password in MySQL database'
# - script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "CREATE OR REPLACE USER ibis@localhost IDENTIFIED BY ''ibis''"'
# displayName: 'Create ibis user and password in MySQL database'

- script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "GRANT ALL PRIVILEGES ON *.* TO ibis@localhost"'
displayName: 'Setup privileges for ibis user in MySQL'
# - script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "GRANT ALL PRIVILEGES ON *.* TO ibis@localhost"'
# displayName: 'Setup privileges for ibis user in MySQL'

- script: choco install -y postgresql10 --params '/Password:postgres'
displayName: 'Install postgres from chocolatey'
Expand All @@ -64,10 +80,10 @@ jobs:
python ci/datamgr.py download
displayName: 'Download data'
- script: |
call activate $(conda.env)
python ci/datamgr.py mysql
displayName: 'Load MySQL data'
# - script: |
# call activate $(conda.env)
# python ci/datamgr.py mysql
# displayName: 'Load MySQL data'

- script: |
call activate $(conda.env)
Expand Down Expand Up @@ -98,62 +114,63 @@ jobs:
mergeTestResults: False
condition: succeededOrFailed() # pass or fail, but not cancelled

#- job: WindowsCondaBuild
#pool:
#vmImage: 'VS2017-Win2016'
#strategy:
#maxParallel: 3
#matrix:
#py36:
#python.version: "3.6"
#py37:
#python.version: "3.7"
#steps:
#- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
#displayName: Add conda to PATH

#- script: |
#conda config --set always_yes True
#conda update --all
#conda config --add channels conda-forge
#conda config --set show_channel_urls True
#conda create --quiet --name "ibis_build_$(python.version)" python=$(python.version)
#displayName: Create Anaconda environment

#- script: |
#call activate "ibis_build_$(python.version)"
#conda install python=$(python.version) conda-build click jinja2 ruamel.yaml plumbum
#displayName: Install dependencies

#- script: |
#call activate "ibis_build_$(python.version)"
#python setup.py develop
#displayName: 'Install ibis'

#- script: |
#call activate "ibis_build_$(python.version)"
#python ci/feedstock.py clone
#displayName: 'Clone conda-forge recipe'

#- script: |
#call activate "ibis_build_$(python.version)"
#python ci/feedstock.py update
#displayName: 'Update conda-forge recipe'

#- script: |
#call activate "ibis_build_$(python.version)"
#python ci/feedstock.py build --python=$(python.version)
#displayName: 'Build conda package from conda-forge recipe'

#- script: |
#call activate "ibis_build_$(python.version)"
#python ci/feedstock.py deploy C:/Miniconda/envs/ibis_build_$(python.version)/conda-bld conda win-64
#displayName: 'Copy conda package to artifact directory'

## publish sdist and wheel and conda package
#- task: PublishBuildArtifacts@1
#displayName: 'Publish conda package to Azure'
#inputs:
#pathToPublish: conda
#artifactName: conda
#condition: and(succeeded(), eq(variables['System.PullRequest.IsFork'], 'False'))
- job: WindowsCondaBuild
pool:
vmImage: 'VS2017-Win2016'
strategy:
maxParallel: 3
matrix:
py36:
python.version: "3.6"
conda.env: "ibis_3.6"
py37:
python.version: "3.7"
conda.env: "ibis_3.7"
steps:
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
displayName: Add conda to PATH

- script: |
conda config --set always_yes True --set show_channel_urls True
conda config --add channels conda-forge
displayName: 'Set conda configuration'
- script: conda update --all
displayName: 'Update conda'

- script: conda create --name $(conda.env) python=$(python.version) conda-build click jinja2 ruamel.yaml plumbum
displayName: 'Create conda environment'

- script: |
call activate $(conda.env)
python setup.py develop
displayName: 'Install ibis'
- script: |
call activate $(conda.env)
python ci/feedstock.py clone
displayName: 'Clone conda-forge recipe'
- script: |
call activate $(conda.env)
python ci/feedstock.py update
displayName: 'Update conda-forge recipe'
- script: |
call activate $(conda.env)
python ci/feedstock.py build --python=$(python.version)
displayName: 'Build conda package from conda-forge recipe'
- script: |
call activate $(conda.env)
python ci/feedstock.py deploy C:/Miniconda/envs/$(conda.env)/conda-bld conda win-64
displayName: 'Copy conda package to artifact directory'
# publish sdist and wheel and conda package
- task: PublishBuildArtifacts@1
displayName: 'Publish conda package to Azure'
inputs:
pathToPublish: conda
artifactName: conda
condition: and(succeeded(), eq(variables['System.PullRequest.IsFork'], 'False'))

12 changes: 6 additions & 6 deletions ci/.env
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ IBIS_TEST_POSTGRES_DATABASE=ibis_testing
IBIS_TEST_CLICKHOUSE_HOST=clickhouse
IBIS_TEST_CLICKHOUSE_PORT=9000
IBIS_TEST_CLICKHOUSE_DATABASE=ibis_testing
IBIS_TEST_MAPD_HOST=mapd
IBIS_TEST_MAPD_PORT=9091
IBIS_TEST_MAPD_DATABASE=ibis_testing
IBIS_TEST_MAPD_USER=mapd
IBIS_TEST_MAPD_PASSWORD=HyperInteractive
IBIS_TEST_OMNISCI_HOST=omnisci
IBIS_TEST_OMNISCI_PORT=6274
IBIS_TEST_OMNISCI_DATABASE=ibis_testing
IBIS_TEST_OMNISCI_USER=mapd
IBIS_TEST_OMNISCI_PASSWORD=HyperInteractive
GOOGLE_BIGQUERY_PROJECT_ID=ibis-gbq
GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcloud-service-key.json
GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcloud-service-key.json
16 changes: 10 additions & 6 deletions ci/Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@ RUN apt-get -qq update --yes \
&& rm -rf /var/lib/apt/lists/*

ARG PYTHON
ADD ci/requirements-dev.yml /
ADD ci/requirements-$PYTHON-dev.yml /

RUN conda config --add channels conda-forge \
&& conda update --all --yes --quiet \
&& conda install --yes --quiet --file /requirements-dev.yml python=$PYTHON conda-build \
&& conda clean --all --yes \
&& pip install pydata-google-auth
&& conda update --all --yes --quiet \
&& conda env create --name ibis-env --file /requirements-$PYTHON-dev.yml \
&& conda install --yes conda-build \
&& conda clean --all --yes

RUN echo 'source activate ibis-env && exec "$@"' > activate.sh

COPY . /ibis
WORKDIR /ibis

RUN python setup.py develop
RUN bash /activate.sh pip install -e . --no-deps --ignore-installed --no-cache-dir

ENTRYPOINT ["bash", "/activate.sh"]
14 changes: 9 additions & 5 deletions ci/Dockerfile.docs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@ RUN apt-get -qq update --yes \
&& rm -rf /var/lib/apt/lists/*

ADD ci/requirements-docs.yml /

RUN conda config --add channels conda-forge \
&& conda update --all --yes --quiet \
&& conda install --yes --quiet --file /requirements-docs.yml python=$PYTHON conda-build \
&& conda clean --all --yes \
&& pip install pydata-google-auth
&& conda update --all --yes \
&& conda install --name ibis-env --yes --file /requirements-docs.yml \
&& conda clean --all --yes

RUN echo 'source activate ibis-env && exec "$@"' > activate.sh

COPY . /ibis
WORKDIR /ibis

RUN python setup.py develop
RUN bash /activate.sh pip install -e . --no-deps --ignore-installed --no-cache-dir

ENTRYPOINT ["bash", "/activate.sh"]
8 changes: 3 additions & 5 deletions ci/asvconfig.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python

import sys
import asv
import json
import socket
import sys

import asv

if __name__ == '__main__':
if len(sys.argv) > 1:
Expand All @@ -14,7 +14,5 @@

machine_info = asv.machine.Machine.get_defaults()
machine_info['machine'] = hostname
machine_info['ram'] = '{:d}GB'.format(
int(machine_info['ram']) // 1_000_000
)
machine_info['ram'] = '{:d}GB'.format(int(machine_info['ram']) // 1000000)
print(json.dumps({hostname: machine_info, 'version': 1}, indent=2))
2 changes: 1 addition & 1 deletion ci/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ docker-compose -f "$compose_file" build --pull ibis

# start all docker compose services
docker-compose -f "$compose_file" up -d --no-build \
mapd postgres mysql clickhouse impala kudu-master kudu-tserver
omnisci postgres mysql clickhouse impala kudu-master kudu-tserver

# wait for services to start
docker-compose -f "$compose_file" run --rm waiter
Expand Down
38 changes: 23 additions & 15 deletions ci/datamgr.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
#!/usr/bin/env python

import os
import json
import logging
import zipfile
import os
import tempfile
import warnings
import zipfile
from pathlib import Path

import click
import pandas as pd
import sqlalchemy as sa
from toolz import dissoc
from plumbum import local
from toolz import dissoc

import ibis


SCRIPT_DIR = Path(__file__).parent.absolute()
DATA_DIR_NAME = 'ibis-testing-data'
DATA_DIR = Path(
Expand Down Expand Up @@ -149,8 +148,8 @@ def download(repo_url, directory):

# extract all files
extract_to = directory.with_name(directory.name + '_extracted')
with zipfile.ZipFile(path, 'r') as f:
f.extractall(extract_to)
with zipfile.ZipFile(str(path), 'r') as f:
f.extractall(str(extract_to))

# remove existent folder
if directory.exists():
Expand Down Expand Up @@ -259,55 +258,65 @@ def sqlite(database, schema, tables, data_directory, **params):

@cli.command()
@click.option('-h', '--host', default='localhost')
@click.option('-P', '--port', default=9091, type=int)
@click.option('-P', '--port', default=6274, type=int)
@click.option('-u', '--user', default='mapd')
@click.option('-p', '--password', default='HyperInteractive')
@click.option('-D', '--database', default='ibis_testing')
@click.option('--protocol', default='binary')
@click.option(
'-S',
'--schema',
type=click.File('rt'),
default=str(SCRIPT_DIR / 'schema' / 'mapd.sql'),
default=str(SCRIPT_DIR / 'schema' / 'omnisci.sql'),
)
@click.option('-t', '--tables', multiple=True, default=TEST_TABLES + ['geo'])
@click.option('-d', '--data-directory', default=DATA_DIR)
def mapd(schema, tables, data_directory, **params):
def omnisci(schema, tables, data_directory, **params):
import pymapd

data_directory = Path(data_directory)
reserved_words = ['table', 'year', 'month']

# connection
logger.info('Initializing MapD...')
logger.info('Initializing OmniSci...')
if params['database'] != 'mapd':
conn = pymapd.connect(
host=params['host'],
user=params['user'],
password=params['password'],
port=params['port'],
dbname='mapd',
protocol=params['protocol'],
)
stmt = 'CREATE DATABASE {}'.format(params['database'])
database = params["database"]
stmt = "DROP DATABASE {}".format(database)
try:
conn.execute(stmt)
except Exception:
logger.warning('OmniSci DDL statement %r failed', stmt)

stmt = 'CREATE DATABASE {}'.format(database)
try:
conn.execute(stmt)
except Exception:
logger.exception('MapD DDL statement %r failed', stmt)
logger.exception('OmniSci DDL statement %r failed', stmt)
conn.close()

conn = pymapd.connect(
host=params['host'],
user=params['user'],
password=params['password'],
port=params['port'],
dbname=params['database'],
dbname=database,
protocol=params['protocol'],
)

# create tables
for stmt in filter(None, map(str.strip, schema.read().split(';'))):
try:
conn.execute(stmt)
except Exception:
logger.exception('MapD DDL statement \n%r\n failed', stmt)
logger.exception('OmniSci DDL statement \n%r\n failed', stmt)

# import data
for table, df in read_tables(tables, data_directory):
Expand Down Expand Up @@ -343,7 +352,6 @@ def mapd(schema, tables, data_directory, **params):
conn.load_table_columnar(table, df)

conn.close()
logger.info('Done!')


@cli.command()
Expand Down
33 changes: 16 additions & 17 deletions ci/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ services:

postgres:
image: postgres
hostname: postgres
ports:
- 5432:5432
environment:
POSTGRES_PASSWORD: postgres

mysql:
image: mariadb:10.2
hostname: mysql
ports:
- 3306:3306
environment:
Expand All @@ -21,10 +23,6 @@ services:
impala:
image: ibisproject/impala:latest
hostname: impala
networks:
default:
aliases:
- quickstart.cloudera
environment:
PGPASSWORD: postgres
ports:
Expand All @@ -46,12 +44,14 @@ services:

clickhouse:
image: yandex/clickhouse-server:18.12
hostname: clickhouse
ports:
- 8123:8123
- 9000:9000

kudu-master:
image: ibisproject/kudu:latest
hostname: kudu-master
networks:
default:
aliases:
Expand All @@ -66,6 +66,7 @@ services:

kudu-tserver:
image: ibisproject/kudu:latest
hostname: kudu-tserver
cap_add:
- SYS_TIME
ports:
Expand All @@ -74,25 +75,23 @@ services:
environment:
KUDU_MASTER: "false"

mapd:
image: mapd/mapd-ce-cpu:v4.4.2
omnisci:
image: omnisci/core-os-cpu:v4.6.1
hostname: omnisci
networks:
default:
aliases:
- mapd
ports:
- 9090:9090
- 9091:9091
- 9092:9092
- 9093:9093
environment:
- MAPD_HOST=mapd
- MAPD_PORT=9091
- MAPD_DATABASE=ibis_testing
- MAPD_USER=mapd
- 6274:6274
- 6278:6278
volumes:
- ./mapd.conf:/mapd-storage/mapd.conf
- ./omnisci.conf:/omnisci-storage/omnisci.conf

waiter:
image: jwilder/dockerize
command: |
dockerize -wait tcp://mapd:9091
dockerize -wait tcp://omnisci:6274
-wait tcp://mysql:3306
-wait tcp://postgres:5432
-wait tcp://impala:21050
Expand Down
2 changes: 1 addition & 1 deletion ci/docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ docker-compose build ibis
docker-compose build ibis-docs

# TODO(kszucs): move the following commands in a single script
docker-compose run --rm ibis-docs ping -c 1 quickstart.cloudera
docker-compose run --rm ibis-docs ping -c 1 impala
docker-compose run --rm ibis-docs rm -rf /tmp/docs.ibis-project.org
docker-compose run --rm ibis-docs git clone \
--branch gh-pages \
Expand Down
9 changes: 4 additions & 5 deletions ci/feedstock.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,15 @@
import shutil
import sys
import tempfile

from pathlib import Path

import click
import ruamel.yaml

from jinja2 import Environment, FileSystemLoader
from plumbum.cmd import git, conda
from plumbum.cmd import conda, git

import ibis


IBIS_DIR = Path(__file__).parent.parent.absolute()


Expand Down Expand Up @@ -70,7 +67,9 @@ def update(meta, source_path):
recipe = ruamel.yaml.round_trip_load(content)

# update the necessary fields, skip leading 'v' in the version
recipe['package']['version'] = ibis.__version__[1:]
version = ibis.__version__
assert not version.startswith('v'), 'version == {}'.format(version)
recipe['package']['version'] = version
recipe['source'] = {'path': source_path}

# XXX: because render will remove the {{ PYTHON }} variable
Expand Down
8 changes: 2 additions & 6 deletions ci/impalamgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,18 @@
import concurrent.futures
import itertools
import os

from io import BytesIO
from pathlib import Path

import click
import toolz

from plumbum import local, CommandNotFound
from plumbum.cmd import make, cmake
from plumbum import CommandNotFound, local
from plumbum.cmd import cmake, make

import ibis

from ibis.common import IbisError
from ibis.impala.tests.conftest import IbisTestEnv


SCRIPT_DIR = Path(__file__).parent.absolute()
DATA_DIR = Path(
os.environ.get(
Expand Down
2 changes: 1 addition & 1 deletion ci/load-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ declare -A argcommands=([sqlite]=sqlite
[parquet]="parquet -i"
[postgres]=postgres
[clickhouse]=clickhouse
[mapd]=mapd
[omnisci]=omnisci
[mysql]=mysql
[impala]=impala)

Expand Down
1 change: 1 addition & 0 deletions ci/mapd.conf → ci/omnisci.conf
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
enable-watchdog = false
enable-window-functions = true
cpu-buffer-mem-bytes = 1000000000
51 changes: 51 additions & 0 deletions ci/requirements-3.5-dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
channels:
- conda-forge/label/cf201901
- conda-forge
dependencies:
#- black # Python 3.5 cannot be used to run black
# conda 4.5 because of python3.5
- conda=4.5.11
- click
- clickhouse-cityhash
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- cmake
- flake8
- google-cloud-bigquery>=1.0.0
- graphviz
- impyla<0.14.2
- isort
- jinja2
- lz4
- multipledispatch>=0.6.0
- mypy
- numpy>=1.11
- pandas>=0.21
- pip
- plumbum
- pre_commit
- psycopg2
- pyarrow<0.12
- pymapd>=0.8.3,<0.11.0
- pymysql
- pytables>=3.0.0
- python=3.5
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- ruamel.yaml
- sqlalchemy>=1.1
- thrift>=0.9.3
# required for impyla in case of py3
- thriftpy
- toolz
- xorg-libxpm
- xorg-libxrender
- pip:
- seed-isort-config
- pydata-google-auth
- pytest # conda-forge only has <=3.8.1 for python 3.5
- pytest-cov
- pytest-xdist
47 changes: 47 additions & 0 deletions ci/requirements-3.6-dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
channels:
- conda-forge
dependencies:
- black
- click
- clickhouse-cityhash
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- cmake
- flake8
- google-cloud-bigquery>=1.0.0
- graphviz
- impyla>=0.15.0
- isort
- jinja2
- lz4
- multipledispatch>=0.6.0
- mypy
- numpy>=1.11
- pandas>=0.21
- pip
- plumbum
- pre_commit
- psycopg2
- pyarrow>=0.12
- pydata-google-auth
- pymapd>=0.12.0
- pymysql
- pytables>=3.0.0
- pytest
- pytest-cov
- pytest-xdist
- python=3.6
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- ruamel.yaml
- sqlalchemy>=1.1
- thrift>=0.9.3
- thriftpy2 # required for impyla in case of py3
- toolz
- xorg-libxpm
- xorg-libxrender
- pip:
- seed-isort-config
47 changes: 47 additions & 0 deletions ci/requirements-3.7-dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
channels:
- conda-forge
dependencies:
- black
- click
- clickhouse-cityhash
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- cmake
- flake8
- google-cloud-bigquery>=1.0.0
- graphviz
- impyla>=0.15.0
- isort
- jinja2
- lz4
- multipledispatch>=0.6.0
- mypy
- numpy>=1.11
- pandas>=0.21
- pip
- plumbum
- pre_commit
- psycopg2
- pyarrow>=0.12
- pydata-google-auth
- pymapd>=0.12.0
- pymysql
- pytables>=3.0.0
- pytest
- pytest-cov
- pytest-xdist
- python=3.7
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- ruamel.yaml
- sqlalchemy>=1.1
- thrift>=0.9.3
- thriftpy2 # required for impyla in case of py3
- toolz
- xorg-libxpm
- xorg-libxrender
- pip:
- seed-isort-config
39 changes: 0 additions & 39 deletions ci/requirements-dev.yml

This file was deleted.

2 changes: 1 addition & 1 deletion ci/requirements-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ matplotlib
nbconvert
nbsphinx
nomkl
numpydoc
# must pin again otherwise strange things happen
pyarrow>=0.12
sphinx=2.0.1
sphinx-releases
sphinx_rtd_theme
File renamed without changes.
58 changes: 58 additions & 0 deletions ci/schema/postgresql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,61 @@ INSERT INTO array_types VALUES
(ARRAY[NULL, 1, NULL], ARRAY[NULL, 'a', NULL], ARRAY[]::DOUBLE PRECISION[], 'b', 4.0),
(ARRAY[2, NULL, 3], ARRAY['b', NULL, 'c'], NULL, 'b', 5.0),
(ARRAY[4, NULL, NULL, 5], ARRAY['d', NULL, NULL, 'e'], ARRAY[4.0, NULL, NULL, 5.0], 'c', 6.0);

DROP TABLE IF EXISTS films CASCADE;

CREATE TABLE IF NOT EXISTS films (
code CHAR(5) PRIMARY KEY,
title VARCHAR(40) NOT NULL,
did INTEGER NOT NULL,
date_prod DATE,
kind VARCHAR(10),
len INTERVAL HOUR TO MINUTE
);

INSERT INTO films VALUES
('A', 'Avengers', 1, DATE '2018-01-01', 'Action', INTERVAL '2 hours 35 minutes'),
('B', 'Ghostbusters', 2, DATE '2018-01-02', 'Ghost', INTERVAL '1 hour 30 minutes');

DROP TABLE IF EXISTS intervals CASCADE;

CREATE TABLE IF NOT EXISTS intervals (
-- enable year and month when relativedelta support lands
-- a INTERVAL YEAR,
-- b INTERVAL MONTH,
c INTERVAL DAY,
d INTERVAL HOUR,
e INTERVAL MINUTE,
f INTERVAL SECOND,
-- g INTERVAL YEAR TO MONTH,
h INTERVAL DAY TO HOUR,
i INTERVAL DAY TO MINUTE,
j INTERVAL DAY TO SECOND,
k INTERVAL HOUR TO MINUTE,
l INTERVAL HOUR TO SECOND,
m INTERVAL MINUTE TO SECOND
);

INSERT INTO intervals VALUES
(
-- '1 year',
-- '1 month',
'1 day',
'1 hour',
'-1 minute',
'1 second 30 milliseconds -10 microseconds',
-- '-1 year 3 months',
'1 day 4 hours',
'1 day 17 minutes',
'-1 day -2 hours 17 minutes 30 seconds',
'1 hour 2 minutes',
'1 hour 2 minutes -7 seconds 37 microseconds',
'1 minute 3 seconds 2 milliseconds 9 microseconds'
);


CREATE TABLE IF NOT EXISTS not_supported_intervals (
a INTERVAL YEAR,
b INTERVAL YEAR,
g INTERVAL YEAR TO MONTH
);
2 changes: 1 addition & 1 deletion ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ compose_file=$(dirname "$0")/docker-compose.yml

cmd='$(find /ibis -name "*.py[co]" -delete > /dev/null 2>&1 || true) && pytest "$@"'
docker-compose -f "$compose_file" build --pull ibis
docker-compose -f "$compose_file" run --rm ibis bash -c "$cmd" -- "$@"
docker-compose -f "$compose_file" run --rm -e PYTHONHASHSEED="${PYTHONHASHSEED:-random}" ibis bash -c "$cmd" -- "$@"
13 changes: 1 addition & 12 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,10 @@
import fnmatch
import os
import sys
import pytest

from pathlib import Path

import pytest

collect_ignore = ['setup.py']

if sys.version_info.major == 2:
this_directory = os.path.dirname(__file__)
bigquery_udf = os.path.join(this_directory, 'ibis', 'bigquery', 'udf')
for root, _, filenames in os.walk(bigquery_udf):
for filename in filenames:
if fnmatch.fnmatch(filename, '*.py'):
collect_ignore.append(os.path.join(root, filename))


@pytest.fixture(scope='session')
def data_directory():
Expand Down
83 changes: 63 additions & 20 deletions dev/merge-pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,9 @@
import textwrap

import click

import plumbum

from plumbum import cmd

import requests
from plumbum import cmd

IBIS_HOME = pathlib.Path(__file__).parent.parent
GITHUB_API_BASE = "https://api.github.com/repos/ibis-project/ibis"
Expand All @@ -56,7 +53,12 @@ def merge_pr(
password: str,
) -> None:
"""Merge a pull request."""
git_log = git["log", f"{remote}/{target_ref}..{base_ref}"]
git_log = git[
"log",
"{remote}/{target_ref}..{base_ref}".format(
remote=remote, target_ref=target_ref, base_ref=base_ref
),
]

commit_authors = git_log["--pretty=format:%an <%ae>"]().splitlines()
author_count = collections.Counter(commit_authors)
Expand All @@ -68,18 +70,22 @@ def merge_pr(
merge_message_pieces.append("\n".join(textwrap.wrap(body)))
merge_message_pieces.extend(map("Author: {}".format, distinct_authors))

# The string f"Closes #{pull_request_number:d}" is required for GitHub to
# The string "Closes #{pull_request_number:d}" is required for GitHub to
# correctly close the PR
merge_message_pieces.append(
f"\nCloses #{pr_num:d} from {pr_repo_desc} and squashes the following "
"commits:\n"
(
"\nCloses #{pr_num:d} from {pr_repo_desc} and squashes the "
"following commits:\n"
).format(pr_num=pr_num, pr_repo_desc=pr_repo_desc)
)
merge_message_pieces += commits

commit_message = "\n".join(merge_message_pieces)
# PUT /repos/:owner/:repo/pulls/:number/merge
resp = requests.put(
f"{GITHUB_API_BASE}/pulls/{pr_num:d}/merge",
"{GITHUB_API_BASE}/pulls/{pr_num:d}/merge".format(
GITHUB_API_BASE=GITHUB_API_BASE, pr_num=pr_num
),
json=dict(
commit_title=commit_title,
commit_message=commit_message,
Expand All @@ -92,7 +98,11 @@ def merge_pr(
resp_json = resp.json()
merged = resp_json["merged"]
assert merged is True, merged
click.echo(f"Pull request #{pr_num:d} successfully merged.")
click.echo(
"Pull request #{pr_num:d} successfully merged.".format(
pr_num=pr_num
)
)


@click.command()
Expand Down Expand Up @@ -136,7 +146,13 @@ def main(
except plumbum.commands.processes.ProcessExecutionError as e:
raise click.ClickException(e.stderr)
try:
git["fetch", remote, f"pull/{pull_request_number:d}/head"]()
git[
"fetch",
remote,
"pull/{pull_request_number:d}/head".format(
pull_request_number=pull_request_number
),
]()
except plumbum.commands.processes.ProcessExecutionError as e:
raise click.ClickException(e.stderr)

Expand All @@ -145,14 +161,21 @@ def main(
if not original_head:
original_head = git["rev-parse", "HEAD"]().strip()

resp = requests.get(f"{GITHUB_API_BASE}/pulls/{pull_request_number:d}")
resp = requests.get(
"{GITHUB_API_BASE}/pulls/{pull_request_number:d}".format(
GITHUB_API_BASE=GITHUB_API_BASE,
pull_request_number=pull_request_number,
)
)
resp.raise_for_status()
pr_json = resp.json()

message = pr_json.get("message", None)
if message is not None and message.lower() == "not found":
raise click.ClickException(
f"PR {pull_request_number:d} does not exist."
"PR {pull_request_number:d} does not exist.".format(
pull_request_number=pull_request_number
)
)

if not pr_json["mergeable"]:
Expand All @@ -166,18 +189,38 @@ def main(
target_ref = pr_json["base"]["ref"]
user_login = pr_json["user"]["login"]
base_ref = pr_json["head"]["ref"]
pr_repo_desc = f"{user_login}/{base_ref}"
pr_repo_desc = "{user_login}/{base_ref}".format(
user_login=user_login, base_ref=base_ref
)

click.echo(f"=== Pull Request #{pull_request_number:d} ===")
click.echo(
f"title\t{commit_title}\n"
f"source\t{pr_repo_desc}\n"
f"target\t{remote}/{target_ref}\n"
f"url\t{url}"
"=== Pull Request #{pull_request_number:d} ===".format(
pull_request_number=pull_request_number
)
)
click.echo(
(
"title\t{commit_title}\n"
"source\t{pr_repo_desc}\n"
"target\t{remote}/{target_ref}\n"
"url\t{url}"
).format(
commit_title=commit_title,
pr_repo_desc=pr_repo_desc,
remote=remote,
target_ref=target_ref,
url=url,
)
)

base_ref_commit = (
git["ls-remote", remote, f"refs/pull/{pull_request_number:d}/head"]()
git[
"ls-remote",
remote,
"refs/pull/{pull_request_number:d}/head".format(
pull_request_number=pull_request_number
),
]()
.strip()
.split()[0]
)
Expand Down
19 changes: 9 additions & 10 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
# All configuration values have a default; values that are commented out
# serve to show the default.

import glob
import datetime
import glob

import sphinx_rtd_theme # noqa: E402

from ibis import __version__ as version # noqa: E402

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
Expand All @@ -32,23 +36,20 @@
'sphinx.ext.autosummary',
'sphinx.ext.extlinks',
'sphinx.ext.mathjax',
'numpydoc',
'sphinx.ext.napoleon',
'nbsphinx',
'IPython.sphinxext.ipython_directive',
'IPython.sphinxext.ipython_console_highlighting',
'releases',
]

napoleon_google_docstring = False
napoleon_numpy_docstring = True
releases_github_path = "ibis-project/ibis"
releases_unstable_prehistory = True
releases_document_name = ["release"]

ipython_warning_is_error = True
autosummary_generate = glob.glob("*.rst")

# autosummary_generate = True

numpydoc_show_class_members = False

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

Expand All @@ -72,7 +73,6 @@
# The short X.Y version.
# version = '0.2'

from ibis import __version__ as version # noqa: E402

# The full version, including alpha/beta/rc tags.
release = version
Expand Down Expand Up @@ -121,7 +121,6 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.

import sphinx_rtd_theme # noqa: E402

html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
Expand Down
4 changes: 2 additions & 2 deletions docs/source/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,15 @@ To see all internal Ibis activity (like queries being executed) set

.. code-block:: python
ibis.options.verbose = True
ibis.options.verbose = True
By default this information is sent to ``sys.stdout``, but you can set some
other logging function:

.. code-block:: python
def cowsay(x):
print("Cow says: {0}".format(x))
print("Cow says: {}".format(x))
ibis.options.verbose_log = cowsay
Expand Down
45 changes: 27 additions & 18 deletions docs/source/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,41 @@
Contributing to Ibis
********************

.. note::
.. _contrib.running_tests:

Make sure you've read the :ref:`installation section <install>` of the docs
before continuing.
Clone the Repository
--------------------
To contribute to ibis you need to clone the repository from GitHub:

.. _contrib.running_tests:
.. code-block:: sh
git clone https://github.com/ibis-project/ibis
Set Up a Development Environment
--------------------------------
#. `Install miniconda <https://docs.conda.io/en/latest/miniconda.html>`_
#. Create a conda environment suitable for ibis development:

.. code-block:: sh
conda env create -n ibis-dev --file ci/requirements-3.7-dev.yml
Running the Test Suite
----------------------
#. Activate the environment

.. code-block:: sh
conda activate ibis-dev
Run the Test Suite
------------------

Contributor `Krisztián Szűcs <https://github.com/kszucs>`_ has spent many hours
crafting an easy-to-use `docker-compose <https://docs.docker.com/compose/>`_
setup that enables ibis developers to get up and running quickly.

Here are the steps to run clone the repo and run the test suite:
Here are the steps to start database services and run the test suite:

.. code-block:: sh
# clone ibis
git clone https://github.com/ibis-project/ibis
# go to where the docker-compose file is
pushd ibis/ci
# start services, build ibis, and load data into databases
./build.sh
# optionally run all tests
./test.sh -m 'not udf' -n auto -o cache_dir=/tmp/.pytest_cache
make --directory ibis init
make --directory ibis testparallel
43 changes: 21 additions & 22 deletions docs/source/design.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Examples of expressions include :class:`~ibis.expr.types.Int64Column`,

Here's an example of each type of expression:

.. code-block:: ipython
.. ipython:: python
import ibis
t = ibis.table([('a', 'int64')])
Expand All @@ -91,42 +91,41 @@ Most nodes are defined in the :mod:`~ibis.expr.operations` module.
Examples of nodes include :class:`~ibis.expr.operations.Add` and
:class:`~ibis.expr.operations.Sum`.

Nodes have two important members (and often these are the only members defined):
Nodes (transitively) inherit from a class that allows node authors to define
their node's input arguments directly in the class body.

#. ``input_type``: a list of rules
#. ``output_type``: a rule or method
Additionally the ``output_type`` member of the class is a rule or method that
defines the shape (scalar or column) and element type of the operation.

The ``input_type`` member is a list of rules that defines the types of
the inputs to the operation. This is sometimes called the signature.
Each input argument's rule should be passed to the
``ibis.expr.signature.Argument`` class (often aliased to ``Arg`` for
convenience in the ibis codebase).

The ``output_type`` member is a rule or a method that defines the output type
of the operation. This is sometimes called the return type.
An example of usage is a node that representats a logarithm operation:

An example of ``input_type``/``output_type`` usage is the
:class:`~ibis.expr.operations.Log` class:
.. ipython:: python
.. code-block:: ipython
import ibis.expr.rules as rlz
from ibis.expr.operations import ValueOp
from ibis.expr.signature import Argument as Arg
class Log(Node):
input_type = [
rules.double(),
rules.double(name='base', optional=True)
]
output_type = rules.shape_like_arg(0, 'double')
class Log(ValueOp):
arg = Arg(rlz.double) # A double scalar or column
base = Arg(rlz.double, default=None) # Optional argument
output_type = rlz.typeof('arg')
This class describes an operation called ``Log`` that takes one required
argument: a double scalar or column, and one optional argument: a double scalar
or column named ``base`` that defaults to nothing if not provided. The base
or column named ``base`` that defaults to nothing if not provided. The ``base``
argument is ``None`` by default so that the expression will behave as the
underlying database does.

These objects are instantiated when you use ibis APIs:
Similar objects are instantiated when you use ibis APIs:

.. code-block:: ipython
.. ipython:: python
import ibis
t = ibis.table([('a', 'double')])
t = ibis.table([('a', 'double')], name='t')
log_1p = (1 + t.a).log() # an Add and a Log are instantiated here
.. _expr_vs_ops:
Expand Down
145 changes: 0 additions & 145 deletions docs/source/developer.rst

This file was deleted.

100 changes: 56 additions & 44 deletions docs/source/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,16 @@ Install dependencies for Ibis's Impala dialect:
pip install ibis-framework[impala]

To create an Ibis client, you must first connect your services and assemble the
client using :func:`~ibis.impala.connect`:
client using :func:`ibis.impala.connect`:

.. code-block:: python
.. ipython:: python
import ibis
hdfs = ibis.hdfs_connect(host=webhdfs_host, port=webhdfs_port)
con = ibis.impala.connect(host=impala_host, port=impala_port,
hdfs_client=hdfs)
hdfs = ibis.hdfs_connect(host='impala', port=50070)
con = ibis.impala.connect(
host='impala', database='ibis_testing', hdfs_client=hdfs
)
Both method calls can take ``auth_mechanism='GSSAPI'`` or
``auth_mechanism='LDAP'`` to connect to Kerberos clusters. Depending on your
Expand All @@ -77,10 +78,11 @@ Install dependencies for Ibis's SQLite dialect:
pip install ibis-framework[sqlite]

Create a client by passing a path to a SQLite database to
:func:`~ibis.sqlite.connect`:
:func:`ibis.sqlite.connect`:

.. code-block:: python
>>> import ibis
>>> ibis.sqlite.connect('path/to/my/sqlite.db')
See http://blog.ibis-project.org/sqlite-crunchbase-quickstart/ for a quickstart
Expand All @@ -97,17 +99,21 @@ Install dependencies for Ibis's PostgreSQL dialect:

pip install ibis-framework[postgres]

Create a client by passing a connection string or individual parameters to
:func:`~ibis.postgres.connect`:
Create a client by passing a connection string to the ``url`` parameter or
individual parameters to :func:`ibis.postgres.connect`:

.. code-block:: python
.. ipython:: python
>>> con = ibis.postgres.connect(
... 'postgresql://user:pass@host:port/my_database'
... )
>>> con = ibis.postgres.connect(
... user='bob', port=23569, database='ibis_testing'
... )
con = ibis.postgres.connect(
url='postgresql://postgres:postgres@postgres:5432/ibis_testing'
)
con = ibis.postgres.connect(
user='postgres',
password='postgres',
host='postgres',
port=5432,
database='ibis_testing',
)
.. _install.clickhouse:

Expand All @@ -121,12 +127,12 @@ Install dependencies for Ibis's Clickhouse dialect:
pip install ibis-framework[clickhouse]

Create a client by passing in database connection parameters such as ``host``,
``port``, ``database``, and ``user`` to :func:`~ibis.clickhouse.connect`:
``port``, ``database``, and ``user`` to :func:`ibis.clickhouse.connect`:


.. code-block:: python
.. ipython:: python
>>> con = ibis.clickhouse.connect(host='localhost', port=9000)
con = ibis.clickhouse.connect(host='clickhouse', port=9000)
.. _install.bigquery:

Expand Down Expand Up @@ -159,8 +165,8 @@ project.
will still be billed for any and all queries**.

If you want to query data that lives in a different project than the billing
project you can use the :meth:`~ibis.bigquery.client.BigQueryClient.database`
method of :class:`~ibis.bigquery.client.BigQueryClient` objects:
project you can use the :meth:`ibis.bigquery.client.BigQueryClient.database`
method of :class:`ibis.bigquery.client.BigQueryClient` objects:

.. code-block:: python
Expand All @@ -169,24 +175,27 @@ method of :class:`~ibis.bigquery.client.BigQueryClient` objects:
>>> t.sweet_column.sum().execute() # runs against the billing project
`Pandas <https://pandas.pydata.org/>`_ Quickstart
------------------------------------------------------
-------------------------------------------------

Ibis's Pandas backend is available on Ibis's core:
Ibis's Pandas backend is available in core Ibis:

Create a client by supplying a dictionary of DataFrames using
:func:`~ibis.pandas.connect`. The keys become the table names:
:func:`ibis.pandas.connect`. The keys become the table names:

.. code-block:: python
.. ipython:: python
>>> con = ibis.pandas.connect({
... 'A': pandas.util.testing.makeDataFrame(),
... 'B': pandas.util.testing.makeDataFrame()
... })
import pandas as pd
con = ibis.pandas.connect(
{
'A': pd.util.testing.makeDataFrame(),
'B': pd.util.testing.makeDataFrame(),
}
)
.. _install.mapd:

`MapD <https://www.omnisci.com/>`_ Quickstart
------------------------------------------------------
---------------------------------------------

Install dependencies for Ibis's MapD dialect:

Expand All @@ -196,19 +205,21 @@ Install dependencies for Ibis's MapD dialect:

Create a client by passing in database connection parameters such as ``host``,
``port``, ``database``, ``user`` and ``password`` to
:func:`~ibis.mapd.connect`:
:func:`ibis.mapd.connect`:

.. code-block:: python
.. ipython:: python
>>> con = ibis.mapd.connect(
... host='localhost', database='mapd', port=9091,
... user='mapd', password='HyperInteractive'
... )
con = ibis.mapd.connect(
host='omnisci',
database='ibis_testing',
user='mapd',
password='HyperInteractive',
)
.. _install.mysql:

`MySQL <https://www.mysql.com/>`_ Quickstart
------------------------------------------------------
--------------------------------------------

Install dependencies for Ibis's MySQL dialect:

Expand All @@ -217,16 +228,17 @@ Install dependencies for Ibis's MySQL dialect:
pip install ibis-framework[mysql]

Create a client by passing a connection string or individual parameters to
:func:`~ibis.mysql.connect`:
:func:`ibis.mysql.connect`:

.. code-block:: python
.. ipython:: python
>>> con = ibis.mysql.connect(
... 'mysql://user:pass@host:port/my_database'
... )
>>> con = ibis.mysql.connect(
... user='bob', port=23569, database='ibis_testing'
... )
con = ibis.mysql.connect(url='mysql+pymysql://ibis:ibis@mysql/ibis_testing')
con = ibis.mysql.connect(
user='ibis',
password='ibis',
host='mysql',
database='ibis_testing',
)
Learning Resources
------------------
Expand Down
215 changes: 95 additions & 120 deletions docs/source/impala.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,35 @@ While interoperability between the Hadoop / Spark ecosystems and pandas / the
PyData stack is overall poor (but improving), we also show some ways that you
can use pandas with Ibis and Impala.

.. ipython:: python
:suppress:
import ibis
host = 'quickstart.cloudera'
hdfs = ibis.hdfs_connect(host=host)
client = ibis.impala.connect(host=host, hdfs_client=hdfs)
The Impala client object
------------------------

To use Ibis with Impala, you first must connect to a cluster using the
``ibis.impala.connect`` function, *optionally* supplying an HDFS connection:
:func:`ibis.impala.connect` function, optionally supplying an HDFS
connection:

.. code-block:: python
import ibis
hdfs = ibis.hdfs_connect(host=webhdfs_host, port=webhdfs_port)
client = ibis.impala.connect(host=impala_host, port=impala_port,
hdfs_client=hdfs)
client = ibis.impala.connect(
host=impala_host, port=impala_port, hdfs_client=hdfs
)
You can accomplish many tasks directly through the client object, but we
additionally provide to streamline tasks involving a single Impala table or
database.
All IPython examples here use the following block of code to connect to impala
using docker:

If you're doing analytics on a single table, you can get going by using the
``table`` method on the client:
.. ipython:: python
.. code-block:: python
import ibis
host = 'impala'
hdfs = ibis.hdfs_connect(host=host)
client = ibis.impala.connect(host=host, hdfs_client=hdfs)
table = client.table(table_name, database=db_name)
You can accomplish many tasks directly through the client object, but we
additionally provide APIs to streamline tasks involving a single Impala table
or database.

Database and Table objects
--------------------------
Expand All @@ -67,7 +64,7 @@ referencing a physical Impala table:
table = client.table('functional_alltypes', database='ibis_testing')
While you can get by fine with only table and client objects, Ibis has a notion
of a "database object" that simplifies interactions with a single Impala
of a database object that simplifies interactions with a single Impala
database. It also gives you IPython tab completion of table names (that are
valid Python variable names):

Expand All @@ -78,13 +75,6 @@ valid Python variable names):
table = db.functional_alltypes
db.list_tables()
So, these two lines of code are equivalent:

.. code-block:: python
table1 = client.table(table_name, database=db)
table2 = db.table(table_name)
``ImpalaTable`` is a Python subclass of the more general Ibis ``TableExpr``
that has additional Impala-specific methods. So you can use it interchangeably
with any code expecting a ``TableExpr``.
Expand Down Expand Up @@ -118,9 +108,8 @@ For example:
expr = fa.double_col.sum()
expr.execute()
For longer-running queries, if you press Control-C (or whatever triggers the
Python ``KeyboardInterrupt`` on your system), Ibis will attempt to cancel the
query in progress.
For longer-running queries, Ibis will attempt to cancel the query in progress
if an interrupt is received.

Creating tables
---------------
Expand Down Expand Up @@ -186,13 +175,13 @@ can force a particular path with the ``location`` option.

.. code-block:: python
from getpass import getuser
schema = ibis.schema([('foo', 'string'),
('year', 'int32'),
('month', 'int16')])
name = 'new_table'
location = '/home/wesm/new-table-data'
db.create_table(name, schema=schema,
location=location)
location = '/home/{}/new-table-data'.format(getuser())
db.create_table(name, schema=schema, location=location)
If the schema matches a known table schema, you can always use the ``schema``
method to get a schema object:
Expand Down Expand Up @@ -323,23 +312,23 @@ files backing a table:

.. code-block:: ipython
In [9]: ss = c.table('tpcds_parquet.store_sales')
In [9]: ss = c.table('tpcds_parquet.store_sales')
In [10]: ss.files()[:5]
Out[10]:
path size \
0 hdfs://localhost:20500/test-warehouse/tpcds.st... 160.61KB
1 hdfs://localhost:20500/test-warehouse/tpcds.st... 123.88KB
2 hdfs://localhost:20500/test-warehouse/tpcds.st... 139.28KB
3 hdfs://localhost:20500/test-warehouse/tpcds.st... 139.60KB
4 hdfs://localhost:20500/test-warehouse/tpcds.st... 62.84KB
In [10]: ss.files()[:5]
Out[10]:
path size \
0 hdfs://localhost:20500/test-warehouse/tpcds.st... 160.61KB
1 hdfs://localhost:20500/test-warehouse/tpcds.st... 123.88KB
2 hdfs://localhost:20500/test-warehouse/tpcds.st... 139.28KB
3 hdfs://localhost:20500/test-warehouse/tpcds.st... 139.60KB
4 hdfs://localhost:20500/test-warehouse/tpcds.st... 62.84KB
partition
0 ss_sold_date_sk=2451803
1 ss_sold_date_sk=2451819
2 ss_sold_date_sk=2451772
3 ss_sold_date_sk=2451789
4 ss_sold_date_sk=2451741
partition
0 ss_sold_date_sk=2451803
1 ss_sold_date_sk=2451819
2 ss_sold_date_sk=2451772
3 ss_sold_date_sk=2451789
4 ss_sold_date_sk=2451741
Modifying table metadata
~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -359,23 +348,27 @@ files, you could run the following command:

.. code-block:: python
from getpass import getuser
csv_props = {
'serialization.format': ',',
'field.delim': ','
'field.delim': ',',
}
data_dir = '/home/wesm/my-csv-files'
data_dir = '/home/{}/my-csv-files'.format(getuser())
table.alter(location=data_dir, format='text',
serde_properties=csv_props)
table.alter(location=data_dir, format='text', serde_properties=csv_props)
If the table is partitioned, you can modify only the properties of a particular
partition:

.. code-block:: python
table.alter_partition({'year': 2007, 'month': 5},
location=data_dir, format='text',
serde_properties=csv_props)
table.alter_partition(
{'year': 2007, 'month': 5},
location=data_dir,
format='text',
serde_properties=csv_props
)
Table statistics
----------------
Expand Down Expand Up @@ -447,34 +440,34 @@ depend, of course, on the last ``COMPUTE STATS`` call.
3 hdfs://localhost:20500/test-warehouse/tpcds.st...
4 hdfs://localhost:20500/test-warehouse/tpcds.st...
In [9]: cstats = ss.column_stats()
In [10]: cstats
Out[10]:
Column Type #Distinct Values #Nulls Max Size Avg Size
0 ss_sold_time_sk BIGINT 13879 -1 NaN 8
1 ss_item_sk BIGINT 17925 -1 NaN 8
2 ss_customer_sk BIGINT 15207 -1 NaN 8
3 ss_cdemo_sk BIGINT 16968 -1 NaN 8
4 ss_hdemo_sk BIGINT 6220 -1 NaN 8
5 ss_addr_sk BIGINT 14077 -1 NaN 8
6 ss_store_sk BIGINT 6 -1 NaN 8
7 ss_promo_sk BIGINT 298 -1 NaN 8
8 ss_ticket_number INT 15006 -1 NaN 4
9 ss_quantity INT 99 -1 NaN 4
10 ss_wholesale_cost DECIMAL(7,2) 10196 -1 NaN 4
11 ss_list_price DECIMAL(7,2) 19393 -1 NaN 4
12 ss_sales_price DECIMAL(7,2) 15594 -1 NaN 4
13 ss_ext_discount_amt DECIMAL(7,2) 29772 -1 NaN 4
14 ss_ext_sales_price DECIMAL(7,2) 102758 -1 NaN 4
15 ss_ext_wholesale_cost DECIMAL(7,2) 125448 -1 NaN 4
16 ss_ext_list_price DECIMAL(7,2) 141419 -1 NaN 4
17 ss_ext_tax DECIMAL(7,2) 33837 -1 NaN 4
18 ss_coupon_amt DECIMAL(7,2) 29772 -1 NaN 4
19 ss_net_paid DECIMAL(7,2) 109981 -1 NaN 4
20 ss_net_paid_inc_tax DECIMAL(7,2) 132286 -1 NaN 4
21 ss_net_profit DECIMAL(7,2) 122436 -1 NaN 4
22 ss_sold_date_sk BIGINT 120 0 NaN 8
In [9]: cstats = ss.column_stats()
In [10]: cstats
Out[10]:
Column Type #Distinct Values #Nulls Max Size Avg Size
0 ss_sold_time_sk BIGINT 13879 -1 NaN 8
1 ss_item_sk BIGINT 17925 -1 NaN 8
2 ss_customer_sk BIGINT 15207 -1 NaN 8
3 ss_cdemo_sk BIGINT 16968 -1 NaN 8
4 ss_hdemo_sk BIGINT 6220 -1 NaN 8
5 ss_addr_sk BIGINT 14077 -1 NaN 8
6 ss_store_sk BIGINT 6 -1 NaN 8
7 ss_promo_sk BIGINT 298 -1 NaN 8
8 ss_ticket_number INT 15006 -1 NaN 4
9 ss_quantity INT 99 -1 NaN 4
10 ss_wholesale_cost DECIMAL(7,2) 10196 -1 NaN 4
11 ss_list_price DECIMAL(7,2) 19393 -1 NaN 4
12 ss_sales_price DECIMAL(7,2) 15594 -1 NaN 4
13 ss_ext_discount_amt DECIMAL(7,2) 29772 -1 NaN 4
14 ss_ext_sales_price DECIMAL(7,2) 102758 -1 NaN 4
15 ss_ext_wholesale_cost DECIMAL(7,2) 125448 -1 NaN 4
16 ss_ext_list_price DECIMAL(7,2) 141419 -1 NaN 4
17 ss_ext_tax DECIMAL(7,2) 33837 -1 NaN 4
18 ss_coupon_amt DECIMAL(7,2) 29772 -1 NaN 4
19 ss_net_paid DECIMAL(7,2) 109981 -1 NaN 4
20 ss_net_paid_inc_tax DECIMAL(7,2) 132286 -1 NaN 4
21 ss_net_profit DECIMAL(7,2) 122436 -1 NaN 4
22 ss_sold_date_sk BIGINT 120 0 NaN 8
``REFRESH`` and ``INVALIDATE METADATA``
Expand Down Expand Up @@ -596,7 +589,7 @@ For example:
In [6]: t.execute()
Out[6]:
bar foo
bar foo
0 a 1
1 b 2
2 c 3
Expand All @@ -612,27 +605,27 @@ For example:
In [11]: to_insert.execute()
Out[11]:
bar foo
bar foo
0 a 1
1 b 2
2 c 3
3 d 4
In [12]: to_insert.drop()
.. .. ipython:: python
.. ipython:: python
.. import pandas as pd
.. data = pd.DataFrame({'foo': [1, 2, 3, 4], 'bar': ['a', 'b', 'c', 'd']})
.. db.create_table('pandas_table', data)
.. t = db.pandas_table
.. t.execute()
.. t.drop()
.. db.create_table('empty_for_insert', schema=t.schema())
.. to_insert = db.empty_for_insert
.. to_insert.insert(data)
.. to_insert.execute()
.. to_insert.drop()
import pandas as pd
data = pd.DataFrame({'foo': [1, 2, 3, 4], 'bar': ['a', 'b', 'c', 'd']})
db.create_table('pandas_table', data)
t = db.pandas_table
t.execute()
t.drop()
db.create_table('empty_for_insert', schema=t.schema())
to_insert = db.empty_for_insert
to_insert.insert(data)
to_insert.execute()
to_insert.drop()
Using Impala UDFs in Ibis
-------------------------
Expand Down Expand Up @@ -678,8 +671,9 @@ To make this function callable, we use ``ibis.impala.wrap_udf``:
udf_db = 'ibis_testing'
udf_name = 'fuzzy_equals'
fuzzy_equals = ibis.impala.wrap_udf(library, inputs, output,
symbol, name=udf_name)
fuzzy_equals = ibis.impala.wrap_udf(
library, inputs, output, symbol, name=udf_name
)
In typical workflows, you will set up a UDF in Impala once then use it
thenceforth. So the *first time* you do this, you need to create the UDF in
Expand All @@ -698,7 +692,7 @@ must take place each time you load your Ibis session.
The object ``fuzzy_equals`` is callable and works with Ibis expressions:

.. code-block:: python
.. code-block:: ipython
In [35]: db = c.database('ibis_testing')
Expand All @@ -725,11 +719,6 @@ use Ibis. If you have a lot of UDFs, I suggest you create a file with all of
your wrapper declarations and user APIs that you load with your Ibis session to
plug in all your own functions.

Using aggregate functions (UDAs)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Coming soon.

Adding documentation to new functions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand All @@ -745,20 +734,6 @@ Adding documentation to new functions
Returns
-------
is_approx_equal : boolean
"""
bool
Adding UDF functions to Ibis types
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Coming soon.

Installing the Impala UDF SDK on OS X and Linux
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Coming soon.

Impala types to Ibis types
~~~~~~~~~~~~~~~~~~~~~~~~~~

Coming soon. See ``ibis.schema`` for now.
"""
1 change: 0 additions & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ SQL engine support needing code contributors:
sql
udf
contributing
developer
design
extending
backends
Expand Down
4 changes: 2 additions & 2 deletions docs/source/notebooks/tutorial/1-Intro-and-Setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"outputs": [],
"source": [
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', port=hdfs_port)"
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)"
]
},
{
Expand All @@ -61,7 +61,7 @@
"metadata": {},
"outputs": [],
"source": [
"con = ibis.impala.connect('quickstart.cloudera', hdfs_client=hdfs)\n",
"con = ibis.impala.connect('impala', hdfs_client=hdfs)\n",
"con"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
"import ibis\n",
"import os\n",
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', port=hdfs_port)\n",
"con = ibis.impala.connect(host='quickstart.cloudera', database='ibis_testing',\n",
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)"
]
},
Expand Down Expand Up @@ -277,8 +277,7 @@
"metadata": {},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"ibis.options.verbose_log = lambda x: print(x)\n",
"ibis.options.verbose_log = print\n",
"ibis.options.verbose = False"
]
},
Expand Down
4 changes: 2 additions & 2 deletions docs/source/notebooks/tutorial/3-Projection-Join-Sort.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
"import ibis\n",
"import os\n",
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', port=hdfs_port)\n",
"con = ibis.impala.connect(host='quickstart.cloudera', database='ibis_testing',\n",
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)\n",
"print('Hello!')"
]
Expand Down
4 changes: 2 additions & 2 deletions docs/source/notebooks/tutorial/4-More-Value-Expressions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
"import ibis\n",
"import os\n",
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', port=hdfs_port)\n",
"con = ibis.impala.connect(host='quickstart.cloudera', database='ibis_testing',\n",
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)\n",
"ibis.options.interactive = True"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
"import os\n",
"hdfs_port = int(os.environ.get('IBIS_TEST_WEBHDFS_PORT', 50070))\n",
"user = os.environ.get('IBIS_TEST_WEBHDFS_USER', 'ubuntu')\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', user=user, port=hdfs_port)\n",
"con = ibis.impala.connect(host='quickstart.cloudera', database='ibis_testing',\n",
"hdfs = ibis.hdfs_connect(host='impala', user=user, port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)\n",
"ibis.options.interactive = True"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
"import ibis\n",
"import os\n",
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', port=hdfs_port)\n",
"con = ibis.impala.connect(host='quickstart.cloudera', database='ibis_testing',\n",
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)\n",
"ibis.options.interactive = True"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
"import ibis\n",
"import os\n",
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', port=hdfs_port)\n",
"con = ibis.impala.connect(host='quickstart.cloudera', database='ibis_testing',\n",
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)\n",
"ibis.options.interactive = True"
]
Expand Down
4 changes: 2 additions & 2 deletions docs/source/notebooks/tutorial/8-More-Analytics-Helpers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
"import ibis\n",
"import os\n",
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='quickstart.cloudera', port=hdfs_port)\n",
"con = ibis.impala.connect(host='quickstart.cloudera', database='ibis_testing',\n",
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)\n",
"ibis.options.interactive = True"
]
Expand Down
40 changes: 40 additions & 0 deletions docs/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,46 @@ Release Notes
These release notes are for versions of ibis **1.0 and later**. Release
notes for pre-1.0 versions of ibis can be found at :doc:`/release-pre-1.0`

* :release: `1.1.0 <2019-06-09>`
* :bug:`1819` Fix group_concat test and implementations
* :support:`1820` Remove decorator hacks and add custom markers
* :bug:`1818` Fix failing strftime tests on Python 3.7
* :bug:`1757` Remove unnecessary (and erroneous in some cases) frame clauses
* :support:`1814` Add development deps to setup.py
* :feature:`1809` Conslidate trailing window functions
* :bug:`1799` Chained mutate operations are buggy
* :support:`1805` Fix design and developer docs
* :support:`1810` Pin sphinx version to 2.0.1
* :feature:`1766` Call to_interval when casting integers to intervals
* :bug:`1783` Allow projections from joins to attempt fusion
* :feature:`1796` Add session feature to mapd client API
* :bug:`1798` Fix Python 3.5 dependency versions
* :feature:`1792` Add min periods parameter to Window
* :support:`1793` Add pep8speaks integration
* :support:`1821` Fix typo in UDF signature specification
* :feature:`1785` Allow strings for types in pandas UDFs
* :feature:`1790` Add missing date operations and struct field operation for the pandas backend
* :bug:`1789` Fix compatibility and bugs associated with pandas toposort reimplementation
* :bug:`1772` Fix outer_join generating LEFT join instead of FULL OUTER
* :feature:`1771` Add window operations to the OmniSci backend
* :feature:`1758` Reimplement the pandas backend using topological sort
* :support:`1779` Clean up most xpassing tests
* :bug:`1782` NullIf should enforce that its arguments are castable to a common type
* :support:`1781` Update omnisci container version
* :feature:`1778` Add marker for xfailing specific backends
* :feature:`1777` Enable window function tests where possible
* :bug:`1775` Fix conda create command in documentation
* :support:`1776` Constrain PyMapD version to get passing builds
* :bug:`1765` Fix preceding and following with ``None``
* :support:`1763` Remove warnings and clean up some docstrings
* :support:`1638` Add StringToTimestamp as unsupported
* :feature:`1743` is_computable_arg dispatcher
* :support:`1759` Add isort pre-commit hooks
* :feature:`1753` Added float32 and geospatial types for create table from schema
* :bug:`1661` PostgreSQL interval type not recognized
* :support:`1750` Add Python 3.5 testing back to CI
* :support:`1700` Re-enable CI for building step
* :support:`1749` Update README reference to MapD to say OmniSci
* :release:`1.0.0 <2019-03-26>`
* :support:`1748` Do not build universal wheels
* :support:`1747` Remove tag prefix from versioneer
Expand Down
10 changes: 2 additions & 8 deletions docs/source/sql.rst
Original file line number Diff line number Diff line change
Expand Up @@ -747,8 +747,8 @@ As you would hope, the code is as follows:
Join with ``SELECT *``
~~~~~~~~~~~~~~~~~~~~~~

If you try to execute a join that has not been projected or aggregated, it will
be *fully materialized*:
If you try to compile or execute a join that has not been projected or
aggregated, it will be *fully materialized*:

.. ipython:: python
Expand Down Expand Up @@ -1055,12 +1055,6 @@ In Ibis this is:
.aggregate(unique_events=metric))
print(ibis.impala.compile(expr))
You can also write:

.. code-block:: python
events.event_type.distinct().count()
Window functions
----------------

Expand Down
66 changes: 16 additions & 50 deletions docs/source/udf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Here's how to define an element-wise function:
import ibis.expr.datatypes as dt
from ibis.pandas import udf
@udf.elementwise(input_type=[dt.int64], output_type=.dtdouble)
@udf.elementwise(input_type=[dt.int64], output_type=dt.double)
def add_one(x):
return x + 1.0
Expand All @@ -57,7 +57,7 @@ Here's how to define a reduction function:
import ibis.expr.datatypes as dt
from ibis.pandas import udf
@udf.reduction(input_type=[dt.double], output_type=.dtdouble)
@udf.reduction(input_type=[dt.double], output_type=dt.double)
def double_mean(series):
return 2 * series.mean()
Expand All @@ -77,7 +77,7 @@ Here's how to define an analytic function:
import ibis.expr.datatypes as dt
from ibis.pandas import udf
@udf.analytic(input_type=[dt.double], output_type=.dtdouble)
@udf.analytic(input_type=[dt.double], output_type=dt.double)
def zscore(series):
return (series - series.mean()) / series.std()
Expand Down Expand Up @@ -106,18 +106,20 @@ Using ``add_one`` from above as an example, the following call will receive a

.. code-block:: python
>>> import ibis
>>> import pandas as pd
>>> df = pd.DataFrame({'a': [1, 2, 3]})
>>> con = ibis.pandas.connect({'df': df})
>>> t = con.table('df')
>>> expr = add_one(t.a)
import ibis
import pandas as pd
df = pd.DataFrame({'a': [1, 2, 3]})
con = ibis.pandas.connect({'df': df})
t = con.table('df')
expr = add_one(t.a)
expr
And this will receive the ``int`` 1:

.. code-block:: python
>>> expr = add_one(1)
expr = add_one(1)
expr
Since the pandas backend passes around ``**kwargs`` you can accept ``**kwargs``
in your function:
Expand Down Expand Up @@ -175,47 +177,11 @@ Ibis will parse the source of the function and turn the resulting Python AST
into JavaScript source code (technically, ECMAScript 2015). Most of the Python
language is supported including classes, functions and generators.

If you want to inspect the generated code you can look at the ``js`` property
of the function.

.. code-block:: python
>>> print(my_bigquery_add_one.js)
CREATE TEMPORARY FUNCTION my_bigquery_add_one(x FLOAT64)
RETURNS FLOAT64
LANGUAGE js AS """
'use strict';
function my_bigquery_add_one(x) {
return (x + 1.0);
}
return my_bigquery_add_one(x);
""";
When you want to use this function you call it like any other Python
function--only on an ibis expression:
function--only it must be called on an ibis expression:

.. code-block:: python
>>> import ibis
>>> t = ibis.table([('a', 'double')])
>>> expr = my_bigquery_add_one(t.a)
>>> print(ibis.bigquery.compile(expr))
CREATE TEMPORARY FUNCTION my_bigquery_add_one(x FLOAT64)
RETURNS FLOAT64
LANGUAGE js AS """
'use strict';
function my_bigquery_add_one(x) {
return (x + 1.0);
}
return my_bigquery_add_one(x);
""";
SELECT my_bigquery_add_one(`a`) AS `tmp`
FROM t0
SQLite
------

.. _udf.sqlite:

TODO
t = ibis.table([('a', 'double')])
expr = my_bigquery_add_one(t.a)
print(ibis.bigquery.compile(expr))
18 changes: 7 additions & 11 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
from contextlib import suppress

import ibis.config_init # noqa: F401
import ibis.util as util # noqa: F401
import ibis.expr.api as api # noqa: F401
import ibis.expr.types as ir # noqa: F401

from ibis.config import options # noqa: F401
# pandas backend is mandatory
import ibis.pandas.api as pandas # noqa: F401
import ibis.util as util # noqa: F401
from ibis.common import IbisError
from ibis.filesystems import HDFS, WebHDFS # noqa: F401

# __all__ is defined
from ibis.config import options # noqa: F401
from ibis.expr.api import * # noqa: F401,F403
from ibis.filesystems import HDFS, WebHDFS # noqa: F401

# pandas backend is mandatory
import ibis.pandas.api as pandas # noqa: F401
from ._version import get_versions # noqa: E402

with suppress(ImportError):
# pip install ibis-framework[csv]
Expand Down Expand Up @@ -64,7 +62,7 @@ def hdfs_connect(
auth_mechanism='NOSASL',
verify=True,
session=None,
**kwds,
**kwds
):
"""Connect to HDFS.
Expand Down Expand Up @@ -133,7 +131,5 @@ def hdfs_connect(
return WebHDFS(hdfs_client)


from ._version import get_versions # noqa: E402

__version__ = get_versions()['version']
del get_versions
285 changes: 158 additions & 127 deletions ibis/_version.py

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions ibis/bigquery/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

from typing import Optional

import google.cloud.bigquery # noqa: F401, fail early if bigquery is missing
import google.auth.credentials
import google.cloud.bigquery # noqa: F401, fail early if bigquery is missing
import pydata_google_auth

import ibis.common as com

from ibis.config import options # noqa: F401
from ibis.bigquery.client import BigQueryClient
from ibis.bigquery.compiler import dialect
from ibis.config import options # noqa: F401

try:
from ibis.bigquery.udf import udf # noqa: F401
Expand Down
19 changes: 7 additions & 12 deletions ibis/bigquery/client.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,26 @@
"""BigQuery ibis client implementation."""

import datetime

from collections import OrderedDict
from pkg_resources import parse_version
from typing import Optional, Tuple

import regex as re

import google.cloud.bigquery as bq
import pandas as pd

import regex as re
from google.api_core.exceptions import NotFound
import google.cloud.bigquery as bq

from multipledispatch import Dispatcher
from pkg_resources import parse_version

import ibis
import ibis.common as com
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.expr.schema as sch
import ibis.expr.datatypes as dt
import ibis.expr.lineage as lin

from ibis.client import Database, Query, SQLClient
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis.bigquery import compiler as comp
from ibis.bigquery.datatypes import ibis_type_to_bigquery_type
from ibis.client import Database, Query, SQLClient

NATIVE_PARTITION_COL = '_PARTITIONTIME'

Expand Down
86 changes: 44 additions & 42 deletions ibis/bigquery/compiler.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,27 @@
import datetime

from functools import partial

import numpy as np
import regex as re


import toolz

from multipledispatch import Dispatcher

import ibis
import ibis.common as com

import numpy as np

import ibis.expr.datatypes as dt
import ibis.expr.lineage as lin
import ibis.expr.operations as ops
import ibis.expr.types as ir

import ibis.sql.compiler as comp
import ibis.expr.operations as ops
import ibis.expr.lineage as lin

from ibis.bigquery.datatypes import ibis_type_to_bigquery_type
from ibis.impala import compiler as impala_compiler
from ibis.impala.compiler import (
ImpalaSelect,
unary,
fixed_arity,
ImpalaTableSetFormatter,
_reduction,
fixed_arity,
unary,
)
from ibis.impala import compiler as impala_compiler

from ibis.bigquery.datatypes import ibis_type_to_bigquery_type


class BigQueryUDFNode(ops.ValueOp):
Expand Down Expand Up @@ -363,7 +354,7 @@ def _formatter(translator, expr):
ops.RegexSearch: _regex_search,
ops.RegexExtract: _regex_extract,
ops.RegexReplace: _regex_replace,
ops.GroupConcat: fixed_arity('STRING_AGG', 2),
ops.GroupConcat: _reduction('STRING_AGG'),
ops.IfNull: fixed_arity('IFNULL', 2),
ops.Cast: _cast,
ops.StructField: _struct_field,
Expand Down Expand Up @@ -479,30 +470,6 @@ def compiles_string_to_timestamp(translator, expr):
return 'PARSE_TIMESTAMP({}, {})'.format(fmt_string, arg_formatted)


@rewrites(ops.Any)
def bigquery_rewrite_any(expr):
arg, = expr.op().args
return arg.cast(dt.int64).sum() > 0


@rewrites(ops.NotAny)
def bigquery_rewrite_notany(expr):
arg, = expr.op().args
return arg.cast(dt.int64).sum() == 0


@rewrites(ops.All)
def bigquery_rewrite_all(expr):
arg, = expr.op().args
return (1 - arg.cast(dt.int64)).sum() == 0


@rewrites(ops.NotAll)
def bigquery_rewrite_notall(expr):
arg, = expr.op().args
return (1 - arg.cast(dt.int64)).sum() != 0


class BigQueryTableSetFormatter(ImpalaTableSetFormatter):
def _quote_identifier(self, name):
if re.match(r'^[A-Za-z][A-Za-z_0-9]*$', name):
Expand Down Expand Up @@ -581,8 +548,43 @@ def compiles_approx(translator, expr):
)


class BigQueryDialect(impala_compiler.ImpalaDialect):
@rewrites(ops.Any)
@rewrites(ops.All)
@rewrites(ops.NotAny)
@rewrites(ops.NotAll)
def bigquery_any_all_no_op(expr):
return expr


@compiles(ops.Any)
def bigquery_compile_any(translator, expr):
return "LOGICAL_OR({})".format(
*map(translator.translate, expr.op().args)
)


@compiles(ops.NotAny)
def bigquery_compile_notany(translator, expr):
return "LOGICAL_AND(NOT ({}))".format(
*map(translator.translate, expr.op().args)
)


@compiles(ops.All)
def bigquery_compile_all(translator, expr):
return "LOGICAL_AND({})".format(
*map(translator.translate, expr.op().args)
)


@compiles(ops.NotAll)
def bigquery_compile_notall(translator, expr):
return "LOGICAL_OR(NOT ({}))".format(
*map(translator.translate, expr.op().args)
)


class BigQueryDialect(impala_compiler.ImpalaDialect):
translator = BigQueryExprTranslator


Expand Down
1 change: 0 additions & 1 deletion ibis/bigquery/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import ibis


PROJECT_ID = os.environ.get('GOOGLE_BIGQUERY_PROJECT_ID', 'ibis-gbq')
DATASET_ID = 'testing'

Expand Down
18 changes: 6 additions & 12 deletions ibis/bigquery/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
import datetime
import decimal

import pytz

import pytest

import numpy as np
import pandas as pd
import pandas.util.testing as tm
import pytest
import pytz

import ibis
import ibis.expr.datatypes as dt
Expand All @@ -19,10 +17,8 @@
ga = pytest.importorskip('google.auth')
exceptions = pytest.importorskip('google.api_core.exceptions')

from ibis.bigquery.tests.conftest import (
connect as bigquery_connect,
) # noqa: E402
from ibis.bigquery.client import bigquery_param # noqa: E402
from ibis.bigquery.client import bigquery_param # noqa: E402, isort:skip
from ibis.bigquery.tests.conftest import connect # noqa: E402, isort:skip


def test_table(alltypes):
Expand Down Expand Up @@ -544,9 +540,7 @@ def test_exists_database_different_project(client, name, expected):


def test_repeated_project_name(project_id):
con = bigquery_connect(
project_id, dataset_id='{}.testing'.format(project_id)
)
con = connect(project_id, dataset_id='{}.testing'.format(project_id))
assert 'functional_alltypes' in con.list_tables()


Expand Down Expand Up @@ -725,6 +719,6 @@ def test_approx_median(alltypes):


def test_client_without_dataset(project_id):
con = bigquery_connect(project_id, dataset_id=None)
con = connect(project_id, dataset_id=None)
with pytest.raises(ValueError, match="Unable to determine BigQuery"):
con.list_tables()
37 changes: 29 additions & 8 deletions ibis/bigquery/tests/test_compiler.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import datetime

import pytest

import pandas as pd
import pytest

import ibis
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -289,12 +288,12 @@ def test_range_window_function(alltypes, project_id):
(5, 5),
(ibis.interval(nanoseconds=1), 0.001),
(ibis.interval(microseconds=1), 1),
(ibis.interval(seconds=1), 1_000_000),
(ibis.interval(minutes=1), 1_000_000 * 60),
(ibis.interval(hours=1), 1_000_000 * 60 * 60),
(ibis.interval(days=1), 1_000_000 * 60 * 60 * 24),
(2 * ibis.interval(days=1), 1_000_000 * 60 * 60 * 24 * 2),
(ibis.interval(weeks=1), 1_000_000 * 60 * 60 * 24 * 7),
(ibis.interval(seconds=1), 1000000),
(ibis.interval(minutes=1), 1000000 * 60),
(ibis.interval(hours=1), 1000000 * 60 * 60),
(ibis.interval(days=1), 1000000 * 60 * 60 * 24),
(2 * ibis.interval(days=1), 1000000 * 60 * 60 * 24 * 2),
(ibis.interval(weeks=1), 1000000 * 60 * 60 * 24 * 7),
],
)
def test_trailing_range_window(alltypes, preceding, value, project_id):
Expand Down Expand Up @@ -548,3 +547,25 @@ def test_bucket():
END AS `tmp`
FROM t"""
assert result == expected


@pytest.mark.parametrize(
('kind', 'begin', 'end', 'expected'),
[
('preceding', None, 1, 'UNBOUNDED PRECEDING AND 1 PRECEDING'),
('following', 1, None, '1 FOLLOWING AND UNBOUNDED FOLLOWING'),
],
)
def test_window_unbounded(kind, begin, end, expected):
t = ibis.table([('a', 'int64')], name='t')
kwargs = {kind: (begin, end)}
expr = t.a.sum().over(ibis.window(**kwargs))
result = ibis.bigquery.compile(expr)
assert (
result
== """\
SELECT sum(`a`) OVER (ROWS BETWEEN {}) AS `tmp`
FROM t""".format(
expected
)
)
10 changes: 5 additions & 5 deletions ibis/bigquery/tests/test_datatypes.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import pytest

from pytest import param

from multipledispatch.conflict import ambiguities
from pytest import param

import ibis.expr.datatypes as dt
from ibis.bigquery.datatypes import (
ibis_type_to_bigquery_type,
UDFContext,
TypeTranslationContext,
UDFContext,
ibis_type_to_bigquery_type,
)

pytestmark = pytest.mark.bigquery


def test_no_ambiguities():
ambs = ambiguities(ibis_type_to_bigquery_type.funcs)
Expand Down
10 changes: 3 additions & 7 deletions ibis/bigquery/udf/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,12 @@
import inspect
import itertools

import ibis.expr.rules as rlz
import ibis.expr.datatypes as dt

from ibis.expr.signature import Argument as Arg

import ibis.expr.rules as rlz
from ibis.bigquery.compiler import BigQueryUDFNode, compiles

from ibis.bigquery.datatypes import UDFContext, ibis_type_to_bigquery_type
from ibis.bigquery.udf.core import PythonToJavaScriptTranslator
from ibis.bigquery.datatypes import ibis_type_to_bigquery_type, UDFContext

from ibis.expr.signature import Argument as Arg

__all__ = ('udf',)

Expand Down
5 changes: 1 addition & 4 deletions ibis/bigquery/udf/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,13 @@
"""

import ast

from collections import ChainMap
import contextlib
import functools
import inspect
import textwrap

from collections import ChainMap

import ibis.expr.datatypes as dt

from ibis.bigquery.udf.find import find_names
from ibis.bigquery.udf.rewrite import rewrite

Expand Down
4 changes: 3 additions & 1 deletion ibis/bigquery/udf/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

import pytest

from ibis.bigquery.udf.core import SymbolTable, PythonToJavaScriptTranslator
from ibis.bigquery.udf.core import PythonToJavaScriptTranslator, SymbolTable

pytestmark = pytest.mark.bigquery


def test_symbol_table():
Expand Down
5 changes: 5 additions & 0 deletions ibis/bigquery/udf/tests/test_find.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import ast

import pytest

from ibis.bigquery.udf.find import find_names
from ibis.util import is_iterable

pytestmark = pytest.mark.bigquery


def parse_expr(expr):
body = parse_stmt(expr)
Expand Down
15 changes: 6 additions & 9 deletions ibis/bigquery/udf/tests/test_udf_execute.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,28 @@
import os

import pytest

from pytest import param

import pandas as pd
import pandas.util.testing as tm
import pytest
from pytest import param

import ibis
import ibis.expr.datatypes as dt
from ibis.bigquery import udf # noqa: E402

pytest.importorskip('google.cloud.bigquery')

pytestmark = pytest.mark.bigquery

from ibis.bigquery.tests.conftest import (
connect as bigquery_connect,
) # noqa: E402
from ibis.bigquery import udf # noqa: E402

PROJECT_ID = os.environ.get('GOOGLE_BIGQUERY_PROJECT_ID', 'ibis-gbq')
DATASET_ID = 'testing'


@pytest.fixture(scope='module')
def client():
return bigquery_connect(PROJECT_ID, DATASET_ID)
from ibis.bigquery.tests.conftest import connect

return connect(PROJECT_ID, DATASET_ID)


@pytest.fixture(scope='module')
Expand Down
4 changes: 1 addition & 3 deletions ibis/clickhouse/api.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import ibis.common as com

from ibis.config import options
from ibis.clickhouse.client import ClickhouseClient
from ibis.clickhouse.compiler import dialect

from ibis.config import options

__all__ = 'compile', 'verify', 'connect', 'dialect'

Expand Down
18 changes: 7 additions & 11 deletions ibis/clickhouse/client.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
import re

from collections import OrderedDict
from pkg_resources import parse_version

import numpy as np
import pandas as pd
from clickhouse_driver.client import Client as _DriverClient
from pkg_resources import parse_version

import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.schema as sch
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops

from ibis.config import options
from ibis.client import Query, Database, DatabaseEntity, SQLClient
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis.clickhouse.compiler import ClickhouseDialect, build_ast
from ibis.util import log
from ibis.client import Database, DatabaseEntity, Query, SQLClient
from ibis.config import options
from ibis.sql.compiler import DDL

from clickhouse_driver.client import Client as _DriverClient

from ibis.util import log

fully_qualified_re = re.compile(r"(.*)\.(?:`(.*)`|(.*))")
base_typename_re = re.compile(r"(\w+)")
Expand Down
4 changes: 2 additions & 2 deletions ibis/clickhouse/compiler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from io import StringIO

import ibis.common as com
import ibis.util as util
import ibis.expr.operations as ops
import ibis.sql.compiler as comp
import ibis.util as util

from .identifiers import quote_identifier
from .operations import _operation_registry, _name_expr
from .operations import _name_expr, _operation_registry


def build_ast(expr, context):
Expand Down
7 changes: 3 additions & 4 deletions ibis/clickhouse/operations.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from io import StringIO
from datetime import date, datetime
from io import StringIO

import ibis.common as com
import ibis.util as util
import ibis.expr.types as ir
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.sql.transforms as transforms

import ibis.util as util
from ibis.clickhouse.identifiers import quote_identifier


Expand Down
3 changes: 2 additions & 1 deletion ibis/clickhouse/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import ibis

import pytest

import ibis

CLICKHOUSE_HOST = os.environ.get('IBIS_TEST_CLICKHOUSE_HOST', 'localhost')
CLICKHOUSE_PORT = int(os.environ.get('IBIS_TEST_CLICKHOUSE_PORT', 9000))
Expand Down
6 changes: 4 additions & 2 deletions ibis/clickhouse/tests/test_aggregations.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pytest
from operator import methodcaller

import numpy as np
import pandas as pd
import pandas.util.testing as tm
from operator import methodcaller
import pytest

from ibis import literal as L

pytest.importorskip('clickhouse_driver')
Expand Down
7 changes: 3 additions & 4 deletions ibis/clickhouse/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from io import StringIO

import pytest
import pandas as pd
import pandas.util.testing as tm
import pytest

import ibis
import ibis.config as config
import ibis.expr.types as ir
import pandas.util.testing as tm

from ibis import literal as L

pytest.importorskip('clickhouse_driver')
Expand Down Expand Up @@ -86,7 +85,7 @@ def logger(x):

def test_sql_query_limits(alltypes):
table = alltypes
with config.option_context('sql.default_limit', 100_000):
with config.option_context('sql.default_limit', 100000):
# table has 25 rows
assert len(table.execute()) == 7300
# comply with limit arg for TableExpr
Expand Down
26 changes: 7 additions & 19 deletions ibis/clickhouse/tests/test_functions.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
import math
import operator

from operator import methodcaller
from datetime import date, datetime

import pytest

from pytest import param
from operator import methodcaller

import pandas as pd
import pandas.util.testing as tm
import pytest
from pytest import param

import ibis
import ibis.expr.types as ir
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
from ibis import literal as L


clickhouse_driver = pytest.importorskip('clickhouse_driver')
pytestmark = pytest.mark.clickhouse

Expand Down Expand Up @@ -107,10 +103,10 @@ def test_timestamp_now(con, translate):
@pytest.mark.parametrize(
('unit', 'expected'),
[
param('y', '2009-01-01', marks=pytest.mark.xfail),
('y', '2009-01-01'),
param('m', '2009-05-01', marks=pytest.mark.xfail),
param('d', '2009-05-17', marks=pytest.mark.xfail),
param('w', '2009-05-11', marks=pytest.mark.xfail),
('d', '2009-05-17'),
('w', '2009-05-11'),
('h', '2009-05-17 12:00:00'),
('minute', '2009-05-17 12:34:00'),
],
Expand Down Expand Up @@ -498,14 +494,6 @@ def test_numeric_builtins_work(con, alltypes, df, translate):
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(
raises=clickhouse_driver.errors.UnknownTypeError,
reason=(
'Newer clickhouse server uses Nullable(Nothing) type '
'for Null values which is currently unhandled by '
'clickhouse-driver'
),
)
def test_null_column(alltypes, translate):
t = alltypes
nrows = t.count().execute()
Expand Down
2 changes: 1 addition & 1 deletion ibis/clickhouse/tests/test_identifiers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import ibis
import pytest

import ibis

pytest.importorskip('clickhouse_driver')
pytestmark = pytest.mark.clickhouse
Expand Down
1 change: 0 additions & 1 deletion ibis/clickhouse/tests/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import ibis
from ibis import literal as L


pytest.importorskip('clickhouse_driver')
pytestmark = pytest.mark.clickhouse

Expand Down
6 changes: 3 additions & 3 deletions ibis/clickhouse/tests/test_operators.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import pytest
import operator
from datetime import date, datetime

import numpy as np
import pandas as pd
import pandas.util.testing as tm
from datetime import date, datetime
import pytest

import ibis
import ibis.expr.datatypes as dt
from ibis import literal as L


pytest.importorskip('clickhouse_driver')
pytestmark = pytest.mark.clickhouse

Expand Down
10 changes: 2 additions & 8 deletions ibis/clickhouse/tests/test_select.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import sys
import pytest
import pandas as pd
import pandas.util.testing as tm
import pytest

import ibis
import ibis.common as com


driver = pytest.importorskip('clickhouse_driver')
pytestmark = pytest.mark.clickhouse

Expand Down Expand Up @@ -101,11 +99,7 @@ def test_subquery(alltypes, df):
)

result['count'] = result['count'].astype('int64')

check_column_type = sys.version_info.major >= 3
tm.assert_frame_equal(
result, expected, check_column_type=check_column_type
)
tm.assert_frame_equal(result, expected)


def test_simple_scalar_aggregates(db, alltypes):
Expand Down
1 change: 0 additions & 1 deletion ibis/clickhouse/tests/test_types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest


pytest.importorskip('clickhouse_driver')
pytestmark = pytest.mark.clickhouse

Expand Down
9 changes: 4 additions & 5 deletions ibis/client.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import abc

from ibis.config import options

import ibis.util as util
import ibis.common as com
import ibis.expr.types as ir
import ibis.expr.schema as sch
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
import ibis.sql.compiler as comp
import ibis.util as util
from ibis.config import options


class Client:
Expand Down
6 changes: 2 additions & 4 deletions ibis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pprint
import re

import warnings
from collections import namedtuple
from contextlib import contextmanager
import pprint
import warnings


DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver')
RegisteredOption = namedtuple(
Expand Down
Loading