167 changes: 131 additions & 36 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,66 @@
.PHONY: all clean develop typecheck stop build start load restart init test testmost testfast testparams docclean doc
.PHONY: all clean develop typecheck stop build start load restart init test testmost testfast testparams docclean doc black

SHELL := /bin/bash
MAKEFILE_DIR = $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST)))))

# from PYTHON_VERSION depends which `./ci/requirements-$PYTHON_VERSION-dev` file
# will be used for creating ibis image (see for additional info: `./ci/Dockerfile.dev`
# and `./ci/docker-compose.yml`)
# you can use `3.6` or `3.7` for now
PYTHON_VERSION := 3.6
PYTHONHASHSEED := "random"

# docker specific
COMPOSE_FILE := "$(MAKEFILE_DIR)/ci/docker-compose.yml"
DOCKER := docker-compose -f $(COMPOSE_FILE)
DOCKER_RUN := PYTHON_VERSION=${PYTHON_VERSION} $(DOCKER) run --rm
DOCKER := PYTHON_VERSION=$(PYTHON_VERSION) docker-compose -f $(COMPOSE_FILE)
DOCKER_UP := $(DOCKER) up --remove-orphans -d --no-build
DOCKER_RUN := $(DOCKER) run --rm
DOCKER_BUILD := $(DOCKER) build
DOCKER_STOP := $(DOCKER) rm --force --stop

# command to be executed inside docker container
DOCKER_RUN_COMMAND := echo "you should do 'make docker_run DOCKER_RUN_COMMAND=[you command]'"

# all backends that ibis using
BACKENDS := clickhouse impala kudu-master kudu-tserver mysql omniscidb parquet postgres sqlite

# backends which are implemented as containers and can be launched through the `docker-compose`
SERVICES := omniscidb postgres mysql clickhouse impala kudu-master kudu-tserver

# the variable contains backends for which test datasets can be automatically loaded
LOADS := sqlite parquet postgres clickhouse omniscidb mysql impala

CURRENT_SERVICES := $(shell $(MAKEFILE_DIR)/ci/backends-to-start.sh "$(BACKENDS)" "$(SERVICES)")
CURRENT_LOADS := $(shell $(MAKEFILE_DIR)/ci/backends-to-start.sh "$(BACKENDS)" "$(LOADS)")
WAITER_COMMAND := $(shell $(MAKEFILE_DIR)/ci/dockerize.sh $(CURRENT_SERVICES))

# pytest specific options
PYTEST_MARKERS := $(shell $(MAKEFILE_DIR)/ci/backends-markers.sh $(BACKENDS))
PYTEST_DOCTEST_OPTIONS := --doctest-modules --doctest-ignore-import-errors
PYTEST_OPTIONS :=
SERVICES := omnisci postgres mysql clickhouse impala kudu-master kudu-tserver

REMOVE_COMPILED_PYTHON_SCRIPTS := (find /ibis -name "*.py[co]" -delete > /dev/null 2>&1 || true)

LOGLEVEL := WARNING


## Targets for code checks

typecheck:
@mypy --ignore-missing-imports $(MAKEFILE_DIR)/ibis

lint:
flake8

black:
# check that black formatting would not be applied
black --check .

check_pre_commit_hooks:
# check if all pre-commit hooks are passing
pre-commit run --all-files

## Targets for setup development environment

clean:
python setup.py clean
Expand All @@ -19,70 +71,113 @@ develop: clean
python setup.py develop
pre-commit install

typecheck:
@mypy --ignore-missing-imports $(MAKEFILE_DIR)/ibis

lint:
flake8
## DOCKER specific targets

# Targets for code checks inside containers

docker_lint: build
$(DOCKER_RUN) ibis flake8

docker_black: build
$(DOCKER_RUN) ibis black --check .

docker_check_pre_commit_hooks: build
# check if all pre-commit hooks are passing inside ibis container
$(DOCKER_RUN) ibis pre-commit run --all-files

# Targets for manipulating docker's containers

stop:
# stop all running docker compose services
$(DOCKER) rm --force --stop ${SERVICES}
# stop all running docker compose services and remove its
$(DOCKER_STOP) $(CURRENT_SERVICES)

start:
# start all docker compose services
$(DOCKER_UP) $(CURRENT_SERVICES)

build:
# build the ibis image
$(DOCKER) build --pull ibis
# build the ibis image
$(DOCKER_BUILD) ibis

start:
# start all docker compose services
$(DOCKER) up --remove-orphans -d --no-build ${SERVICES}
# wait for services to start
$(DOCKER_RUN) waiter
wait:
# wait for services to start
$(DOCKER_RUN) waiter $(WAITER_COMMAND)
DOCKER_CODE=$(shell echo $$?) ./ci/check-services.sh $(CURRENT_SERVICES)

load:
$(DOCKER_RUN) -e LOGLEVEL ibis ci/load-data.sh
# load datasets for testing purpose
$(DOCKER_RUN) -e LOGLEVEL=$(LOGLEVEL) ibis ./ci/load-data.sh $(CURRENT_LOADS)

restart: stop
$(MAKE) start
$(MAKE) wait

init: restart
$(MAKE) build
$(MAKE) load

testparallel:
PYTHONHASHSEED=${PYTHONHASHSEED} $(MAKEFILE_DIR)/ci/test.sh -n auto -m 'not udf' -k 'not test_import_time' \
--doctest-modules --doctest-ignore-import-errors ${PYTEST_OPTIONS}
# Targets for testing ibis inside docker's containers

test: init
# use the target to run backend specific tests
$(DOCKER_RUN) -e PYTHONHASHSEED="$(PYTHONHASHSEED)" ibis bash -c "${REMOVE_COMPILED_PYTHON_SCRIPTS} && \
pytest $(PYTEST_DOCTEST_OPTIONS) $(PYTEST_OPTIONS) ${PYTEST_MARKERS} -k 'not test_import_time'"

testparallel: init
$(DOCKER_RUN) -e PYTHONHASHSEED="$(PYTHONHASHSEED)" ibis bash -c "${REMOVE_COMPILED_PYTHON_SCRIPTS} && \
pytest $(PYTEST_DOCTEST_OPTIONS) $(PYTEST_OPTIONS) ${PYTEST_MARKERS} -n auto -m 'not udf' -k 'not test_import_time'"

test:
PYTHONHASHSEED=${PYTHONHASHSEED} $(MAKEFILE_DIR)/ci/test.sh ${PYTEST_OPTIONS} -k 'not test_import_time' \
--doctest-modules --doctest-ignore-import-errors
testall:
$(DOCKER_RUN) -e PYTHONHASHSEED="$(PYTHONHASHSEED)" ibis bash -c "${REMOVE_COMPILED_PYTHON_SCRIPTS} && \
pytest $(PYTEST_DOCTEST_OPTIONS) $(PYTEST_OPTIONS) -k 'not test_import_time'"

testmost:
PYTHONHASHSEED=${PYTHONHASHSEED} $(MAKEFILE_DIR)/ci/test.sh -n auto -m 'not (udf or impala or hdfs)' -k 'not test_import_time' \
--doctest-modules --doctest-ignore-import-errors ${PYTEST_OPTIONS}
$(DOCKER_RUN) -e PYTHONHASHSEED="$(PYTHONHASHSEED)" ibis bash -c "${REMOVE_COMPILED_PYTHON_SCRIPTS} && \
pytest $(PYTEST_DOCTEST_OPTIONS) $(PYTEST_OPTIONS) -n auto -m 'not (udf or impala or hdfs)' -k 'not test_import_time'"

testfast:
PYTHONHASHSEED=${PYTHONHASHSEED} $(MAKEFILE_DIR)/ci/test.sh -n auto -m 'not (udf or impala or hdfs or bigquery)' -k 'not test_import_time' \
--doctest-modules --doctest-ignore-import-errors ${PYTEST_OPTIONS}
$(DOCKER_RUN) -e PYTHONHASHSEED="$(PYTHONHASHSEED)" ibis bash -c "${REMOVE_COMPILED_PYTHON_SCRIPTS} && \
pytest $(PYTEST_DOCTEST_OPTIONS) $(PYTEST_OPTIONS) -n auto -m 'not (udf or impala or hdfs or bigquery)' -k 'not test_import_time'"

testpandas:
$(DOCKER_RUN) -e PYTHONHASHSEED="$(PYTHONHASHSEED)" ibis bash -c "${REMOVE_COMPILED_PYTHON_SCRIPTS} && \
pytest $(PYTEST_DOCTEST_OPTIONS) $(PYTEST_OPTIONS) -n auto -m 'pandas' -k 'not test_import_time'"

testspark:
$(DOCKER_RUN) -e PYTHONHASHSEED="$(PYTHONHASHSEED)" ibis bash -c "${REMOVE_COMPILED_PYTHON_SCRIPTS} && \
pytest $(PYTEST_DOCTEST_OPTIONS) $(PYTEST_OPTIONS) -n auto -m 'pyspark' -k 'not test_import_time'"

testlocal:
PYTHONHASHSEED=${PYTHONHASHSEED} pytest -n auto -m 'not (udf or impala or hdfs or bigquery or mysql or mapd or postgresql or clickhouse)' -k 'not test_import_time' \
${PYTEST_OPTIONS}
fastopt:
@echo -m 'not (backend or bigquery or clickhouse or hdfs or impala or kudu or omniscidb or mysql or postgis or postgresql or superuser or udf)'

# Targets for documentation builds

docclean:
$(DOCKER_RUN) ibis-docs rm -rf /tmp/docs.ibis-project.org

builddoc:
# build the ibis-docs image
$(DOCKER) build ibis-docs
builddoc: build
# build the ibis-docs image
$(DOCKER_BUILD) ibis-docs

doc: builddoc docclean
$(DOCKER_RUN) ibis-docs ping -c 1 impala
$(DOCKER_RUN) ibis-docs rm -rf /tmp/docs.ibis-project.org
$(DOCKER_RUN) ibis-docs git clone --branch gh-pages https://github.com/ibis-project/docs.ibis-project.org /tmp/docs.ibis-project.org --depth 1
$(DOCKER_RUN) ibis-docs find /tmp/docs.ibis-project.org -maxdepth 1 ! -wholename /tmp/docs.ibis-project.org \
$(DOCKER_RUN) ibis-docs find /tmp/docs.ibis-project.org \
-maxdepth 1 \
! -wholename /tmp/docs.ibis-project.org \
! -name '*.git' \
! -name '.' \
! -name 'CNAME' \
! -name CNAME \
! -name '*.nojekyll' \
-exec rm -rf {} \;
$(DOCKER_RUN) ibis-docs sphinx-build -b html docs/source /tmp/docs.ibis-project.org -W -T

# Targets for run commands inside ibis and ibis-docs containers

docker_run: build
$(DOCKER_RUN) ibis $(DOCKER_RUN_COMMAND)

docker_docs_run: builddoc
$(DOCKER_RUN) ibis-docs $(DOCKER_RUN_COMMAND)
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
| Documentation | [![Documentation Status](https://img.shields.io/badge/docs-docs.ibis--project.org-blue.svg)](http://docs.ibis-project.org) |
| Conda packages | [![Anaconda-Server Badge](https://anaconda.org/conda-forge/ibis-framework/badges/version.svg)](https://anaconda.org/conda-forge/ibis-framework) |
| PyPI | [![PyPI](https://img.shields.io/pypi/v/ibis-framework.svg)](https://pypi.org/project/ibis-framework) |
| CircleCI | [![CircleCI Status](https://circleci.com/gh/ibis-project/ibis.svg?style=shield&circle-token=b84ff8383cbb0d6788ee0f9635441cb962949a4f)](https://circleci.com/gh/ibis-project/ibis/tree/master) |
| Azure | [![Azure Status](https://dev.azure.com/ibis-project/ibis/_apis/build/status/ibis-project.ibis)](https://dev.azure.com/ibis-project/ibis/_build) |
| Coverage | [![Codecov branch](https://img.shields.io/codecov/c/github/ibis-project/ibis/master.svg)](https://codecov.io/gh/ibis-project/ibis) |

Expand Down Expand Up @@ -38,6 +37,7 @@ Ibis currently provides tools for interacting with the following systems:
- [Pandas](https://pandas.pydata.org/) [DataFrames](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe) (Experimental)
- [Clickhouse](https://clickhouse.yandex)
- [BigQuery](https://cloud.google.com/bigquery)
- [OmniSci](https://www.omnisci.com) (Experimental)
- [OmniSciDB](https://www.omnisci.com) (Experimental)
- [Spark](https://spark.apache.org) (Experimental)

Learn more about using the library at http://docs.ibis-project.org.
181 changes: 8 additions & 173 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,176 +1,11 @@
jobs:
- job: WindowsTest
pool:
vmImage: 'VS2017-Win2016'
strategy:
maxParallel: 3
matrix:
py35:
python.version: "3.5"
conda.version: "4.5"
conda.env: "ibis_3.5"
py36:
python.version: "3.6"
conda.version: "4.6"
conda.env: "ibis_3.6"
py37:
python.version: "3.7"
conda.version: "4.6"
conda.env: "ibis_3.7"
variables:
AZURECI: 1
steps:
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
displayName: Add conda to PATH

- script: |
conda config --set always_yes True --set show_channel_urls True
conda config --add channels conda-forge
displayName: 'Set conda configuration'
- script: conda update --all
displayName: 'Update conda'

- script: conda install conda=$(conda.version)
displayName: 'Install an appropriate conda version'

- script: conda create --name $(conda.env) python=$(python.version) numpy pandas pytables ruamel.yaml jinja2 pyarrow multipledispatch pymysql sqlalchemy psycopg2 graphviz click mock plumbum flake8 geopandas
displayName: 'Create conda environment'

- script: |
call activate $(conda.env)
pip install -U pytest pytest-xdist
displayName: 'Install latest pytest'
- script: conda info
displayName: 'Show conda info'

- script: conda list --name $(conda.env)
displayName: 'Show installed packages'

- script: |
call activate $(conda.env)
python -c "import numpy; import pandas"
displayName: 'Import numpy and pandas'
- script: |
call activate $(conda.env)
flake8
displayName: 'Lint'
#- script: choco install -y mariadb --version=10.3.11
# displayName: 'Install mariadb (mysql) from chocolatey'

# - script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "CREATE OR REPLACE USER ibis@localhost IDENTIFIED BY ''ibis''"'
# displayName: 'Create ibis user and password in MySQL database'

# - script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "GRANT ALL PRIVILEGES ON *.* TO ibis@localhost"'
# displayName: 'Setup privileges for ibis user in MySQL'

#- script: choco install -y postgresql10 --params '/Password:postgres'
#displayName: 'Install postgres from chocolatey'

- script: |
call activate $(conda.env)
python setup.py develop
displayName: 'Install ibis'
- script: |
call activate $(conda.env)
python ci/datamgr.py download
displayName: 'Download data'
# - script: |
# call activate $(conda.env)
# python ci/datamgr.py mysql
# displayName: 'Load MySQL data'

#- script: |
#call activate $(conda.env)
#python ci/datamgr.py postgres --psql-path="C:/Program Files/PostgreSQL/10/bin/psql.exe" -t functional_alltypes -t diamonds -t batting -t awards_players
#displayName: 'Load PostgreSQL data'

- script: |
call activate $(conda.env)
python ci/datamgr.py sqlite
displayName: 'Load SQLite data'
- script: |
call activate $(conda.env)
python ci/datamgr.py parquet -i
displayName: 'Load Parquet data'
- script: |
call activate $(conda.env)
pytest --tb=short --junitxml="junit-$(python.version).xml" -n auto -m "not backend and not clickhouse and not impala and not hdfs and not bigquery and not mapd and not mysql and not postgis and not postgresql" -ra ibis
displayName: 'Run tests'
# publish test results
- task: PublishTestResults@2
displayName: 'Publish test results from pytest JUnitXML'
inputs:
testResultsFiles: junit-$(python.version).xml
testRunTitle: 'Publish test results'
mergeTestResults: False
condition: succeededOrFailed() # pass or fail, but not cancelled

- job: WindowsCondaBuild
pool:
vmImage: 'VS2017-Win2016'
strategy:
maxParallel: 3
matrix:
py36:
python.version: "3.6"
conda.env: "ibis_3.6"
py37:
python.version: "3.7"
conda.env: "ibis_3.7"
steps:
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
displayName: Add conda to PATH

- script: |
conda config --set always_yes True --set show_channel_urls True
conda config --add channels conda-forge
displayName: 'Set conda configuration'
- script: conda update --all
displayName: 'Update conda'

- script: conda create --name $(conda.env) python=$(python.version) conda-build click jinja2 ruamel.yaml plumbum
displayName: 'Create conda environment'

- script: |
call activate $(conda.env)
python setup.py develop
displayName: 'Install ibis'
- script: |
call activate $(conda.env)
python ci/feedstock.py clone
displayName: 'Clone conda-forge recipe'
- script: |
call activate $(conda.env)
python ci/feedstock.py update
displayName: 'Update conda-forge recipe'
- script: |
call activate $(conda.env)
python ci/feedstock.py build --python=$(python.version)
displayName: 'Build conda package from conda-forge recipe'
- script: |
call activate $(conda.env)
python ci/feedstock.py deploy C:/Miniconda/envs/$(conda.env)/conda-bld conda win-64
displayName: 'Copy conda package to artifact directory'
# publish sdist and wheel and conda package
- task: PublishBuildArtifacts@1
displayName: 'Publish conda package to Azure'
inputs:
pathToPublish: conda
artifactName: conda
condition: and(succeeded(), eq(variables['System.PullRequest.IsFork'], 'False'))
- template: ci/azure/linux.yml
parameters:
name: Linux
vmImage: ubuntu-16.04

- template: ci/azure/windows.yml
parameters:
name: Windows
vmImage: windows-2019
70 changes: 64 additions & 6 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.pandas.udf import udf


def make_t(name='t'):
Expand Down Expand Up @@ -138,7 +139,7 @@ def time_impala_large_expr_compile(self):
class PandasBackend:
def setup(self):
n = 30 * int(2e5)
data = pd.DataFrame(
self.data = pd.DataFrame(
{
'key': np.random.choice(16000, size=n),
'low_card_key': np.random.choice(30, size=n),
Expand All @@ -155,7 +156,7 @@ def setup(self):
}
)

t = ibis.pandas.connect({'df': data}).table('df')
t = ibis.pandas.connect({'df': self.data}).table('df')

self.high_card_group_by = t.groupby(t.key).aggregate(
avg_value=t.value.mean()
Expand All @@ -180,19 +181,61 @@ def setup(self):
['low_card_key', 'key', 'value']
].sort_by(['low_card_key', 'key'])

low_card_window = ibis.trailing_range_window(
low_card_rolling_window = ibis.trailing_range_window(
ibis.interval(days=2),
order_by=t.repeated_timestamps,
group_by=t.low_card_key,
)
self.low_card_grouped_rolling = t.value.mean().over(low_card_window)
self.low_card_grouped_rolling = t.value.mean().over(
low_card_rolling_window
)

high_card_window = ibis.trailing_range_window(
high_card_rolling_window = ibis.trailing_range_window(
ibis.interval(days=2),
order_by=t.repeated_timestamps,
group_by=t.key,
)
self.high_card_grouped_rolling = t.value.mean().over(high_card_window)
self.high_card_grouped_rolling = t.value.mean().over(
high_card_rolling_window
)

@udf.reduction(['double'], 'double')
def my_mean(series):
return series.mean()

self.low_card_grouped_rolling_udf_mean = my_mean(t.value).over(
low_card_rolling_window
)
self.high_card_grouped_rolling_udf_mean = my_mean(t.value).over(
high_card_rolling_window
)

@udf.analytic(['double'], 'double')
def my_zscore(series):
return (series - series.mean()) / series.std()

low_card_window = ibis.window(group_by=t.low_card_key)

high_card_window = ibis.window(group_by=t.key)

self.low_card_window_analytics_udf = my_zscore(t.value).over(
low_card_window
)
self.high_card_window_analytics_udf = my_zscore(t.value).over(
high_card_window
)

@udf.reduction(['double', 'double'], 'double')
def my_wm(v, w):
return np.average(v, weights=w)

self.low_card_grouped_rolling_udf_wm = my_wm(t.value, t.value).over(
low_card_rolling_window
)

self.high_card_grouped_rolling_udf_wm = my_wm(t.value, t.value).over(
low_card_rolling_window
)

def time_high_cardinality_group_by(self):
self.high_card_group_by.execute()
Expand Down Expand Up @@ -223,3 +266,18 @@ def time_low_card_grouped_rolling(self):

def time_high_card_grouped_rolling(self):
self.high_card_grouped_rolling.execute()

def time_low_card_grouped_rolling_udf(self):
self.low_card_grouped_rolling_udf.execute()

def time_high_card_grouped_rolling_udf(self):
self.high_card_grouped_rolling_udf.execute()

def time_low_card_window_analytics_udf(self):
self.low_card_window_analytics_udf.execute()

def time_high_card_grouped_rolling_udf_wm(self):
self.high_card_grouped_rolling_udf_wm.execute()

def time_low_card_grouped_rolling_udf_wm(self):
self.low_card_grouped_rolling_udf_wm.execute()
10 changes: 5 additions & 5 deletions ci/.env
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ IBIS_TEST_POSTGRES_DATABASE=ibis_testing
IBIS_TEST_CLICKHOUSE_HOST=clickhouse
IBIS_TEST_CLICKHOUSE_PORT=9000
IBIS_TEST_CLICKHOUSE_DATABASE=ibis_testing
IBIS_TEST_OMNISCI_HOST=omnisci
IBIS_TEST_OMNISCI_PORT=6274
IBIS_TEST_OMNISCI_DATABASE=ibis_testing
IBIS_TEST_OMNISCI_USER=mapd
IBIS_TEST_OMNISCI_PASSWORD=HyperInteractive
IBIS_TEST_OMNISCIDB_HOST=omniscidb
IBIS_TEST_OMNISCIDB_PORT=6274
IBIS_TEST_OMNISCIDB_DATABASE=ibis_testing
IBIS_TEST_OMNISCIDB_USER=admin
IBIS_TEST_OMNISCIDB_PASSWORD=HyperInteractive
GOOGLE_BIGQUERY_PROJECT_ID=ibis-gbq
GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcloud-service-key.json
16 changes: 8 additions & 8 deletions ci/Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ RUN apt-get -qq update --yes \
build-essential git make clang libboost-dev postgresql-client ca-certificates \
&& rm -rf /var/lib/apt/lists/*

ARG PYTHON
ADD ci/requirements-$PYTHON-dev.yml /
ARG PYTHON_VERSION
ADD ci/requirements-$PYTHON_VERSION-dev.yml /

RUN conda config --add channels conda-forge \
&& conda update --all --yes --quiet \
&& conda env create --name ibis-env --file /requirements-$PYTHON-dev.yml \
&& conda install --yes conda-build \
&& conda clean --all --yes
RUN /opt/conda/bin/conda config --add channels conda-forge \
&& /opt/conda/bin/conda update --all --yes --quiet \
&& /opt/conda/bin/conda env create --name ibis-env --file /requirements-$PYTHON_VERSION-dev.yml \
&& /opt/conda/bin/conda install --yes conda-build \
&& /opt/conda/bin/conda clean --all --yes

RUN echo 'source activate ibis-env && exec "$@"' > activate.sh
RUN echo 'source /opt/conda/bin/activate ibis-env && exec "$@"' > activate.sh

COPY . /ibis
WORKDIR /ibis
Expand Down
16 changes: 8 additions & 8 deletions ci/Dockerfile.docs
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
ARG PYTHON
FROM ibis:$PYTHON
ARG PYTHON_VERSION
FROM ibis:$PYTHON_VERSION

# fonts are for docs
RUN apt-get -qq update --yes \
&& apt-get -qq install --yes ttf-dejavu \
&& apt-get -qq install --yes ttf-dejavu iputils-ping \
&& rm -rf /var/lib/apt/lists/*

ADD ci/requirements-docs.yml /

RUN conda config --add channels conda-forge \
&& conda update --all --yes \
&& conda install --name ibis-env --yes --file /requirements-docs.yml \
&& conda clean --all --yes
RUN /opt/conda/bin/conda config --add channels conda-forge \
&& /opt/conda/bin/conda update --all --yes \
&& /opt/conda/bin/conda install --name ibis-env --yes --file /requirements-docs.yml \
&& /opt/conda/bin/conda clean --all --yes

RUN echo 'source activate ibis-env && exec "$@"' > activate.sh
RUN echo 'source /opt/conda/bin/activate ibis-env && exec "$@"' > activate.sh

COPY . /ibis
WORKDIR /ibis
Expand Down
266 changes: 266 additions & 0 deletions ci/azure/linux.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
parameters:
name: ''
vmImage: ''

jobs:
- job: ${{ parameters.name }}Test

pool:
vmImage: ${{ parameters.vmImage }}

variables:
AZURECI: 1
COMPOSE_FILE: ci/docker-compose.yml
PYTHONHASHSEED: "random"
PYTEST_MARK_EXPRESSION: "not udf"
BACKENDS: "clickhouse impala kudu-master kudu-tserver mysql omniscidb parquet postgres sqlite"
strategy:
matrix:
py36:
PYTHON_MAJOR_VERSION: "3"
PYTHON_MINOR_VERSION: "6"
PYTHON_VERSION: $(PYTHON_MAJOR_VERSION).$(PYTHON_MINOR_VERSION)
PYTHON_NO_DOT_VERSION: $(PYTHON_MAJOR_VERSION)$(PYTHON_MINOR_VERSION)
py37:
PYTHON_MAJOR_VERSION: "3"
PYTHON_MINOR_VERSION: "7"
PYTHON_VERSION: $(PYTHON_MAJOR_VERSION).$(PYTHON_MINOR_VERSION)
PYTHON_NO_DOT_VERSION: $(PYTHON_MAJOR_VERSION)$(PYTHON_MINOR_VERSION)
py38:
PYTHON_MAJOR_VERSION: "3"
PYTHON_MINOR_VERSION: "8"
PYTHON_VERSION: $(PYTHON_MAJOR_VERSION).$(PYTHON_MINOR_VERSION)
PYTHON_NO_DOT_VERSION: $(PYTHON_MAJOR_VERSION)$(PYTHON_MINOR_VERSION)
# pymapd and pyspark are not working on Ibis with Python 3.8
# https://github.com/ibis-project/ibis/issues/2091
# https://github.com/ibis-project/ibis/issues/2090
PYTEST_MARK_EXPRESSION: "not udf and not omniscidb and not spark and not pyspark"
BACKENDS: "clickhouse impala kudu-master kudu-tserver mysql parquet postgres sqlite"

steps:
- bash: |
if [ -n "${GCLOUD_SERVICE_KEY}" ]; then
mkdir -p /tmp/ibis
base64 --decode --ignore-garbage <<< "${GCLOUD_SERVICE_KEY}" > /tmp/ibis/gcloud-service-key.json
fi
env:
GCLOUD_SERVICE_KEY: $(GCLOUD_SERVICE_KEY)
displayName: 'Setup BigQuery credentials'
condition: eq(variables['System.PullRequest.IsFork'], 'False')
- bash: make start PYTHON_VERSION=$PYTHON_VERSION BACKENDS="${BACKENDS}"
displayName: 'Start databases'

- bash: make wait PYTHON_VERSION=$PYTHON_VERSION BACKENDS="${BACKENDS}"
displayName: 'Wait for databases'

- bash: docker ps
displayName: 'Show running containers'

- bash: make build PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Build ibis image'

- bash: docker images
displayName: 'List docker images'

- bash: make docker_lint PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Lint'

# TODO: change match-dir when docstrings are fixed for other backends
- bash: docker-compose run ibis pydocstyle --match-dir="(ibis|omniscidb)"
displayName: "Docstring check"

- bash: make docker_check_pre_commit_hooks PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Ensure all pre-commit hooks checking are passing.'

- bash: |
sudo mkdir -p /tmp/ibis/test-reports/pytest
mkdir -p /tmp/env
displayName: 'Make artifacts directories'
- bash: make docker_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND="conda list"
displayName: 'Show packages in conda environment'

- bash: make docker_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND="conda list --export > /tmp/env/env.yml"
displayName: 'Capture packages in conda environment'

- bash: make load PYTHON_VERSION=$PYTHON_VERSION BACKENDS="${BACKENDS}"
displayName: 'Load test datasets'

- bash: |
docker-compose run \
-e PYTHONHASHSEED=$PYTHONHASHSEED \
-e AZURECI=$AZURECI \
-e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcloud-service-key.json \
ibis \
pytest ibis -m "${PYTEST_MARK_EXPRESSION}" \
-ra \
--numprocesses auto \
--doctest-modules \
--doctest-ignore-import-errors \
--junitxml=/tmp/test-reports/pytest/junit.xml \
--cov=ibis \
--cov-report=xml:/tmp/test-reports/pytest-cov/coverage.xml
displayName: 'Run tests'
# See #1954
# - bash: |
# bash <(curl -s https://codecov.io/bash) \
# -f /tmp/ibis/test-reports/pytest-cov/coverage.xml
# displayName: 'Upload coverage'

- task: PublishTestResults@2
displayName: 'Publish test results from pytest JUnitXML'
inputs:
testResultsFiles: /tmp/ibis/test-reports/pytest/junit.xml
testRunTitle: 'Publish test results'
mergeTestResults: False
condition: succeededOrFailed() # pass or fail, but not cancelled

- task: PublishPipelineArtifact@1
inputs:
path: /tmp/env/env.yml
artifact: LinuxCondaEnvironment$(PYTHON_NO_DOT_VERSION)
displayName: 'Publish Linux environment YAML to Azure'
condition: succeededOrFailed() # pass or fail, but not cancelled

- job: ${{ parameters.name }}BuildConda

pool:
vmImage: ${{ parameters.vmImage }}

variables:
PYTHON_VERSION: "3.7"
AZURECI: 1
COMPOSE_FILE: ci/docker-compose.yml

steps:
- bash: make build PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Build ibis image'

- bash: docker images
displayName: 'List docker images'

- bash: make docker_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND="ci/feedstock.py test"
displayName: 'Clone, update and build conda-forge recipe'

- task: PublishPipelineArtifact@1
inputs:
path: /tmp/ibis/packages
artifact: LinuxCondaPackage
displayName: 'Publish conda package to Azure'
condition: and(succeeded(), eq(variables['System.PullRequest.IsFork'], 'False'))

- job: ${{ parameters.name }}Benchmark

pool:
vmImage: ${{ parameters.vmImage }}

variables:
PYTHON_VERSION: "3.6"
AZURECI: 1
COMPOSE_FILE: ci/docker-compose.yml

steps:
- bash: make build PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Build Ibis Image'

- bash: make docker_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND='ci/benchmark.sh azure "$(Build.SourceVersion)"'
displayName: 'Run Benchmark (ASV)'

- job: ${{ parameters.name }}BuildDocs

pool:
vmImage: ${{ parameters.vmImage }}

variables:
PYTHON_VERSION: "3.6"
AZURECI: 1
COMPOSE_FILE: ci/docker-compose.yml

steps:
- bash: make start PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Start databases'

- bash: make wait PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Wait for databases'

- bash: make builddoc PYTHON_VERSION=$PYTHON_VERSION
displayName: 'build ibis documentation image'

- bash: docker images
displayName: 'List docker images'

- bash: docker ps
displayName: 'Show running containers'

- bash: make load PYTHON_VERSION=$PYTHON_VERSION
displayName: 'Load test datasets'

- bash: make docker_docs_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND="ping -c 1 impala"
displayName: 'Ping the impala host used in the tutorial notebooks'

- bash: |
make docker_docs_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND="git clone --branch gh-pages \
https://github.com/cpcloud/docs.ibis-project.org \
/tmp/docs.ibis-project.org"
displayName: 'Clone doc repo'
- bash: |
docker-compose run ibis-docs \
find /tmp/docs.ibis-project.org \
-maxdepth 1 \
# ignore the directory we're searching in itself
! -wholename /tmp/docs.ibis-project.org \
# ignore git files
! -name '*.git' \
# ignore the CNAME record
! -name CNAME \
# ignore files ending in nojekyll
! -name '*.nojekyll' \
-exec rm -rf {} \;
displayName: 'Clear out old docs'
- bash: make docker_docs_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND="conda list"
displayName: 'Show the doc env'

- bash: |
make docker_docs_run PYTHON_VERSION=$PYTHON_VERSION DOCKER_RUN_COMMAND="sphinx-build -b html \
docs/source /tmp/docs.ibis-project.org -W -T"
displayName: 'Build docs'
- task: PublishPipelineArtifact@1
inputs:
path: /tmp/ibis/docs.ibis-project.org
artifact: Documentation
displayName: 'Publish documentation to Azure'
condition: and(succeeded(), eq(variables['System.PullRequest.IsFork'], 'False'))

- bash: |
mkdir ~/.ssh
base64 --decode --ignore-garbage <<< "${IBIS_GH_TOKEN}" > ~/.ssh/id_rsa
chmod 700 ~/.ssh
chmod 600 ~/.ssh/id_rsa
# add github to known hosts
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
sudo chown -R "${USER}":"${USER}" /tmp/ibis
pushd /tmp/ibis/docs.ibis-project.org
git remote set-url origin git@github.com:ibis-project/docs.ibis-project.org
git config user.name 'Ibis Documentation Bot'
git config user.email ''
# Add everything
git add --all .
git commit -m "Docs from ibis at $(Build.SourceVersion)"
git push --force origin gh-pages
displayName: 'Push docs to remote repo'
env:
IBIS_GH_TOKEN: $(IBIS_GH_TOKEN)
condition: |
and(eq(variables['System.PullRequest.IsFork'], 'False'),
eq(variables['Build.Repository.Name'], 'ibis-project/ibis'),
eq(variables['Build.SourceBranchName'], 'master'))
137 changes: 137 additions & 0 deletions ci/azure/windows.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
parameters:
name: ''
vmImage: ''

jobs:
- job: ${{ parameters.name }}Test

pool:
vmImage: ${{ parameters.vmImage }}

variables:
AZURECI: 1
conda.version: "4.6"

strategy:
matrix:
py36:
python.major.version: "3"
python.minor.version: "6"
python.version: "$(python.major.version).$(python.minor.version)"
python.no.dot.version: "$(python.major.version)$(python.minor.version)"
conda.env: "ibis$(python.no.dot.version)"
py37:
python.major.version: "3"
python.minor.version: "7"
python.version: "$(python.major.version).$(python.minor.version)"
python.no.dot.version: "$(python.major.version)$(python.minor.version)"
conda.env: "ibis$(python.no.dot.version)"
py38:
python.major.version: "3"
python.minor.version: "8"
python.version: "$(python.major.version).$(python.minor.version)"
python.no.dot.version: "$(python.major.version)$(python.minor.version)"
conda.env: "ibis$(python.no.dot.version)"

steps:
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
displayName: Add conda to PATH

- script: conda config --set always_yes True --set show_channel_urls True
displayName: 'Set conda always_yes and show_channel_urls'

- script: conda config --add channels conda-forge
displayName: 'Add the conda-forge channel'

- script: conda update --all conda=$(conda.version)
displayName: 'Update conda and install an appropriate version'

- script: conda create --name $(conda.env) python=$(python.version) numpy pandas pytables ruamel.yaml jinja2 pyarrow multipledispatch pymysql "sqlalchemy>=1.1" psycopg2 graphviz click mock plumbum geopandas toolz regex
displayName: 'Create conda environment'

- script: |
call activate $(conda.env)
pip install --upgrade "pytest>=4.5"
pip install --upgrade pytest-xdist
displayName: 'Install latest pytest'
- script: conda info
displayName: 'Show conda info'

- script: conda list --name $(conda.env)
displayName: 'Show installed packages'

- script: conda list --export --name $(conda.env) > env.yml
displayName: 'Capture installed packages'

- task: PublishPipelineArtifact@1
inputs:
path: env.yml
artifact: WindowsCondaEnvironment$(python.no.dot.version)
displayName: 'Publish Windows environment YAML to Azure'
condition: succeededOrFailed() # pass or fail, but not cancelled

- script: |
call activate $(conda.env)
python -c "import numpy"
displayName: 'Import numpy'
- script: |
call activate $(conda.env)
python -c "import pandas"
displayName: 'Import pandas'
- script: choco install -y mariadb --version=10.3.16
displayName: 'Install mariadb (mysql) from chocolatey'

- script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "CREATE OR REPLACE USER ibis@localhost IDENTIFIED BY ''ibis''"'
displayName: 'Create ibis user and password in MySQL database'

- script: '"C:\\Program Files\\MariaDB 10.3\\bin\\mysql" -u root -e "GRANT ALL PRIVILEGES ON *.* TO ibis@localhost"'
displayName: 'Setup privileges for ibis user in MySQL'

- script: choco install -y postgresql10 --params '/Password:postgres'
displayName: 'Install postgres from chocolatey'

- script: |
call activate $(conda.env)
pip install -e . --no-deps --ignore-installed --no-cache-dir
displayName: 'Install ibis'
- script: |
call activate $(conda.env)
python ci/datamgr.py download
displayName: 'Download data'
- script: |
call activate $(conda.env)
python ci/datamgr.py mysql
displayName: 'Load MySQL data'
- script: |
call activate $(conda.env)
python ci/datamgr.py postgres --no-plpython --psql-path="C:/Program Files/PostgreSQL/10/bin/psql.exe" -t functional_alltypes -t diamonds -t batting -t awards_players
displayName: 'Load PostgreSQL data'
- script: |
call activate $(conda.env)
python ci/datamgr.py sqlite
displayName: 'Load SQLite data'
- script: |
call activate $(conda.env)
python ci/datamgr.py parquet -i
displayName: 'Load Parquet data'
- script: |
call activate $(conda.env)
pytest --tb=short --junitxml="junit-$(python.version).xml" -n auto -m "not backend and not clickhouse and not impala and not hdfs and not bigquery and not omniscidb and not postgis and not postgresql" -ra ibis
displayName: 'Run tests'
- task: PublishTestResults@2
displayName: 'Publish test results from pytest JUnitXML'
inputs:
testResultsFiles: junit-$(python.version).xml
testRunTitle: 'Publish test results'
mergeTestResults: False
condition: succeededOrFailed() # pass or fail, but not cancelled
30 changes: 30 additions & 0 deletions ci/backends-markers.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash -e

# The script generates a marker for each backend, which `pytest`
# can use to run specific tests.
#
# Usage:
# $ ./ci/backends-markers.sh param1
# * param1: array of backends
#
# Example:
# markers=`./ci/backends-markers.sh omniscidb impala` && echo $markers
#
# Output:
# '-m omniscidb or impala'

BACKENDS=$@

PYTEST_MARKERS=""

for s in ${BACKENDS[@]}
do
if [ "$PYTEST_MARKERS" == "" ]
then
PYTEST_MARKERS="-m ${PYTEST_MARKERS} ${s}"
else
PYTEST_MARKERS="${PYTEST_MARKERS} or ${s}"
fi
done

echo "'${PYTEST_MARKERS}'"
41 changes: 41 additions & 0 deletions ci/backends-to-start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

# Among the backends, the script finds those that will need to be launched
# by the `docker-compose` or those for which test datasets should be loaded.
# The choice depends on comparing each backend with elements inside the
# USER_REQUESTED_BACKENDS variable.
#
# Usage:
# $ ./ci/backends-to-start.sh param1 param2
# * param1: string of backends
# * param2: string of backends, which then need to launched by `docker-compose`
# (as docker's services) before working with them or for which test
# datasets should be loaded.
#
# Example:
# current_backends=`./ci/backends-to-start.sh "omniscidb impala parquet" "omniscidb impala"` && echo $current_backends
#
# Output:
# 'omniscidb impala'

# convert strings to arrays
BACKENDS=($(echo $1))
USER_REQUESTED_BACKENDS=($(echo $2))

# lookup table to choose backends to start
declare -A USER_REQUESTED_BACKENDS_LOOKUP
for service in ${USER_REQUESTED_BACKENDS[@]}
do
USER_REQUESTED_BACKENDS_LOOKUP[$service]=1
done

i=0
for backend in ${BACKENDS[@]}
do
if [[ ${USER_REQUESTED_BACKENDS_LOOKUP[${backend}]} ]]; then
CHOSEN_BACKENDS[${i}]=${backend}
((i++))
fi
done

echo ${CHOSEN_BACKENDS[@]}
19 changes: 0 additions & 19 deletions ci/build.sh

This file was deleted.

22 changes: 22 additions & 0 deletions ci/check-services.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

SERVICES=$@

echo "DOCKER_CODE: ${DOCKER_CODE}"
echo "SERVICES: ${SERVICES}"

if [ $DOCKER_CODE -eq 0 ]
then
echo "[II] Done."
else
for s in ${SERVICES}
do
docker container ls
echo "=============================================================="
echo "docker ${s} log"
echo "=============================================================="
docker logs --details $(docker ps -aqf "name=ci_${s}_1")
done

fi
exit $DOCKER_CODE
78 changes: 53 additions & 25 deletions ci/datamgr.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
#!/usr/bin/env python

import json
import logging
import os
import sys
import tempfile
import warnings
import zipfile
Expand All @@ -15,8 +13,6 @@
from plumbum import local
from toolz import dissoc

import ibis

SCRIPT_DIR = Path(__file__).parent.absolute()
DATA_DIR_NAME = 'ibis-testing-data'
DATA_DIR = Path(
Expand All @@ -26,7 +22,30 @@
TEST_TABLES = ['functional_alltypes', 'diamonds', 'batting', 'awards_players']


logger = ibis.util.get_logger('datamgr')
def get_logger(name, level=None, format=None, propagate=False):
logging.basicConfig()
handler = logging.StreamHandler()

if format is None:
format = (
'%(relativeCreated)6d '
'%(name)-20s '
'%(levelname)-8s '
'%(threadName)-25s '
'%(message)s'
)
handler.setFormatter(logging.Formatter(fmt=format))
logger = logging.getLogger(name)
logger.propagate = propagate
logger.setLevel(
level
or getattr(logging, os.environ.get('LOGLEVEL', 'WARNING').upper())
)
logger.addHandler(handler)
return logger


logger = get_logger(Path(__file__).with_suffix('').name)


def recreate_database(driver, params, **kwargs):
Expand Down Expand Up @@ -166,7 +185,7 @@ def download(repo_url, directory):
@cli.command()
@click.option('-t', '--tables', multiple=True, default=TEST_TABLES)
@click.option('-d', '--data-directory', default=DATA_DIR)
@click.option('-i', '--ignore-missing-dependency', is_flag=True, default=False)
@click.option('-i', '--ignore-missing-dependency', is_flag=True, default=True)
def parquet(tables, data_directory, ignore_missing_dependency, **params):
try:
import pyarrow as pa # noqa: F401
Expand Down Expand Up @@ -207,16 +226,27 @@ def parquet(tables, data_directory, ignore_missing_dependency, **params):
required=os.name == 'nt',
default=None if os.name == 'nt' else '/usr/bin/psql',
)
def postgres(schema, tables, data_directory, psql_path, **params):
@click.option(
'--plpython/--no-plpython',
help='Create PL/Python extension in database',
default=True,
)
def postgres(schema, tables, data_directory, psql_path, plpython, **params):
psql = local[psql_path]
data_directory = Path(data_directory)
logger.info('Initializing PostgreSQL...')
engine = init_database(
'postgresql', params, schema, isolation_level='AUTOCOMMIT'
)
use_postgis = 'geo' in tables and sys.version_info >= (3, 6)

engine.execute("CREATE SEQUENCE IF NOT EXISTS test_sequence;")

use_postgis = 'geo' in tables
if use_postgis:
engine.execute("CREATE EXTENSION POSTGIS")
engine.execute("CREATE EXTENSION IF NOT EXISTS POSTGIS")

if plpython:
engine.execute("CREATE EXTENSION IF NOT EXISTS PLPYTHONU")

query = "COPY {} FROM STDIN WITH (FORMAT CSV, HEADER TRUE, DELIMITER ',')"
database = params['database']
Expand All @@ -230,14 +260,15 @@ def postgres(schema, tables, data_directory, psql_path, **params):
continue
from geoalchemy2 import Geometry, WKTElement

srid = 4326
srid = 0
df = pd.read_csv(src)
df = df[df.columns[1:]].applymap(
df[df.columns[1:]] = df[df.columns[1:]].applymap(
lambda x: WKTElement(x, srid=srid)
)
df.to_sql(
'geo',
engine,
index=False,
dtype={
"geo_point": Geometry("POINT", srid=srid),
"geo_linestring": Geometry("LINESTRING", srid=srid),
Expand Down Expand Up @@ -294,37 +325,39 @@ def sqlite(database, schema, tables, data_directory, **params):
@cli.command()
@click.option('-h', '--host', default='localhost')
@click.option('-P', '--port', default=6274, type=int)
@click.option('-u', '--user', default='mapd')
@click.option('-u', '--user', default='admin')
@click.option('-p', '--password', default='HyperInteractive')
@click.option('-D', '--database', default='ibis_testing')
@click.option('--protocol', default='binary')
@click.option(
'-S',
'--schema',
type=click.File('rt'),
default=str(SCRIPT_DIR / 'schema' / 'omnisci.sql'),
default=str(SCRIPT_DIR / 'schema' / 'omniscidb.sql'),
)
@click.option('-t', '--tables', multiple=True, default=TEST_TABLES + ['geo'])
@click.option('-d', '--data-directory', default=DATA_DIR)
def omnisci(schema, tables, data_directory, **params):
def omniscidb(schema, tables, data_directory, **params):
import pymapd

data_directory = Path(data_directory)
reserved_words = ['table', 'year', 'month']

# connection
logger.info('Initializing OmniSci...')
if params['database'] != 'mapd':
default_db = 'omnisci'
database = params["database"]

if database != default_db:
conn = pymapd.connect(
host=params['host'],
user=params['user'],
password=params['password'],
port=params['port'],
dbname='mapd',
dbname=default_db,
protocol=params['protocol'],
)
database = params["database"]
stmt = "DROP DATABASE {}".format(database)
stmt = "DROP DATABASE IF EXISTS {}".format(database)
try:
conn.execute(stmt)
except Exception:
Expand Down Expand Up @@ -378,13 +411,8 @@ def omnisci(schema, tables, data_directory, **params):
continue
df.rename(columns={df_col: column}, inplace=True)

# load geospatial data
if table == 'geo':
conn.load_table_rowwise(
table, list(df.itertuples(index=False, name=None))
)
else:
conn.load_table_columnar(table, df)
load_method = 'rows' if table == 'geo' else 'columnar'
conn.load_table(table, df, method=load_method)

conn.close()

Expand Down
49 changes: 23 additions & 26 deletions ci/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,25 @@ version: '3'
services:

postgres:
image: mdillon/postgis
image: shajekpivotal/ibis-docker-postgres-9.5
hostname: postgres
ports:
- 5432:5432
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_PASSWORD: ''

mysql:
image: mariadb:10.2
hostname: mysql
ports:
- 3306:3306
- 3307:3306
environment:
MYSQL_ALLOW_EMPTY_PASSWORD: 1
MYSQL_DATABASE: ibis_testing
MYSQL_USER: ibis
MYSQL_PASSWORD: ibis
# see: https://github.com/docker-library/mariadb/issues/262
MYSQL_INITDB_SKIP_TZINFO: 1

impala:
image: ibisproject/impala:latest
Expand All @@ -41,6 +43,17 @@ services:
- 25000:25000
- 25010:25010
- 25020:25020
depends_on:
- postgres
# as postgres restart some times until be ready,
# dockerize -wait could finish before postgres
# be indead ready.
# Using sleep 40 seconds before in fact start
# helps to ensure postgres will be available.
entrypoint:
- bash
- -c
- "sleep 80 && supervisord -c /etc/supervisord.conf -n"

clickhouse:
image: yandex/clickhouse-server:18.12
Expand Down Expand Up @@ -75,34 +88,18 @@ services:
environment:
KUDU_MASTER: "false"

omnisci:
image: omnisci/core-os-cpu:v4.6.1
hostname: omnisci
networks:
default:
aliases:
- mapd
omniscidb:
image: omnisci/core-os-cpu:v5.1.0
hostname: omniscidb

ports:
- 6274:6274
- 6278:6278
volumes:
- ./omnisci.conf:/omnisci-storage/omnisci.conf
- ./omniscidb.conf:/omnisci-storage/omnisci.conf

waiter:
image: jwilder/dockerize
command: |
dockerize -wait tcp://omnisci:6274
-wait tcp://mysql:3306
-wait tcp://postgres:5432
-wait tcp://impala:21050
-wait tcp://impala:50070
-wait tcp://kudu-master:7051
-wait tcp://kudu-master:8051
-wait tcp://kudu-tserver:7050
-wait tcp://kudu-tserver:8050
-wait tcp://clickhouse:9000
-wait-retry-interval 5s
-timeout 5m

ibis:
image: ibis:${PYTHON_VERSION:-3.6}
Expand All @@ -115,7 +112,7 @@ services:
context: ..
dockerfile: ci/Dockerfile.dev
args:
PYTHON: ${PYTHON_VERSION:-3.6}
PYTHON_VERSION: ${PYTHON_VERSION:-3.6}

ibis-docs:
image: ibis-docs:${PYTHON_VERSION:-3.6}
Expand All @@ -128,4 +125,4 @@ services:
context: ..
dockerfile: ci/Dockerfile.docs
args:
PYTHON: ${PYTHON_VERSION:-3.6}
PYTHON_VERSION: ${PYTHON_VERSION:-3.6}
41 changes: 41 additions & 0 deletions ci/dockerize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

DOCKERIZE_CALL="dockerize"

add_wait() {
wait_string=$1
DOCKERIZE_CALL="${DOCKERIZE_CALL} ${wait_string}"
}

for service in $@; do
case "${service}" in
omniscidb)
add_wait "-wait tcp://omniscidb:6274"
;;
mysql)
add_wait "-wait tcp://mysql:3306"
;;
postgres)
add_wait "-wait tcp://postgres:5432"
;;
impala)
add_wait "-wait tcp://impala:21050"
add_wait "-wait tcp://impala:50070"
;;
kudu-master)
add_wait "-wait tcp://kudu-master:7051"
add_wait "-wait tcp://kudu-master:8051"
;;
kudu-tserver)
add_wait "-wait tcp://kudu-tserver:7050"
add_wait "-wait tcp://kudu-tserver:8050"
;;
clickhouse)
add_wait "-wait tcp://clickhouse:9000"
;;
esac
done

DOCKERIZE_CALL="${DOCKERIZE_CALL} -wait-retry-interval 5s -timeout 10m"

echo ${DOCKERIZE_CALL}
8 changes: 8 additions & 0 deletions ci/feedstock.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ def update(meta, source_path):
# XXX: because render will remove the {{ PYTHON }} variable
recipe['build']['script'] = SCRIPT

# TODO: remove it after update feedstock
recipe['test']['commands'] = [
'pytest --rootdir ibis -m "not (backend or bigquery or clickhouse or '
'hdfs or impala or kudu or omniscidb or mysql or postgresql or '
'superuser or udf or postgis)" --ignore ibis/tests/test_version.py '
'--tb=line'
]

updated_content = ruamel.yaml.round_trip_dump(
recipe, default_flow_style=False, width=sys.maxsize
).strip()
Expand Down
5 changes: 3 additions & 2 deletions ci/impalamgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from plumbum.cmd import cmake, make

import ibis
from ibis.common import IbisError
from ibis.common.exceptions import IbisError
from ibis.impala.tests.conftest import IbisTestEnv

SCRIPT_DIR = Path(__file__).parent.absolute()
Expand All @@ -23,7 +23,8 @@
)


logger = ibis.util.get_logger('impalamgr')
logger = ibis.util.get_logger(Path(__file__).with_suffix('').name)


ENV = IbisTestEnv()

Expand Down
22 changes: 4 additions & 18 deletions ci/load-data.sh
Original file line number Diff line number Diff line change
@@ -1,28 +1,14 @@
#!/usr/bin/env bash
#!/bin/bash -e

CWD="$(dirname "${0}")"

declare -A argcommands=([sqlite]=sqlite
[parquet]="parquet -i"
[postgres]=postgres
[clickhouse]=clickhouse
[omnisci]=omnisci
[mysql]=mysql
[impala]=impala)

if [[ "$#" == 0 ]]; then
ARGS=(${!argcommands[@]}) # keys of argcommands
else
ARGS=($*)
fi

python $CWD/datamgr.py download

for arg in ${ARGS[@]}; do
for arg in $@; do
if [[ "${arg}" == "impala" ]]; then
python "${CWD}"/impalamgr.py load --data &
python "${CWD}"/impalamgr.py load --data &
else
python "${CWD}"/datamgr.py ${argcommands[${arg}]} &
python "${CWD}"/datamgr.py ${arg} &
fi
done

Expand Down
File renamed without changes.
52 changes: 0 additions & 52 deletions ci/requirements-3.5-dev.yml

This file was deleted.

17 changes: 13 additions & 4 deletions ci/requirements-3.6-dev.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
channels:
- conda-forge
dependencies:
- black
- black=19.10b0
- click
- clickhouse-cityhash
- clickhouse-driver>=0.0.8
- clickhouse-driver>=0.1.3
- clickhouse-sqlalchemy
- cmake
- flake8
Expand All @@ -13,38 +13,47 @@ dependencies:
- google-cloud-bigquery>=1.0.0
- graphviz
- impyla>=0.15.0
- isort
- jinja2
- libiconv # see https://github.com/jupyter/repo2docker/issues/758
- lz4
- multipledispatch>=0.6.0
- mypy
- numpy>=1.11
- openjdk=8
- pandas>=0.21
- pip
- plumbum
- pre_commit
- pre-commit
- psycopg2
- pyarrow>=0.12
- pydata-google-auth
- pydocstyle=4.0.1
- pygit2
- pymapd>=0.12.0
- pymysql
- pyspark>=2.4.3
- pytables>=3.0.0
- pytest>=4.5
- pytest-cov
- pytest-mock
- pytest-xdist
- python=3.6
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- rtree
- ruamel.yaml
- shapely
- sqlalchemy>=1.1
- thrift>=0.9.3
- thriftpy2 # required for impyla in case of py3
- toolz
- xorg-libxpm
- xorg-libxrender
- pip:
# see .pre-commit-config.yaml, isort pinned
- seed-isort-config
# we should try using isort release that will be next after 4.3.21-2
- git+git://github.com/timothycrosley/isort@18ad293fc9d1852776afe35015a932b68d26fb14#egg=isort
17 changes: 13 additions & 4 deletions ci/requirements-3.7-dev.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
channels:
- conda-forge
dependencies:
- black
- black=19.10b0
- click
- clickhouse-cityhash
- clickhouse-driver>=0.0.8
- clickhouse-driver>=0.1.3
- clickhouse-sqlalchemy
- cmake
- flake8
Expand All @@ -13,38 +13,47 @@ dependencies:
- google-cloud-bigquery>=1.0.0
- graphviz
- impyla>=0.15.0
- isort
- jinja2
- libiconv # see https://github.com/jupyter/repo2docker/issues/758
- lz4
- multipledispatch>=0.6.0
- mypy
- numpy>=1.11
- openjdk=8
- pandas>=0.21
- pip
- plumbum
- pre_commit
- pre-commit
- psycopg2
- pyarrow>=0.12
- pydata-google-auth
- pydocstyle=4.0.1
- pygit2
- pymapd>=0.12.0
- pymysql
- pyspark>=2.4.3
- pytables>=3.0.0
- pytest>=4.5
- pytest-cov
- pytest-mock
- pytest-xdist
- python=3.7
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- rtree
- ruamel.yaml
- shapely
- sqlalchemy>=1.1
- thrift>=0.9.3
- thriftpy2 # required for impyla in case of py3
- toolz
- xorg-libxpm
- xorg-libxrender
- pip:
# see .pre-commit-config.yaml, isort pinned
- seed-isort-config
# we should try using isort release that will be next after 4.3.21-2
- git+git://github.com/timothycrosley/isort@18ad293fc9d1852776afe35015a932b68d26fb14#egg=isort
61 changes: 61 additions & 0 deletions ci/requirements-3.8-dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
channels:
- conda-forge
dependencies:
- black=19.10b0
- click
- clickhouse-cityhash
- clickhouse-driver>=0.1.3
- clickhouse-sqlalchemy
- cmake
- flake8
- geoalchemy2
- geopandas
- google-cloud-bigquery>=1.0.0
- graphviz
- impyla>=0.15.0
- jinja2
- libiconv # see https://github.com/jupyter/repo2docker/issues/758
- lz4
- multipledispatch>=0.6.0
- mypy
- numpy>=1.15
- openjdk=8
- pandas>=0.25.3
- pip=19.3.1
- plumbum
- pre-commit
- psycopg2
- pyarrow>=0.13
- pydata-google-auth
- pydocstyle=4.0.1
- pygit2
# currently it introduces incompatible packages
# maybe it is related to the pinned arrow used
# - pymapd>=0.12
- pymysql
# not fully compatible with Python 3.8
# https://github.com/apache/spark/pull/26194#issuecomment-566592265
# - pyspark>=3.0
- pytables>=3.0.0
- pytest>=4.5
- pytest-cov
- pytest-xdist
- python=3.8
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- rtree
- ruamel.yaml
- shapely
- sqlalchemy>=1.1
- thrift>=0.9.3
- thriftpy2 # required for impyla in case of py3
- toolz
- xorg-libxpm
- xorg-libxrender
- pip:
# see .pre-commit-config.yaml, isort pinned
- seed-isort-config
- git+git://github.com/timothycrosley/isort@18ad293fc9d1852776afe35015a932b68d26fb14#egg=isort
3 changes: 3 additions & 0 deletions ci/requirements-docs.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ipython
jupyter
libiconv # see https://github.com/jupyter/repo2docker/issues/758
matplotlib
nbconvert
nbsphinx
Expand All @@ -9,3 +10,5 @@ pyarrow>=0.12
sphinx=2.0.1
sphinx-releases
sphinx_rtd_theme
# https://github.com/ibis-project/ibis/issues/2027
semantic_version<2.7
8 changes: 4 additions & 4 deletions ci/schema/omnisci.sql → ci/schema/omniscidb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ DROP TABLE IF EXISTS geo;

CREATE TABLE geo (
id INTEGER,
geo_point POINT,
geo_linestring LINESTRING,
geo_polygon POLYGON,
geo_multipolygon MULTIPOLYGON
geo_point GEOMETRY(POINT, 0),
geo_linestring GEOMETRY(LINESTRING, 0),
geo_polygon GEOMETRY(POLYGON, 0),
geo_multipolygon GEOMETRY(MULTIPOLYGON, 0)
);
7 changes: 0 additions & 7 deletions ci/test.sh

This file was deleted.

11 changes: 10 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Settings for tests."""
import os
from pathlib import Path

Expand All @@ -7,7 +8,15 @@


@pytest.fixture(scope='session')
def data_directory():
def data_directory() -> Path:
"""
Fixture that returns the test data directory.
Returns
-------
Path
Test data directory
"""
root = Path(__file__).absolute().parent

default = root / 'ci' / 'ibis-testing-data'
Expand Down
73 changes: 34 additions & 39 deletions dev/merge-pr.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,10 @@
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Utility for creating well-formed pull request merges and pushing them to
# Apache.
# usage: ./apache-pull_request_number-merge.py (see config env vars below)
#
# Lightly modified from version of this script in incubator-parquet-format

"""Command line tool for merging PRs."""

import collections
import pathlib
import sys
import textwrap

import click
Expand Down Expand Up @@ -81,7 +60,6 @@ def merge_pr(
merge_message_pieces += commits

commit_message = "\n".join(merge_message_pieces)
# PUT /repos/:owner/:repo/pulls/:number/merge
resp = requests.put(
"{GITHUB_API_BASE}/pulls/{pr_num:d}/merge".format(
GITHUB_API_BASE=GITHUB_API_BASE, pr_num=pr_num
Expand All @@ -93,16 +71,16 @@ def merge_pr(
),
auth=(github_user, password),
)
resp.raise_for_status()
if resp.status_code == 200:
status_code = resp.status_code
if status_code == 200:
resp_json = resp.json()
merged = resp_json["merged"]
assert merged is True, merged
click.echo(
"Pull request #{pr_num:d} successfully merged.".format(
pr_num=pr_num
)
)
assert resp_json["merged"]
click.echo(resp_json["message"])
elif status_code == 405 or status_code == 409:
resp_json = resp.json()
raise click.ClickException(resp_json["message"])
else:
resp.raise_for_status()


@click.command()
Expand Down Expand Up @@ -167,20 +145,37 @@ def main(
pull_request_number=pull_request_number,
)
)
resp.raise_for_status()
if resp.status_code == 404:
pr_json = resp.json()
message = pr_json.get("message", None)
if message is not None:
raise click.ClickException(
"PR {pull_request_number:d} does not exist.".format(
pull_request_number=pull_request_number
)
)
else:
resp.raise_for_status()

pr_json = resp.json()

message = pr_json.get("message", None)
if message is not None and message.lower() == "not found":
raise click.ClickException(
"PR {pull_request_number:d} does not exist.".format(
pull_request_number=pull_request_number
# no-op if already merged
if pr_json["merged"]:
click.echo(
"#{pr_num:d} already merged. Nothing to do.".format(
pr_num=pull_request_number
)
)
sys.exit(0)

if not pr_json["mergeable"]:
raise click.ClickException(
"Pull request {:d} cannot be merged in its current form."
(
"Pull request #{pr_num:d} cannot be merged in its current "
"form. See "
"https://github.com/ibis-project/ibis/pulls/{pr_num:d} for "
"more details."
).format(pr_num=pull_request_number)
)

url = pr_json["url"]
Expand Down
171 changes: 171 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,66 @@ Use ``ibis.sqlite.connect`` to create a SQLite client.
SQLiteClient.list_tables
SQLiteClient.table

.. _api.mysql:

MySQL client (Experimental)
---------------------------
.. currentmodule:: ibis.sql.mysql.api

The MySQL client is accessible through the ``ibis.mysql`` namespace.

Use ``ibis.mysql.connect`` with a SQLAlchemy-compatible connection string to
create a client.

.. autosummary::
:toctree: generated/

connect
MySQLClient.database
MySQLClient.list_databases
MySQLClient.list_tables
MySQLClient.table

.. _api.omniscidb:

OmniSciDB client (Experimental)
-------------------------------
.. currentmodule:: ibis.omniscidb.api

The OmniSciDB client is accessible through the ``ibis.omniscidb`` namespace.

Use ``ibis.omniscidb.connect`` to create a client.

.. autosummary::
:toctree: generated/

compile
connect
verify
OmniSciDBClient.alter_user
OmniSciDBClient.close
OmniSciDBClient.create_database
OmniSciDBClient.create_table
OmniSciDBClient.create_user
OmniSciDBClient.create_view
OmniSciDBClient.database
OmniSciDBClient.describe_formatted
OmniSciDBClient.drop_database
OmniSciDBClient.drop_table
OmniSciDBClient.drop_table_or_view
OmniSciDBClient.drop_user
OmniSciDBClient.drop_view
OmniSciDBClient.exists_table
OmniSciDBClient.get_schema
OmniSciDBClient.list_tables
OmniSciDBClient.load_data
OmniSciDBClient.log
OmniSciDBClient.set_database
OmniSciDBClient.sql
OmniSciDBClient.table
OmniSciDBClient.truncate_table
OmniSciDBClient.version

.. _api.hdfs:

HDFS
Expand All @@ -199,9 +259,49 @@ HDFS.
HDFS.size
HDFS.status

.. _api.spark:

SparkSQL client (Experimental)
------------------------------
.. currentmodule:: ibis.spark.api

The Spark SQL client is accessible through the ``ibis.spark`` namespace.

Use ``ibis.spark.connect`` to create a client.

.. autosummary::
:toctree: generated/

connect
SparkClient.database
SparkClient.list_databases
SparkClient.list_tables
SparkClient.table

.. _api.pyspark:

PySpark client (Experimental)
-----------------------------
.. currentmodule:: ibis.pyspark.api

The PySpark client is accessible through the ``ibis.pyspark`` namespace.

Use ``ibis.pyspark.connect`` to create a client.

.. autosummary::
:toctree: generated/

connect
PySparkClient.database
PySparkClient.list_databases
PySparkClient.list_tables
PySparkClient.table

Top-level expression APIs
-------------------------

.. currentmodule:: ibis

These methods are available directly in the ``ibis`` module namespace.

.. autosummary::
Expand Down Expand Up @@ -606,3 +706,74 @@ Decimal methods

DecimalValue.precision
DecimalValue.scale

.. _api.geospatial:

Geospatial methods
-------------------

Scalar or column methods
~~~~~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/

GeoSpatialValue.area
GeoSpatialValue.as_binary
GeoSpatialValue.as_ewkb
GeoSpatialValue.as_ewkt
GeoSpatialValue.as_text
GeoSpatialValue.azimuth
GeoSpatialValue.buffer
GeoSpatialValue.centroid
GeoSpatialValue.contains
GeoSpatialValue.contains_properly
GeoSpatialValue.covers
GeoSpatialValue.covered_by
GeoSpatialValue.crosses
GeoSpatialValue.d_fully_within
GeoSpatialValue.d_within
GeoSpatialValue.difference
GeoSpatialValue.disjoint
GeoSpatialValue.distance
GeoSpatialValue.end_point
GeoSpatialValue.envelope
GeoSpatialValue.equals
GeoSpatialValue.geometry_n
GeoSpatialValue.geometry_type
GeoSpatialValue.intersection
GeoSpatialValue.intersects
GeoSpatialValue.is_valid
GeoSpatialValue.line_locate_point
GeoSpatialValue.line_merge
GeoSpatialValue.line_substring
GeoSpatialValue.length
GeoSpatialValue.max_distance
GeoSpatialValue.n_points
GeoSpatialValue.n_rings
GeoSpatialValue.ordering_equals
GeoSpatialValue.overlaps
GeoSpatialValue.perimeter
GeoSpatialValue.point_n
GeoSpatialValue.set_srid
GeoSpatialValue.simplify
GeoSpatialValue.srid
GeoSpatialValue.start_point
GeoSpatialValue.touches
GeoSpatialValue.transform
GeoSpatialValue.union
GeoSpatialValue.within
GeoSpatialValue.x
GeoSpatialValue.x_max
GeoSpatialValue.x_min
GeoSpatialValue.y
GeoSpatialValue.y_max
GeoSpatialValue.y_min

Column methods
~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/

GeoSpatialColumn.unary_union
37 changes: 10 additions & 27 deletions docs/source/backends.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,12 @@ Generally speaking these backends also need to handle their own execution.
They work by translating each node into a string, and passing the generated
string to the database through a driver API.

Impala
******

TODO

Clickhouse
**********

TODO

BigQuery
********

TODO
- `Apache Impala <https://impala.apache.org/>`_
- `Yandex Clickhouse <https://clickhouse.yandex/>`_
- `Google BigQuery <https://cloud.google.com/bigquery/>`_
- `Hadoop Distributed File System (HDFS) <https://hadoop.apache.org/>`_
- `OmniSciDB <https://www.omnisci.com/>`_ (Experimental)
- `PySpark/Spark SQL <https://spark.apache.org/sql/>`_ (Experimental)

.. _expression_generating_backends:

Expand All @@ -56,15 +48,9 @@ an expression. These backends tend to execute their expressions directly
through the driver APIs provided by SQLAlchemy (or one of its transitive
dependencies).

SQLite
******

TODO

PostgreSQL
**********

TODO
- `PostgreSQL <https://www.postgresql.org/>`_
- `SQLite <https://www.sqlite.org/>`_
- `MySQL <https://www.mysql.com/>`_ (Experimental)

.. _direct_execution_backends:

Expand All @@ -75,7 +61,4 @@ The only existing backend that directly executes ibis expressions is the pandas
backend. A full description of the implementation can be found in the module
docstring of the pandas backend located in ``ibis/pandas/execution/core.py``.

Pandas
******

TODO
- `Pandas <http://pandas.pydata.org/>`_
141 changes: 139 additions & 2 deletions docs/source/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ To contribute to ibis you need to clone the repository from GitHub:
Set Up a Development Environment
--------------------------------
#. `Install miniconda <https://docs.conda.io/en/latest/miniconda.html>`_
#. Create a conda environment suitable for ibis development:
#. Create a Conda environment suitable for ibis development:

.. code-block:: sh
Expand All @@ -29,16 +29,153 @@ Set Up a Development Environment
conda activate ibis-dev
#. Install your local copy of Ibis into the Conda environment. This also
sets up a pre-commit hook to check style and formatting before committing.

.. code-block:: sh
make develop
Run the Test Suite
------------------

Contributor `Krisztián Szűcs <https://github.com/kszucs>`_ has spent many hours
crafting an easy-to-use `docker-compose <https://docs.docker.com/compose/>`_
setup that enables ibis developers to get up and running quickly.

For those unfamiliar with ``docker``, and ``docker-compose``, here are some
rough steps on how to get things set up:

- Install ``docker-compose`` with ``pip install docker-compose``
- Install `docker <https://docs.docker.com/install/>`_

- Be sure to follow the
`post-install instructions
<https://docs.docker.com/install/linux/linux-postinstall/>`_
if you are running on Linux.


Here are the steps to start database services and run the test suite:

.. code-block:: sh
make --directory ibis init
make --directory ibis testparallel
make --directory ibis testall
Also you can run tests for a specific backend:

.. code-block:: sh
make --directory ibis testparallel BACKENDS='omniscidb impala'
or start database services for a specific backend:

.. code-block:: sh
make --directory ibis init BACKENDS='omniscidb impala'
.. note::
make for targets `test` and `testparallel` automatically do restart of
services (as a prerequisite)

You can also run ``pytest`` tests on the command line if you are not testing
integration with running database services. For example, to run all the tests
for the ``pandas`` backend:

.. code-block:: sh
pytest ./ibis/pandas
Style and Formatting
--------------------

We use `flake8 <http://flake8.pycqa.org/en/latest/>`_,
`black <https://github.com/psf/black>`_ and
`isort <https://github.com/pre-commit/mirrors-isort>`_ to ensure our code
is formatted and linted properly. If you have properly set up your development
environment by running ``make develop``, the pre-commit hooks should check
that your proposed changes continue to conform to our style guide.

We use `numpydoc <https://numpydoc.readthedocs.io/en/latest/format.html>`_ as
our standard format for docstrings.


Commit Philosophy
-----------------

We aim to make our individual commits small and tightly focused on the feature
they are implementing. If you find yourself making functional changes to
different areas of the codebase, we prefer you break up your changes into
separate Pull Requests. In general, a philosophy of one Github Issue per
Pull Request is a good rule of thumb, though that isn't always possible.

We avoid merge commits (and in fact they are disabled in the Github repository)
so you may be asked to rebase your changes on top of the latest commits to
master if there have been changes since you last updated a Pull Request.
Rebasing your changes is usually as simple as running
``git pull upstream master --rebase`` and then force-pushing to your branch:
``git push origin <branch-name> -f``.


Commit/PR Messages
------------------

Well-structed commit messages allow us to generate comprehensive release notes
and make it very easy to understand what a commit/PR contributes to our
codebase. Commit messages and PR titles should be prefixed with a standard
code the states what kind of change it is. They fall broadly into 3 categories:
``FEAT (feature)``, ``BUG (bug)``, and ``SUPP (support)``. The ``SUPP``
category has some more fine-grained aliases that you can use, such as ``BLD``
(build), ``CI`` (continuous integration), ``DOC`` (documentation), ``TST``
(testing), and ``RLS`` (releases).


Maintainer's Guide
------------------

Maintainers generally perform two roles, merging PRs and making official
releases.


Merging PRs
~~~~~~~~~~~

We have a CLI script that will merge Pull Requests automatically once they have
been reviewed and approved. See the help message in ``dev/merge-pr.py`` for
full details. If you have two-factor authentication turned on in Github, you
will have to generate an application-specific password by following this
`guide <https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line>`_.
You will then use that generated password on the command line for the ``-P``
argument.

Access the `Ibis "Merging PRs" wiki <https://github.com/ibis-project/ibis/wiki/Merging-PRs>`_ page
for more information.

Releasing
~~~~~~~~~

Access the `Ibis "Releasing" wiki <https://github.com/ibis-project/ibis/wiki/Releasing-Ibis>`_ page
for more information.


***************
Code of Conduct
***************

Ibis is governed by the
`NumFOCUS code of conduct <https://numfocus.org/code-of-conduct>`_,
which in a short version is:

- Be kind to others. Do not insult or put down others. Behave professionally.
Remember that harassment and sexist, racist, or exclusionary jokes are not
appropriate for NumFOCUS.
- All communication should be appropriate for a professional audience
including people of many different backgrounds. Sexual language and
imagery is not appropriate.
- NumFOCUS is dedicated to providing a harassment-free community for everyone,
regardless of gender, sexual orientation, gender identity, and expression,
disability, physical appearance, body size, race, or religion. We do not
tolerate harassment of community members in any form.
- Thank you for helping make this a welcoming, friendly community for all.
24 changes: 24 additions & 0 deletions docs/source/extending.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,28 @@ how to add a new ``bitwise_and`` reduction operation:
Adding a New Backend
--------------------

Run test suite for separate Backend
-----------------------------------
.. note::
By following the steps below, you get the opportunity to run tests with one
command: `make test BACKEND='[your added backend]'`

1) you need to add a new backend to `BACKENDS` variable in `Makefile`.

2) if backend needs to start services (implemented as docker containers and
added into `docker-compose.yml` file) then add the services to `SERVICES`
variable in `Makefile`, add case for switch-case construction inside
`./ci/dockerize.sh` for proper waiting the services.

3) if backend needs to load some data then add the backend to `LOADS` variable
in `Makefile` and implement necessary functionality in `./ci/load-data.sh`

4) the necessary markers for `pytest` will be generated inside
`./ci/backends-markers.sh`. By default, a marker will be generated that
matches the name of the backend (you can manually correct the generated
name for the marker inside the file)

Other
-----

TBD
24 changes: 12 additions & 12 deletions docs/source/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ individual parameters to :func:`ibis.postgres.connect`:
`Clickhouse <https://clickhouse.yandex/>`_ Quickstart
-----------------------------------------------------

Install dependencies for Ibis's Clickhouse dialect:
Install dependencies for Ibis's Clickhouse dialect(minimal supported version is `0.1.3`):

::

Expand Down Expand Up @@ -187,32 +187,32 @@ Create a client by supplying a dictionary of DataFrames using
import pandas as pd
con = ibis.pandas.connect(
{
'A': pd.util.testing.makeDataFrame(),
'B': pd.util.testing.makeDataFrame(),
'A': pd._testing.makeDataFrame(),
'B': pd._testing.makeDataFrame(),
}
)
.. _install.mapd:
.. _install.omniscidb:

`MapD <https://www.omnisci.com/>`_ Quickstart
---------------------------------------------
`omniscidb <https://www.omnisci.com/>`_ Quickstart
--------------------------------------------------

Install dependencies for Ibis's MapD dialect:
Install dependencies for Ibis's omniscidb dialect:

::

pip install ibis-framework[mapd]
pip install ibis-framework[omniscidb]

Create a client by passing in database connection parameters such as ``host``,
``port``, ``database``, ``user`` and ``password`` to
:func:`ibis.mapd.connect`:
:func:`ibis.omniscidb.connect`:

.. ipython:: python
con = ibis.mapd.connect(
host='omnisci',
con = ibis.omniscidb.connect(
host='omniscidb',
database='ibis_testing',
user='mapd',
user='admin',
password='HyperInteractive',
)
Expand Down
7 changes: 5 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,16 @@ At this time, Ibis offers some level of support for the following systems:

- `Apache Impala <https://impala.apache.org/>`_
- `Apache Kudu <https://kudu.apache.org/>`_
- `Hadoop Distributed File System (HDFS) <https://hadoop.apache.org/>`_
- `PostgreSQL <https://www.postgresql.org/>`_
- `MySQL <https://www.mysql.com/>`_ (Experimental)
- `SQLite <https://www.sqlite.org/>`_
- `Google BigQuery <https://cloud.google.com/bigquery/>`_
- `Yandex Clickhouse <https://clickhouse.yandex/>`_
- Direct execution of ibis expressions against `Pandas
<http://pandas.pydata.org/>`_ objects
- `OmniSciDB <https://www.omnisci.com/>`_ (Experimental)
- `PySpark/Spark SQL <https://spark.apache.org/sql/>`_ (Experimental)

Coming from SQL? Check out :ref:`Ibis for SQL Programmers <sql>`.

Expand All @@ -69,8 +73,7 @@ SQL engine support needing code contributors:

- `Redshift <https://aws.amazon.com/redshift/>`_
- `Vertica <https://www.vertica.com/>`_
- `Spark SQL <https://spark.apache.org/sql/>`_
- `Presto <https://prestodb.io/>`_
- `Presto <https://prestosql.io/>`_
- `Hive <https://hive.apache.org/>`_

.. toctree::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@
"To be able to execute the rest of this notebook you need to run the following command from your ibis clone:\n",
"\n",
"```sh\n",
"ci/build.sh\n",
"make init\n",
"```"
]
},
Expand Down Expand Up @@ -353,4 +353,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
648 changes: 648 additions & 0 deletions docs/source/notebooks/tutorial/11-Geospatial-Analysis.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@
"To be able to execute the rest of this notebook you need to run the following command from your ibis clone:\n",
"\n",
"```sh\n",
"ci/build.sh\n",
"make init\n",
"```"
]
},
Expand Down Expand Up @@ -339,4 +339,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
103 changes: 103 additions & 0 deletions docs/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,109 @@ Release Notes
These release notes are for versions of ibis **1.0 and later**. Release
notes for pre-1.0 versions of ibis can be found at :doc:`/release-pre-1.0`

* :release:`1.3.0 <2020-02-27>`
* :support:`2066` Add support to Python 3.8
* :bug:`2089 major` Pin "clickhouse-driver" to ">=0.1.3"
* :support:`2079` Pin back version of isort
* :support:`2082` Use user-defined port variables for Omnisci and PostgreSQL tests
* :support:`2077` Change omniscidb image tag from v5.0.0 to v5.1.0 on docker-compose recipe
* :support:`2051` [Omnisci] The same SRIDs for test_geo_spatial_binops
* :support:`2078` Unpin rtree version
* :feature:`2071` Improve many arguments UDF performance in pandas backend.
* :bug:`2069 major` Fix load data stage for Linux CI
* :support:`2074` Link pandas issues with xfail tests in pandas/tests/test_udf.py
* :support:`2075` Disable Postgres tests on Windows CI.
* :support:`2068` use conda for installation black and isort tools
* :bug:`2057 major` Fix datamgr.py fail if IBIS_TEST_OMNISCIDB_DATABASE=omnisci
* :support:`2061` CI: Fix CI builds related to new pandas 1.0 compatibility
* :support:`2056` Fix data map for int8 on OmniSciDB backend
* :feature:`1976` Add DenseRank, RowNumber, MinRank, Count, PercentRank/CumeDist window operations to OmniSciDB
* :support:`2052` Add possibility to run tests for separate backend via `make test BACKENDS=[YOUR BACKEND]`
* :support:`2055` Fix "cudf" import on OmniSciDB backend
* :feature:`2047` Introduce a top level vectorized UDF module (experimental). Implement element-wise UDF for pandas and PySpark backend.
* :support:`2050` CI: Drop table only if it exists (OmniSciDB)
* :support:`2034` Add initial documentation for OmniSciDB, MySQL, PySpark and SparkSQL backends, add initial documentation for geospatial methods and add links to Ibis wiki page
* :support:`2044` Implement covariance for bigquery backend
* :feature:`2035` Add support for multi arguments window UDAF for the pandas backend
* :bug:`2041 major` Change pymapd connection parameter from "session_id" to "sessionid"
* :support:`2046` Add Spark to supported backends list
* :support:`2043` Ping dependency of rtree to fix CI failure
* :support:`2037` Drop support for Python 3.5
* :support:`2023` HTML escape column names and types in png repr.
* :support:`1991` Add geospatial tutorial notebook
* :support:`2031` Change omniscidb image tag from v4.7.0 to v5.0.0 on docker-compose recipe
* :support:`2030` Pin "semantic_version" to "<2.7" in the docs build CI, fix "builddoc" and "doc" section inside "Makefile" and skip mysql tzinfo on CI to allow to run MySQL using docker container on a hard disk drive.
* :bug:`2009 major` Fix pandas backend to treat trailing_window preceding arg as window bound rather than window size (e.g. preceding=0 now indicates current row rather than window size 0)
* :feature:`2004` Clean up window translation logic in pyspark backend
* :bug:`2015 major` Fix handling of Array types in Postgres UDF
* :feature:`1996` Add docstring check to CI for an initial subset files
* :bug:`2010 major` Fix pydocstyle config
* :support:`2012` Fixed impala start up issues
* :feature:`2001` Pyspark backend bounded windows
* :bug:`2006 major` Pinning clickhouse-driver<0.1.2
* :support:`1999` cache all ops in translate()
* :feature:`1987` Add more POSTGIS operations
* :feature:`1969` SQLAlchemy Default precision and scale to decimal types for PostgreSQL and MySQL
* :support:`1988` Add black step to CI
* :support:`1962` Json UUID any
* :bug:`1984 major` Fix CI log for database
* :feature:`1983` Add support for array operations in PySpark backend
* :feature:`1978` Implement sort, if_null, null_if and notin for PySpark backend
* :support:`1982` Add log for database services
* :feature:`1974` Add support for date/time operations in PySpark backend
* :feature:`1973` Add support for params, query_schema, and sql in PySpark backend
* :support:`1972` Fix BigQuery backend fixture so batting and awards_players fixture re…
* :support:`1971` Disable BigQuery explicitly in all/test_join.py
* :feature:`1967` Implement join for PySpark backend
* :feature:`1952` Validate AsOfJoin tolerance and attempt interval unit conversion
* :support:`1963` Re-formatting all files using pre-commit hook
* :support:`1961` Disable codecov report upload during CI builds
* :support:`1960` Developer doc enhancements
* :feature:`1943` filter for PySpark backend
* :feature:`1945` window operations for pyspark backend
* :support:`1958` Missing geospatial ops for OmniSciDB
* :feature:`1951` Implement IntervalSub for pandas backend
* :support:`1950` Remove pandas deprecation warnings
* :support:`1948` Add developer docs to get docker setup
* :support:`1949` More informative IntegrityError on duplicate columns
* :feature:`1942` PySpark backend string and column ops
* :support:`1928` Improve geospatial literals and smoke tests
* :support:`1925` PostGIS enhancements
* :bug:`1933 major` Fixes explain operation
* :feature:`1913` PySpark backend
* :bug:`1937 major` Fix incorrect assumptions about attached SQLite databases
* :bug:`1938 major` Upgrade to JDK11
* :support:`1866` Rename mapd to omniscidb backend
* :support:`1926` Fix failing BigQuery tests
* :feature:`1908` DDL support for Spark backend
* :support:`1917` Added missing null literal op
* :feature:`1923` Support timezone aware arrow timestamps
* :bug:`1903 major` `sql` method doesn't work when the query uses LIMIT clause
* :feature:`1860` Add shapely geometries as input for literals
* :bug:`1910 major` Fix union implementation
* :bug:`1912 major` Fix failing com imports on master
* :feature:`1858` Add geopandas as output for omniscidb
* :bug:`1901 major` OmniSci/MapD - Fix reduction for bool
* :feature:`1885` Spark UDFs
* :feature:`1871` Add support for Postgres UDFs
* :bug:`1899 major` Pass scope to grouping execution in the pandas backend
* :support:`1895` Update link to Presto website
* :support:`1896` Removing linting from windows
* :bug:`1888 major` Fix various Spark backend issues
* :bug:`1891 major` Make Nodes enforce the proper signature
* :bug:`1893 major` Fix according to bug in pd.to_datetime when passing the unit flag
* :feature:`1830` Spark tests
* :support:`1884` Fix link to NUMFOCUS CoC
* :bug:`1883 major` Fix small formatting buglet in PR merge tool
* :support:`1882` Added CoC section
* :bug:`1876 major` Fix the case where we do not have an index when using preceding with intervals
* :feature:`1807` Spark client
* :bug:`1872 major` Fixed issues with geo data
* :feature:`1868` Use pandas rolling apply to implement rows_with_max_lookback
* :bug:`1869 major` Remove -x from pytest call in linux CI
* :bug:`1867 major` Fix return type of Struct.from_tuples
* :support:`1859` Remove pandas exception for rows_with_max_lookback
* :support:`1856` Move CI pipelines to Azure
* :release:`1.2.0 <2019-06-24>`
* :feature:`1836` Add new geospatial functions to OmniSciDB backend
* :support:`1847` Skip SQLAlchemy backend tests in connect method in backends.py
Expand Down
1 change: 1 addition & 0 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ Here we show Jupyter notebooks that take you through various tasks using ibis.
notebooks/tutorial/8-More-Analytics-Helpers.ipynb
notebooks/tutorial/9-Adding-a-new-elementwise-expression.ipynb
notebooks/tutorial/10-Adding-a-new-reduction-expression.ipynb
notebooks/tutorial/11-Geospatial-Analysis.ipynb
17 changes: 13 additions & 4 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Initialize Ibis module."""
from contextlib import suppress

import ibis.config_init # noqa: F401
import ibis.expr.api as api # noqa: F401
import ibis.expr.types as ir # noqa: F401

# pandas backend is mandatory
import ibis.pandas.api as pandas # noqa: F401
import ibis.util as util # noqa: F401
from ibis.common import IbisError
from ibis.common.exceptions import IbisError
from ibis.config import options # noqa: F401
from ibis.expr.api import * # noqa: F401,F403
from ibis.filesystems import HDFS, WebHDFS # noqa: F401
Expand Down Expand Up @@ -50,8 +52,15 @@
import ibis.bigquery.api as bigquery # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[mapd]
import ibis.mapd.api as mapd # noqa: F401
# pip install ibis-framework[omniscidb]
import ibis.omniscidb.api as omniscidb # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[spark]
import ibis.spark.api as spark # noqa: F401

with suppress(ImportError):
import ibis.pyspark.api as pyspark # noqa: F401


def hdfs_connect(
Expand All @@ -62,7 +71,7 @@ def hdfs_connect(
auth_mechanism='NOSASL',
verify=True,
session=None,
**kwds
**kwds,
):
"""Connect to HDFS.
Expand Down
2 changes: 1 addition & 1 deletion ibis/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class NotThisMethod(Exception):


def register_vcs_handler(vcs, method): # decorator
"""Decorator to mark a method as the handler for a particular VCS."""
"""Mark a method as the handler for a particular VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
Expand Down
2 changes: 1 addition & 1 deletion ibis/bigquery/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import google.cloud.bigquery # noqa: F401, fail early if bigquery is missing
import pydata_google_auth

import ibis.common as com
import ibis.common.exceptions as com
from ibis.bigquery.client import BigQueryClient
from ibis.bigquery.compiler import dialect
from ibis.config import options # noqa: F401
Expand Down
2 changes: 1 addition & 1 deletion ibis/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pkg_resources import parse_version

import ibis
import ibis.common as com
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.lineage as lin
import ibis.expr.operations as ops
Expand Down
37 changes: 28 additions & 9 deletions ibis/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from multipledispatch import Dispatcher

import ibis
import ibis.common as com
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.lineage as lin
import ibis.expr.operations as ops
Expand Down Expand Up @@ -199,7 +199,7 @@ def _string_join(translator, expr):


def _string_ascii(translator, expr):
arg, = expr.op().args
(arg,) = expr.op().args
return 'TO_CODE_POINTS({})[SAFE_OFFSET(0)]'.format(
translator.translate(arg)
)
Expand Down Expand Up @@ -494,7 +494,7 @@ def identical_to(expr):

@rewrites(ops.Log2)
def log2(expr):
arg, = expr.op().args
(arg,) = expr.op().args
return arg.log(2)


Expand Down Expand Up @@ -548,6 +548,29 @@ def compiles_approx(translator, expr):
)


@compiles(ops.Covariance)
def compiles_covar(translator, expr):
expr = expr.op()
left = expr.left
right = expr.right
where = expr.where

if expr.how == 'sample':
how = 'SAMP'
elif expr.how == 'pop':
how = 'POP'
else:
raise ValueError(
"Covariance with how={!r} is not supported.".format(how)
)

if where is not None:
left = where.ifelse(left, ibis.NA)
right = where.ifelse(right, ibis.NA)

return "COVAR_{}({}, {})".format(how, left, right)


@rewrites(ops.Any)
@rewrites(ops.All)
@rewrites(ops.NotAny)
Expand All @@ -558,9 +581,7 @@ def bigquery_any_all_no_op(expr):

@compiles(ops.Any)
def bigquery_compile_any(translator, expr):
return "LOGICAL_OR({})".format(
*map(translator.translate, expr.op().args)
)
return "LOGICAL_OR({})".format(*map(translator.translate, expr.op().args))


@compiles(ops.NotAny)
Expand All @@ -572,9 +593,7 @@ def bigquery_compile_notany(translator, expr):

@compiles(ops.All)
def bigquery_compile_all(translator, expr):
return "LOGICAL_AND({})".format(
*map(translator.translate, expr.op().args)
)
return "LOGICAL_AND({})".format(*map(translator.translate, expr.op().args))


@compiles(ops.NotAll)
Expand Down
8 changes: 7 additions & 1 deletion ibis/bigquery/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

def connect(project_id, dataset_id):
ga = pytest.importorskip('google.auth')
service_account = pytest.importorskip('google.oauth2.service_account')
google_application_credentials = os.environ.get(
"GOOGLE_APPLICATION_CREDENTIALS", None
)
Expand All @@ -34,8 +35,13 @@ def connect(project_id, dataset_id):
'No BigQuery credentials found using project_id={}, '
'dataset_id={}. Skipping BigQuery tests.'
).format(project_id, dataset_id)
credentials = service_account.Credentials.from_service_account_file(
google_application_credentials
)
try:
return ibis.bigquery.connect(project_id, dataset_id)
return ibis.bigquery.connect(
project_id, dataset_id, credentials=credentials
)
except ga.exceptions.DefaultCredentialsError:
pytest.skip(skip_message)

Expand Down
21 changes: 21 additions & 0 deletions ibis/bigquery/tests/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,27 @@ def test_approx_median(alltypes):
assert result == expected


def test_cov(alltypes):
d = alltypes.double_col
expr = d.cov(d)
result = expr.compile()
expected = """\
SELECT COVAR_SAMP(`double_col`, `double_col`) AS `tmp`
FROM `ibis-gbq.testing.functional_alltypes`"""
assert result == expected

expr = d.cov(d, how='pop')
result = expr.compile()
expected = """\
SELECT COVAR_POP(`double_col`, `double_col`) AS `tmp`
FROM `ibis-gbq.testing.functional_alltypes`"""
assert result == expected

expr = d.cov(d, how='error')
with pytest.raises(ValueError):
expr.compile()


@pytest.mark.parametrize(
('unit', 'expected_unit', 'expected_func'),
[
Expand Down
12 changes: 8 additions & 4 deletions ibis/bigquery/udf/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from ibis.bigquery.compiler import BigQueryUDFNode, compiles
from ibis.bigquery.datatypes import UDFContext, ibis_type_to_bigquery_type
from ibis.bigquery.udf.core import PythonToJavaScriptTranslator
from ibis.compat import PY38 # noqa: F401
from ibis.expr.signature import Argument as Arg

__all__ = ('udf',)


_udf_name_cache = collections.defaultdict(itertools.count)


Expand Down Expand Up @@ -58,12 +58,16 @@ def udf(input_type, output_type, strict=True, libraries=None):
Notes
-----
``INT64`` is not supported as an argument type or a return type, as per
`the BigQuery documentation
<https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions#sql-type-encodings-in-javascript>`_.
- ``INT64`` is not supported as an argument type or a return type, as per
`the BigQuery documentation
<https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions#sql-type-encodings-in-javascript>`_.
- `The follow example doctest doesn't work for Python 3.8
<https://github.com/ibis-project/ibis/issues/2085>`_.
Examples
--------
>>> if PY38:
... import pytest; pytest.skip("Issue #2085")
>>> from ibis.bigquery import udf
>>> import ibis.expr.datatypes as dt
>>> @udf(input_type=[dt.double], output_type=dt.double)
Expand Down
4 changes: 2 additions & 2 deletions ibis/bigquery/udf/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def visit_YieldFrom(self, node):
@semicolon
def visit_Assign(self, node):
try:
target, = node.targets
(target,) = node.targets
except ValueError:
raise NotImplementedError(
'Only single assignment supported for now'
Expand Down Expand Up @@ -495,7 +495,7 @@ def visit_ListComp(self, node):
[[1, 4], [2, 5], [3, 6]]].map(([x, y]) => x + y)
"""
try:
generator, = node.generators
(generator,) = node.generators
except ValueError:
raise NotImplementedError(
'Only single loop comprehensions are allowed'
Expand Down
10 changes: 8 additions & 2 deletions ibis/bigquery/udf/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@
import pytest

from ibis.bigquery.udf.core import PythonToJavaScriptTranslator, SymbolTable

pytestmark = pytest.mark.bigquery
from ibis.compat import PY38

if PY38:
# ref: https://github.com/ibis-project/ibis/issues/2098
# note: UDF is already skipt on CI
pytestmark = [pytest.mark.bigquery, pytest.mark.udf]
else:
pytestmark = pytest.mark.bigquery


def test_symbol_table():
Expand Down
10 changes: 8 additions & 2 deletions ibis/bigquery/udf/tests/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@
import pytest

from ibis.bigquery.udf.find import find_names
from ibis.compat import PY38
from ibis.util import is_iterable

pytestmark = pytest.mark.bigquery
if PY38:
# ref: https://github.com/ibis-project/ibis/issues/2098
# note: UDF is already skipt on CI
pytestmark = [pytest.mark.bigquery, pytest.mark.udf]
else:
pytestmark = pytest.mark.bigquery


def parse_expr(expr):
Expand All @@ -14,7 +20,7 @@ def parse_expr(expr):


def parse_stmt(stmt):
body, = ast.parse(stmt).body
(body,) = ast.parse(stmt).body
return body


Expand Down
8 changes: 7 additions & 1 deletion ibis/bigquery/udf/tests/test_udf_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
import ibis
import ibis.expr.datatypes as dt
from ibis.bigquery import udf # noqa: E402
from ibis.compat import PY38

pytest.importorskip('google.cloud.bigquery')

pytestmark = pytest.mark.bigquery
if PY38:
# ref: https://github.com/ibis-project/ibis/issues/2098
# note: UDF is already skipt on CI
pytestmark = [pytest.mark.bigquery, pytest.mark.udf]
else:
pytestmark = pytest.mark.bigquery


PROJECT_ID = os.environ.get('GOOGLE_BIGQUERY_PROJECT_ID', 'ibis-gbq')
Expand Down
2 changes: 1 addition & 1 deletion ibis/clickhouse/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import ibis.common as com
import ibis.common.exceptions as com
from ibis.clickhouse.client import ClickhouseClient
from ibis.clickhouse.compiler import dialect
from ibis.config import options
Expand Down
2 changes: 1 addition & 1 deletion ibis/clickhouse/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from clickhouse_driver.client import Client as _DriverClient
from pkg_resources import parse_version

import ibis.common as com
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.schema as sch
Expand Down
2 changes: 1 addition & 1 deletion ibis/clickhouse/compiler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from io import StringIO

import ibis.common as com
import ibis.common.exceptions as com
import ibis.expr.operations as ops
import ibis.sql.compiler as comp
import ibis.util as util
Expand Down
4 changes: 2 additions & 2 deletions ibis/clickhouse/operations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import date, datetime
from io import StringIO

import ibis.common as com
import ibis.common.exceptions as com
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.sql.transforms as transforms
Expand Down Expand Up @@ -214,7 +214,7 @@ def _index_of(translator, expr):
def _sign(translator, expr):
"""Workaround for missing sign function"""
op = expr.op()
arg, = op.args
(arg,) = op.args
arg_ = translator.translate(arg)
return 'intDivOrZero({0}, abs({0}))'.format(arg_)

Expand Down
2 changes: 1 addition & 1 deletion ibis/clickhouse/tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

import ibis
import ibis.common as com
import ibis.common.exceptions as com

driver = pytest.importorskip('clickhouse_driver')
pytestmark = pytest.mark.clickhouse
Expand Down
Loading