46 changes: 23 additions & 23 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,34 @@ platform:
environment:
PGUSER: "postgres"
PGPASSWORD: "Password12!"
IBIS_POSTGRES_USER: "%PGUSER%"
IBIS_POSTGRES_PASS: "%PGPASSWORD%"
DATA_DIR: "%USERPROFILE%\\ibis-testing-data"
DATA_URL: "https://storage.googleapis.com/ibis-ci-data"
IBIS_TEST_POSTGRES_DB: "ibis_testing"
IBIS_TEST_CRUNCHBASE_DB: "%USERPROFILE%\\crunchbase.db"
IBIS_TEST_SQLITE_DB_PATH: "%USERPROFILE%\\ibis_testing.db"
PG: "\"C:\\Program Files\\PostgreSQL\\9.6\\bin\\createdb\" ibis_testing"
CHOCO: "C:\\ProgramData\\chocolatey\\bin"
CONDA: "C:\\Miniconda36-x64\\Scripts\\conda"
ACTIVATE: "C:\\Miniconda36-x64\\Scripts\\activate"

matrix:
- PYTHON: "C:\\Python27-x64"
- PYTHON: "C:\\Python34-x64"
- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python36-x64"
- PYTHON_VERSION: "2.7"
- PYTHON_VERSION: "3.4"
- PYTHON_VERSION: "3.5"
- PYTHON_VERSION: "3.6"

services:
- postgresql
- postgresql93

test_script:
- "%PYTHON%\\python.exe -m pip install -U pip"
- "%PYTHON%\\python.exe -m pip install -U setuptools"
- "%PYTHON%\\python.exe -m pip install -e .\"[sqlite, postgres, visualization, pandas]\""
- "%PYTHON%\\python.exe -m pip install flake8 mock pytest"
- "%PYTHON%\\python.exe -m flake8"

- "curl -o crunchbase.db https://storage.googleapis.com/ibis-ci-data/crunchbase.db"
- "mv crunchbase.db %IBIS_TEST_CRUNCHBASE_DB%"
- "curl -o ibis-testing-data.tar.gz https://storage.googleapis.com/ibis-ci-data/ibis-testing-data.tar.gz"
- "7z x ibis-testing-data.tar.gz"
- "7z x ibis-testing-data.tar"
- "mv ibis-testing-data\\ibis_testing.db %IBIS_TEST_SQLITE_DB_PATH%"
- "%PG%\\createdb.exe %IBIS_TEST_POSTGRES_DB%"

- "%PYTHON%\\python.exe -m pytest --tb=short -m \"not impala and not hdfs and not postgresql\" ibis"
- "%CONDA% --version"
- "%CONDA% config --set always_yes true"
- "%CONDA% install conda=4.3.22 --channel conda-forge"
- "%CONDA% create --name \"ibis_%PYTHON_VERSION%\" python=%PYTHON_VERSION% --channel conda-forge"
- "%ACTIVATE% \"ibis_%PYTHON_VERSION%\""
- "pip install -e .\"[sqlite, postgres, visualization, pandas]\""
- "pip install flake8 mock pytest click \"pbs==0.110\""
- "flake8"
- "python ci\\datamgr.py download --directory \"%USERPROFILE%\""
- "python ci\\datamgr.py sqlite --database \"%IBIS_TEST_SQLITE_DB_PATH%\" --data-directory \"%DATA_DIR%\" --script ci\\sqlite_load.sql functional_alltypes batting awards_players diamonds"
- "python ci\\datamgr.py postgres --database \"%IBIS_TEST_POSTGRES_DB%\" --data-directory \"%DATA_DIR%\" --script ci\\postgresql_load.sql functional_alltypes batting awards_players diamonds"
- "pytest --tb=short -m \"not impala and not hdfs\" ibis"
21 changes: 8 additions & 13 deletions asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": ["3.5"],
// "pythons": [3.6],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
Expand All @@ -53,18 +53,13 @@
// followed by the pip installed packages).
//
"matrix": {
"numpy": [""],
"pandas": [""],
"toolz": [""],
"six": [""],
"graphviz": [""],
"multipledispatch": [""],
"python-graphviz": [""],
"pip+hdfs": [""],
"impyla": [""],
"sqlalchemy": [""],
"pip+thrift": [""],
"pip+thriftpy": [""]
"numpy": [],
"pandas": [],
"toolz": [],
"six": [],
"multipledispatch": [],
"impyla": [],
"sqlalchemy": []
},

// Combinations of libraries/python versions can be excluded/included
Expand Down
70 changes: 70 additions & 0 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import numpy as np
import pandas as pd

import ibis
import ibis.expr.datatypes as dt


class Suite:
Expand Down Expand Up @@ -83,3 +87,69 @@ def time_impala_base_compile(self):

def time_impala_large_expr_compile(self):
ibis.impala.compile(self.expr)


class PandasBackend:

def setup(self):
n = int(5e6)
data = pd.DataFrame({
'key': np.random.choice(16000, size=n),
'low_card_key': np.random.choice(30, size=n),
'value': np.random.rand(n),
'timestamps': pd.date_range(
start='now', periods=n, freq='s'
).values,
'timestamp_strings': pd.date_range(
start='now', periods=n, freq='s'
).values.astype(str),
})

t = ibis.pandas.connect({'df': data}).table('df')

self.high_card_group_by = t.groupby(t.key).aggregate(
avg_value=t.value.mean()
)

self.cast_to_dates = t.timestamps.cast(dt.date)
self.cast_to_dates_from_strings = t.timestamp_strings.cast(dt.date)

self.multikey_group_by_with_mutate = t.mutate(
dates=t.timestamps.cast('date')
).groupby(['low_card_key', 'dates']).aggregate(
avg_value=lambda t: t.value.mean()
)

self.simple_sort = t.sort_by([t.key])

self.simple_sort_projection = t[['key', 'value']].sort_by(['key'])

self.multikey_sort = t.sort_by(['low_card_key', 'key'])

self.multikey_sort_projection = t[[
'low_card_key', 'key', 'value'
]].sort_by(['low_card_key', 'key'])

def time_high_cardinality_group_by(self):
self.high_card_group_by.execute()

def time_cast_to_date(self):
self.cast_to_dates.execute()

def time_cast_to_date_from_string(self):
self.cast_to_dates_from_strings.execute()

def time_multikey_group_by_with_mutate(self):
self.multikey_group_by_with_mutate.execute()

def time_simple_sort(self):
self.simple_sort.execute()

def time_multikey_sort(self):
self.multikey_sort.execute()

def time_simple_sort_projection(self):
self.simple_sort_projection.execute()

def time_multikey_sort_projection(self):
self.multikey_sort_projection.execute()
96 changes: 96 additions & 0 deletions ci/clickhouse_load.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
DROP TABLE IF EXISTS diamonds;

CREATE TABLE diamonds (
`date` Date DEFAULT today(),
carat Float64,
cut String,
color String,
clarity String,
depth Float64,
`table` Float64,
price Int64,
x Float64,
y Float64,
z Float64
) ENGINE = MergeTree(date, (`carat`), 8192);

DROP TABLE IF EXISTS batting;

CREATE TABLE batting (
`date` Date DEFAULT today(),
`playerID` String,
`yearID` Int64,
stint Int64,
`teamID` String,
`lgID` String,
`G` Int64,
`AB` Int64,
`R` Int64,
`H` Int64,
`X2B` Int64,
`X3B` Int64,
`HR` Int64,
`RBI` Int64,
`SB` Int64,
`CS` Int64,
`BB` Int64,
`SO` Int64,
`IBB` Int64,
`HBP` Int64,
`SH` Int64,
`SF` Int64,
`GIDP` Int64
) ENGINE = MergeTree(date, (`playerID`), 8192);

DROP TABLE IF EXISTS awards_players;

CREATE TABLE awards_players (
`date` Date DEFAULT today(),
`playerID` String,
`awardID` String,
`yearID` Int64,
`lgID` String,
tie String,
notes String
) ENGINE = MergeTree(date, (`playerID`), 8192);

DROP TABLE IF EXISTS functional_alltypes;

CREATE TABLE functional_alltypes (
`date` Date DEFAULT toDate(timestamp_col),
`index` Int64,
`Unnamed_0` Int64,
id Int32,
bool_col UInt8,
tinyint_col Int8,
smallint_col Int16,
int_col Int32,
bigint_col Int64,
float_col Float32,
double_col Float64,
date_string_col String,
string_col String,
timestamp_col DateTime,
year Int32,
month Int32
) ENGINE = MergeTree(date, (`index`), 8192);

DROP TABLE IF EXISTS tzone;

CREATE TABLE tzone (
`date` Date DEFAULT today(),
ts DateTime,
key String,
value Float64
) ENGINE = MergeTree(date, (key), 8192);

DROP TABLE IF EXISTS array_types;

CREATE TABLE IF NOT EXISTS array_types (
`date` Date DEFAULT today(),
x Array(Int64),
y Array(String),
z Array(Float64),
grouper String,
scalar_column Float64
) ENGINE = MergeTree(date, (scalar_column), 8192);
286 changes: 286 additions & 0 deletions ci/datamgr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
#!/usr/bin/env python

import os
import getpass
import tempfile
import tarfile
import operator

import sqlalchemy as sa

import numpy as np
import pandas as pd

import click

try:
import sh
except ImportError:
import pbs as sh


@click.group()
def cli():
pass


@cli.command()
@click.argument('tables', nargs=-1)
@click.option('-S', '--script', type=click.File('rt'), required=True)
@click.option(
'-d', '--database',
default=os.environ.get('IBIS_TEST_CLICKHOUSE_DB', 'ibis_testing')
)
@click.option(
'-D', '--data-directory',
default=tempfile.gettempdir(), type=click.Path(exists=True)
)
def clickhouse(script, tables, database, data_directory):
username = os.environ.get('IBIS_CLICKHOUSE_USER', 'default')
host = os.environ.get('IBIS_CLICKHOUSE_HOST', 'localhost')
password = os.environ.get('IBIS_CLICKHOUSE_PASS', '')

url = sa.engine.url.URL(
'clickhouse+native',
username=username,
host=host,
password=password,
)
engine = sa.create_engine(str(url))
engine.execute('DROP DATABASE IF EXISTS "{}"'.format(database))
engine.execute('CREATE DATABASE "{}"'.format(database))

url = sa.engine.url.URL(
'clickhouse+native',
username=username,
host=host,
password=password,
database=database,
)
engine = sa.create_engine(str(url))
script_text = script.read()

# missing stmt
# INSERT INTO array_types (x, y, z, grouper, scalar_column) VALUES
# ([1, 2, 3], ['a', 'b', 'c'], [1.0, 2.0, 3.0], 'a', 1.0),
# ([4, 5], ['d', 'e'], [4.0, 5.0], 'a', 2.0),
# ([6], ['f'], [6.0], 'a', 3.0),
# ([1], ['a'], [], 'b', 4.0),
# ([2, 3], ['b', 'c'], [], 'b', 5.0),
# ([4, 5], ['d', 'e'], [4.0, 5.0], 'c', 6.0);

with engine.begin() as con:
# doesn't support multiple statements
for stmt in script_text.split(';'):
if len(stmt.strip()):
con.execute(stmt)

table_paths = [
os.path.join(data_directory, '{}.csv'.format(table))
for table in tables
]
dtype = {'bool_col': np.bool_}
for table, path in zip(tables, table_paths):
# correct dtypes per table to be able to insert
# TODO: cleanup, kinda ugly
df = pd.read_csv(path, index_col=None, header=0, dtype=dtype)
if table == 'functional_alltypes':
df = df.rename(columns={'Unnamed: 0': 'Unnamed_0'})
cols = ['date_string_col', 'string_col']
df[cols] = df[cols].astype(str)
df.timestamp_col = df.timestamp_col.astype('datetime64[s]')
elif table == 'batting':
cols = ['playerID', 'teamID', 'lgID']
df[cols] = df[cols].astype(str)
cols = df.select_dtypes([float]).columns
df[cols] = df[cols].fillna(0).astype(int)
elif table == 'awards_players':
cols = ['playerID', 'awardID', 'lgID', 'tie', 'notes']
df[cols] = df[cols].astype(str)

df.to_sql(table, engine, index=False, if_exists='append')


@cli.command()
@click.argument('tables', nargs=-1)
@click.option('-S', '--script', type=click.File('rt'), required=True)
@click.option(
'-d', '--database',
default=os.environ.get(
'IBIS_TEST_POSTGRES_DB', os.environ.get('PGDATABASE', 'ibis_testing')
),
)
@click.option(
'-D', '--data-directory',
default=tempfile.gettempdir(), type=click.Path(exists=True)
)
def postgres(script, tables, database, data_directory):
username = os.environ.get(
'IBIS_POSTGRES_USER', os.environ.get('PGUSER', getpass.getuser())
)
host = os.environ.get('PGHOST', 'localhost')
password = os.environ.get('IBIS_POSTGRES_PASS', os.environ.get('PGPASS'))
url = sa.engine.url.URL(
'postgresql',
username=username,
host=host,
password=password,
)
engine = sa.create_engine(str(url), isolation_level='AUTOCOMMIT')
engine.execute('DROP DATABASE IF EXISTS "{}"'.format(database))
engine.execute('CREATE DATABASE "{}"'.format(database))

url = sa.engine.url.URL(
'postgresql',
username=username,
host=host,
password=password,
database=database,
)
engine = sa.create_engine(str(url))
script_text = script.read()
with engine.begin() as con:
con.execute(script_text)

table_paths = [
os.path.join(data_directory, '{}.csv'.format(table))
for table in tables
]
dtype = {'bool_col': np.bool_}
for table, path in zip(tables, table_paths):
df = pd.read_csv(path, index_col=None, header=0, dtype=dtype)
df.to_sql(table, engine, index=False, if_exists='append')
engine = sa.create_engine(str(url), isolation_level='AUTOCOMMIT')
engine.execute('VACUUM FULL ANALYZE')


@cli.command()
@click.argument('tables', nargs=-1)
@click.option('-S', '--script', type=click.File('rt'), required=True)
@click.option(
'-d', '--database',
default=os.environ.get('IBIS_TEST_SQLITE_DB_PATH', 'ibis_testing.db')
)
@click.option(
'-D', '--data-directory',
default=tempfile.gettempdir(), type=click.Path(exists=True)
)
def sqlite(script, tables, database, data_directory):
database = os.path.abspath(database)
if os.path.exists(database):
try:
os.remove(database)
except OSError:
pass
engine = sa.create_engine('sqlite:///{}'.format(database))
script_text = script.read()
with engine.begin() as con:
con.connection.connection.executescript(script_text)
table_paths = [
os.path.join(data_directory, '{}.csv'.format(table))
for table in tables
]
click.echo(tables)
click.echo(table_paths)
for table, path in zip(tables, table_paths):
df = pd.read_csv(path, index_col=None, header=0)
with engine.begin() as con:
df.to_sql(table, con, index=False, if_exists='append')
engine.execute('VACUUM')
engine.execute('VACUUM ANALYZE')


if os.environ.get('APPVEYOR', None) is not None:
curl = sh.Command('C:\\Tools\\curl\\bin\\curl.exe')
else:
curl = sh.curl


@cli.command()
@click.argument(
'base_url',
required=False,
default='https://storage.googleapis.com/ibis-ci-data' # noqa: E501
)
@click.option('-d', '--data', multiple=True)
@click.option('-D', '--directory', default='.', type=click.Path(exists=False))
def download(base_url, data, directory):
if not data:
data = 'ibis-testing-data.tar.gz',

if not os.path.exists(directory):
os.mkdir(directory)

for piece in data:
data_url = '{}/{}'.format(base_url, piece)
path = os.path.join(directory, piece)

curl(
data_url, o=path, L=True,
_out=click.get_binary_stream('stdout'),
_err=click.get_binary_stream('stderr'),
)

if piece.endswith(('.tar', '.gz', '.bz2', '.xz')):
with tarfile.open(path, mode='r|gz') as f:
f.extractall(path=directory)


def parse_env(ctx, param, values):
pairs = []
for envar in values:
try:
name, value = envar.split('=', 1)
except ValueError:
raise click.ClickException(
'Environment variables must be of the form NAME=VALUE. '
'{} is not in this format'.format(envar)
)
pairs.append((name, value))
return dict(pairs)


@cli.command()
@click.argument('data_directory', type=click.Path(exists=True))
@click.option('-e', '--environment', multiple=True, callback=parse_env)
def env(data_directory, environment):
envars = dict([
('IBIS_TEST_IMPALA_HOST', 'impala'),
('IBIS_TEST_NN_HOST', 'impala'),
('IBIS_TEST_IMPALA_POST', 21050),
('IBIS_TEST_WEBHDFS_PORT', 50070),
('IBIS_TEST_WEBHDFS_USER', 'ubuntu'),
(
'IBIS_TEST_SQLITE_DB_PATH',
os.path.join(data_directory, 'ibis_testing.db'),
),
(
'DIAMONDS_CSV',
os.path.join(data_directory, 'diamonds.csv')
),
(
'BATTING_CSV',
os.path.join(data_directory, 'batting.csv')
),
(
'AWARDS_PLAYERS_CSV',
os.path.join(data_directory, 'awards_players.csv')
),
(
'FUNCTIONAL_ALLTYPES_CSV',
os.path.join(data_directory, 'functional_alltypes.csv')
),
('IBIS_TEST_POSTGRES_DB', 'ibis_testing'),
('IBIS_POSTGRES_USER', getpass.getuser()),
('IBIS_POSTGRES_PASS', ''),
])
envars.update(environment)
string = '\n'.join(
'='.join((name, str(value)))
for name, value in sorted(envars.items(), key=operator.itemgetter(0))
)
click.echo(string)


if __name__ == '__main__':
cli()
108 changes: 108 additions & 0 deletions ci/postgresql_load.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
DROP TABLE IF EXISTS diamonds CASCADE;

CREATE TABLE diamonds (
carat FLOAT,
cut TEXT,
color TEXT,
clarity TEXT,
depth FLOAT,
"table" FLOAT,
price BIGINT,
x FLOAT,
y FLOAT,
z FLOAT
);

DROP TABLE IF EXISTS batting CASCADE;

CREATE TABLE batting (
"playerID" TEXT,
"yearID" BIGINT,
stint BIGINT,
"teamID" TEXT,
"lgID" TEXT,
"G" BIGINT,
"AB" BIGINT,
"R" BIGINT,
"H" BIGINT,
"X2B" BIGINT,
"X3B" BIGINT,
"HR" BIGINT,
"RBI" BIGINT,
"SB" BIGINT,
"CS" BIGINT,
"BB" BIGINT,
"SO" BIGINT,
"IBB" BIGINT,
"HBP" BIGINT,
"SH" BIGINT,
"SF" BIGINT,
"GIDP" BIGINT
);

DROP TABLE IF EXISTS awards_players CASCADE;

CREATE TABLE awards_players (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
"lgID" TEXT,
tie TEXT,
notes TEXT
);

DROP TABLE IF EXISTS functional_alltypes CASCADE;

CREATE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id INTEGER,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INTEGER,
bigint_col BIGINT,
float_col REAL,
double_col DOUBLE PRECISION,
date_string_col TEXT,
string_col TEXT,
timestamp_col TIMESTAMP WITHOUT TIME ZONE,
year INTEGER,
month INTEGER
);

CREATE INDEX "ix_functional_alltypes_index" ON functional_alltypes ("index");

DROP TABLE IF EXISTS tzone CASCADE;

CREATE TABLE tzone (
ts TIMESTAMP WITH TIME ZONE,
key TEXT,
value DOUBLE PRECISION
);

INSERT INTO tzone
SELECT
CAST('2017-05-28 11:01:31.000400' AS TIMESTAMP WITH TIME ZONE) +
t * INTERVAL '1 day 1 microsecond' AS ts,
CHR(97 + t) AS key,
t + t / 10.0 AS value
FROM generate_series(0, 9) AS t;

DROP TABLE IF EXISTS array_types CASCADE;

CREATE TABLE IF NOT EXISTS array_types (
x BIGINT[],
y TEXT[],
z DOUBLE PRECISION[],
grouper TEXT,
scalar_column DOUBLE PRECISION
);

INSERT INTO array_types VALUES
(ARRAY[1, 2, 3], ARRAY['a', 'b', 'c'], ARRAY[1.0, 2.0, 3.0], 'a', 1.0),
(ARRAY[4, 5], ARRAY['d', 'e'], ARRAY[4.0, 5.0], 'a', 2.0),
(ARRAY[6, NULL], ARRAY['f', NULL], ARRAY[6.0, NULL], 'a', 3.0),
(ARRAY[NULL, 1, NULL], ARRAY[NULL, 'a', NULL], ARRAY[]::DOUBLE PRECISION[], 'b', 4.0),
(ARRAY[2, NULL, 3], ARRAY['b', NULL, 'c'], NULL, 'b', 5.0),
(ARRAY[4, NULL, NULL, 5], ARRAY['d', NULL, NULL, 'e'], ARRAY[4.0, NULL, NULL, 5.0], 'c', 6.0);
28 changes: 28 additions & 0 deletions ci/requirements-dev-2.7.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
channels:
- conda-forge
dependencies:
- click
- cmake
- enum34
- flake8
- graphviz
- impyla>=0.13.7
- mock
- multipledispatch
- numpy=1.10.0
- pandas=0.18.1
- psycopg2
- pytest
- python=2.7
- python-graphviz
- sh
- six
- sqlalchemy>=1.0.0
- thrift<=0.9.3
- thriftpy<=0.3.9
- toolz
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- pip:
- hdfs>=2.0.0
- google-cloud-bigquery
26 changes: 26 additions & 0 deletions ci/requirements-dev-3.4.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
channels:
- conda-forge
dependencies:
- click
- cmake
- flake8
- graphviz
- impyla>=0.13.7
- multipledispatch
- numpy=1.11.0
- pandas=0.19.0
- psycopg2
- pytest
- python=3.4
- python-graphviz
- sh
- six
- sqlalchemy>=1.0.0
- thrift<=0.9.3
- thriftpy<=0.3.9
- toolz
- pip:
- hdfs>=2.0.0
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- google-cloud-bigquery
26 changes: 26 additions & 0 deletions ci/requirements-dev-3.5.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
channels:
- conda-forge
dependencies:
- click
- cmake
- flake8
- graphviz
- impyla>=0.13.7
- multipledispatch
- numpy=1.12.0
- pandas
- psycopg2
- pytest
- python=3.5
- python-graphviz
- six
- sh
- sqlalchemy>=1.0.0
- thrift<=0.9.3
- thriftpy<=0.3.9
- toolz
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- pip:
- hdfs>=2.0.0
- google-cloud-bigquery
26 changes: 26 additions & 0 deletions ci/requirements-dev-3.6.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
channels:
- conda-forge
dependencies:
- click
- cmake
- flake8
- graphviz
- impyla>=0.13.7
- multipledispatch
- numpy
- pandas
- psycopg2
- pytest
- python=3.6
- python-graphviz
- sh
- six
- sqlalchemy>=1.0.0
- thrift
- thriftpy<=0.3.9
- toolz
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- pip:
- hdfs>=2.0.0
- google-cloud-bigquery
30 changes: 30 additions & 0 deletions ci/requirements-docs-3.6.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
channels:
- conda-forge
dependencies:
- click
- cmake
- flake8
- graphviz
- impyla>=0.13.7
- ipython
- matplotlib
- multipledispatch
- numpy
- numpydoc
- pandas
- psycopg2
- pytest
- python=3.6
- python-graphviz
- sh
- six
- sphinx_rtd_theme
- sqlalchemy>=1.0.0
- thrift
- thriftpy<=0.3.9
- toolz
- clickhouse-driver>=0.0.8
- clickhouse-sqlalchemy
- pip:
- hdfs>=2.0.0
- google-cloud-bigquery
12 changes: 0 additions & 12 deletions ci/run.sh

This file was deleted.

67 changes: 67 additions & 0 deletions ci/sqlite_load.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
CREATE TABLE functional_alltypes (
"index" BIGINT,
"Unnamed: 0" BIGINT,
id BIGINT,
bool_col BOOLEAN,
tinyint_col BIGINT,
smallint_col BIGINT,
int_col BIGINT,
bigint_col BIGINT,
float_col FLOAT,
double_col FLOAT,
date_string_col TEXT,
string_col TEXT,
timestamp_col TEXT,
year BIGINT,
month BIGINT,
CHECK (bool_col IN (0, 1))
);

CREATE INDEX ix_functional_alltypes_index ON "functional_alltypes" ("index");

CREATE TABLE awards_players (
"playerID" TEXT,
"awardID" TEXT,
"yearID" BIGINT,
"lgID" TEXT,
tie TEXT,
notes TEXT
);

CREATE TABLE batting (
"playerID" TEXT,
"yearID" BIGINT,
stint BIGINT,
"teamID" TEXT,
"lgID" TEXT,
"G" BIGINT,
"AB" BIGINT,
"R" BIGINT,
"H" BIGINT,
"X2B" BIGINT,
"X3B" BIGINT,
"HR" BIGINT,
"RBI" BIGINT,
"SB" BIGINT,
"CS" BIGINT,
"BB" BIGINT,
"SO" BIGINT,
"IBB" BIGINT,
"HBP" BIGINT,
"SH" BIGINT,
"SF" BIGINT,
"GIDP" BIGINT
);

CREATE TABLE diamonds (
carat FLOAT,
cut TEXT,
color TEXT,
clarity TEXT,
depth FLOAT,
"table" FLOAT,
price BIGINT,
x FLOAT,
y FLOAT,
z FLOAT
);
90 changes: 0 additions & 90 deletions circle.yml

This file was deleted.

2 changes: 1 addition & 1 deletion conda-recipes/ibis-framework/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

$PYTHON setup.py install
$PYTHON setup.py install --single-version-externally-managed --record=installed-files.txt
$PYTHON -c "import ibis; print(ibis.__version__.replace('v', ''))" > ibis/.version

# Add more build steps here, if they are necessary.
Expand Down
24 changes: 12 additions & 12 deletions conda-recipes/ibis-framework/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,37 @@ source:
requirements:
build:
- enum34 # [py27]
- numpy >=1.7.0
- pandas >=0.12.0
- numpy >=1.10.0
- pandas >=0.18.1
- python
- setuptools
- six
- toolz
run:
- enum34 # [py27]
- numpy >=1.7.0
- pandas >=0.12.0
- numpy >=1.10.0
- pandas >=0.18.1
- python
- setuptools
- six
- toolz

test:
requires:
- graphviz
- graphviz # [not (py34 and win)]
- mock # [py27]
- multipledispatch
- pytest <3 # [py27]
- pytest >=3 # [py3k]
- python-graphviz
- pytest >=3
- python-graphviz # [not (py34 and win)]
imports:
- ibis
- ibis.expr
- ibis.expr.tests
- ibis.expr.visualize
- ibis.expr.visualize # [not (py34 and win)]
- ibis.hive
- ibis.hive.tests
- ibis.impala
- ibis.impala.tests
- ibis.impala # [linux]
- ibis.impala.tests # [linux]
- ibis.spark
- ibis.spark.tests
- ibis.sql
Expand All @@ -59,7 +58,8 @@ test:
- ibis.sql.vertica.tests
- ibis.tests
commands:
- py.test --tb=short --pyargs ibis -m 'not impala and not hdfs'
- pytest --version
- pytest --tb=short --pyargs ibis -m "not impala and not hdfs and not bigquery"

about:
license: Apache License, Version 2.0
Expand Down
2 changes: 2 additions & 0 deletions docs/requirements-docs.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
sphinx_rtd_theme
numpydoc
ipython
matplotlib
64 changes: 32 additions & 32 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,8 @@ Generic value methods

.. _api.functions:

Scalar or array methods
~~~~~~~~~~~~~~~~~~~~~~~
Scalar or column methods
~~~~~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/
Expand All @@ -322,20 +322,12 @@ Scalar or array methods
ValueExpr.over
ValueExpr.typeof

ValueExpr.add
ValueExpr.sub
ValueExpr.mul
ValueExpr.div
ValueExpr.pow
ValueExpr.rdiv
ValueExpr.rsub

ValueExpr.case
ValueExpr.cases
ValueExpr.substitute

Array methods
~~~~~~~~~~~~~
Column methods
~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/
Expand Down Expand Up @@ -365,8 +357,8 @@ Array methods
General numeric methods
-----------------------

Scalar or array methods
~~~~~~~~~~~~~~~~~~~~~~~
Scalar or column methods
~~~~~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/
Expand All @@ -384,33 +376,41 @@ Scalar or array methods
NumericValue.round
NumericValue.nullifzero
NumericValue.zeroifnull
NumericValue.add
NumericValue.sub
NumericValue.mul
NumericValue.div
NumericValue.pow
NumericValue.rdiv
NumericValue.rsub


Array methods
~~~~~~~~~~~~~

Column methods
~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/

NumericArray.sum
NumericArray.mean
NumericColumn.sum
NumericColumn.mean

NumericArray.std
NumericArray.var
NumericColumn.std
NumericColumn.var

NumericArray.cumsum
NumericArray.cummean
NumericColumn.cumsum
NumericColumn.cummean

NumericArray.bottomk
NumericArray.topk
NumericArray.bucket
NumericArray.histogram
NumericColumn.bottomk
NumericColumn.topk
NumericColumn.bucket
NumericColumn.histogram

Integer methods
---------------

Scalar or array methods
~~~~~~~~~~~~~~~~~~~~~~~
Scalar or column methods
~~~~~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/
Expand Down Expand Up @@ -489,10 +489,10 @@ Boolean methods
.. autosummary::
:toctree: generated/

BooleanArray.any
BooleanArray.all
BooleanArray.cumany
BooleanArray.cumall
BooleanColumn.any
BooleanColumn.all
BooleanColumn.cumany
BooleanColumn.cumall

Category methods
----------------
Expand Down
16 changes: 8 additions & 8 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,11 @@
# serve to show the default.

import glob
import sys
import os

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.

sys.path.insert(0, os.path.abspath('../sphinxext'))

# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
Expand All @@ -32,19 +28,19 @@
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.mathjax',
'sphinx.ext.autosummary',
'sphinx.ext.extlinks',
'sphinx.ext.mathjax',
'numpydoc',

'ipython_sphinxext.ipython_directive',
'ipython_sphinxext.ipython_console_highlighting',
'IPython.sphinxext.ipython_directive',
'IPython.sphinxext.ipython_console_highlighting',
]

autosummary_generate = glob.glob("*.rst")

# autosummary_generate = True

import numpydoc
numpydoc_show_class_members = False

# Add any paths that contain templates here, relative to this directory.
Expand Down Expand Up @@ -244,6 +240,10 @@
#latex_domain_indices = True


# extlinks alias
extlinks = {'issue': ('https://github.com/ibis-project/ibis/issues/%s', '#')}


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
Expand Down
128 changes: 119 additions & 9 deletions docs/source/developer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,129 @@ For a primer on general open source contributions, see the `pandas contribution
guide <http://pandas.pydata.org/pandas-docs/stable/contributing.html>`_. The
project will be run much like pandas has been.

Test environment setup
----------------------
Linux Test Environment Setup
============================

If you do not have access to an Impala cluster, you may wish to set up the test
virtual machine. We've set up a Quickstart VM to get you up and running faster,
:ref:`see here <install.quickstart>`.
Conda Environment Setup
-----------------------

Unit tests and integration tests that use Impala require a test data load. See
``scripts/load_test_data.py`` in the source repository for the data loading
script.
#. **Install the latest version of miniconda**:

.. code:: sh
# Download the miniconda bash installer
curl -Ls -o $HOME/miniconda.sh \
https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
# Run the installer
bash $HOME/miniconda.sh -b -p $HOME/miniconda
# Put the conda command on your PATH
export PATH="$HOME/miniconda/bin:$PATH"
#. **Install the development environment of your choice (Python 3.6 in this
example), activate and install ibis in development mode**:

.. code:: sh
# Create a conda environment ready for ibis development
conda env create --name ibis36 --file=ci/requirements_dev-3.6.yml
# Activate the conda environment
source activate ibis36
# Install ibis
python setup.py develop
#. `Install docker <https://docs.docker.com/engine/installation/>`_
#. **Download the test data**:

.. code:: sh
DATA_DIR=$PWD
ci/datamgr.py download --directory=$DATA_DIR
Setting Up Test Databases
-------------------------

Impala (with UDFs)
^^^^^^^^^^^^^^^^^^

#. **Start the Impala docker image in another terminal**:

.. code:: sh
# Keeping this running as long as you want to test ibis
docker run --tty --rm --hostname impala cpcloud86/impala:java8
#. **Load data and UDFs into impala**:

.. code:: sh
test_data_admin.py load --data --data-dir=$DATA_DIR
Clickhouse
^^^^^^^^^^

#. **Start the Clickhouse Server docker image in another terminal**:

.. code:: sh
# Keeping this running as long as you want to test ibis
docker run --rm -p 9000:9000 --tty yandex/clickhouse-server
#. **Load data**:

.. code:: sh
ci/datamgr.py clickhouse \
--database $IBIS_TEST_CLICKHOUSE_DB \
--data-directory $DATA_DIR/ibis-testing-data \
--script ci/clickhouse_load.sql \
functional_alltypes batting diamonds awards_players
PostgreSQL
^^^^^^^^^^

PostgreSQL can be used from either the installation that resides on the Impala
docker image or from your machine directly.

Here's how to load test data into PostgreSQL:

.. code:: sh
ci/datamgr.py postgres \
--database $IBIS_TEST_POSTGRES_DB \
--data-directory $DATA_DIR/ibis-testing-data \
--script ci/postgresql_load.sql \
functional_alltypes batting diamonds awards_players
SQLite
^^^^^^

SQLite comes already installed on many systems. If you used the conda setup
instructions above, then SQLite will be available in the conda environment.

.. code:: sh
ci/datamgr.py sqlite \
--database $IBIS_TEST_SQLITE_DB_PATH \
--data-directory $DATA_DIR/ibis-testing-data \
--script ci/sqlite_load.sql \
functional_alltypes batting diamonds awards_players
Running Tests
-------------

You are now ready to run the full ibis test suite:

.. code:: sh
pytest ibis
Contribution Ideas
------------------
==================

Here's a few ideas to think about outside of participating in the primary
development roadmap:
Expand Down
98 changes: 75 additions & 23 deletions docs/source/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,59 +43,111 @@ Some platforms will require that you have Kerberos installed to build properly.

.. _install.impala:

Ibis Impala Quickstart
----------------------
`Impala <https://impala.apache.org/>`_ Quickstart
-------------------------------------------------

To install dependencies for Ibis's Impala dialect:
Install dependencies for Ibis's Impala dialect:

::

pip install ibis-framework[impala]

To create an Ibis client, you must first connect your services and assemble the
client using :func:`~ibis.impala.connect`:

.. code-block:: python
import ibis
hdfs = ibis.hdfs_connect(host=webhdfs_host, port=webhdfs_port)
con = ibis.impala.connect(host=impala_host, port=impala_port,
hdfs_client=hdfs)
Both method calls can take ``auth_mechanism='GSSAPI'`` or
``auth_mechanism='LDAP'`` to connect to Kerberos clusters. Depending on your
cluster setup, this may also include SSL. See the :ref:`API reference
<api.client>` for more, along with the Impala shell reference, as the
connection semantics are identical.

.. _install.sqlite:

Ibis SQLite Quickstart
----------------------
`SQLite <https://www.sqlite.org/>`_ Quickstart
----------------------------------------------

To install dependencies for Ibis's SQLite dialect:
Install dependencies for Ibis's SQLite dialect:

::

pip install ibis-framework[sqlite]

Create a client by passing a path to a SQLite database to
:func:`~ibis.sqlite.connect`:

.. code-block:: python
>>> ibis.sqlite.connect('path/to/my/sqlite.db')
See http://blog.ibis-project.org/sqlite-crunchbase-quickstart/ for a quickstart
using SQLite. Otherwise read on to try out Ibis on Impala.
using SQLite.

.. _install.postgres:

Ibis PostgreSQL Quickstart
--------------------------
`PostgreSQL <https://www.postgresql.org/>`_ Quickstart
------------------------------------------------------

To install dependencies for Ibis's PostgreSQL dialect:
Install dependencies for Ibis's PostgreSQL dialect:

::

pip install ibis-framework[postgres]

Creating a client
-----------------
Create a client by passing a connection string or individual parameters to
:func:`~ibis.postgres.connect`:

.. code-block:: python
>>> con = ibis.postgres.connect(
... 'postgresql://user:pass@host:port/my_database'
... )
>>> con = ibis.postgres.connect(
... user='bob', port=23569, database='ibis_testing'
... )
.. _install.bigquery:

`Clickhouse <https://clickhouse.yandex/>`_ Quickstart
-----------------------------------------------------

Install dependencies for Ibis's Clickhouse dialect:

::

pip install ibis-framework[clickhouse]

Create a client by passing in database connection parameters such as ``host``,
``port``, ``database``, and ``user`` to :func:`~ibis.clickhouse.connect`:

To create an Ibis "client", you must first connect your services and assemble
the client using ``ibis.impala.connect``:

.. code-block:: python
import ibis
>>> con = ibis.clickhouse.connect(host='localhost', port=9000)
hdfs = ibis.hdfs_connect(host=webhdfs_host, port=webhdfs_port)
con = ibis.impala.connect(host=impala_host, port=impala_port,
hdfs_client=hdfs)
`BigQuery <https://cloud.google.com/bigquery/>`_ Quickstart
-----------------------------------------------------------

Both method calls can take ``auth_mechanism='GSSAPI'`` or
``auth_mechanism='LDAP'`` to connect to Kerberos clusters. Depending on your
cluster setup, this may also include SSL. See the :ref:`API reference
<api.client>` for more, along with the Impala shell reference, as the
connection semantics are identical.
Install dependencies for Ibis's BigQuery dialect:

::

pip install ibis-framework[bigquery]

Create a client by passing in the project id and dataset id you wish to operate
with:


.. code-block:: python
>>> con = ibis.bigquery.connect(project_id='ibis-gbq', dataset_id='testing')
Learning resources
------------------
Expand Down
5 changes: 3 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ At this time, Ibis offers some level of support for the following systems:
- `Apache Impala (incubating) <http://impala.io/>`_
- `Apache Kudu (incubating) <http://getkudu.io>`_
- Hadoop Distributed File System (HDFS)
- PostgreSQL (Experimental)
- PostgreSQL
- SQLite
- Google BigQuery (experimental)
- Direct execution of ibis expressions against pandas objects (Experimental)

Coming from SQL? Check out :ref:`Ibis for SQL Programmers <sql>`.
Expand Down Expand Up @@ -84,9 +85,9 @@ places, but this will improve as things progress.
impala
api
sql
release
developer
type-system
release
legal

Indices and tables
Expand Down
116 changes: 96 additions & 20 deletions docs/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,86 @@
Release Notes
=============

**Note**: These release notes will only include notable or major bug fixes
since most minor bug fixes tend to be esoteric and not generally
interesting. Point (minor, e.g. 0.5.1) releases will generally not be found
here and contain only bug fixes.
.. note::

These release notes will only include notable or major bug fixes since most
minor bug fixes tend to be esoteric and not generally interesting. Point
releases (e.g., ``0.5.1``) will generally not be found here and contain
only bug fixes.

v0.12.0 (October 28, 2017)
--------------------------

This release brings Clickhouse and BigQuery SQL support along with a number of
bug fixes and reliability enhancements. We recommend that all users upgrade
from earlier versions of Ibis.

New Backends
~~~~~~~~~~~~

* BigQuery backend (:issue:`1170`), thanks to @tsdlovell.
* Clickhouse backend (:issue:`1127`), thanks to @kszucs.

New Features
~~~~~~~~~~~~

* Add support for ``Binary`` data type (:issue:`1183`)
* Allow users of the BigQuery client to define their own API proxy classes
(:issue:`1188`)
* Add support for HAVING in the pandas backend (:issue:`1182`)
* Add struct field tab completion (:issue:`1178`)
* Add expressions for Map/Struct types and columns (:issue:`1166`)
* Support Table.asof_join (:issue:`1162`)
* Allow right side of arithmetic operations to take over (:issue:`1150`)
* Add a data_preload step in pandas backend (:issue:`1142`)
* expressions in join predicates in the pandas backend (:issue:`1138`)
* Scalar parameters (:issue:`1075`)
* Limited window function support for pandas (:issue:`1083`)
* Implement Time datatype (:issue:`1105`)
* Implement array ops for pandas (:issue:`1100`)
* support for passing multiple quantiles in ``.quantile()`` (:issue:`1094`)
* support for clip and quantile ops on DoubleColumns (:issue:`1090`)
* Enable unary math operations for pandas, sqlite (:issue:`1071`)
* Enable casting from strings to temporal types (:issue:`1076`)
* Allow selection of whole tables in pandas joins (:issue:`1072`)
* Implement comparison for string vs date and timestamp types (:issue:`1065`)
* Implement isnull and notnull for pandas (:issue:`1066`)
* Allow like operation to accept a list of conditions to match (:issue:`1061`)
* Add a pre_execute step in pandas backend (:issue:`1189`)

Bug Fixes
~~~~~~~~~

* Remove global expression caching to ensure repeatable code generation
(:issue:`1179`, :issue:`1181`)
* Fix ``ORDER BY`` generation without a ``GROUP BY`` (:issue:`1180`,
:issue:`1181`)
* Ensure that :class:`~ibis.expr.datatypes.DataType` and subclasses hash
properly (:issue:`1172`)
* Ensure that the pandas backend can deal with unary operations in groupby
* (:issue:`1182`)
* Incorrect impala code generated for NOT with complex argument (:issue:`1176`)
* BUG/CLN: Fix predicates on Selections on Joins (:issue:`1149`)
* Don't use SET LOCAL to allow redshift to work (:issue:`1163`)
* Allow empty arrays as arguments (:issue:`1154`)
* Fix column renaming in groupby keys (:issue:`1151`)
* Ensure that we only cast if timezone is not None (:issue:`1147`)
* Fix location of conftest.py (:issue:`1107`)
* TST/Make sure we drop tables during postgres testing (:issue:`1101`)
* Fix misleading join error message (:issue:`1086`)
* BUG/TST: Make hdfs an optional dependency (:issue:`1082`)
* Memoization should include expression name where available (:issue:`1080`)

Performance Enhancements
~~~~~~~~~~~~~~~~~~~~~~~~

* Speed up imports (:issue:`1074`)
* Fix execution perf of groupby and selection (:issue:`1073`)
* Use normalize for casting to dates in pandas (:issue:`1070`)
* Speed up pandas groupby (:issue:`1067`)

Contributors
~~~~~~~~~~~~

0.11.0 (June 28, 2017)
----------------------
Expand All @@ -14,7 +90,7 @@ This release brings initial Pandas backend support along with a number of
bug fixes and reliability enhancements. We recommend that all users upgrade
from earlier versions of Ibis.

New features
New Features
~~~~~~~~~~~~
* Experimental pandas backend to allow execution of ibis expression against
pandas DataFrames
Expand All @@ -32,16 +108,16 @@ New features
* Added a ``type`` parameter to ``ibis.literal`` to allow user specification of
literal types

Bug fixes
Bug Fixes
~~~~~~~~~
* Fix broken conda recipe
* Fix incorrectly typed fillna operation
* Fix postgres boolean summary operations
* Fix kudu support to reflect client API changes
* Fix kudu support to reflect client API Changes
* Fix equality of nested types and construction of nested types when the value
type is specified as a string

API changes
API Changes
~~~~~~~~~~~
* Deprecate passing integer values to the ``ibis.timestamp`` literal
constructor, this will be removed in 0.12.0
Expand All @@ -68,20 +144,20 @@ critical bug fixes and usability improvements. As several correctness bugs with
the SQL compiler were fixed, we recommend that all users upgrade from earlier
versions of Ibis.

New features
New Features
~~~~~~~~~~~~
* Initial PostgreSQL backend contributed by Phillip Cloud.
* Add ``groupby`` as an alias for ``group_by`` to table expressions

Bug fixes
Bug Fixes
~~~~~~~~~
* Fix an expression error when filtering based on a new field
* Fix Impala's SQL compilation of using ``OR`` with compound filters
* Various fixes with the ``having(...)`` function in grouped table expressions
* Fix CTE (``WITH``) extraction inside ``UNION ALL`` expressions.
* Fix ``ImportError`` on Python 2 when ``mock`` library not installed

API changes
API Changes
~~~~~~~~~~~
* The deprecated ``ibis.impala_connect`` and ``ibis.make_client`` APIs have
been removed
Expand All @@ -92,7 +168,7 @@ API changes
This release brings initial Kudu-Impala integration and improved Impala and
SQLite support, along with several critical bug fixes.

New features
New Features
~~~~~~~~~~~~
* Apache Kudu (incubating) integration for Impala users. See the `blog post <http://blog.ibis-project.org/kudu-impala-ibis>`_ for now. Will add some documentation here when possible.
* Add ``use_https`` option to ``ibis.hdfs_connect`` for WebHDFS connections in
Expand Down Expand Up @@ -131,7 +207,7 @@ The last expression now generates the correct Impala or SQLite SQL:
WHERE `flag` = '0'
) t1
Bug fixes
Bug Fixes
~~~~~~~~~
* ``CHAR(n)`` and ``VARCHAR(n)`` Impala types now correctly map to Ibis string
expressions
Expand Down Expand Up @@ -173,7 +249,7 @@ release.
This release also includes bug fixes affecting generated SQL correctness. All
users should upgrade as soon as possible.

New features
New Features
~~~~~~~~~~~~

* New integrated Impala functionality. See :ref:`Ibis for Impala Users
Expand Down Expand Up @@ -239,7 +315,7 @@ Highlights in this release are the SQLite, Python 3, Impala UDA support, and an
asynchronous execution API. There are also many usability improvements, bug
fixes, and other new features.

New features
New Features
~~~~~~~~~~~~
* SQLite client and built-in function support
* Ibis now supports Python 3.4 as well as 2.6 and 2.7
Expand All @@ -261,7 +337,7 @@ New features
and ability to compile (since many operations are unavailable in SQLite, for
example)

API changes
API Changes
~~~~~~~~~~~
* Impala Ibis client creation now uses only ``ibis.impala.connect``, and
``ibis.make_client`` has been deprecated
Expand All @@ -278,7 +354,7 @@ Contributors
0.4 (August 14, 2015)
---------------------

New features
New Features
~~~~~~~~~~~~
* Add tooling to use Impala C++ scalar UDFs within Ibis (#262, #195)
* Support and testing for Kerberos-enabled secure HDFS clusters
Expand Down Expand Up @@ -322,7 +398,7 @@ Contributors

First public release. See http://ibis-project.org for more.

New features
New Features
~~~~~~~~~~~~
* Implement window / analytic function support
* Enable non-equijoins (join clauses with operations other than ``==``).
Expand Down Expand Up @@ -358,7 +434,7 @@ Contributors
0.2 (June 16, 2015)
-------------------

New features
New Features
~~~~~~~~~~~~
* ``insert`` method on Ibis client for inserting data into existing tables.
* ``parquet_file``, ``delimited_file``, and ``avro_file`` client methods for
Expand Down Expand Up @@ -408,7 +484,7 @@ API Changes
* New Ibis client and Impala connection workflow. Client now combined from an
Impala connection and an optional HDFS connection

Bug fixes
Bug Fixes
~~~~~~~~~
* Numerous expression API bug fixes and rough edges fixed

Expand Down
4 changes: 0 additions & 4 deletions docs/sphinxext/ipython_sphinxext/LICENSE

This file was deleted.

116 changes: 0 additions & 116 deletions docs/sphinxext/ipython_sphinxext/ipython_console_highlighting.py

This file was deleted.

1,089 changes: 0 additions & 1,089 deletions docs/sphinxext/ipython_sphinxext/ipython_directive.py

This file was deleted.

13 changes: 13 additions & 0 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,20 @@
pass

try:
import ibis.clickhouse.api as clickhouse
except ImportError: # pip install ibis-framework[clickhouse]
pass

try:
import ibis.bigquery.api as bigquery
except ImportError: # pip install ibis-framework[bigquery]
pass

try:
from multipledispatch import halt_ordering, restart_ordering
halt_ordering()
import ibis.pandas.api as pandas
restart_ordering()
except ImportError: # pip install ibis-framework[pandas]
pass

Expand Down
File renamed without changes.
44 changes: 44 additions & 0 deletions ibis/bigquery/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import ibis.common as com
from ibis.config import options # noqa: F401
from ibis.bigquery.client import BigQueryClient


def compile(expr):
"""
Force compilation of expression as though it were an expression depending
on BigQuery. Note you can also call expr.compile()
Returns
-------
compiled : string
"""
from .compiler import to_sql
return to_sql(expr)


def verify(expr):
"""
Determine if expression can be successfully translated to execute on
BigQuery
"""
try:
compile(expr)
return True
except com.TranslationError:
return False


def connect(project_id, dataset_id):
"""Create a BigQueryClient for use with Ibis
Parameters
----------
project_id: str
dataset_id: str
Returns
-------
BigQueryClient
"""

return BigQueryClient(project_id, dataset_id)
210 changes: 210 additions & 0 deletions ibis/bigquery/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
import re

import pandas as pd

import ibis
import ibis.expr.types as ir
import ibis.expr.datatypes as dt
from ibis.client import Database, Query, SQLClient
from ibis.bigquery import compiler as comp
import google.cloud.bigquery


def _ensure_split(table_id, dataset_id):
split = table_id.split('.')
if len(split) > 1:
assert len(split) == 2
if dataset_id:
raise ValueError(
"Can't pass a fully qualified table name *AND* a dataset_id"
)
(dataset_id, table_id) = split
return (table_id, dataset_id)


class BigQueryCursor(object):
"""Cursor to allow the BigQuery client to reuse machinery in ibis/client.py
"""

def __init__(self, query):
self.query = query

def fetchall(self):
return list(self.query.fetch_data())

@property
def columns(self):
return [field.name for field in self.query.schema]

def __enter__(self):
# For compatibility when constructed from Query.execute()
return self

def __exit__(self, exc_type, exc_value, traceback):
pass


class BigQuery(Query):

def _fetch(self, cursor):
return pd.DataFrame(cursor.fetchall(), columns=cursor.columns)


class BigQueryAPIProxy(object):

def __init__(self, project_id):
self._client = google.cloud.bigquery.Client(project_id)

@property
def client(self):
return self._client

@property
def project_id(self):
return self.client.project

def get_datasets(self):
return list(self.client.list_datasets())

def get_dataset(self, dataset_id):
return self.client.dataset(dataset_id)

def get_table(self, table_id, dataset_id, reload=True):
(table_id, dataset_id) = _ensure_split(table_id, dataset_id)
table = self.client.dataset(dataset_id).table(table_id)
if reload:
table.reload()
return table

def get_schema(self, table_id, dataset_id):
return self.get_table(table_id, dataset_id).schema


class BigQueryDatabase(Database):
pass


class BigQueryClient(SQLClient):

sync_query = BigQuery
database_class = BigQueryDatabase
proxy_class = BigQueryAPIProxy

def __init__(self, project_id, dataset_id):
self._proxy = self.__class__.proxy_class(project_id)
self._dataset_id = dataset_id

@property
def project_id(self):
return self._proxy.project_id

@property
def dataset_id(self):
return self._dataset_id

@property
def _table_expr_klass(self):
return ir.TableExpr

def _build_ast(self, expr, params=None):
return comp.build_ast(expr, params=params)

def _fully_qualified_name(self, name, database):
dataset_id = database or self.dataset_id
return dataset_id + '.' + name

def _get_table_schema(self, qualified_name):
return self.get_schema(qualified_name)

def _execute(self, stmt, results=True):
# TODO(phillipc): Allow **kwargs in calls to execute
query = self._proxy.client.run_sync_query(stmt)
query.use_legacy_sql = False
query.run()
return BigQueryCursor(query)

def database(self, name=None):
if name is None:
name = self.dataset_id
return self.database_class(name, self)

@property
def current_database(self):
return self.database(self.dataset_id)

def set_database(self, name):
self._dataset_id = name

def exists_database(self, name):
return self._proxy.get_dataset(name).exists()

def list_databases(self, like=None):
results = [dataset.name
for dataset in self._proxy.get_datasets()]
if like:
results = [
dataset_name for dataset_name in results
if re.match(like, dataset_name)
]
return results

def exists_table(self, name, database=None):
(table_id, dataset_id) = _ensure_split(name, database)
return self._proxy.get_table(table_id, dataset_id).exists()

def list_tables(self, like=None, database=None):
dataset = self._proxy.get_dataset(database or self.dataset_id)
result = [table.name for table in dataset.list_tables()]
if like:
result = [
table_name for table_name in result
if re.match(like, table_name)
]
return result

def get_schema(self, name, database=None):
(table_id, dataset_id) = _ensure_split(name, database)
bq_table = self._proxy.get_table(table_id, dataset_id)
return bigquery_table_to_ibis_schema(bq_table)


_DTYPE_TO_IBIS_TYPE = {
'INT64': dt.int64,
'FLOAT64': dt.double,
'BOOL': dt.boolean,
'STRING': dt.string,
'DATE': dt.date,
# FIXME: enforce no tz info
'DATETIME': dt.timestamp,
'TIME': dt.time,
'TIMESTAMP': dt.timestamp,
'BYTES': dt.binary,
}


_LEGACY_TO_STANDARD = {
'INTEGER': 'INT64',
'FLOAT': 'FLOAT64',
'BOOLEAN': 'BOOL',
}


def _discover_type(field):
typ = field.field_type
if typ == 'RECORD':
fields = field.fields
assert fields
names = [el.name for el in fields]
ibis_types = [_discover_type(el) for el in fields]
ibis_type = dt.Struct(names, ibis_types)
else:
ibis_type = _LEGACY_TO_STANDARD.get(typ, typ)
ibis_type = _DTYPE_TO_IBIS_TYPE.get(ibis_type, ibis_type)
if field.mode == 'REPEATED':
ibis_type = dt.Array(ibis_type)
return ibis_type


def bigquery_table_to_ibis_schema(table):
pairs = ((el.name, _discover_type(el)) for el in table.schema)
return ibis.schema(pairs)
154 changes: 154 additions & 0 deletions ibis/bigquery/compiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import ibis.sql.compiler as comp
import ibis.expr.operations as ops
from ibis.impala.compiler import ImpalaSelect
from ibis.impala import compiler as impala_compiler


class BigQuerySelectBuilder(comp.SelectBuilder):

@property
def _select_class(self):
return BigQuerySelect


class BigQueryQueryBuilder(comp.QueryBuilder):

select_builder = BigQuerySelectBuilder

def __init__(self, expr, context=None, params=None):
super(BigQueryQueryBuilder, self).__init__(
expr, context=context, params=params
)

def _make_context(self):
return BigQueryContext()

@property
def _union_class(self):
# return BigQueryUnion
raise NotImplementedError()


def build_ast(expr, context=None, params=None):
builder = BigQueryQueryBuilder(expr, context=context, params=params)
return builder.get_result()


def _get_query(expr, context, params=None):
ast = build_ast(expr, context, params=params)
(query, rest) = (ast.queries[0], ast.queries[1:])
assert not rest
return query


def to_sql(expr, context=None, params=None):
query = _get_query(expr, context, params=params)
compiled = query.compile()
return compiled


class BigQueryContext(comp.QueryContext):

def _to_sql(self, expr, ctx):
return to_sql(expr, context=ctx)


class BigQuerySelect(ImpalaSelect):

@property
def translator(self):
return BigQueryExprTranslator


def _extract_field(sql_attr):
def extract_field_formatter(translator, expr):
op = expr.op()
arg = translator.translate(op.args[0])
return "extract({0!s} from {1!s})".format(sql_attr, arg)
return extract_field_formatter


def _ifnull(translator, expr):
(a, b) = (translator.translate(arg) for arg in expr.op().args)
return ('CASE WHEN {0!s} IS NULL THEN {1!s} ELSE {0!s} END'
.format(a, b))


_sql_type_names = {
'int8': 'int64',
'int16': 'int64',
'int32': 'int64',
'int64': 'int64',
'float': 'float64',
'double': 'float64',
'string': 'string',
'boolean': 'boolean',
'timestamp': 'timestamp',
}


def _cast(translator, expr):
op = expr.op()
arg, target_type = op.args
arg_formatted = translator.translate(arg)
sql_type = _sql_type_names[target_type.name.lower()]
return 'CAST({0!s} AS {1!s})'.format(arg_formatted, sql_type)


def _struct_field(translator, expr):
arg, field = expr.op().args
arg_formatted = translator.translate(arg)
return '{}.`{}`'.format(arg_formatted, field)


def _array_collect(translator, expr):
return 'ARRAY_AGG({})'.format(*map(translator.translate, expr.op().args))


def _array_concat(translator, expr):
return 'ARRAY_CONCAT({})'.format(
', '.join(map(translator.translate, expr.op().args))
)


def _array_index(translator, expr):
# SAFE_OFFSET returns NULL if out of bounds
return '{}[SAFE_OFFSET({})]'.format(
*map(translator.translate, expr.op().args)
)


def _array_length(translator, expr):
return 'ARRAY_LENGTH({})'.format(
*map(translator.translate, expr.op().args)
)


_operation_registry = impala_compiler._operation_registry.copy()
_operation_registry.update({
ops.ExtractYear: _extract_field('year'),
ops.ExtractMonth: _extract_field('month'),
ops.ExtractDay: _extract_field('day'),
ops.ExtractHour: _extract_field('hour'),
ops.ExtractMinute: _extract_field('minute'),
ops.ExtractSecond: _extract_field('second'),
ops.ExtractMillisecond: _extract_field('millisecond'),

ops.IfNull: _ifnull,
ops.Cast: _cast,

ops.StructField: _struct_field,

ops.ArrayCollect: _array_collect,
ops.ArrayConcat: _array_concat,
ops.ArrayIndex: _array_index,
ops.ArrayLength: _array_length,

# BigQuery doesn't have these operations built in.
# ops.ArrayRepeat: _array_repeat,
# ops.ArraySlice: _array_slice,
})


class BigQueryExprTranslator(impala_compiler.ImpalaExprTranslator):
_registry = _operation_registry
File renamed without changes.
34 changes: 34 additions & 0 deletions ibis/bigquery/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os

import pytest

import ibis


PROJECT_ID = os.environ.get('GOOGLE_BIGQUERY_PROJECT_ID')
DATASET_ID = 'testing'


@pytest.fixture(scope='session')
def client():
ga = pytest.importorskip('google.auth')

try:
return ibis.bigquery.connect(PROJECT_ID, DATASET_ID)
except ga.exceptions.DefaultCredentialsError:
pytest.skip("no credentials found, skipping")


@pytest.fixture(scope='session')
def alltypes(client):
return client.table('functional_alltypes')


@pytest.fixture(scope='session')
def df(alltypes):
return alltypes.execute()


@pytest.fixture(scope='session')
def struct_table(client):
return client.table('struct_table')
144 changes: 144 additions & 0 deletions ibis/bigquery/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import pytest

import numpy as np
import pandas as pd
import pandas.util.testing as tm

import ibis
import ibis.expr.types as ir


pytestmark = pytest.mark.bigquery
pytest.importorskip('google.cloud.bigquery')


def test_table(alltypes):
assert isinstance(alltypes, ir.TableExpr)


def test_column_execute(alltypes, df):
col_name = 'float_col'
expr = alltypes[col_name]
result = expr.execute()
expected = df[col_name]
tm.assert_series_equal(result, expected)


def test_literal_execute(client):
expected = '1234'
expr = ibis.literal(expected)
result = client.execute(expr)
assert result == expected


def test_simple_aggregate_execute(alltypes, df):
col_name = 'float_col'
expr = alltypes[col_name].sum()
result = expr.execute()
expected = df[col_name].sum()
np.testing.assert_allclose(result, expected)


def test_list_tables(client):
assert len(client.list_tables(like='functional_alltypes')) == 1


def test_current_database(client):
assert client.current_database.name == 'testing'
assert client.current_database.name == client.dataset_id
assert client.current_database.tables == client.list_tables()


def test_database(client):
database = client.database(client.dataset_id)
assert database.list_tables() == client.list_tables()


def test_database_layer(client):
bq_dataset = client._proxy.get_dataset(client.dataset_id)
actual = client.list_tables()
expected = [el.name for el in bq_dataset.list_tables()]
assert sorted(actual) == sorted(expected)


def test_compile_verify(alltypes):
column = alltypes['string_col']
unsupported_expr = column.replace('foo', 'bar')
supported_expr = column.lower()
assert not unsupported_expr.verify()
assert supported_expr.verify()


def test_compile_toplevel():
t = ibis.table([('foo', 'double')], name='t0')

# it works!
expr = t.foo.sum()
result = ibis.bigquery.compile(expr)
# FIXME: remove quotes because bigquery can't use anythig that needs
# quoting?
expected = """\
SELECT sum(`foo`) AS `sum`
FROM t0""" # noqa
assert str(result) == expected


def test_struct_field_access(struct_table):
expr = struct_table.struct_col.string_field
result = expr.execute()
expected = pd.Series([None, 'a'], name='tmp')
tm.assert_series_equal(result, expected)


def test_array_index(struct_table):
expr = struct_table.array_of_structs_col[1]
result = expr.execute()
expected = pd.Series(
[
{'int_field': None, 'string_field': None},
{'int_field': None, 'string_field': 'hijklmnop'}
],
name='tmp'
)
tm.assert_series_equal(result, expected)


def test_array_concat(struct_table):
c = struct_table.array_of_structs_col
expr = c + c
result = expr.execute()
expected = pd.Series(
[
[
{'int_field': 12345, 'string_field': 'abcdefg'},
{'int_field': None, 'string_field': None},
{'int_field': 12345, 'string_field': 'abcdefg'},
{'int_field': None, 'string_field': None},
],
[
{'int_field': 12345, 'string_field': 'abcdefg'},
{'int_field': None, 'string_field': 'hijklmnop'},
{'int_field': 12345, 'string_field': 'abcdefg'},
{'int_field': None, 'string_field': 'hijklmnop'},
],
],
name='tmp',
)
tm.assert_series_equal(result, expected)


def test_array_length(struct_table):
expr = struct_table.array_of_structs_col.length()
result = expr.execute()
expected = pd.Series([2, 2], name='tmp')
tm.assert_series_equal(result, expected)


@pytest.mark.xfail
def test_array_collect(struct_table):
key = struct_table.array_of_structs_col[0].string_field
expr = struct_table.groupby(key).aggregate(
foo=lambda t: t.array_of_structs_col[0].int_field.collect()
)
result = expr.execute()
assert result == -1
Empty file added ibis/clickhouse/__init__.py
Empty file.
81 changes: 81 additions & 0 deletions ibis/clickhouse/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import ibis.common as com

from ibis.config import options
from ibis.clickhouse.client import ClickhouseClient


def compile(expr):
"""
Force compilation of expression as though it were an expression depending
on Clickhouse. Note you can also call expr.compile()
Returns
-------
compiled : string
"""
from .compiler import to_sql
return to_sql(expr)


def verify(expr):
"""
Determine if expression can be successfully translated to execute on
Clickhouse
"""
try:
compile(expr)
return True
except com.TranslationError:
return False


def connect(host='localhost', port=9000, database='default', user='default',
password='', client_name='ibis', compression=False):
"""Create an ClickhouseClient for use with Ibis.
Parameters
----------
host : str, optional
Host name of the clickhouse server
port : int, optional
Clickhouse server's port
database : str, optional
Default database when executing queries
user : str, optional
User to authenticate with
password : str, optional
Password to authenticate with
client_name: str, optional
This will appear in clickhouse server logs
compression: str, optional
Weather or not to use compression. Default is False.
Possible choices: lz4, lz4hc, quicklz, zstd
True is equivalent to 'lz4'.
Examples
--------
>>> import ibis
>>> import os
>>> clickhouse_host = os.environ.get('IBIS_TEST_CLICKHOUSE_HOST',
... 'localhost')
>>> clickhouse_port = int(os.environ.get('IBIS_TEST_CLICKHOUSE_PORT',
... 9000))
>>> client = ibis.clickhouse.connect(
... host=clickhouse_host,
... port=clickhouse_port
... )
>>> client # doctest: +ELLIPSIS
<ibis.clickhouse.client.ClickhouseClient object at 0x...>
Returns
-------
ClickhouseClient
"""

client = ClickhouseClient(host, port=port, database=database, user=user,
password=password, client_name=client_name,
compression=compression)
if options.default_backend is None:
options.default_backend = client

return client
304 changes: 304 additions & 0 deletions ibis/clickhouse/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
import re
import pandas as pd

import ibis.common as com
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir

from ibis.config import options
from ibis.compat import zip as czip
from ibis.client import Query, Database, DatabaseEntity, SQLClient
from ibis.clickhouse.compiler import build_ast
from ibis.util import log
from ibis.sql.compiler import DDL

from clickhouse_driver.client import Client as _DriverClient

from .types import clickhouse_to_pandas, clickhouse_to_ibis


fully_qualified_re = re.compile(r"(.*)\.(?:`(.*)`|(.*))")


class ClickhouseDatabase(Database):
pass


class ClickhouseQuery(Query):

def execute(self):
# synchronous by default
cursor = self.client._execute(self.compiled_ddl)
result = self._fetch(cursor)
return self._wrap_result(result)

def _fetch(self, cursor):
data, columns = cursor
names, types = czip(*columns)

cols = {}
for (col, name, db_type) in czip(data, names, types):
dtype = self._db_type_to_dtype(db_type, name)
try:
cols[name] = pd.Series(col, dtype=dtype)
except TypeError:
cols[name] = pd.Series(col)

return pd.DataFrame(cols, columns=names)

def _db_type_to_dtype(self, db_type, column):
return clickhouse_to_pandas[db_type]


class ClickhouseClient(SQLClient):
"""An Ibis client interface that uses Clickhouse"""

database_class = ClickhouseDatabase
sync_query = ClickhouseQuery

def __init__(self, *args, **kwargs):
self.con = _DriverClient(*args, **kwargs)

def _build_ast(self, expr, params=None):
return build_ast(expr, params=params)

@property
def current_database(self):
# might be better to use driver.Connection instead of Client
return self.con.connection.database

@property
def _table_expr_klass(self):
return ClickhouseTable

def log(self, msg):
log(msg)

def close(self):
"""Close Clickhouse connection and drop any temporary objects"""
self.con.disconnect()

def _execute(self, query):
if isinstance(query, DDL):
query = query.compile()
self.log(query)

return self.con.execute(query, columnar=True, with_column_types=True)

def _fully_qualified_name(self, name, database):
if bool(fully_qualified_re.search(name)):
return name

database = database or self.current_database
return '{0}.`{1}`'.format(database, name)

def list_tables(self, like=None, database=None):
"""
List tables in the current (or indicated) database. Like the SHOW
TABLES command in the clickhouse-shell.
Parameters
----------
like : string, default None
e.g. 'foo*' to match all tables starting with 'foo'
database : string, default None
If not passed, uses the current/default database
Returns
-------
tables : list of strings
"""
statement = 'SHOW TABLES'
if database:
statement += " FROM `{0}`".format(database)
if like:
m = fully_qualified_re.match(like)
if m:
database, quoted, unquoted = m.groups()
like = quoted or unquoted
return self.list_tables(like=like, database=database)
statement += " LIKE '{0}'".format(like)

return self._execute(statement)

def set_database(self, name):
"""
Set the default database scope for client
"""
self.con.database = name

def exists_database(self, name):
"""
Checks if a given database exists
Parameters
----------
name : string
Database name
Returns
-------
if_exists : boolean
"""
return len(self.list_databases(like=name)) > 0

def list_databases(self, like=None):
"""
List databases in the Clickhouse cluster.
Like the SHOW DATABASES command in the clickhouse-shell.
Parameters
----------
like : string, default None
e.g. 'foo*' to match all tables starting with 'foo'
Returns
-------
databases : list of strings
"""
statement = 'SELECT name FROM system.databases'
if like:
statement += " WHERE name LIKE '{0}'".format(like)

return self._execute(statement)

def get_schema(self, table_name, database=None):
"""
Return a Schema object for the indicated table and database
Parameters
----------
table_name : string
May be fully qualified
database : string, default None
Returns
-------
schema : ibis Schema
"""
qualified_name = self._fully_qualified_name(table_name, database)
query = 'DESC {0}'.format(qualified_name)
data, _ = self._execute(query)

names, types = data[:2]
ibis_types = map(clickhouse_to_ibis.get, types)

return dt.Schema(names, ibis_types)

@property
def client_options(self):
return self.con.options

def set_options(self, options):
self.con.set_options(options)

def reset_options(self):
# Must nuke all cursors
raise NotImplementedError

def exists_table(self, name, database=None):
"""
Determine if the indicated table or view exists
Parameters
----------
name : string
database : string, default None
Returns
-------
if_exists : boolean
"""
return len(self.list_tables(like=name, database=database)) > 0

def _ensure_temp_db_exists(self):
name = options.clickhouse.temp_db,
if not self.exists_database(name):
self.create_database(name, force=True)

def _get_table_schema(self, tname):
return self.get_schema(tname)

def _get_schema_using_query(self, query):
_, types = self._execute(query)
names, clickhouse_types = zip(*types)
ibis_types = map(clickhouse_to_ibis.get, clickhouse_types)
return dt.Schema(names, ibis_types)

def _exec_statement(self, stmt, adapter=None):
query = ClickhouseQuery(self, stmt)
result = query.execute()
if adapter is not None:
result = adapter(result)
return result

def _table_command(self, cmd, name, database=None):
qualified_name = self._fully_qualified_name(name, database)
return '{0} {1}'.format(cmd, qualified_name)


class ClickhouseTable(ir.TableExpr, DatabaseEntity):
"""References a physical table in Clickhouse"""

@property
def _qualified_name(self):
return self.op().args[0]

@property
def _unqualified_name(self):
return self._match_name()[1]

@property
def _client(self):
return self.op().args[2]

def _match_name(self):
m = fully_qualified_re.match(self._qualified_name)
if not m:
raise com.IbisError('Cannot determine database name from {0}'
.format(self._qualified_name))
db, quoted, unquoted = m.groups()
return db, quoted or unquoted

@property
def _database(self):
return self._match_name()[0]

def invalidate_metadata(self):
self._client.invalidate_metadata(self._qualified_name)

def metadata(self):
"""
Return parsed results of DESCRIBE FORMATTED statement
Returns
-------
meta : TableMetadata
"""
return self._client.describe_formatted(self._qualified_name)

describe_formatted = metadata

@property
def name(self):
return self.op().name

def _execute(self, stmt):
return self._client._execute(stmt)


class ClickhouseTemporaryTable(ops.DatabaseTable):

def __del__(self):
try:
self.drop()
except com.IbisError:
pass

def drop(self):
try:
self.source.drop_table(self.name)
except Exception: # ClickhouseError
# database might have been dropped
pass
Loading