Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Additional Analytics Tools"
"# Advanced Topics: Analytics Tools"
]
},
{
Expand All @@ -20,13 +20,12 @@
"metadata": {},
"outputs": [],
"source": [
"import ibis\n",
"import os\n",
"hdfs_port = os.environ.get('IBIS_WEBHDFS_PORT', 50070)\n",
"hdfs = ibis.hdfs_connect(host='impala', port=hdfs_port)\n",
"con = ibis.impala.connect(host='impala', database='ibis_testing',\n",
" hdfs_client=hdfs)\n",
"ibis.options.interactive = True"
"import ibis\n",
"\n",
"ibis.options.interactive = True\n",
"\n",
"connection = ibis.sqlite.connect(os.path.join('data', 'geography.db'))"
]
},
{
Expand All @@ -44,13 +43,8 @@
"metadata": {},
"outputs": [],
"source": [
"lineitem = con.table('tpch_lineitem')\n",
"orders = con.table('tpch_orders')\n",
"\n",
"items = (orders.join(lineitem, orders.o_orderkey == lineitem.l_orderkey)\n",
" [lineitem, orders])\n",
"\n",
"items.o_orderpriority.value_counts()"
"countries = connection.table('countries')\n",
"countries.continent.value_counts()"
]
},
{
Expand All @@ -66,9 +60,9 @@
"metadata": {},
"outputs": [],
"source": [
"freq = (items.group_by(items.o_orderpriority)\n",
" .aggregate([items.count().name('nrows'),\n",
" items.l_extendedprice.sum().name('total $')]))\n",
"freq = (countries.group_by(countries.continent)\n",
" .aggregate([countries.count().name('# countries'),\n",
" countries.population.sum().name('total population')]))\n",
"freq"
]
},
Expand All @@ -81,25 +75,16 @@
"\n",
"Numeric array expressions (columns with numeric type and other array expressions) have `bucket` and `histogram` methods which produce different kinds of binning. These produce category values (the computed bins) that can be used in grouping and other analytics.\n",
"\n",
"Let's have a look at a few examples\n",
"Some backends implement the `.summary()` method, which can be used to see the general distribution of a column.\n",
"\n",
"I'll use the `summary` function to see the general distribution of lineitem prices in the order data joined above:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"items.l_extendedprice.summary()"
"Let's have a look at a few examples."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Alright then, now suppose we want to split the item prices up into some buckets of our choosing:"
"Alright then, now suppose we want to split the countries up into some buckets of our choosing for their population:"
]
},
{
Expand All @@ -108,7 +93,7 @@
"metadata": {},
"outputs": [],
"source": [
"buckets = [0, 5000, 10000, 50000, 100000]"
"buckets = [0, 1e6, 1e7, 1e8, 1e9]"
]
},
{
Expand All @@ -124,7 +109,7 @@
"metadata": {},
"outputs": [],
"source": [
"bucketed = items.l_extendedprice.bucket(buckets).name('bucket')"
"bucketed = countries.population.bucket(buckets).name('bucket')"
]
},
{
Expand Down Expand Up @@ -156,7 +141,7 @@
"metadata": {},
"outputs": [],
"source": [
"bucketed = (items.l_extendedprice\n",
"bucketed = (countries.population\n",
" .bucket(buckets, include_over=True)\n",
" .name('bucket'))\n",
"bucketed.value_counts()"
Expand Down Expand Up @@ -196,8 +181,7 @@
"bucket_counts = bucketed.value_counts()\n",
"\n",
"labeled_bucket = (bucket_counts.bucket\n",
" .label(['0 to 5000', '5000 to 10000', '10000 to 50000',\n",
" '50000 to 100000', 'Over 100000'])\n",
" .label(['< 1M', '> 1M', '> 10M', '> 100M', '> 1B'])\n",
" .name('bucket_name'))\n",
"\n",
"expr = (bucket_counts[labeled_bucket, bucket_counts]\n",
Expand All @@ -211,60 +195,7 @@
"source": [
"Nice, huh?\n",
"\n",
"`histogram` is a linear (fixed size bin) equivalent:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t = con.table('functional_alltypes')\n",
"\n",
"d = t.double_col\n",
"\n",
"tier = d.histogram(10).name('hist_bin')\n",
"expr = (t.group_by(tier)\n",
" .aggregate([d.min(), d.max(), t.count()])\n",
" .sort_by('hist_bin'))\n",
"expr"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Filtering in aggregations\n",
"\n",
"\n",
"Suppose that you want to compute an aggregation with a subset of the data for _only one_ of the metrics / aggregates in question, and the complete data set with the other aggregates. Most aggregation functions are thus equipped with a `where` argument. Let me show it to you in action:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t = con.table('functional_alltypes')\n",
"\n",
"d = t.double_col\n",
"s = t.string_col\n",
"\n",
"cond = s.isin(['3', '5', '7'])\n",
"\n",
"metrics = [t.count().name('# rows total'), \n",
" cond.sum().name('# selected'),\n",
" d.sum().name('total'),\n",
" d.sum(where=cond).name('selected total')]\n",
"\n",
"color = (t.float_col\n",
" .between(3, 7)\n",
" .ifelse('red', 'blue')\n",
" .name('color'))\n",
"\n",
"t.group_by(color).aggregate(metrics)"
"Some backends implement `histogram(num_bins)`, a linear (fixed size bin) equivalent."
]
}
],
Expand All @@ -284,9 +215,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 1
"nbformat_minor": 4
}
359 changes: 0 additions & 359 deletions docs/source/tutorial/07-Advanced-Topics-ComplexFiltering.ipynb

This file was deleted.

74 changes: 70 additions & 4 deletions docs/source/tutorial/data/Create-geography-database.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
"\n",
"The source of the `countries` table has been obtained from [GeoNames](https://www.geonames.org/countries/).\n",
"\n",
"The data for the `gdp` data has been obtained from the [World Bank website](https://data.worldbank.org/indicator/NY.GDP.MKTP.CD)."
"The data for the `gdp` data has been obtained from the [World Bank website](https://data.worldbank.org/indicator/NY.GDP.MKTP.CD).\n",
"\n",
"The `independence` days table has been obtained from [Wikipedia](https://en.wikipedia.org/wiki/List_of_national_independence_days)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -53,15 +55,79 @@
"cursor.executemany('INSERT INTO gdp VALUES (?, ?, ?)',\n",
" data['gdp'])\n",
"\n",
"cursor.execute('''\n",
"CREATE TABLE independence (\n",
" country_code TEXT,\n",
" independence_date DATE,\n",
" independence_from TEXT);\n",
"''')\n",
"cursor.executemany('INSERT INTO independence VALUES (?, ?, ?)',\n",
" data['independence'])\n",
"\n",
"conn.commit()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This code is not currently being used, but it creates the tutorials database (same as above) in PostgreSQL:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"import json\n",
"import psycopg2\n",
"\n",
"HOST = 'localhost'\n",
"DBNAME = 'ibis'\n",
"USER = 'ibis'\n",
"\n",
"with open('geography.json') as f:\n",
" data = json.load(f)\n",
"\n",
"conn = psycopg2.connect(f\"host={HOST} dbname={DBANME} user={USER}\")\n",
"cursor = conn.cursor()\n",
"\"\"\"\n",
"cursor.execute('''\n",
"CREATE TABLE countries (\n",
" iso_alpha2 CHAR(2),\n",
" iso_alpha3 CHAR(3),\n",
" iso_numeric INTEGER,\n",
" fips VARCHAR(10),\n",
" name VARCHAR(300),\n",
" capital VARCHAR(300),\n",
" area_km2 REAL,\n",
" population INTEGER,\n",
" continent CHAR(2));\n",
"''')\n",
"cursor.executemany('INSERT INTO countries VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)',\n",
" data['countries'])\n",
"\"\"\"\n",
"cursor.execute('''\n",
"CREATE TABLE gdp (\n",
" country_code CHAR(3),\n",
" year INTEGER,\n",
" value REAL);\n",
"''')\n",
"cursor.executemany('INSERT INTO gdp VALUES (%s, %s, %s)',\n",
" data['gdp'])\n",
"\n",
"cursor.execute('''\n",
"CREATE TABLE independence (\n",
" country_code CHAR(2),\n",
" independence_date DATE,\n",
" independence_from VARCHAR(300));\n",
"''')\n",
"cursor.executemany('INSERT INTO independence VALUES (%s, %s, %s)',\n",
" data['independence'])\n",
"\n",
"conn.commit()"
]
}
],
"metadata": {
Expand All @@ -80,7 +146,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.9.1"
}
},
"nbformat": 4,
Expand Down
Binary file modified docs/source/tutorial/data/geography.db
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/source/tutorial/data/geography.json

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions docs/source/tutorial/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,5 @@ Here we show Jupyter notebooks that take you through various tasks using ibis.
03-Expressions-Lazy-Mode-Logging.ipynb
04-More-Value-Expressions.ipynb
05-IO-Create-Insert-External-Data.ipynb
06-Advanced-Topics-TopK-SelfJoins.ipynb
07-Advanced-Topics-ComplexFiltering.ipynb
08-More-Analytics-Helpers.ipynb
06-Advanced-Topics-ComplexFiltering.ipynb
07-Advanced-Topics-Analytics-Tools.ipynb
2 changes: 1 addition & 1 deletion docs/source/user_guide/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ This method also takes arguments to configure SSL (``use_ssl``, ``ca_cert``).
See the documentation for the Impala shell for more details.

Ibis also includes functionality that communicates directly with HDFS, using
the WebHDFS REST API. When calling ``ibis.hdfs_connect(...)``, also pass
the WebHDFS REST API. When calling ``ibis.impala.hdfs_connect(...)``, also pass
``auth_mechanism='GSSAPI'`` or ``auth_mechanism='LDAP'``, and ensure that you
are connecting to the correct port, which may likely be an SSL-secured WebHDFS
port. Also note that you can pass ``verify=False`` to avoid verifying SSL
Expand Down
146 changes: 44 additions & 102 deletions docs/source/user_guide/extending/extending_elementwise_expr.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,11 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"There are two parts of ibis that users typically want to extend:\n",
"This notebook will show you how to add a new elementwise operation to an existing backend.\n",
"\n",
"1. Expressions (for example, by adding a new operation)\n",
"1. Backends\n",
"We are going to add `julianday`, a function supported by the SQLite database, to the SQLite Ibis backend.\n",
"\n",
"This notebook will show you how to add a new elementwise operation--`sha1`--to an existing backend (PostgreSQL)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Description\n",
"\n",
"We're going to add a **`sha1`** method to ibis. [SHA1](https://en.wikipedia.org/wiki/SHA-1) is a hash algorithm, employed in systems such as git."
"The Julian day of a date, is the number of days since January 1st, 4713 BC. For more information check the [Julian day](https://en.wikipedia.org/wiki/Julian_day) wikipedia page."
]
},
{
Expand All @@ -39,10 +29,11 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's define the `sha` operation as a function that takes one string input argument and returns a hexidecimal string.\n",
"Let's define the `julianday` operation as a function that takes one string input argument and returns a float.\n",
"\n",
"```haskell\n",
"sha1 :: String -> String\n",
"```python\n",
"def julianday(date: str) -> float:\n",
" \"\"\"Julian date\"\"\"\n",
"```"
]
},
Expand All @@ -58,16 +49,16 @@
"from ibis.expr.operations import ValueOp, Arg\n",
"\n",
"\n",
"class SHA1(ValueOp):\n",
"class JulianDay(ValueOp):\n",
" arg = Arg(rlz.string)\n",
" output_type = rlz.shape_like('arg', 'string')"
" output_type = rlz.shape_like('arg', 'float')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We just defined a `SHA1` class that takes one argument of type string or binary, and returns a binary. This matches the description of the function provided by BigQuery."
"We just defined a `JulianDay` class that takes one argument of type string or binary, and returns a float."
]
},
{
Expand All @@ -81,7 +72,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Because we know the output type of the operation, to make an expression out of ``SHA1`` we simply need to construct it and call its `ibis.expr.types.Node.to_expr` method.\n",
"Because we know the output type of the operation, to make an expression out of ``JulianDay`` we simply need to construct it and call its `ibis.expr.types.Node.to_expr` method.\n",
"\n",
"We still need to add a method to `StringValue` and `BinaryValue` (this needs to work on both scalars and columns).\n",
"\n",
Expand All @@ -100,11 +91,11 @@
"from ibis.expr.types import StringValue, BinaryValue\n",
"\n",
"\n",
"def sha1(string_value):\n",
" return SHA1(string_value).to_expr()\n",
"def julianday(string_value):\n",
" return JulianDay(string_value).to_expr()\n",
"\n",
"\n",
"StringValue.sha1 = sha1"
"StringValue.julianday = julianday"
]
},
{
Expand All @@ -120,25 +111,11 @@
"metadata": {},
"outputs": [],
"source": [
"import ibis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t = ibis.table([('string_col', 'string')], name='t')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t.string_col.sha1()"
"import ibis\n",
"\n",
"t = ibis.table([('string_col', 'string')], name='t')\n",
"\n",
"t.string_col.julianday()"
]
},
{
Expand All @@ -157,16 +134,16 @@
"import sqlalchemy as sa\n",
"\n",
"\n",
"@ibis.postgres.compiles(SHA1)\n",
"def compile_sha1(translator, expr):\n",
"@ibis.sqlite.add_operation(JulianDay)\n",
"def _julianday(translator, expr):\n",
" # pull out the arguments to the expression\n",
" arg, = expr.op().args\n",
" \n",
" # compile the argument\n",
" compiled_arg = translator.translate(arg)\n",
" \n",
" # return a SQLAlchemy expression that calls into the PostgreSQL pgcrypto extension\n",
" return sa.func.encode(sa.func.digest(compiled_arg, 'sha1'), 'hex')"
" # return a SQLAlchemy expression that calls into the SQLite julianday function\n",
" return sa.func.julianday(compiled_arg)"
]
},
{
Expand All @@ -176,62 +153,25 @@
"## Step 4: Putting it all Together"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Connect to the `ibis_testing` database"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**NOTE:**\n",
"\n",
"To be able to execute the rest of this notebook you need to run the following command from your ibis clone:\n",
"\n",
"```sh\n",
"make init\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pathlib\n",
"import ibis\n",
"con = ibis.postgres.connect(\n",
" database='ibis_testing', user='postgres', host='postgres', password='postgres')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Register the pgcrypto extension\n",
"\n",
"See https://www.postgresql.org/docs/10/static/pgcrypto.html for details about this extension"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# the output here is an AlchemyProxy instance that cannot iterate\n",
"# (because there's no output from the database) so we hide it with a semicolon\n",
"con.raw_sql('CREATE EXTENSION IF NOT EXISTS pgcrypto');"
"db_fname = str(pathlib.Path().resolve().parent.parent / 'tutorial' / 'data' / 'geography.db')\n",
"\n",
"con = ibis.sqlite.connect(db_fname)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create and execute a `sha1` expression"
"### Create and execute a `julianday` expression"
]
},
{
Expand All @@ -240,8 +180,8 @@
"metadata": {},
"outputs": [],
"source": [
"t = con.table('functional_alltypes')\n",
"t"
"independence = con.table('independence')\n",
"independence"
]
},
{
Expand All @@ -250,8 +190,8 @@
"metadata": {},
"outputs": [],
"source": [
"sha1_expr = t.string_col.sha1()\n",
"sha1_expr"
"day = independence.independence_date.cast('string')\n",
"day"
]
},
{
Expand All @@ -260,8 +200,8 @@
"metadata": {},
"outputs": [],
"source": [
"sql_expr = sha1_expr.compile()\n",
"print(sql_expr)"
"julianday_expr = day.julianday()\n",
"julianday_expr"
]
},
{
Expand All @@ -270,7 +210,8 @@
"metadata": {},
"outputs": [],
"source": [
"result = sha1_expr.execute()"
"sql_expr = julianday_expr.compile()\n",
"print(sql_expr)"
]
},
{
Expand All @@ -279,6 +220,7 @@
"metadata": {},
"outputs": [],
"source": [
"result = julianday_expr.execute()\n",
"result.head()"
]
},
Expand All @@ -295,8 +237,8 @@
"metadata": {},
"outputs": [],
"source": [
"string_scalar = ibis.literal('abcdefg')\n",
"string_scalar"
"scalar = ibis.literal('2010-03-14')\n",
"scalar"
]
},
{
Expand All @@ -305,7 +247,7 @@
"metadata": {},
"outputs": [],
"source": [
"sha1_scalar = string_scalar.sha1()"
"julianday_scalar = scalar.julianday()"
]
},
{
Expand All @@ -314,7 +256,7 @@
"metadata": {},
"outputs": [],
"source": [
"con.execute(sha1_scalar)"
"con.execute(julianday_scalar)"
]
}
],
Expand All @@ -334,9 +276,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
"nbformat_minor": 4
}
140 changes: 46 additions & 94 deletions docs/source/user_guide/extending/extending_reduce_expr.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook will show you how to add a new *reduction* operation (`bitwise_and`) to an existing backend (PostgreSQL).\n",
"This notebook will show you how to add a new *reduction* operation `last_date` to the existing backend SQLite.\n",
"\n",
"A reduction operation is a function that maps $N$ rows to 1 row, for example the `sum` function."
]
Expand All @@ -22,18 +22,7 @@
"source": [
"## Description\n",
"\n",
"We're going to add a **`bitwise_and`** function to ibis. `bitwise_and` computes the logical `AND` of the individual bits of an integer.\n",
"\n",
"For example,\n",
"\n",
"```\n",
" 0101\n",
" 0111\n",
" 0011\n",
"& 1101\n",
"------\n",
" 0001\n",
"```"
"We're going to add a **`last_date`** function to ibis. `last_date` simply returns the latest date of a list of dates."
]
},
{
Expand All @@ -47,10 +36,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's define the `bitwise_and` operation as a function that takes any integer typed column as input and returns an integer\n",
"Let's define the `last_date` operation as a function that takes any date column as input and returns a date:\n",
"\n",
"```python\n",
"import datetime\n",
"import typing\n",
"\n",
"```haskell\n",
"bitwise_and :: Column Int -> Int\n",
"def last_date(dates: typing.List[datetime.date]) -> datetime.date:\n",
" \"\"\"Latest date\"\"\"\n",
"```"
]
},
Expand All @@ -66,8 +59,8 @@
"from ibis.expr.operations import Reduction, Arg\n",
"\n",
"\n",
"class BitwiseAnd(Reduction):\n",
" arg = Arg(rlz.column(rlz.integer))\n",
"class LastDate(Reduction):\n",
" arg = Arg(rlz.column(rlz.date))\n",
" where = Arg(rlz.boolean, default=None)\n",
" output_type = rlz.scalar_like('arg')"
]
Expand All @@ -76,7 +69,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"We just defined a `BitwiseAnd` class that takes one integer column as input, and returns a scalar output of the same type as the input. This matches both the requirements of a reduction and the spepcifics of the function that we want to implement.\n",
"We just defined a `LastDate` class that takes one date column as input, and returns a scalar output of the same type as the input. This matches both the requirements of a reduction and the spepcifics of the function that we want to implement.\n",
"\n",
"**Note**: It is very important that you write the correct argument rules and output type here. The expression *will not work* otherwise."
]
Expand All @@ -92,7 +85,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Because every reduction in ibis has the ability to filter out values during aggregation (a typical feature in databases and analytics tools), to make an expression out of ``BitwiseAnd`` we need to pass an additional argument: `where` to our `BitwiseAnd` constructor."
"Because every reduction in ibis has the ability to filter out values during aggregation (a typical feature in databases and analytics tools), to make an expression out of ``LastDate`` we need to pass an additional argument: `where` to our `LastDate` constructor."
]
},
{
Expand All @@ -101,21 +94,21 @@
"metadata": {},
"outputs": [],
"source": [
"from ibis.expr.types import IntegerColumn # not IntegerValue! reductions are only valid on columns\n",
"from ibis.expr.types import DateColumn # not DateValue! reductions are only valid on columns\n",
"\n",
"\n",
"def bitwise_and(integer_column, where=None):\n",
" return BitwiseAnd(integer_column, where=where).to_expr()\n",
"def last_date(date_column, where=None):\n",
" return LastDate(date_column, where=where).to_expr()\n",
"\n",
"\n",
"IntegerColumn.bitwise_and = bitwise_and"
"DateColumn.last_date = last_date"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Interlude: Create some expressions using `bitwise_and`"
"## Interlude: Create some expressions using `last_date`"
]
},
{
Expand All @@ -133,7 +126,7 @@
"metadata": {},
"outputs": [],
"source": [
"t = ibis.table([('bigint_col', 'int64'), ('string_col', 'string')], name='t')"
"people = ibis.table([('name', 'string'), ('country', 'string'), ('date_of_birth', 'date')], name='people')"
]
},
{
Expand All @@ -142,7 +135,7 @@
"metadata": {},
"outputs": [],
"source": [
"t.bigint_col.bitwise_and()"
"people.date_of_birth.last_date()"
]
},
{
Expand All @@ -151,7 +144,7 @@
"metadata": {},
"outputs": [],
"source": [
"t.bigint_col.bitwise_and(t.string_col == '1')"
"people.date_of_birth.last_date(people.country == 'Indonesia')"
]
},
{
Expand All @@ -170,16 +163,16 @@
"import sqlalchemy as sa\n",
"\n",
"\n",
"@ibis.postgres.compiles(BitwiseAnd)\n",
"def compile_sha1(translator, expr):\n",
"@ibis.sqlite.add_operation(LastDate)\n",
"def _last_date(translator, expr):\n",
" # pull out the arguments to the expression\n",
" arg, where = expr.op().args\n",
" \n",
" # compile the argument\n",
" compiled_arg = translator.translate(arg)\n",
" \n",
" # call the appropriate postgres function\n",
" agg = sa.func.bit_and(compiled_arg)\n",
" # call the appropriate SQLite function (`max` for the latest/maximum date)\n",
" agg = sa.func.max(compiled_arg)\n",
" \n",
" # handle a non-None filter clause\n",
" if where is not None:\n",
Expand All @@ -194,26 +187,6 @@
"## Step 4: Putting it all Together"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Connect to the `ibis_testing` database"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**NOTE:**\n",
"\n",
"To be able to execute the rest of this notebook you need to run the following command from your ibis clone:\n",
"\n",
"```sh\n",
"make init\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -222,12 +195,12 @@
},
"outputs": [],
"source": [
"con = ibis.postgres.connect(\n",
" user='postgres',\n",
" host='postgres',\n",
" password='postgres',\n",
" database='ibis_testing'\n",
")"
"import pathlib\n",
"import ibis\n",
"\n",
"db_fname = str(pathlib.Path().resolve().parent.parent / 'tutorial' / 'data' / 'geography.db')\n",
"\n",
"con = ibis.sqlite.connect(db_fname)"
]
},
{
Expand All @@ -243,8 +216,15 @@
"metadata": {},
"outputs": [],
"source": [
"t = con.table('functional_alltypes')\n",
"t"
"independence = con.table('independence')\n",
"independence"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Last country to gain independence in our database:"
]
},
{
Expand All @@ -253,7 +233,7 @@
"metadata": {},
"outputs": [],
"source": [
"expr = t.bigint_col.bitwise_and()\n",
"expr = independence.independence_date.last_date()\n",
"expr"
]
},
Expand All @@ -280,7 +260,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's see what a `bitwise_and` call looks like with a `where` argument"
"Last country to gain independence from the Spanish Empire, using the `where` parameter:"
]
},
{
Expand All @@ -289,7 +269,7 @@
"metadata": {},
"outputs": [],
"source": [
"expr = t.bigint_col.bitwise_and(where=(t.bigint_col == 10) | (t.bigint_col == 40))\n",
"expr = independence.independence_date.last_date(where=independence.independence_from == 'Spanish Empire')\n",
"expr"
]
},
Expand All @@ -302,34 +282,6 @@
"result = expr.execute()\n",
"result"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's confirm that taking bitwise `AND` of 10 and 40 is in fact 8"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"10 & 40"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(' {:0>8b}'.format(10))\n",
"print('& {:0>8b}'.format(40))\n",
"print('-' * 10)\n",
"print(' {:0>8b}'.format(10 & 40))"
]
}
],
"metadata": {
Expand All @@ -348,9 +300,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
"nbformat_minor": 4
}
59 changes: 35 additions & 24 deletions docs/source/user_guide/extending/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,6 @@ Below we provide notebooks showing how to extend ibis in each of these ways.
Adding a New Expression
-----------------------

.. note::

Make sure you've run the following commands before executing the notebook

.. code-block:: sh
docker-compose up -d --no-build postgres dns
docker-compose run waiter
docker-compose run ibis ci/load-data.sh postgres
Here we show how to add a ``sha1`` method to the PostgreSQL backend as well as
how to add a new ``bitwise_and`` reduction operation:

Expand All @@ -39,23 +29,44 @@ how to add a new ``bitwise_and`` reduction operation:
Adding a New Backend
--------------------

Ibis backends are accessed by users calling ``ibis.backend_name``, for example
in ``ibis.sqlite.connect(fname)``.

Both, when adding a new backend to the Ibis repo, or when creating a third-party
backend, you should define a entry point in the group ``ibis.backends``, with the
name of your backend and the module where it is implemented. This is defined in
the ``setup.py`` file. The code to setup the sqlite backend could be:

.. code-block:: python
setup(name='ibis-sqlite',
...
entry_points={'ibis.backends': 'sqlite = ibis-sqlite'}
)
In the code above, the name of the module will be ``sqlite``, as defined in the
left of the assignment ``sqlite = ibis-sqlite``. And the code should be available
in the module ``ibis-sqlite`` (the file ``ibis-sqlite/__init__.py`` will define
the ``connect`` method, as well as any other method available to the users in
``ibis.sqlite.<method>``.

For third party packages it is recommended that the name of the Python package
is ``ibis-<backend>``, since Ibis will recommend users to run ``pip install ibis-<backend>``
when a backend is not found.


Run test suite for separate Backend
-----------------------------------
.. note::
By following the steps below, you get the opportunity to run tests with one
command: `make test BACKEND='[your added backend]'`

1) you need to add a new backend to `BACKENDS` variable in `Makefile`.
To run the tests for specific backends you can use:

.. code:: shell
2) if backend needs to start services (implemented as docker containers and
added into `docker-compose.yml` file) then add the services to `SERVICES`
variable in `Makefile`, add case for switch-case construction inside
`./ci/dockerize.sh` for proper waiting the services.
PYTEST_BACKENDS="sqlite pandas" python -m pytest ibis/tests
3) if backend needs to load some data then add the backend to `LOADS` variable
in `Makefile` and implement necessary functionality in `./ci/load-data.sh`
Some backends may require a database server running. The CI file
`.github/workflows/main.yml` contains the configuration to run
servers for all backends using docker images.

4) the necessary markers for `pytest` will be generated inside
`./ci/backends-markers.sh`. By default, a marker will be generated that
matches the name of the backend (you can manually correct the generated
name for the marker inside the file)
The backends may need data to be loaded, run or check `ci/setup.py` to
see how it is loaded in the CI, and loaded for your local containers.
2 changes: 2 additions & 0 deletions docs/source/user_guide/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ the information is available in the :ref:`api`.

configuration
sql
topk
self_joins
udf
geospatial_analysis
design
Expand Down
137 changes: 137 additions & 0 deletions docs/source/user_guide/self_joins.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
.. _self_joins:

**********
Self joins
**********

If you’re a relational data guru, you may have wondered how it’s
possible to join tables with themselves, because joins clauses involve
column references back to the original table.

Consider the SQL

.. code:: sql
SELECT t1.key, sum(t1.value - t2.value) AS metric
FROM my_table t1
JOIN my_table t2
ON t1.key = t2.subkey
GROUP BY 1
Here, we have an unambiguous way to refer to each of the tables through
aliasing.

Let’s consider the TPC-H database, and support we want to compute
year-over-year change in total order amounts by region using joins.

.. code:: python
>>> region = con.table('tpch_region')
>>> nation = con.table('tpch_nation')
>>> customer = con.table('tpch_customer')
>>> orders = con.table('tpch_orders')
>>> orders.limit(5)
o_orderkey o_custkey o_orderstatus o_totalprice o_orderdate \
0 1 36901 O 173665.47 1996-01-02
1 2 78002 O 46929.18 1996-12-01
2 3 123314 F 193846.25 1993-10-14
3 4 136777 O 32151.78 1995-10-11
4 5 44485 F 144659.20 1994-07-30
o_orderpriority o_clerk o_shippriority \
0 5-LOW Clerk#000000951 0
1 1-URGENT Clerk#000000880 0
2 5-LOW Clerk#000000955 0
3 5-LOW Clerk#000000124 0
4 5-LOW Clerk#000000925 0
o_comment
0 nstructions sleep furiously among
1 foxes. pending accounts at the pending, silen...
2 sly final accounts boost. carefully regular id...
3 sits. slyly regular warthogs cajole. regular, ...
4 quickly. bold deposits sleep slyly. packages u...
First, let’s join all the things and select the fields we care about:

.. code:: python
>>> fields_of_interest = [region.r_name.name('region'),
... nation.n_name.name('nation'),
... orders.o_totalprice.name('amount'),
... orders.o_orderdate.cast('timestamp').name('odate') # these are strings
... ]
>>> joined_all = (region.join(nation, region.r_regionkey == nation.n_regionkey)
... .join(customer, customer.c_nationkey == nation.n_nationkey)
... .join(orders, orders.o_custkey == customer.c_custkey)
... [fields_of_interest])
Okay, great, let’s have a look:

.. code:: python
>>> joined_all.limit(5)
region nation amount odate
0 AMERICA UNITED STATES 160843.35 1992-06-22
1 MIDDLE EAST IRAN 78307.91 1996-04-19
2 EUROPE FRANCE 103237.90 1994-10-12
3 EUROPE FRANCE 201463.59 1997-09-12
4 ASIA JAPAN 166098.86 1995-09-12
Sweet, now let’s aggregate by year and region:

.. code:: python
>>> year = joined_all.odate.year().name('year')
>>> total = joined_all.amount.sum().cast('double').name('total')
>>> annual_amounts = (joined_all
... .group_by(['region', year])
... .aggregate(total))
>>> annual_amounts.limit(5)
region year total
0 EUROPE 1994 6.979473e+09
1 EUROPE 1996 7.015421e+09
2 ASIA 1997 6.910663e+09
3 ASIA 1998 4.058824e+09
4 EUROPE 1992 6.926705e+09
Looking good so far. Now, we need to join this table on itself, by
subtracting 1 from one of the year columns.

We do this by creating a “joinable” view of a table that is considered a
distinct object within Ibis. To do this, use the ``view`` function:

.. code:: python
>>> current = annual_amounts
>>> prior = annual_amounts.view()
>>> yoy_change = (current.total - prior.total).name('yoy_change')
>>> results = (current.join(prior, ((current.region == prior.region) &
... (current.year == (prior.year - 1))))
... [current.region, current.year, yoy_change])
>>> df = results.execute()
.. code:: python
>>> df['yoy_pretty'] = df.yoy_change.map(lambda x: '$%.2fmm' % (x / 1000000.))
If you’re being fastidious and want to consider the first year occurring
in the dataset for each region to have 0 for the prior year, you will
instead need to do an outer join and treat nulls in the prior side of
the join as zero:

.. code:: python
>>> yoy_change = (current.total - prior.total.zeroifnull()).name('yoy_change')
>>> results = (current.outer_join(prior, ((current.region == prior.region) &
... (current.year == (prior.year - 1))))
... [current.region, current.year, current.total,
... prior.total.zeroifnull().name('prior_total'),
... yoy_change])
>>> results.limit(5)
region year total prior_total yoy_change
0 ASIA 1998 4.058824e+09 0.000000e+00 4.058824e+09
1 AFRICA 1994 6.837587e+09 6.908429e+09 -7.084172e+07
2 AMERICA 1996 6.883057e+09 6.922465e+09 -3.940791e+07
3 AFRICA 1996 6.878112e+09 6.848983e+09 2.912979e+07
4 AFRICA 1992 6.873319e+09 6.859733e+09 1.358699e+07
96 changes: 96 additions & 0 deletions docs/source/user_guide/topk.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
.. _topk:

*****************
“Top-K” Filtering
*****************

A common analytical pattern involves subsetting based on some method of
ranking. For example, “the 5 most frequently occurring widgets in a
dataset”. By choosing the right metric, you can obtain the most
important or least important items from some dimension, for some
definition of important.

To carry out the pattern by hand involves the following

- Choose a ranking metric
- Aggregate, computing the ranking metric, by the target dimension
- Order by the ranking metric and take the highest K values
- Use those values as a set filter (either with ``semi_join`` or
``isin``) in your next query

For example, let’s look at the TPC-H tables and find the 5 or 10
customers who placed the most orders over their lifetime:

.. code:: python
>>> orders = con.table('tpch_orders')
>>> top_orders = (orders
... .group_by('o_custkey')
... .size()
... .sort_by(('count', False))
... .limit(5))
>>> top_orders
o_custkey count
0 3451 41
1 102022 41
2 102004 41
3 79300 40
4 117082 40
Now, we could use these customer keys as a filter in some other
analysis:

.. code:: python
>>> # Among the top 5 most frequent customers, what's the histogram of their order statuses?
>>> analysis = (orders[orders.o_custkey.isin(top_orders.o_custkey)]
... .group_by('o_orderstatus')
... .size())
>>> analysis
o_orderstatus count
0 P 5
1 F 85
2 O 113
This is such a common pattern that Ibis supports a high level primitive
``topk`` operation, which can be used immediately as a filter:

.. code:: python
>>> top_orders = orders.o_custkey.topk(5)
>>> orders[top_orders].group_by('o_orderstatus').size()
o_orderstatus count
0 P 5
1 F 85
2 O 113
This goes a little further. Suppose now we want to rank customers by
their total spending instead of the number of orders, perhaps a more
meaningful metric:

.. code:: python
>>> total_spend = orders.o_totalprice.sum().name('total')
>>> top_spenders = (orders
... .group_by('o_custkey')
... .aggregate(total_spend)
... .sort_by(('total', False))
... .limit(5))
>>> top_spenders
o_custkey total
0 143500 7012696.48
1 95257 6563511.23
2 87115 6457526.26
3 131113 6311428.86
4 103834 6306524.23
To use another metric, just pass it to the ``by`` argument in ``topk``:

.. code:: python
>>> top_spenders = orders.o_custkey.topk(5, by=total_spend)
>>> orders[top_spenders].group_by('o_orderstatus').size()
o_orderstatus count
0 P 1
1 F 78
2 O 98
2 changes: 1 addition & 1 deletion docs/source/user_guide/udf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ The next backends provide UDF support:

- :ref:`udf.impala`
- :ref:`udf.pandas`
- :ref:`udf.bigquery`
- Bigquery
4 changes: 2 additions & 2 deletions docs/web/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ layout:
url: https://twitter.com/IbisData
- font_awesome: github
url: https://github.com/ibis-project/ibis/
footer_note: "© Copyright 2020, Ibis developers"
footer_note: "© Copyright 2014-2021, Ibis developers"
google_analytics: ""

team:
Expand All @@ -60,9 +60,9 @@ team:
members:
- jreback
- datapythonista
- cpcloud
- name: "Former maintainers"
kind: github
members:
- wesm
- cpcloud
- kszucs
65 changes: 63 additions & 2 deletions docs/web/contribute.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,66 @@ for more information.

### Releasing

Access the [Ibis "Releasing" wiki](https://github.com/ibis-project/ibis/wiki/Releasing-Ibis) page
for more information.
Ibis is released in two places:

- [PyPI](https://pypi.org/) (the **PY**thon **P**ackage **I**ndex), to enable `pip install ibis-framework`
- [Conda Forge](https://conda-forge.org/), to enable `conda install ibis-framework`

Steps to release:

#### Create a new version

In the `master` branch, after the last commit to include in the release, create a tag:

- `git tag <version>` (e.g. `git tag 2.0.0`)

Originally, Ibis used a version like `v0.0.1`, but the `v` was eventually dropped, and recently we
have been using just the `0.0.1` format.

Push the tag to the remote branch:

- `git push --tags upstream master`

The remote `upstream` is assumed to be the main Ibis repo (i.e. https://github.com/ibis-project/ibis).

#### Release to PyPI

Just after the tag (without pulling new commits from master, build the Python package:

- `python setup.py sdist bdist_wheel`

This requires `twine` and `wheel` installed, which you should have if you created your environment
with the repo `environment.yml` file.

The package will be built in the `dist/` directory. To upload it to the PyPI server, use:

- `twine upload dist/*`

This will create the new package, and will be available immediately via `pip install ibis-framework`.

#### Release to conda-forge

The conda-forge package is released using the conda-forge feedstock repository: https://github.com/conda-forge/ibis-framework-feedstock/

We need to update its recipe in a pull request, and the new version will be automatically released.
After cloning the feedstock repository, update its recipe with the one in the main Ibis repository:

- `cp <ibis-repo>/ci/recipe/meta.yaml <feedstock-repo/recipe/meta.yaml`

Remove the comment at the header of the `meta.yaml` file. And update the next yaml values:

- Add at the beginning `{% set version = "2.0.0" %}` (replace `2.0.0` by the version being released)
- Set `number` in the `build` section to `0` (unless it's a different build for the previous release, then increase by one)
- Add the `sha256` value in the `source` section. Use `sha256sum` of the `tar.gz` file in `dist/` to know the value.

Once the recipe is final, run:

- `conda smithy rerender`

This will update the azure configuration files in the feedstock repository, and possibly other files. Open a pull request with all the changes.

The conda-forge package should be ready not long after the pull request is merged, and it can
be installed with `conda install -c conda-forge ibis-framework`.

Finally, if extra changes have been required to the recipe, besides the version, build and sha256
mentioned before, copy the recipe to the Ibis repository. Keeping the header.
2 changes: 1 addition & 1 deletion docs/web/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

The next steps provides the easiest and recommended way to set up your
environment to use {{ ibis.project_name }}. Other installation options can be found in
the [advanced installation page]({{ base_url}}/docs/getting_started/install.html).
the [advanced installation page]({{ base_url}}/docs/index.html#installation).

1. Download [Anaconda](https://www.anaconda.com/distribution/) for your operating system and
the latest Python version, run the installer, and follow the steps. Detailed instructions
Expand Down
7 changes: 3 additions & 4 deletions docs/web/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ multiple engines.

- Standard DBMS: [PostgreSQL](/docs/backends/postgres.html), [MySQL](/docs/backends/mysql.html), [SQLite](/docs/backends/sqlite.html)
- Analytical DBMS: [OmniSciDB](/docs/backends/omnisci.html), [ClickHouse](/docs/backends/clickhouse.html)
- Distributed platforms: [Impala](/docs/backends/impala.html), [Spark](/docs/backends/spark.html), [BigQuery](/docs/backends/bigquery.html)
- In memory execution: [pandas](/docs/backends/pandas.html)
- Distributed platforms: [Impala](/docs/backends/impala.html), [PySpark](/docs/backends/pyspark.html), [BigQuery](/docs/backends/bigquery.html)
- In memory execution: [pandas](/docs/backends/pandas.html), [Dask](/docs/backends/dask.html)

## Example

Expand Down Expand Up @@ -76,5 +76,4 @@ representations.
Dask provides advanced parallelism, and can distribute pandas jobs. Ibis can
process data in a similar way, but for a different number of backends. For
example, given a Spark cluster, Ibis allows to perform analytics using it,
with a familiar Python syntax. Ibis plans to add support for a Dask backend
in the future.
with a familiar Python syntax. Ibis supports Dask as a backend.
2 changes: 1 addition & 1 deletion docs/web/static/css/ibis.css
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ h3 a {
}
h4 {
font-size: 1rem;
font-weight: 500;
font-weight: 600;
color: #444;
}
a {
Expand Down
88 changes: 44 additions & 44 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,59 +1,59 @@
# This file should have all the dependencies for development excluding the specific to the backends.
name: ibis-dev
channels:
- conda-forge
name: ibis
dependencies:
# Ibis hard dependencies
- multipledispatch>=0.6.0
- numpy>=1.19
- pandas>=0.25 # XXX pymapd does not support pandas 1.0
- pytz>=2020.1
- regex>=2020.7
- toolz>=0.10
- multipledispatch
- numpy
- pandas
- pytz
- regex
- toolz
- cached_property # for 3.7 compat, functools.cached_property is for >=3.8
- setuptools
- parsy

# Ibis soft dependencies
# TODO This section is probably not very accurate right now (some dependencies should probably be in the backends files)
- sqlalchemy>=1.3
- graphviz>=2.38
- openjdk=8
- pytables>=3.6
- python-graphviz>=0.14
- python-hdfs>=2.0.16 # XXX this verison can probably be increased
- sqlalchemy
- python-graphviz

# Dev tools
- asv>=0.4.2
- asv
- black=19.10b0
- click>=7.1 # few scripts in ci/
- click # few scripts in ci/
- flake8
- flake8-comprehensions # used by flake8, linting of unnecessary comprehensions
- isort
- jinja2<3 # feedstock
- mypy
- plumbum # few scripts in ci/ and dev/
- pydocstyle
- pytest
- pytest-cov
- pytest-mock

# Release
- twine
- wheel
- conda-build # feedstock
- cmake>=3.17
- flake8>=3.8
- isort>=5.3
- jinja2>=2.11 # feedstock
- mypy>=0.782
- plumbum>=1.6 # few scripts in ci/ and dev/
- pre-commit>=2.6
- pydocstyle>=4.0
- pygit2>=1.2 # dev/genrelease.py
- pytest>=5.4
- pytest-cov>=2.10
- pytest-mock>=3.1
- ruamel.yaml>=0.16 # feedstock
- libiconv>=1.15 # bug in repo2docker, see https://github.com/jupyter/repo2docker/issues/758
- xorg-libxpm>=3.5
- xorg-libxrender>=0.9
- ruamel.yaml # feedstock
- pygit2 # dev/genrelease.py

# Docs
- ipython>=7.17
- jupyter>=1.0
- matplotlib>=2 # XXX test if this can be bumped
- pip
- pip:
- pysuerga
- pytest-randomly
- ipython
- ipykernel
- nbconvert
- nbsphinx>=0.7
- nbsphinx
- nomkl
- pyarrow>=0.12 # must pin again otherwise strange things happen
- semantic_version=2.6 # https://github.com/ibis-project/ibis/issues/2027
- sphinx>=2.0.1
- sphinx
- sphinx-releases
- sphinx_rtd_theme>=0.5
- pip
- pip:
- pysuerga
- sphinx_rtd_theme

# Type annotations
- types-setuptools
- types-pytz
- types-python-dateutil
157 changes: 84 additions & 73 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,84 +1,95 @@
"""Initialize Ibis module."""
import warnings
from contextlib import suppress

import ibis.config_init # noqa: F401
import ibis.expr.api as api # noqa: F401
import ibis.expr.types as ir # noqa: F401
import ibis.util as util # noqa: F401

# pandas backend is mandatory
from ibis.backends import pandas # noqa: F401
from ibis.common.exceptions import IbisError # noqa: F401
from ibis.config import options # noqa: F401
import pkg_resources

# Converting an Ibis schema to a pandas DataFrame requires registering
# some type conversions that are currently registered in the pandas backend
import ibis.backends.pandas
import ibis.config
import ibis.expr.types as ir
from ibis import util
from ibis.backends.base import BaseBackend
from ibis.common.exceptions import IbisError
from ibis.config import options
from ibis.expr import api
from ibis.expr.api import * # noqa: F401,F403

from ._version import get_versions # noqa: E402

with suppress(ImportError):
# pip install ibis-framework[csv]
from ibis.backends import csv # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[parquet]
from ibis.backends import parquet # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[hdf5]
from ibis.backends import hdf5 # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[impala]
from ibis.backends import impala # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[sqlite]
from ibis.backends import sqlite # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[postgres]
from ibis.backends import postgres # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[mysql]
from ibis.backends import mysql # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[clickhouse]
from ibis.backends import clickhouse # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[bigquery]
from ibis.backends import bigquery # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[omniscidb]
from ibis.backends import omniscidb # noqa: F401

with suppress(ImportError):
# pip install ibis-framework[spark]
from ibis.backends import spark # noqa: F401

with suppress(ImportError):
from ibis.backends import pyspark # noqa: F401

__all__ = ['api', 'ir', 'util', 'IbisError', 'options']
__all__ += api.__all__


ibis.config.register_option(
'interactive', False, validator=ibis.config.is_bool
)
ibis.config.register_option('verbose', False, validator=ibis.config.is_bool)
ibis.config.register_option('verbose_log', None)
ibis.config.register_option(
'graphviz_repr',
True,
"""\
Whether to render expressions as GraphViz PNGs when repr-ing in a Jupyter
notebook.
""",
validator=ibis.config.is_bool,
)
ibis.config.register_option('default_backend', None)
with ibis.config.config_prefix('context_adjustment'):
ibis.config.register_option(
'time_col',
'time',
'Name of the timestamp col for execution with a timecontext'
'See ibis.expr.timecontext for details.',
validator=ibis.config.is_str,
)
with ibis.config.config_prefix('sql'):
ibis.config.register_option(
'default_limit',
10_000,
'Number of rows to be retrieved for an unlimited table expression',
)

__version__ = get_versions()['version']
del get_versions


def __getattr__(name):
if name in ('HDFS', 'WebHDFS', 'hdfs_connect'):
warnings.warn(
f'`ibis.{name}` has been deprecated and will be removed in a '
f'future version, use `ibis.impala.{name}` instead',
FutureWarning,
stacklevel=2,
def __getattr__(name: str) -> BaseBackend:
"""Load backends in a lazy way with `ibis.<backend-name>`.
This also registers the backend options.
Examples
--------
>>> import ibis
>>> con = ibis.sqlite.connect(...)
When accessing the `sqlite` attribute of the `ibis` module, this function
is called, and a backend with the `sqlite` name is tried to load from
the `ibis.backends` entrypoints. If successful, the `ibis.sqlite`
attribute is "cached", so this function is only called the first time.
"""
entry_points = list(
pkg_resources.iter_entry_points(group='ibis.backends', name=name)
)
if len(entry_points) == 0:
raise AttributeError(
f"module 'ibis' has no attribute '{name}'. "
f"If you are trying to access the '{name}' backend, "
f"try installing it first with `pip install ibis-{name}`"
)
if 'impala' in globals():
return getattr(impala, name)
else:
raise AttributeError(
f'`ibis.{name}` requires impala backend to be installed'
)
raise AttributeError
elif len(entry_points) > 1:
raise RuntimeError(
f"{len(entry_points)} packages found for backend '{name}'. "
"There should be only one, please uninstall the unused packages "
"and just leave the one that needs to be used."
)

backend = entry_points[0].resolve().Backend()

# The first time a backend is loaded, we register its options, and we set
# it as an attribute of `ibis`, so `__getattr__` is not called again for it
with ibis.config.config_prefix(name):
backend.register_options()

setattr(ibis, name, backend)
return backend
6 changes: 3 additions & 3 deletions ibis/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
Expand All @@ -190,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
tags = {r for r in refs if re.search(r'\d', r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
Expand Down
477 changes: 477 additions & 0 deletions ibis/backends/base/__init__.py

Large diffs are not rendered by default.

176 changes: 176 additions & 0 deletions ibis/backends/base/file/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import abc
import warnings
from pathlib import Path

import pandas as pd

import ibis
import ibis.expr.types as ir
from ibis.backends.base import BaseBackend, Database
from ibis.backends.pandas.core import execute_and_reset

# Load options of pandas backend
ibis.pandas


class FileDatabase(Database):
def __init__(self, name, client):
super().__init__(name, client)
self.path = client.path

def __str__(self):
return '{0.__class__.__name__}({0.name})'.format(self)

def __dir__(self):
dbs = self.list_databases(path=self.path)
tables = self.list_tables(path=self.path)
return sorted(set(dbs).union(set(tables)))

def __getattr__(self, name):
try:
return self.table(name, path=self.path)
except AttributeError:
return self.database(name, path=self.path)

def table(self, name, path):
return self.client.table(name, path=path)

def database(self, name=None, path=None):
return self.client.database(name=name, path=path)

def list_databases(self, path=None):
if path is None:
path = self.path
return sorted(self.client.list_databases(path=path))

def list_tables(self, path=None, database=None):
if path is None:
path = self.path
return sorted(self.client.list_tables(path=path, database=database))


class BaseFileBackend(BaseBackend):
"""
Base backend class for pandas pseudo-backends for file formats.
"""

database_class = FileDatabase

def connect(self, path):
"""Create a Client for use with Ibis
Parameters
----------
path : str or pathlib.Path
Returns
-------
Backend
"""
new_backend = self.__class__()
new_backend.path = new_backend.root = Path(path)
new_backend.dictionary = {}
return new_backend

@property
def version(self) -> str:
return pd.__version__

def list_tables(
self, path: Path = None, like: str = None, database: str = None
):
# For file backends, we return files in the `path` directory.

def is_valid(path):
return path.is_file() and path.suffix == '.' + self.extension

path = path or self.path

if path.is_dir():
tables = [f.stem for f in path.iterdir() if is_valid(f)]
elif is_valid(path):
tables = [path.stem]
else:
tables = []

return self._filter_with_like(tables, like)

@property
def current_database(self):
# Databases for the file backend are a bit confusing
# `list_databases()` will return the directories in the current path
# The `current_database` is not in that list. Probably we want to
# rethink this eventually. For now we just return `None` here, as if
# databases were not supported
return '.'

def compile(self, expr, *args, **kwargs):
return expr

def _list_databases_dirs(self, path=None):
tables = []
if path.is_dir():
for d in path.iterdir():
if d.is_dir():
tables.append(d.name)
return tables

def _list_tables_files(self, path=None):
# tables are files in a dir
if path is None:
path = self.root

tables = []
if path.is_dir():
for d in path.iterdir():
if d.is_file():
if str(d).endswith(self.extension):
tables.append(d.stem)
elif path.is_file():
if str(path).endswith(self.extension):
tables.append(path.stem)
return tables

def list_databases(self, path=None, like=None):
if path is None:
path = self.path
else:
warnings.warn(
'The `path` argument of `list_databases` is deprecated and '
'will be removed in a future version of Ibis. Connect to a '
'different path with the `connect()` method instead.',
FutureWarning,
)
databases = ['.'] + self._list_databases_dirs(path)
return self._filter_with_like(databases, like)

@abc.abstractmethod
def insert(self, path, expr, **kwargs):
pass

@abc.abstractmethod
def table(self, name, path):
pass

def database(self, name=None, path=None):
if name is None:
self.path = path or self.path
return super().database(name)

if path is None:
path = self.root
if name not in self.list_databases(path):
raise AttributeError(name)

new_name = f"{name}.{self.extension}"
if (self.root / name).is_dir():
path /= name
elif not str(path).endswith(new_name):
path /= new_name

self.path = path
return super().database(name)

def execute(self, expr, params=None, **kwargs): # noqa
assert isinstance(expr, ir.Expr)
return execute_and_reset(expr, params=params, **kwargs)
216 changes: 216 additions & 0 deletions ibis/backends/base/sql/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import abc
from typing import Optional

import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
import ibis.util as util
from ibis.backends.base import BaseBackend
from ibis.expr.typing import TimeContext

from .compiler import Compiler

__all__ = 'BaseSQLBackend'


class BaseSQLBackend(BaseBackend):
"""
Base backend class for backends that compile to SQL.
"""

compiler = Compiler
table_class = ops.DatabaseTable
table_expr_class = ir.TableExpr

def table(self, name, database=None):
"""Create a table expression.
Create a table expression that references a particular table in the
database.
Parameters
----------
name : string
database : string, optional
Returns
-------
table : TableExpr
"""
qualified_name = self._fully_qualified_name(name, database)
schema = self.get_schema(qualified_name)
node = self.table_class(qualified_name, schema, self)
return self.table_expr_class(node)

def _fully_qualified_name(self, name, database):
# XXX
return name

def sql(self, query):
"""Convert a SQL query to an Ibis table expression.
Parameters
----------
query : string
Returns
-------
table : TableExpr
"""
# Get the schema by adding a LIMIT 0 on to the end of the query. If
# there is already a limit in the query, we find and remove it
limited_query = f'SELECT * FROM ({query}) t0 LIMIT 0'
schema = self._get_schema_using_query(limited_query)
return ops.SQLQueryResult(query, schema, self).to_expr()

def raw_sql(self, query: str, results=False):
"""Execute a given query string.
Could have unexpected results if the query modifies the behavior of
the session in a way unknown to Ibis; be careful.
Parameters
----------
query : string
DML or DDL statement
Returns
-------
Backend cursor
"""
# TODO results is unused, it can be removed
# (requires updating Impala tests)
# TODO `self.con` is assumed to be defined in subclasses, but there
# is nothing that enforces it. We should find a way to make sure
# `self.con` is always a DBAPI2 connection, or raise an error
cursor = self.con.execute(query) # type: ignore
if cursor:
return cursor
cursor.release()

def execute(self, expr, params=None, limit='default', **kwargs):
"""Compile and execute the given Ibis expression.
Compile and execute Ibis expression using this backend client
interface, returning results in-memory in the appropriate object type
Parameters
----------
expr : Expr
limit : int, default None
For expressions yielding result yets; retrieve at most this number of
values/rows. Overrides any limit already set on the expression.
params : not yet implemented
kwargs : Backends can receive extra params. For example, clickhouse
uses this to receive external_tables as dataframes.
Returns
-------
output : input type dependent
Table expressions: pandas.DataFrame
Array expressions: pandas.Series
Scalar expressions: Python scalar value
"""
# TODO Reconsider having `kwargs` here. It's needed to support
# `external_tables` in clickhouse, but better to deprecate that
# feature than all this magic.
# we don't want to pass `timecontext` to `raw_sql`
kwargs.pop('timecontext', None)
query_ast = self.compiler.to_ast_ensure_limit(
expr, limit, params=params
)
sql = query_ast.compile()
self._log(sql)
cursor = self.raw_sql(sql, **kwargs)
schema = self.ast_schema(query_ast, **kwargs)
result = self.fetch_from_cursor(cursor, schema)

if hasattr(getattr(query_ast, 'dml', query_ast), 'result_handler'):
result = query_ast.dml.result_handler(result)

return result

@abc.abstractmethod
def fetch_from_cursor(self, cursor, schema):
"""Fetch data from cursor."""

def ast_schema(self, query_ast):
"""Return the schema of the expression.
Returns
-------
Schema
Raises
------
ValueError
if self.expr doesn't have a schema.
"""
dml = getattr(query_ast, 'dml', query_ast)
expr = getattr(dml, 'parent_expr', getattr(dml, 'table_set', None))

if isinstance(expr, (ir.TableExpr, ir.ExprList, sch.HasSchema)):
return expr.schema()
elif isinstance(expr, ir.ValueExpr):
return sch.schema([(expr.get_name(), expr.type())])
else:
raise ValueError(
'Expression with type {} does not have a '
'schema'.format(type(self.expr))
)

def _log(self, sql):
"""Log the SQL, usually to the standard output.
This method can be implemented by subclasses. The logging happens
when `ibis.options.verbose` is `True`.
"""
pass

def compile(
self,
expr,
limit=None,
params=None,
timecontext: Optional[TimeContext] = None,
):
"""Translate expression.
Translate expression to one or more queries according to
backend target.
Returns
-------
output : single query or list of queries
"""
return self.compiler.to_ast_ensure_limit(
expr, limit, params=params
).compile()

def explain(self, expr, params=None):
"""Explain expression.
Query for and return the query plan associated with the indicated
expression or SQL query.
Returns
-------
plan : string
"""
if isinstance(expr, ir.Expr):
context = self.compiler.make_context(params=params)
query_ast = self.compiler.to_ast(expr, context)
if len(query_ast.queries) > 1:
raise Exception('Multi-query expression')

query = query_ast.queries[0].compile()
else:
query = expr

statement = f'EXPLAIN {query}'

cur = self.raw_sql(statement)
result = self._get_list(cur)
cur.release()

return '\n'.join(['Query:', util.indent(query, 2), '', *result])
445 changes: 445 additions & 0 deletions ibis/backends/base/sql/alchemy/__init__.py

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions ibis/backends/base/sql/alchemy/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import ibis.expr.operations as ops
import ibis.expr.schema as sch
from ibis.backends.base import Database


class AlchemyDatabase(Database):
"""
Attributes
----------
client : AlchemyClient
"""

def table(self, name, schema=None):
return self.client.table(name, schema=schema)


class AlchemyTable(ops.DatabaseTable):
def __init__(self, table, source, schema=None):
schema = sch.infer(table, schema=schema)
super().__init__(table.name, schema, source)
self.sqla_table = table
292 changes: 292 additions & 0 deletions ibis/backends/base/sql/alchemy/datatypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
from typing import Optional

import sqlalchemy as sa
from sqlalchemy.dialects import mysql, postgresql, sqlite
from sqlalchemy.dialects.mysql.base import MySQLDialect
from sqlalchemy.dialects.postgresql.base import PGDialect
from sqlalchemy.dialects.sqlite.base import SQLiteDialect
from sqlalchemy.engine.interfaces import Dialect

import ibis.expr.datatypes as dt
import ibis.expr.schema as sch

from .geospatial import geospatial_supported

if geospatial_supported:
import geoalchemy2 as ga


def table_from_schema(name, meta, schema, database: Optional[str] = None):
# Convert Ibis schema to SQLA table
columns = []

for colname, dtype in zip(schema.names, schema.types):
satype = to_sqla_type(dtype)
column = sa.Column(colname, satype, nullable=dtype.nullable)
columns.append(column)

return sa.Table(name, meta, schema=database, *columns)


# TODO(cleanup)
ibis_type_to_sqla = {
dt.Null: sa.types.NullType,
dt.Date: sa.Date,
dt.Time: sa.Time,
dt.Boolean: sa.Boolean,
dt.Binary: sa.LargeBinary,
dt.String: sa.Text,
dt.Decimal: sa.NUMERIC,
# Mantissa-based
dt.Float: sa.Float(precision=24),
dt.Double: sa.Float(precision=53),
dt.Int8: sa.SmallInteger,
dt.Int16: sa.SmallInteger,
dt.Int32: sa.Integer,
dt.Int64: sa.BigInteger,
}


def to_sqla_type(itype, type_map=None):
if type_map is None:
type_map = ibis_type_to_sqla
if isinstance(itype, dt.Decimal):
return sa.types.NUMERIC(itype.precision, itype.scale)
elif isinstance(itype, dt.Date):
return sa.Date()
elif isinstance(itype, dt.Timestamp):
# SQLAlchemy DateTimes do not store the timezone, just whether the db
# supports timezones.
return sa.TIMESTAMP(bool(itype.timezone))
elif isinstance(itype, dt.Array):
ibis_type = itype.value_type
if not isinstance(ibis_type, (dt.Primitive, dt.String)):
raise TypeError(
'Type {} is not a primitive type or string type'.format(
ibis_type
)
)
return sa.ARRAY(to_sqla_type(ibis_type, type_map=type_map))
elif geospatial_supported and isinstance(itype, dt.GeoSpatial):
if itype.geotype == 'geometry':
return ga.Geometry
elif itype.geotype == 'geography':
return ga.Geography
else:
return ga.types._GISType
else:
return type_map[type(itype)]


@dt.dtype.register(Dialect, sa.types.NullType)
def sa_null(_, satype, nullable=True):
return dt.null


@dt.dtype.register(Dialect, sa.types.Boolean)
def sa_boolean(_, satype, nullable=True):
return dt.Boolean(nullable=nullable)


@dt.dtype.register(MySQLDialect, mysql.NUMERIC)
def sa_mysql_numeric(_, satype, nullable=True):
# https://dev.mysql.com/doc/refman/8.0/en/fixed-point-types.html
return dt.Decimal(
satype.precision or 10, satype.scale or 0, nullable=nullable
)


@dt.dtype.register(PGDialect, postgresql.NUMERIC)
def sa_postgres_numeric(_, satype, nullable=True):
# PostgreSQL allows any precision for numeric values if not specified,
# up to the implementation limit. Here, default to the maximum value that
# can be specified by the user. The scale defaults to zero.
# https://www.postgresql.org/docs/10/datatype-numeric.html
return dt.Decimal(
satype.precision or 1000, satype.scale or 0, nullable=nullable
)


@dt.dtype.register(Dialect, sa.types.Numeric)
@dt.dtype.register(SQLiteDialect, sqlite.NUMERIC)
def sa_numeric(_, satype, nullable=True):
return dt.Decimal(satype.precision, satype.scale, nullable=nullable)


@dt.dtype.register(Dialect, sa.types.SmallInteger)
def sa_smallint(_, satype, nullable=True):
return dt.Int16(nullable=nullable)


@dt.dtype.register(Dialect, sa.types.Integer)
def sa_integer(_, satype, nullable=True):
return dt.Int32(nullable=nullable)


@dt.dtype.register(Dialect, mysql.TINYINT)
def sa_mysql_tinyint(_, satype, nullable=True):
return dt.Int8(nullable=nullable)


@dt.dtype.register(Dialect, sa.types.BigInteger)
def sa_bigint(_, satype, nullable=True):
return dt.Int64(nullable=nullable)


@dt.dtype.register(Dialect, sa.types.Float)
def sa_float(_, satype, nullable=True):
return dt.Float(nullable=nullable)


@dt.dtype.register(SQLiteDialect, sa.types.Float)
@dt.dtype.register(PGDialect, postgresql.DOUBLE_PRECISION)
def sa_double(_, satype, nullable=True):
return dt.Double(nullable=nullable)


@dt.dtype.register(PGDialect, postgresql.UUID)
def sa_uuid(_, satype, nullable=True):
return dt.UUID(nullable=nullable)


@dt.dtype.register(PGDialect, postgresql.MACADDR)
def sa_macaddr(_, satype, nullable=True):
return dt.MACADDR(nullable=nullable)


@dt.dtype.register(PGDialect, postgresql.INET)
def sa_inet(_, satype, nullable=True):
return dt.INET(nullable=nullable)


@dt.dtype.register(PGDialect, postgresql.JSON)
def sa_json(_, satype, nullable=True):
return dt.JSON(nullable=nullable)


@dt.dtype.register(PGDialect, postgresql.JSONB)
def sa_jsonb(_, satype, nullable=True):
return dt.JSONB(nullable=nullable)


if geospatial_supported:

@dt.dtype.register(Dialect, (ga.Geometry, ga.types._GISType))
def ga_geometry(_, gatype, nullable=True):
t = gatype.geometry_type
if t == 'POINT':
return dt.Point(nullable=nullable)
if t == 'LINESTRING':
return dt.LineString(nullable=nullable)
if t == 'POLYGON':
return dt.Polygon(nullable=nullable)
if t == 'MULTILINESTRING':
return dt.MultiLineString(nullable=nullable)
if t == 'MULTIPOINT':
return dt.MultiPoint(nullable=nullable)
if t == 'MULTIPOLYGON':
return dt.MultiPolygon(nullable=nullable)
if t == 'GEOMETRY':
return dt.Geometry(nullable=nullable)
else:
raise ValueError(f"Unrecognized geometry type: {t}")


POSTGRES_FIELD_TO_IBIS_UNIT = {
"YEAR": "Y",
"MONTH": "M",
"DAY": "D",
"HOUR": "h",
"MINUTE": "m",
"SECOND": "s",
"YEAR TO MONTH": "M",
"DAY TO HOUR": "h",
"DAY TO MINUTE": "m",
"DAY TO SECOND": "s",
"HOUR TO MINUTE": "m",
"HOUR TO SECOND": "s",
"MINUTE TO SECOND": "s",
}


@dt.dtype.register(PGDialect, postgresql.INTERVAL)
def sa_postgres_interval(_, satype, nullable=True):
field = satype.fields.upper()
unit = POSTGRES_FIELD_TO_IBIS_UNIT.get(field, None)
if unit is None:
raise ValueError(f"Unknown PostgreSQL interval field {field!r}")
elif unit in {"Y", "M"}:
raise ValueError(
"Variable length timedeltas are not yet supported with PostgreSQL"
)
return dt.Interval(unit=unit, nullable=nullable)


@dt.dtype.register(MySQLDialect, mysql.DOUBLE)
def sa_mysql_double(_, satype, nullable=True):
# TODO: handle asdecimal=True
return dt.Double(nullable=nullable)


@dt.dtype.register(Dialect, sa.types.String)
def sa_string(_, satype, nullable=True):
return dt.String(nullable=nullable)


@dt.dtype.register(Dialect, sa.LargeBinary)
def sa_binary(_, satype, nullable=True):
return dt.Binary(nullable=nullable)


@dt.dtype.register(Dialect, sa.Time)
def sa_time(_, satype, nullable=True):
return dt.Time(nullable=nullable)


@dt.dtype.register(Dialect, sa.Date)
def sa_date(_, satype, nullable=True):
return dt.Date(nullable=nullable)


@dt.dtype.register(Dialect, sa.DateTime)
def sa_datetime(_, satype, nullable=True, default_timezone='UTC'):
timezone = default_timezone if satype.timezone else None
return dt.Timestamp(timezone=timezone, nullable=nullable)


@dt.dtype.register(Dialect, sa.ARRAY)
def sa_array(dialect, satype, nullable=True):
dimensions = satype.dimensions
if dimensions is not None and dimensions != 1:
raise NotImplementedError('Nested array types not yet supported')

value_dtype = dt.dtype(dialect, satype.item_type)
return dt.Array(value_dtype, nullable=nullable)


@sch.infer.register(sa.Table)
def schema_from_table(table, schema=None):
"""Retrieve an ibis schema from a SQLAlchemy ``Table``.
Parameters
----------
table : sa.Table
Returns
-------
schema : ibis.expr.datatypes.Schema
An ibis schema corresponding to the types of the columns in `table`.
"""
schema = schema if schema is not None else {}
pairs = []
for name, column in zip(table.columns.keys(), table.columns):
if name in schema:
dtype = dt.dtype(schema[name])
else:
dtype = dt.dtype(
getattr(table.bind, 'dialect', Dialect()),
column.type,
nullable=column.nullable,
)
pairs.append((name, dtype))
return sch.schema(pairs)
8 changes: 8 additions & 0 deletions ibis/backends/base/sql/alchemy/geospatial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
try:
import geoalchemy2 # noqa F401
import geoalchemy2.shape # noqa F401
import geopandas # noqa F401
except ImportError:
geospatial_supported = False
else:
geospatial_supported = True
Loading