| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| import sqlalchemy as sa | ||
|
|
||
| c = sa.table("companies") | ||
| i = sa.table("investments") | ||
|
|
||
| a = ( | ||
| sa.select( | ||
| [ | ||
| sa.case( | ||
| [(i.c.investor_name.is_(None), "NO INVESTOR")], | ||
| else_=i.c.investor_name, | ||
| ).label("investor_name"), | ||
| sa.func.count(c.c.permalink.distinct()).label("num_investments"), | ||
| sa.func.count( | ||
| sa.case( | ||
| [(c.status.in_(("ipo", "acquired")), c.c.permalink)] | ||
| ).distinct() | ||
| ).label("acq_ipos"), | ||
| ] | ||
| ) | ||
| .select_from( | ||
| c.join( | ||
| i, onclause=c.c.permalink == i.c.company_permalink, isouter=True | ||
| ) | ||
| ) | ||
| .group_by(1) | ||
| .order_by(sa.desc(2)) | ||
| ) | ||
| expr = sa.select([(a.c.acq_ipos / a.c.num_investments).label("acq_rate")]) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| :root { | ||
| --md-admonition-icon--experimental: url('data:image/svg+xml;charset=utf-8,<svg aria-hidden="true" focusable="false" data-prefix="fas" data-icon="flask" class="svg-inline--fa fa-flask fa-w-14" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M437.2 403.5L320 215V64h8c13.3 0 24-10.7 24-24V24c0-13.3-10.7-24-24-24H120c-13.3 0-24 10.7-24 24v16c0 13.3 10.7 24 24 24h8v151L10.8 403.5C-18.5 450.6 15.3 512 70.9 512h306.2c55.7 0 89.4-61.5 60.1-108.5zM137.9 320l48.2-77.6c3.7-5.2 5.8-11.6 5.8-18.4V64h64v160c0 6.9 2.2 13.2 5.8 18.4l48.2 77.6h-172z"></path></svg>'); | ||
| } | ||
| .md-typeset .admonition.experimental, | ||
| .md-typeset details.experimental { | ||
| border-color: rgb(43, 155, 70); | ||
| } | ||
|
|
||
| .md-typeset .experimental > .admonition-title, | ||
| .md-typeset .experimental > summary { | ||
| background-color: rgba(43, 155, 70, 0.1); | ||
| border-color: rgb(43, 155, 70); | ||
| } | ||
|
|
||
| .md-typeset .experimental > .admonition-title::before, | ||
| .md-typeset .experimental > summary::before { | ||
| background-color: rgb(43, 155, 70); | ||
| -webkit-mask-image: var(--md-admonition-icon--experimental); | ||
| mask-image: var(--md-admonition-icon--experimental); | ||
| } | ||
|
|
||
| .verified { | ||
| color: #00c853; | ||
| } | ||
|
|
||
| .unverified { | ||
| color: #ff9100; | ||
| } | ||
|
|
||
| .bug { | ||
| color: #f50057; | ||
| } | ||
|
|
||
| .cancel { | ||
| color: #ff5252; | ||
| } | ||
|
|
||
| .download-button { | ||
| text-align: center; | ||
| } | ||
|
|
||
| .support-matrix .md-typeset__table { | ||
| display: table; | ||
| min-width: 100%; | ||
| } | ||
|
|
||
| .support-matrix .md-typeset table:not([class]) { | ||
| display: table; | ||
| min-width: 100%; | ||
| } | ||
|
|
||
| body | ||
| > div.md-container | ||
| > main | ||
| > div | ||
| > div.md-content | ||
| > article | ||
| > div.md-typeset__scrollwrap | ||
| > div | ||
| > table | ||
| > thead | ||
| > tr | ||
| > th:nth-child(1) { | ||
| min-width: 9.8rem; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| # Configuring Ibis | ||
|
|
||
| Ibis configuration happens through the `ibis.options` attribute. Attributes can | ||
| be get and set like class attributes. | ||
|
|
||
| ## Interactive mode | ||
|
|
||
| Ibis out of the box is in _developer mode_. Expressions display their internal | ||
| details when printed to the console. For a better interactive experience, set | ||
| the `interactive` option: | ||
|
|
||
| ```python | ||
| ibis.options.interactive = True | ||
| ``` | ||
|
|
||
| This will cause expressions to be executed immediately when printed to the | ||
| console. | ||
|
|
||
| ## SQL Query Execution | ||
|
|
||
| If an Ibis table expression has no row limit set using the `limit` API, a | ||
| default one is applied to prevent too much data from being retrieved from the | ||
| query engine. The default is currently 10000 rows, but this can be configured | ||
| with the `sql.default_limit` option: | ||
|
|
||
| ```python | ||
| ibis.options.sql.default_limit = 100 | ||
| ``` | ||
|
|
||
| Set this to `None` to retrieve all rows in all queries | ||
|
|
||
| !!! warning "Be careful with `None`" | ||
|
|
||
| Setting the default limit to `None` will result in *all* rows from a query | ||
| coming back to the client from the backend. | ||
|
|
||
| ```python | ||
| ibis.options.sql.default_limit = None | ||
| ``` | ||
|
|
||
| ## Verbose option and Logging | ||
|
|
||
| To see all internal Ibis activity (like queries being executed) set | ||
| `ibis.options.verbose`: | ||
|
|
||
| ```python | ||
| ibis.options.verbose = True | ||
| ``` | ||
|
|
||
| By default this information is sent to `sys.stdout`, but you can set some | ||
| other logging function: | ||
|
|
||
| ```python | ||
| def cowsay(msg): | ||
| print(f"Cow says: {msg}") | ||
|
|
||
|
|
||
| ibis.options.verbose_log = cowsay | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,175 @@ | ||
| # Design | ||
|
|
||
| ## Primary Goals | ||
|
|
||
| 1. Type safety | ||
| 1. Expressiveness | ||
| 1. Composability | ||
| 1. Familiarity | ||
|
|
||
| ## Flow of Execution | ||
|
|
||
| 1. User writes expression | ||
| 1. Each method or function call builds a new expression | ||
| 1. Expressions are type checked as you create them | ||
| 1. Expressions have some optimizations that happen as the user builds them | ||
| 1. Backend specific rewrites | ||
| 1. Expressions are compiled | ||
| 1. The SQL string that generated by the compiler is sent to the database and | ||
| executed (this step is skipped for the pandas backend) | ||
| 1. The database returns some data that is then turned into a pandas DataFrame | ||
| by ibis | ||
|
|
||
| ## Expressions | ||
|
|
||
| The main user-facing component of ibis is expressions. The base class of all | ||
| expressions in ibis is the [ibis.expr.types.Expr][] class. | ||
|
|
||
| Expressions provide the user facing API, most of which is defined in | ||
| `ibis/expr/api.py`. | ||
|
|
||
| ### Type System | ||
|
|
||
| Ibis's type system consists of a set of rules for specifying the types of | ||
| inputs to `ibis.expr.types.Node` subclasses. Upon construction of a `Node` | ||
| subclass, ibis performs validation of every input to the node based on the rule | ||
| that was used to declare the input. | ||
|
|
||
| Rules are defined in `ibis.expr.rules` | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ### The [`Expr`][ibis.expr.types.Expr] class | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| Expressions are a thin but important abstraction over operations, containing | ||
| only type information and shape information, i.e., whether they are tables, | ||
| columns, or scalars. | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| Examples of expression types include | ||
| [`StringValue`][ibis.expr.types.StringValue] and | ||
| [`TableExpr`][ibis.expr.types.TableExpr]. | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| ### The `ibis.expr.types.Node` Class | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| `Node` subclasses make up the core set of operations of ibis. Each node | ||
| corresponds to a particular operation. | ||
|
|
||
| Most nodes are defined in the `ibis.expr.operations` module. | ||
|
|
||
| Examples of nodes include `ibis.expr.operations.Add` and | ||
| `ibis.expr.operations.Sum`. | ||
|
|
||
| Nodes (transitively) inherit from a class that allows node authors to define | ||
| their node's input arguments directly in the class body. | ||
|
|
||
| Additionally the `output_type` member of the class is a rule or method that | ||
| defines the shape (scalar or column) and element type of the operation. | ||
|
|
||
| An example of usage is a node that representats a logarithm operation: | ||
|
|
||
| ```python | ||
|
|
||
| import ibis.expr.rules as rlz | ||
| from ibis.expr.operations import ValueOp | ||
|
|
||
| class Log(ValueOp): | ||
| # A double scalar or column | ||
| arg = rlz.double | ||
| # Optional argument, defaults to None | ||
| base = rlz.optional(rlz.double) | ||
| # Output expression's datatype will correspond to arg's datatype | ||
| output_dtype = rlz.dtype_like('arg') | ||
| # Output expression will be scalar if arg is scalar, column otherwise | ||
| output_shape = rlz.shape_like('arg') | ||
| ``` | ||
|
|
||
| This class describes an operation called `Log` that takes one required | ||
| argument: a double scalar or column, and one optional argument: a double scalar | ||
| or column named `base` that defaults to nothing if not provided. The `base` | ||
| argument is `None` by default so that the expression will behave as the | ||
| underlying database does. | ||
|
|
||
| Similar objects are instantiated when you use ibis APIs: | ||
|
|
||
| ```python | ||
| import ibis | ||
| t = ibis.table([('a', 'float')], name='t') | ||
| log_1p = (1 + t.a).log() # an Add and a Log are instantiated here | ||
| ``` | ||
|
|
||
| ### Expressions vs Operations: Why are they different? | ||
|
|
||
| Separating expressions from their underlying operations makes it easy to | ||
| generically describe and validate the inputs to particular nodes. In the log | ||
| example, it doesn't matter what _operation_ (node) the double-valued arguments | ||
| are coming from, they must only satisfy the requirement denoted by the rule. | ||
|
|
||
| Separation of the `ibis.expr.types.Node` and | ||
| `ibis.expr.types.Expr` classes also allows the API to be tied to the | ||
| physical type of the expression rather than the particular operation, making it | ||
| easy to define the API in terms of types rather than specific operations. | ||
|
|
||
| Furthermore, operations often have an output type that depends on the input | ||
| type. An example of this is the `greatest` function, which takes the maximum | ||
| of all of its arguments. Another example is `CASE` statements, whose `THEN` | ||
| expressions determine the output type of the expression. | ||
|
|
||
| This allows ibis to provide **only** the APIs that make sense for a particular | ||
| type, even when an operation yields a different output type depending on its | ||
| input. Concretely, this means that you cannot perform operations that don't | ||
| make sense, like computing the average of a string column. | ||
|
|
||
| ## Compilation | ||
|
|
||
| The next major component of ibis is the compilers. | ||
|
|
||
| The first few versions of ibis directly generated strings, but the compiler | ||
| infrastructure was generalized to support compilation of | ||
| [SQLAlchemy](https://docs.sqlalchemy.org/en/latest/core/tutorial.html) based | ||
| expressions. | ||
|
|
||
| The compiler works by translating the different pieces of SQL expression into a | ||
| string or SQLAlchemy expression. | ||
|
|
||
| The main pieces of a `SELECT` statement are: | ||
|
|
||
| !. The set of column expressions (`select_set`) | ||
| !. `WHERE` clauses (`where`) | ||
| !. `GROUP BY` clauses (`group_by`) | ||
| !. `HAVING` clauses (`having`) | ||
| !. `LIMIT` clauses (`limit`) | ||
| !. `ORDER BY` clauses (`order_by`) | ||
| !. `DISTINCT` clauses (`distinct`) | ||
|
|
||
| Each of these pieces is translated into a SQL string and finally assembled by | ||
| the instance of the `ibis.sql.compiler.ExprTranslator` subclass | ||
| specific to the backend being compiled. For example, the | ||
| `ibis.impala.compiler.ImpalaExprTranslator` is one of the subclasses | ||
| that will perform this translation. | ||
|
|
||
| !!! note "Ibis can target other systems besides SQL" | ||
|
|
||
| While ibis was designed with an explicit goal of first-class SQL support, | ||
| ibis can target other systems such as pandas. | ||
|
|
||
| ## Execution | ||
|
|
||
| Presumably we want to _do_ something with our compiled expressions. This is | ||
| where execution comes in. | ||
|
|
||
| This is least complex part of ibis, mostly only requiring ibis to correctly | ||
| handle whatever the database hands back. | ||
|
|
||
| By and large, the execution of compiled SQL is handled by the database to which | ||
| SQL is sent from ibis. | ||
|
|
||
| However, once the data arrives from the database we need to convert that | ||
| data to a pandas DataFrame. | ||
|
|
||
| The Query class, with its `ibis.sql.client.Query._fetch` method, provides a way | ||
| for ibis `ibis.sql.client.SQLClient` objects to do any additional processing | ||
| necessary after the database returns results to the client. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| # Self Joins | ||
|
|
||
| If you’re a relational data guru, you may have wondered how it’s possible to | ||
| join tables with themselves, because joins clauses involve column references | ||
| back to the original table. | ||
|
|
||
| Consider the SQL | ||
|
|
||
| ```sql | ||
| SELECT t1.key, sum(t1.value - t2.value) AS metric | ||
| FROM my_table t1 | ||
| JOIN my_table t2 | ||
| ON t1.key = t2.subkey | ||
| GROUP BY 1 | ||
| ``` | ||
|
|
||
| Here, we have an unambiguous way to refer to each of the tables through | ||
| aliasing. | ||
|
|
||
| Let’s consider the TPC-H database, and support we want to compute | ||
| year-over-year change in total order amounts by region using joins. | ||
|
|
||
| ```python | ||
| >>> region = con.table('tpch_region') | ||
| >>> nation = con.table('tpch_nation') | ||
| >>> customer = con.table('tpch_customer') | ||
| >>> orders = con.table('tpch_orders') | ||
| >>> orders.limit(5) | ||
| o_orderkey o_custkey o_orderstatus o_totalprice o_orderdate \ | ||
| 0 1 36901 O 173665.47 1996-01-02 | ||
| 1 2 78002 O 46929.18 1996-12-01 | ||
| 2 3 123314 F 193846.25 1993-10-14 | ||
| 3 4 136777 O 32151.78 1995-10-11 | ||
| 4 5 44485 F 144659.20 1994-07-30 | ||
|
|
||
| o_orderpriority o_clerk o_shippriority \ | ||
| 0 5-LOW Clerk#000000951 0 | ||
| 1 1-URGENT Clerk#000000880 0 | ||
| 2 5-LOW Clerk#000000955 0 | ||
| 3 5-LOW Clerk#000000124 0 | ||
| 4 5-LOW Clerk#000000925 0 | ||
|
|
||
| o_comment | ||
| 0 nstructions sleep furiously among | ||
| 1 foxes. pending accounts at the pending, silen... | ||
| 2 sly final accounts boost. carefully regular id... | ||
| 3 sits. slyly regular warthogs cajole. regular, ... | ||
| 4 quickly. bold deposits sleep slyly. packages u... | ||
| ``` | ||
|
|
||
| First, let’s join all the things and select the fields we care about: | ||
|
|
||
| ```python | ||
| >>> fields_of_interest = [region.r_name.name('region'), | ||
| ... nation.n_name.name('nation'), | ||
| ... orders.o_totalprice.name('amount'), | ||
| ... orders.o_orderdate.cast('timestamp').name('odate') # these are strings | ||
| ... ] | ||
| >>> joined_all = (region.join(nation, region.r_regionkey == nation.n_regionkey) | ||
| ... .join(customer, customer.c_nationkey == nation.n_nationkey) | ||
| ... .join(orders, orders.o_custkey == customer.c_custkey) | ||
| ... [fields_of_interest]) | ||
| ``` | ||
|
|
||
| Okay, great, let’s have a look: | ||
|
|
||
| ```python | ||
| >>> joined_all.limit(5) | ||
| region nation amount odate | ||
| 0 AMERICA UNITED STATES 160843.35 1992-06-22 | ||
| 1 MIDDLE EAST IRAN 78307.91 1996-04-19 | ||
| 2 EUROPE FRANCE 103237.90 1994-10-12 | ||
| 3 EUROPE FRANCE 201463.59 1997-09-12 | ||
| 4 ASIA JAPAN 166098.86 1995-09-12 | ||
| ``` | ||
|
|
||
| Sweet, now let’s aggregate by year and region: | ||
|
|
||
| ```python | ||
| >>> year = joined_all.odate.year().name('year') | ||
| >>> total = joined_all.amount.sum().cast('float').name('total') | ||
| >>> annual_amounts = (joined_all | ||
| ... .group_by(['region', year]) | ||
| ... .aggregate(total)) | ||
| >>> annual_amounts.limit(5) | ||
| region year total | ||
| 0 EUROPE 1994 6.979473e+09 | ||
| 1 EUROPE 1996 7.015421e+09 | ||
| 2 ASIA 1997 6.910663e+09 | ||
| 3 ASIA 1998 4.058824e+09 | ||
| 4 EUROPE 1992 6.926705e+09 | ||
| ``` | ||
|
|
||
| Looking good so far. Now, we need to join this table on itself, by | ||
| subtracting 1 from one of the year columns. | ||
|
|
||
| We do this by creating a “joinable” view of a table that is considered a | ||
| distinct object within Ibis. To do this, use the `view` function: | ||
|
|
||
| ```python | ||
| >>> current = annual_amounts | ||
| >>> prior = annual_amounts.view() | ||
| >>> yoy_change = (current.total - prior.total).name('yoy_change') | ||
| >>> results = (current.join(prior, ((current.region == prior.region) & | ||
| ... (current.year == (prior.year - 1)))) | ||
| ... [current.region, current.year, yoy_change]) | ||
| >>> df = results.execute() | ||
| ``` | ||
|
|
||
| ```python | ||
| >>> df['yoy_pretty'] = df.yoy_change.map(lambda x: '$%.2fmm' % (x / 1000000.)) | ||
| ``` | ||
|
|
||
| If you’re being fastidious and want to consider the first year occurring | ||
| in the dataset for each region to have 0 for the prior year, you will | ||
| instead need to do an outer join and treat nulls in the prior side of | ||
| the join as zero: | ||
|
|
||
| ```python | ||
| >>> yoy_change = (current.total - prior.total.zeroifnull()).name('yoy_change') | ||
| >>> results = (current.outer_join(prior, ((current.region == prior.region) & | ||
| ... (current.year == (prior.year - 1)))) | ||
| ... [current.region, current.year, current.total, | ||
| ... prior.total.zeroifnull().name('prior_total'), | ||
| ... yoy_change]) | ||
| >>> results.limit(5) | ||
| region year total prior_total yoy_change | ||
| 0 ASIA 1998 4.058824e+09 0.000000e+00 4.058824e+09 | ||
| 1 AFRICA 1994 6.837587e+09 6.908429e+09 -7.084172e+07 | ||
| 2 AMERICA 1996 6.883057e+09 6.922465e+09 -3.940791e+07 | ||
| 3 AFRICA 1996 6.878112e+09 6.848983e+09 2.912979e+07 | ||
| 4 AFRICA 1992 6.873319e+09 6.859733e+09 1.358699e+07 | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| # Top-K Filtering | ||
|
|
||
| A common analytical pattern involves subsetting based on some method of | ||
| ranking. For example, “the 5 most frequently occurring widgets in a dataset”. | ||
| By choosing the right metric, you can obtain the most important or least | ||
| important items from some dimension, for some definition of important. | ||
|
|
||
| To carry out the pattern by hand involves the following | ||
|
|
||
| - Choose a ranking metric | ||
| - Aggregate, computing the ranking metric, by the target dimension | ||
| - Order by the ranking metric and take the highest K values | ||
| - Use those values as a set filter (either with `semi_join` or | ||
| `isin`) in your next query | ||
|
|
||
| For example, let’s look at the TPC-H tables and find the 5 or 10 | ||
| customers who placed the most orders over their lifetime: | ||
|
|
||
| ```python | ||
| >>> orders = con.table('tpch_orders') | ||
| >>> top_orders = (orders | ||
| ... .group_by('o_custkey') | ||
| ... .size() | ||
| ... .sort_by(('count', False)) | ||
| ... .limit(5)) | ||
| >>> top_orders | ||
| o_custkey count | ||
| 0 3451 41 | ||
| 1 102022 41 | ||
| 2 102004 41 | ||
| 3 79300 40 | ||
| 4 117082 40 | ||
| ``` | ||
|
|
||
| Now, we could use these customer keys as a filter in some other analysis: | ||
|
|
||
| ```python | ||
| >>> # Among the top 5 most frequent customers, what's the histogram of their order statuses? | ||
| >>> analysis = (orders[orders.o_custkey.isin(top_orders.o_custkey)] | ||
| ... .group_by('o_orderstatus') | ||
| ... .size()) | ||
| >>> analysis | ||
| o_orderstatus count | ||
| 0 P 5 | ||
| 1 F 85 | ||
| 2 O 113 | ||
| ``` | ||
|
|
||
| This is such a common pattern that Ibis supports a high level primitive | ||
| `topk` operation, which can be used immediately as a filter: | ||
|
|
||
| ```python | ||
| >>> top_orders = orders.o_custkey.topk(5) | ||
| >>> orders[top_orders].group_by('o_orderstatus').size() | ||
| o_orderstatus count | ||
| 0 P 5 | ||
| 1 F 85 | ||
| 2 O 113 | ||
| ``` | ||
|
|
||
| This goes a little further. Suppose now we want to rank customers by their | ||
| total spending instead of the number of orders, perhaps a more meaningful | ||
| metric: | ||
|
|
||
| ```python | ||
| >>> total_spend = orders.o_totalprice.sum().name('total') | ||
| >>> top_spenders = (orders | ||
| ... .group_by('o_custkey') | ||
| ... .aggregate(total_spend) | ||
| ... .sort_by(('total', False)) | ||
| ... .limit(5)) | ||
| >>> top_spenders | ||
| o_custkey total | ||
| 0 143500 7012696.48 | ||
| 1 95257 6563511.23 | ||
| 2 87115 6457526.26 | ||
| 3 131113 6311428.86 | ||
| 4 103834 6306524.23 | ||
| ``` | ||
|
|
||
| ```python | ||
| >>> top_spenders = orders.o_custkey.topk(5, by=total_spend) | ||
| >>> orders[top_spenders].group_by('o_orderstatus').size() | ||
| o_orderstatus count | ||
| 0 P 1 | ||
| 1 F 78 | ||
| 2 O 98 | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| # User Defined Functions | ||
|
|
||
| !!! experimental "UD(A)Fs are unstable" | ||
|
|
||
| The user-defined elementwise and aggregate function APIs are provisional | ||
| and subject to change. | ||
|
|
||
| Ibis has mechanisms for writing custom scalar and aggregate functions with | ||
| varying levels of support for depending on the backend | ||
|
|
||
| User-defined function are a complex and interesting topic. Please get involved | ||
| if you're interested in working on them! | ||
|
|
||
| The following backends provide some level of support for user-defined functions: | ||
|
|
||
| - [Google BigQuery](https://github.com/ibis-project/ibis-bigquery) | ||
| - [Pandas](../backends/Pandas.md) | ||
| - [PostgreSQL](../backends/PostgreSQL.md) | ||
| - [Datafusion](../backends/Datafusion.md) | ||
| - [Impala](../backends/Impala.md) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| from pathlib import Path | ||
|
|
||
| import pandas as pd | ||
| import tomli | ||
|
|
||
| import ibis | ||
| import ibis.expr.operations as ops | ||
|
|
||
|
|
||
| def get_backends(): | ||
| pyproject = tomli.loads(Path("pyproject.toml").read_text()) | ||
| backends = pyproject["tool"]["poetry"]["plugins"]["ibis.backends"] | ||
| del backends["spark"] | ||
| return [ | ||
| (backend, getattr(ibis, backend)) | ||
| for backend in sorted(backends.keys()) | ||
| ] | ||
|
|
||
|
|
||
| def get_leaf_classes(op): | ||
| for child_class in op.__subclasses__(): | ||
| if not child_class.__subclasses__(): | ||
| yield child_class | ||
| else: | ||
| yield from get_leaf_classes(child_class) | ||
|
|
||
|
|
||
| ICONS = { | ||
| True: ":material-check-decagram:{ .verified }", | ||
| False: ":material-cancel:{ .cancel }", | ||
| } | ||
|
|
||
|
|
||
| def main(): | ||
| possible_ops = frozenset(get_leaf_classes(ops.ValueOp)) | ||
|
|
||
| support = { | ||
| "operation": [f"`{op.__name__}`" for op in possible_ops], | ||
| } | ||
| support.update( | ||
| (name, list(map(backend.has_operation, possible_ops))) | ||
| for name, backend in get_backends() | ||
| ) | ||
|
|
||
| df = pd.DataFrame(support).set_index("operation").sort_index() | ||
|
|
||
| counts = df.sum().sort_values(ascending=False) | ||
| num_ops = len(possible_ops) | ||
| coverage = ( | ||
| counts.map(lambda n: f"_{n} ({round(100 * n / num_ops)}%)_") | ||
| .to_frame(name="**API Coverage**") | ||
| .T | ||
| ) | ||
|
|
||
| ops_table = df.loc[:, counts.index].replace(ICONS) | ||
| table = pd.concat([coverage, ops_table]) | ||
| dst = Path(__file__).parent.joinpath( | ||
| "docs", | ||
| "backends", | ||
| "support_matrix.csv", | ||
| ) | ||
| table.to_csv(dst, index_label="Backends") | ||
|
|
||
|
|
||
| main() |