| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -94,3 +94,4 @@ spark-warehouse | |
| docs/backends/support_matrix.csv | ||
| __pycache__ | ||
| tags | ||
| .DS_Store | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| "use strict"; | ||
|
|
||
| module.exports = { | ||
| branches: ["master"], | ||
| tagFormat: "${version}", | ||
| preset: "conventionalcommits", | ||
| plugins: [ | ||
| [ | ||
| "@semantic-release/commit-analyzer", | ||
| { | ||
| // deprecations are patch releases | ||
| releaseRules: [{ type: "depr", release: "patch" }], | ||
| preset: "conventionalcommits", | ||
| }, | ||
| ], | ||
| [ | ||
| "@semantic-release/release-notes-generator", | ||
| { | ||
| preset: "conventionalcommits", | ||
| presetConfig: { | ||
| types: [ | ||
| { type: "feat", section: "Features" }, | ||
| { type: "fix", section: "Bug Fixes" }, | ||
| { type: "chore", hidden: true }, | ||
| { type: "docs", section: "Documentation" }, | ||
| { type: "style", hidden: true }, | ||
| { type: "refactor", hidden: true }, | ||
| { type: "perf", section: "Performance" }, | ||
| { type: "test", hidden: true }, | ||
| { type: "depr", section: "Deprecations" }, | ||
| ], | ||
| }, | ||
| }, | ||
| ], | ||
| [ | ||
| "@semantic-release/changelog", | ||
| { | ||
| changelogTitle: "Release Notes\n---", | ||
| changelogFile: "docs/release_notes.md", | ||
| }, | ||
| ], | ||
| [ | ||
| "@google/semantic-release-replace-plugin", | ||
| { | ||
| replacements: [ | ||
| { | ||
| files: ["ibis/__init__.py"], | ||
| from: '__version__ = ".*"', | ||
| to: '__version__ = "${nextRelease.version}"', | ||
| results: [ | ||
| { | ||
| file: "ibis/__init__.py", | ||
| hasChanged: true, | ||
| numMatches: 1, | ||
| numReplacements: 1, | ||
| }, | ||
| ], | ||
| countMatches: true, | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| [ | ||
| "@semantic-release/exec", | ||
| { | ||
| verifyConditionsCmd: "ci/release/verify.sh ${options.dryRun}", | ||
| prepareCmd: "ci/release/prepare.sh ${nextRelease.version}", | ||
| publishCmd: "ci/release/publish.sh", | ||
| }, | ||
| ], | ||
| [ | ||
| "@semantic-release/github", | ||
| { | ||
| successComment: false, | ||
| assets: ["dist/*.whl"], | ||
| }, | ||
| ], | ||
| [ | ||
| "@semantic-release/git", | ||
| { | ||
| assets: [ | ||
| "pyproject.toml", | ||
| "docs/release_notes.md", | ||
| "setup.py", | ||
| "ibis/__init__.py", | ||
| ], | ||
| message: "chore(release): ${nextRelease.version}", | ||
| }, | ||
| ], | ||
| ], | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,21 @@ | ||
| #!/usr/bin/env nix-shell | ||
| #!nix-shell -I nixpkgs=channel:nixos-unstable-small --pure --keep POETRY_PYPI_TOKEN_PYPI -p dyff git poetry yj -i bash | ||
| # shellcheck shell=bash | ||
|
|
||
| set -euo pipefail | ||
|
|
||
| dry_run="${1:-false}" | ||
|
|
||
| # verify pyproject.toml | ||
| poetry check | ||
|
|
||
| # verify that the lock file is up to date | ||
| # | ||
| # go through the rigamarole of yj and dyff because poetry is sensitive to | ||
| # PYTHONHASHSEED | ||
| bash ./dev/lockfile_diff.sh | ||
|
|
||
| # verify that we have a token available to push to pypi using set -u | ||
| if [ "${dry_run}" = "false" ]; then | ||
| : "${POETRY_PYPI_TOKEN_PYPI}" | ||
| fi |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| #!/usr/bin/env nix-shell | ||
| #!nix-shell -I nixpkgs=channel:nixos-unstable-small --pure -p dyff git poetry yj -i bash | ||
| # shellcheck shell=bash | ||
|
|
||
| set -euo pipefail | ||
|
|
||
| old="$(mktemp --suffix=".yaml")" | ||
| new="$(mktemp --suffix=".yaml")" | ||
|
|
||
| # verify that the lock file is up to date | ||
| # | ||
| # go through the rigamarole of yj and dyff because poetry is sensitive to | ||
| # PYTHONHASHSEED | ||
| yj -ty < poetry.lock > "$old" | ||
| PYTHONHASHSEED=0 poetry lock --no-update | ||
| yj -ty < poetry.lock > "$new" | ||
|
|
||
| if ! dyff between "$old" "$new" --ignore-order-changes --omit-header --set-exit-code; then | ||
| git checkout poetry.lock | ||
| exit 1 | ||
| fi |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| #!/usr/bin/env nix-shell | ||
| #!nix-shell -I nixpkgs=channel:nixos-unstable-small --pure -p poetry nix -i bash | ||
| # shellcheck shell=bash | ||
| set -euo pipefail | ||
|
|
||
| export PYTHONHASHSEED=0 | ||
|
|
||
| TOP="${1:-$(dirname "$(dirname "$(readlink -f "$0")")")}" | ||
|
|
||
| pushd "${TOP}" > /dev/null || exit 1 | ||
| poetry lock --no-update | ||
| poetry export --dev --without-hashes --no-ansi --extras all > "${TOP}/requirements.txt" | ||
| "${TOP}/dev/poetry2setup" -o "${TOP}/setup.py" | ||
| popd > /dev/null || exit 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,2 @@ | ||
| FROM postgis/postgis:14-3.2-alpine | ||
| RUN apk add postgresql14-plpython3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| # Code of Conduct | ||
|
|
||
| ## Summary | ||
|
|
||
| Ibis is governed by the | ||
| [NumFOCUS code of conduct](https://numfocus.org/code-of-conduct): | ||
|
|
||
| > Be kind to others. Do not insult or put down others. Behave professionally. | ||
| > Remember that harassment and sexist, racist, or exclusionary jokes are not | ||
| > appropriate for Ibis. | ||
| > | ||
| > All communication should be appropriate for a professional audience including | ||
| > people of many different backgrounds. Sexual language and imagery is not | ||
| > appropriate. | ||
| > | ||
| > Ibis is dedicated to providing a harassment-free | ||
| > community for everyone, regardless of gender, sexual orientation, gender | ||
| > identity, and expression, disability, physical appearance, body size, race, | ||
| > or religion. We do not tolerate harassment of community members in any form. | ||
| > | ||
| > Thank you for helping make this a welcoming, friendly community for all. | ||
| ## Reporting and Enforcement Violations | ||
|
|
||
| Instances of abusive, harassing, or otherwise unacceptable behavior may be | ||
| reported by contacting the Ibis Code of Conduct committee at | ||
| ibis-conduct@googlegroups.com. You can also report | ||
|
|
||
| The committee currently consists of: | ||
|
|
||
| - Phillip Cloud | ||
| - Wes McKinney | ||
| - Krisztián Szűcs | ||
| - Jeff Reback | ||
|
|
||
| All complaints will be reviewed and investigated and will result in a response | ||
| that is deemed necessary and appropriate to the circumstances. The committee is | ||
| obligated to maintain confidentiality with regard to the reporter of an | ||
| incident. In addition, the online form allows you to submit a report | ||
| anonymously. Further details of specific enforcement policies may be posted | ||
| separately. | ||
|
|
||
| Project maintainers who do not follow or enforce the Code of Conduct in good | ||
| faith may face temporary or permanent repercussions as determined by other | ||
| members of the project's leadership. | ||
|
|
||
| ## Attribution | ||
|
|
||
| Parts of this CoC are adapated from the [Dask code of | ||
| conduct](https://github.com/dask/governance/blob/main/code-of-conduct.md). |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # Contributing to Ibis | ||
|
|
||
| We love new contributors! | ||
|
|
||
| To get started: | ||
|
|
||
| 1. [Set up a development environment](https://ibis-project.org/docs/latest/contribute/01_environment/) | ||
| 1. [Learn about the commit workflow](https://ibis-project.org/docs/latest/contribute/02_workflow/) | ||
| 1. [Review the code style guidelines](https://ibis-project.org/docs/latest/contribute/03_style/) | ||
| 1. [Learn how to run the backend test suite](https://ibis-project.org/docs/latest/contribute/04_backend_tests/) | ||
| 1. [Dig into the nitty gritty of being a maintainer](https://ibis-project.org/docs/latest/contribute/05_maintainers_guide/) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,11 @@ | ||
| # Data Types | ||
|
|
||
| This module contains classes for handling the different logical types that | ||
| occur in databases. | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| All data type constructors take a `nullable: bool` parameter whose default | ||
| value is [`True`][True]. | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| ::: ibis.expr.datatypes.core |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,7 @@ | ||
| # Complex Type Expressions | ||
|
|
||
| These APIs are available on arrays, maps and structs. | ||
|
|
||
| ::: ibis.expr.types.arrays.ArrayValue | ||
| ::: ibis.expr.types.structs.StructValue | ||
| ::: ibis.expr.types.maps.MapValue |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,15 @@ | ||
| # Numeric and Boolean Expressions | ||
|
|
||
| These APIs are available on numeric and boolean expressions. | ||
|
|
||
| ::: ibis.expr.types.numeric.NumericValue | ||
| ::: ibis.expr.types.numeric.NumericColumn | ||
|
|
||
| ::: ibis.expr.types.numeric.IntegerValue | ||
| ::: ibis.expr.types.numeric.IntegerColumn | ||
|
|
||
| ::: ibis.expr.types.numeric.FloatingValue | ||
|
|
||
| ::: ibis.expr.types.numeric.DecimalValue | ||
|
|
||
| ::: ibis.expr.types.logical.BooleanValue |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,4 +2,4 @@ | |
|
|
||
| All string operations are valid for both scalars and columns. | ||
|
|
||
| ::: ibis.expr.types.strings.StringValue | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,9 @@ | ||
| # Temporal Expression APIs | ||
|
|
||
| All temporal operations are valid for both scalars and columns. | ||
|
|
||
| ::: ibis.expr.types.temporal.TemporalValue | ||
| ::: ibis.expr.types.temporal.TimestampValue | ||
| ::: ibis.expr.types.temporal.DateValue | ||
| ::: ibis.expr.types.temporal.TimeValue | ||
| ::: ibis.expr.types.temporal.IntervalValue |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Schemas | ||
|
|
||
| This module contains APIs for interacting with table schemas. | ||
|
|
||
| ::: ibis.expr.schema.Schema |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| # Ibis v3.1.0 | ||
|
|
||
| **by Marlene Mhangami** | ||
|
|
||
| 25 July 2022 | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| # Use `register` to load external data files with the DuckDB backend | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| Here we use the [`register`][ibis.backends.duckdb.Backend.register] method to load external data files and join them. | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| We're going to download one month of [NYC Taxi | ||
| data](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page) in | ||
| `parquet` format and also download the "Taxi Zone Lookup Table" which is a `csv` | ||
|
|
||
| https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-01.parquet | ||
| https://d37ci6vzurychx.cloudfront.net/misc/taxi+_zone_lookup.csv | ||
|
|
||
| Create an in-memory DuckDB connection via `ibis` | ||
|
|
||
| ```python | ||
| >>> import ibis | ||
| >>> con = ibis.duckdb.connect() # in-memory database | ||
| >>> con.list_tables() | ||
| [] | ||
| ``` | ||
|
|
||
| Now we call `register` with the filepath (the `table_name` argument is optional, | ||
| if it isn't specified, Ibis will use the filename minus the extension) | ||
|
|
||
| ```python | ||
| >>> con.register("taxi+_zone_lookup.csv", table_name="taxi_zone_lookup") | ||
| AlchemyTable: taxi+_zone_lookup | ||
| LocationID int32 | ||
| Borough string | ||
| Zone string | ||
| service_zone string | ||
|
|
||
| >>> con.register("green_tripdata_2022-01.parquet", table_name="tripdata") | ||
| AlchemyTable: green_tripdata_2022_01 | ||
| VendorID int64 | ||
| lpep_pickup_datetime timestamp | ||
| lpep_dropoff_datetime timestamp | ||
| store_and_fwd_flag string | ||
| RatecodeID float64 | ||
| PULocationID int64 | ||
| DOLocationID int64 | ||
| passenger_count float64 | ||
| trip_distance float64 | ||
| fare_amount float64 | ||
| extra float64 | ||
| mta_tax float64 | ||
| tip_amount float64 | ||
| tolls_amount float64 | ||
| ehail_fee int32 | ||
| improvement_surcharge float64 | ||
| total_amount float64 | ||
| payment_type float64 | ||
| trip_type float64 | ||
| congestion_surcharge float64 | ||
| >>> con.list_tables() | ||
| ['tripdata, 'taxi_zone_lookup'] | ||
| ``` | ||
|
|
||
| We now have a schema parsed from the files and corresponding tables (they are | ||
| actually `views` that are lazily-loaded) are available. | ||
|
|
||
| Now we can interact with these tables just like a table or view in any backend | ||
| connection: | ||
|
|
||
| ```python | ||
| >>> lookup = con.table("taxi_zone_lookup") | ||
| >>> tripdata = con.table("tripdata") | ||
|
|
||
| >>> tripdata.columns | ||
| ['VendorID', 'lpep_pickup_datetime', 'lpep_dropoff_datetime', 'store_and_fwd_flag', 'RatecodeID', 'PULocationID', 'DOLocationID', 'passenger_count', 'trip_distance', 'fare_amount', 'extra', 'mta_tax', 'tip_amount', 'tolls_amount', 'ehail_fee', 'improvement_surcharge', 'total_amount', 'payment_type', 'trip_type', 'congestion_surcharge'] | ||
|
|
||
| >>> lookup.columns | ||
| ['LocationID', 'Borough', 'Zone', 'service_zone'] | ||
| ``` | ||
|
|
||
| We can grab a small subset of the `tripdata` columns and then join them to the | ||
| `lookup` table to get human-readable values for the pickup locations: | ||
|
|
||
| ```python | ||
| >>> ibis.options.interactive = True | ||
|
|
||
| >>> tripdata = tripdata[["lpep_pickup_datetime", "PULocationID"]] | ||
|
|
||
| >>> tripdata.head() | ||
| lpep_pickup_datetime PULocationID | ||
| 0 2022-01-01 00:14:21 42 | ||
| 1 2022-01-01 00:20:55 116 | ||
| 2 2022-01-01 00:57:02 41 | ||
| 3 2022-01-01 00:07:42 181 | ||
| 4 2022-01-01 00:07:50 33 | ||
|
|
||
| >>> tripdata.join(lookup, tripdata.PULocationID == lookup.LocationID).head() | ||
| lpep_pickup_datetime PULocationID LocationID Borough Zone service_zone | ||
| 0 2022-01-01 00:14:21 42 42 Manhattan Central Harlem North Boro Zone | ||
| 1 2022-01-01 00:20:55 116 116 Manhattan Hamilton Heights Boro Zone | ||
| 2 2022-01-01 00:57:02 41 41 Manhattan Central Harlem Boro Zone | ||
| 3 2022-01-01 00:07:42 181 181 Brooklyn Park Slope Boro Zone | ||
| 4 2022-01-01 00:07:50 33 33 Brooklyn Brooklyn Heights Boro Zone | ||
| ``` | ||
|
|
||
| That's it! | ||
|
|
||
| Ibis+duckdb currently supports registering `parquet`, `csv`, and `csv.gz`. | ||
|
|
||
| You can pass in the filename and the filetype will be inferred from the extension, or you can pass it explicitly using a file URI, e.g. | ||
|
|
||
| ```python | ||
| con.register("csv://some_csv_file_without_an_extension") | ||
| con.register("csv.gz://a_compressed_csv_file.csv") | ||
| con.register("parquet://a_parquet_file_with_truncated_extension.parq") | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| # How to `ffill` and `bfill` using Window Functions | ||
|
|
||
| If you have gaps in your data and need to fill them in using a simple forward fill | ||
| (given an order, null values are replaced by the value preceeding) or backward fill | ||
| (given an order, null values are replaced by the value following), then you can do this in Ibis: | ||
|
|
||
| === "`ffill`" | ||
|
|
||
| ~~~python | ||
| # Create a window that orders your series, default ascending | ||
| win = ibis.window(order_by=data.measured_on, following=0) | ||
| # Create a grouping that is a rolling count of non-null values | ||
| # This creates a partition where each set has no more than one non-null value | ||
| grouped = data.mutate(grouper=data.measurement.count().over(win)) | ||
| # Group by your newly-created grouping and, in each set, | ||
| # set all values to the one non-null value in that set (if it exists) | ||
| result = ( | ||
| grouped | ||
| .group_by([grouped.grouper]) | ||
| .mutate(ffill=grouped.measurement.max()) | ||
| ) | ||
| # execute to get a pandas dataframe, sort values in case your backend shuffles | ||
| result.execute().sort_values(by=['measured_on']) | ||
| ~~~ | ||
|
|
||
| === "`bfill`" | ||
|
|
||
| ~~~python | ||
| # Create a window that orders your series (use ibis.desc to get descending order) | ||
| win = ibis.window(order_by=ibis.desc(data.measured_on), following=0) | ||
| # Create a grouping that is a rolling count of non-null values | ||
| # This creates a partition where each set has no more than one non-null value | ||
| grouped = data.mutate(grouper=data.measurement.count().over(win)) | ||
| # Group by your newly-created grouping and, in each set, | ||
| # set all values to the one non-null value in that set (if it exists) | ||
| result = ( | ||
| grouped | ||
| .group_by([grouped.grouper]) | ||
| .mutate(ffill=grouped.measurement.max()) | ||
| ) | ||
| # execute to get a pandas dataframe, sort values in case your backend shuffles | ||
| result.execute().sort_values(by=['measured_on']) | ||
| ~~~ | ||
|
|
||
| If you have an event partition, which means there's another segment you need to consider | ||
| for your ffill or bfill operations, you can do this as well: | ||
|
|
||
| === "`ffill` with event partition" | ||
|
|
||
| ~~~python | ||
| # Group your data by your event partition and then order your series (default ascending) | ||
| win = ibis.window(group_by=data.event_id, order_by=data.measured_on, following=0) | ||
| # Create a grouping that is a rolling count of non-null values within each event | ||
| # This creates a partition where each set has no more than one non-null value | ||
| grouped = data.mutate(grouper=data.measurement.count().over(win)) | ||
| # Group by your newly-created grouping and, in each set, | ||
| # set all values to the one non-null value in that set (if it exists) | ||
| result = ( | ||
| grouped | ||
| .group_by([grouped.event_id, grouped.grouper]) | ||
| .mutate(ffill=grouped.measurement.max()) | ||
| ) | ||
| # execute to get a pandas dataframe, sort values in case your backend shuffles | ||
| result.execute().sort_values(by=['event_id', 'measured_on']) | ||
| ~~~ | ||
|
|
||
| === "`bfill` with event partition" | ||
|
|
||
| ~~~python | ||
| # Group your data by your event partition and then order your series (use ibis.desc for desc) | ||
| win = ibis.window(group_by=data.event_id, order_by=ibis.desc(data.measured_on), following=0) | ||
| # Create a grouping that is a rolling count of non-null values within each event | ||
| # This creates a partition where each set has no more than one non-null value | ||
| grouped = data.mutate(grouper=data.measurement.count().over(win)) | ||
| # Group by your newly-created grouping and, in each set, | ||
| # set all values to the one non-null value in that set (if it exists) | ||
| result = ( | ||
| grouped | ||
| .group_by([grouped.event_id, grouped.grouper]) | ||
| .mutate(ffill=grouped.measurement.max()) | ||
| ) | ||
| # execute to get a pandas dataframe, sort values in case your backend shuffles | ||
| result.execute().sort_values(by=['event_id', 'measured_on']) | ||
| ~~~ | ||
|
|
||
| We wrote a deeper dive into how this works on the ibis-project blog | ||
| [here](../blog/ffill-and-bfill-using-ibis.md). |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| # Compute the Top K Records | ||
|
|
||
| <!-- prettier-ignore-start --> | ||
| Here we use the [`topk`][ibis.expr.types.Column.topk] method to compute the top | ||
| 5 customers for some generated TPC-H data by: | ||
| <!-- prettier-ignore-end --> | ||
|
|
||
| - count (the default) | ||
| - sum of order totals | ||
|
|
||
| ```python | ||
| >>> import ibis | ||
|
|
||
| >>> ibis.options.interactive = True | ||
|
|
||
| >>> con = ibis.duckdb.connect() # in-memory duckdb | ||
|
|
||
| >>> con.raw_sql("CALL dbgen(sf=0.1)") | ||
|
|
||
| >>> orders = con.table("orders") | ||
|
|
||
| >>> orders.o_custkey.topk(5) # top 5 most frequent customers | ||
| o_custkey count | ||
| 0 11998 36 | ||
| 1 8761 36 | ||
| 2 3151 35 | ||
| 3 388 35 | ||
| 4 8362 35 | ||
|
|
||
| >>> orders.o_custkey.topk(5, by=orders.o_totalprice.sum()) # top 5 largest spending customers | ||
| o_custkey sum | ||
| 0 8362 5793605.05 | ||
| 1 6958 5370682.19 | ||
| 2 9454 5354381.81 | ||
| 3 346 5323350.43 | ||
| 4 10354 5227957.24 | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,64 +1,45 @@ | ||
| """Execution rules for ops.Where operations""" | ||
|
|
||
| import dask.dataframe as dd | ||
|
|
||
| import ibis.expr.operations as ops | ||
| from ibis.backends.dask.dispatch import execute_node | ||
| from ibis.backends.pandas.core import boolean_types, scalar_types, simple_types | ||
| from ibis.backends.pandas.execution.generic import pd_where | ||
|
|
||
|
|
||
| @execute_node.register( | ||
| ops.Where, (dd.Series, *boolean_types), dd.Series, dd.Series | ||
| ) | ||
| @execute_node.register( | ||
| ops.Where, (dd.Series, *boolean_types), dd.Series, simple_types | ||
| ) | ||
| @execute_node.register( | ||
| ops.Where, (dd.Series, *boolean_types), simple_types, dd.Series | ||
| ) | ||
| @execute_node.register( | ||
| ops.Where, (dd.Series, *boolean_types), type(None), type(None) | ||
| ) | ||
| def execute_node_where(op, cond, true, false, **kwargs): | ||
| if any( | ||
| isinstance(x, (dd.Series, dd.core.Scalar)) for x in (cond, true, false) | ||
| ): | ||
| return dd.map_partitions(pd_where, cond, true, false) | ||
| # All are immediate scalars, handle locally | ||
| return true if cond else false | ||
|
|
||
|
|
||
| # For true/false as scalars, we only support identical type pairs + None to | ||
| # limit the size of the dispatch table and not have to worry about type | ||
| # promotion. | ||
| for typ in (str, *scalar_types): | ||
| for cond_typ in (dd.Series, *boolean_types): | ||
| execute_node.register(ops.Where, cond_typ, typ, typ)( | ||
| execute_node_where | ||
| ) | ||
| execute_node.register(ops.Where, cond_typ, type(None), typ)( | ||
| execute_node_where | ||
| ) | ||
| execute_node.register(ops.Where, cond_typ, typ, type(None))( | ||
| execute_node_where | ||
| ) |