Skip to content

Commit

Permalink
feat(polars): implement ops.RegexSplit using pyarrow UDF
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and gforsyth committed Dec 19, 2023
1 parent 37b6b7f commit a3bed10
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
19 changes: 19 additions & 0 deletions ibis/backends/polars/compiler.py
Expand Up @@ -1202,6 +1202,25 @@ def execute_agg_udf(op, **kw):
return getattr(first, op.__func_name__)(*rest)


@translate.register(ops.RegexSplit)
def execute_regex_split(op, **kw):
import pyarrow.compute as pc

def split(args):
arg, patterns = args
if len(patterns) != 1:
raise com.IbisError(
"Only a single scalar pattern is supported for Polars re_split"
)
return pl.from_arrow(pc.split_pattern_regex(arg.to_arrow(), patterns[0]))

arg = translate(op.arg, **kw)
pattern = translate(op.pattern, **kw)
return pl.map_batches(
exprs=(arg, pattern), function=split, return_dtype=dtype_to_polars(op.dtype)
)


@translate.register(ops.IntegerRange)
def execute_integer_range(op, **kw):
if not isinstance(op.step, ops.Literal):
Expand Down
1 change: 0 additions & 1 deletion ibis/backends/tests/test_string.py
Expand Up @@ -1113,7 +1113,6 @@ def test_non_match_regex_search_is_false(con):
"exasol",
"pandas",
"bigquery",
"polars",
],
raises=com.OperationNotDefinedError,
)
Expand Down

0 comments on commit a3bed10

Please sign in to comment.