Skip to content

Commit

Permalink
feat(api): define RegexSplit operation and re_split API
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and gforsyth committed Dec 19, 2023
1 parent e18d725 commit 07beaed
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 0 deletions.
12 changes: 12 additions & 0 deletions ibis/expr/operations/strings.py
Expand Up @@ -207,6 +207,18 @@ class RegexExtract(Value):
dtype = dt.string


@public
class RegexSplit(Value):
arg: Value[dt.String]
pattern: Value[dt.String]

dtype = dt.Array(dt.string)

@attribute
def shape(self):
return rlz.highest_precedence_shape((self.arg, self.pattern))


@public
class RegexReplace(Value):
arg: Value[dt.String]
Expand Down
46 changes: 46 additions & 0 deletions ibis/expr/types/strings.py
Expand Up @@ -1078,6 +1078,52 @@ def re_extract(
"""
return ops.RegexExtract(self, pattern, index).to_expr()

@util.backend_sensitive(
why="Different backends support different regular expression syntax."
)
def re_split(self, pattern: str | StringValue) -> ir.ArrayValue:
"""Split a string by a regular expression `pattern`.
Parameters
----------
pattern
Regular expression string to split by
Returns
-------
ArrayValue
Array of strings from splitting by `pattern`
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable(dict(s=["a.b", "b.....c", "c.........a", "def"]))
>>> t.s
┏━━━━━━━━━━━━━┓
┃ s ┃
┡━━━━━━━━━━━━━┩
│ string │
├─────────────┤
│ a.b │
│ b.....c │
│ c.........a │
│ def │
└─────────────┘
>>> t.s.re_split("\.+").name("splits")
┏━━━━━━━━━━━━━━━━━━━━━━┓
┃ splits ┃
┡━━━━━━━━━━━━━━━━━━━━━━┩
│ array<string> │
├──────────────────────┤
│ ['a', 'b'] │
│ ['b', 'c'] │
│ ['c', 'a'] │
│ ['def'] │
└──────────────────────┘
"""
return ops.RegexSplit(self, pattern).to_expr()

@util.backend_sensitive(
why="Different backends support different regular expression syntax."
)
Expand Down

0 comments on commit 07beaed

Please sign in to comment.