128 changes: 90 additions & 38 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def param(type: dt.DataType) -> ir.Scalar:
predicates:
r0.timestamp_col >= $(date)
r0.timestamp_col <= $(date)
sum: Sum(r1.value)
Sum(value): Sum(r1.value)
"""
return ops.ScalarParameter(type).to_expr()

Expand Down Expand Up @@ -269,7 +269,7 @@ def schema(
>>> sc = schema(names=['foo', 'bar', 'baz'],
... types=['string', 'int64', 'boolean'])
>>> sc = schema(dict(foo="string"))
>>> sc = schema(Schema(['foo'], ['string'])) # no-op
>>> sc = schema(Schema(dict(foo="string"))) # no-op
Returns
-------
Expand Down Expand Up @@ -304,9 +304,12 @@ def table(
--------
Create a table with no data backing it
>>> t = ibis.table(schema=dict(a="int", b="string"))
>>> import ibis
>>> ibis.options.interactive
False
>>> t = ibis.table(schema=dict(a="int", b="string"), name="t")
>>> t
UnboundTable: unbound_table_0
UnboundTable: t
a int64
b string
"""
Expand Down Expand Up @@ -453,18 +456,20 @@ def desc(expr: ir.Column | str) -> ir.Value:
Examples
--------
>>> import ibis
>>> t = ibis.table(dict(g='string'), name='t')
>>> t.group_by('g').size('count').order_by(ibis.desc('count'))
r0 := UnboundTable: t
g string
r1 := Aggregation[r0]
metrics:
count: Count(t)
by:
g: r0.g
Selection[r1]
sort_keys:
desc|r1.count
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t[["species", "year"]].order_by(ibis.desc("year")).head()
┏━━━━━━━━━┳━━━━━━━┓
┃ species ┃ year ┃
┡━━━━━━━━━╇━━━━━━━┩
│ string │ int64 │
├─────────┼───────┤
│ Adelie │ 2009 │
│ Adelie │ 2009 │
│ Adelie │ 2009 │
│ Adelie │ 2009 │
│ Adelie │ 2009 │
└─────────┴───────┘
Returns
-------
Expand All @@ -485,18 +490,20 @@ def asc(expr: ir.Column | str) -> ir.Value:
Examples
--------
>>> import ibis
>>> t = ibis.table(dict(g='string'), name='t')
>>> t.group_by('g').size('count').order_by(ibis.asc('count'))
r0 := UnboundTable: t
g string
r1 := Aggregation[r0]
metrics:
count: Count(t)
by:
g: r0.g
Selection[r1]
sort_keys:
asc|r1.count
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t[["species", "year"]].order_by(ibis.asc("year")).head()
┏━━━━━━━━━┳━━━━━━━┓
┃ species ┃ year ┃
┡━━━━━━━━━╇━━━━━━━┩
│ string │ int64 │
├─────────┼───────┤
│ Adelie │ 2007 │
│ Adelie │ 2007 │
│ Adelie │ 2007 │
│ Adelie │ 2007 │
│ Adelie │ 2007 │
└─────────┴───────┘
Returns
-------
Expand Down Expand Up @@ -794,7 +801,8 @@ def case() -> bl.SearchedCaseBuilder:
>>> cond1 = ibis.literal(1) == 1
>>> cond2 = ibis.literal(2) == 1
>>> expr = ibis.case().when(cond1, 3).when(cond2, 4).end()
SearchedCase(cases=(1 == 1, 2 == 1), results=(3, 4)), default=Cast(None, to=int8))
>>> expr
SearchedCase(...)
Returns
-------
Expand Down Expand Up @@ -849,7 +857,27 @@ def read_csv(sources: str | Path | Sequence[str | Path], **kwargs: Any) -> ir.Ta
Examples
--------
>>> batting = ibis.read_csv("ci/ibis-testing-data/batting.csv")
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.Batting_raw.fetch()
>>> t
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━┓
┃ playerID ┃ yearID ┃ stint ┃ teamID ┃ lgID ┃ G ┃ AB ┃ R ┃ … ┃
┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━┩
│ string │ int64 │ int64 │ string │ string │ int64 │ int64 │ int64 │ … │
├───────────┼────────┼───────┼────────┼────────┼───────┼───────┼───────┼───┤
│ abercda01 │ 1871 │ 1 │ TRO │ NA │ 1 │ 4 │ 0 │ … │
│ addybo01 │ 1871 │ 1 │ RC1 │ NA │ 25 │ 118 │ 30 │ … │
│ allisar01 │ 1871 │ 1 │ CL1 │ NA │ 29 │ 137 │ 28 │ … │
│ allisdo01 │ 1871 │ 1 │ WS3 │ NA │ 27 │ 133 │ 28 │ … │
│ ansonca01 │ 1871 │ 1 │ RC1 │ NA │ 25 │ 120 │ 29 │ … │
│ armstbo01 │ 1871 │ 1 │ FW1 │ NA │ 12 │ 49 │ 9 │ … │
│ barkeal01 │ 1871 │ 1 │ RC1 │ NA │ 1 │ 4 │ 0 │ … │
│ barnero01 │ 1871 │ 1 │ BS1 │ NA │ 31 │ 157 │ 66 │ … │
│ barrebi01 │ 1871 │ 1 │ FW1 │ NA │ 1 │ 5 │ 1 │ … │
│ barrofr01 │ 1871 │ 1 │ BS1 │ NA │ 18 │ 86 │ 13 │ … │
│ … │ … │ … │ … │ … │ … │ … │ … │ … │
└───────────┴────────┴───────┴────────┴────────┴───────┴───────┴───────┴───┘
"""
from ibis.config import _default_backend

Expand Down Expand Up @@ -886,9 +914,9 @@ def read_json(sources: str | Path | Sequence[str | Path], **kwargs: Any) -> ir.T
... {"a": 2, "b": null}
... {"a": null, "b": "f"}
... '''
>>> with open("lines.json", mode="w") as f:
... f.write(lines)
>>> t = ibis.read_json("lines.json")
>>> with open("/tmp/lines.json", mode="w") as f:
... _ = f.write(lines)
>>> t = ibis.read_json("/tmp/lines.json")
>>> t
┏━━━━━━━━┳━━━━━━━━┓
┃ a ┃ b ┃
Expand Down Expand Up @@ -929,7 +957,27 @@ def read_parquet(sources: str | Path | Sequence[str | Path], **kwargs: Any) -> i
Examples
--------
>>> batting = ibis.read_parquet("ci/ibis-testing-data/parquet/batting/batting.parquet")
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.Batting_raw.fetch()
>>> t
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━┓
┃ playerID ┃ yearID ┃ stint ┃ teamID ┃ lgID ┃ G ┃ AB ┃ R ┃ … ┃
┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━┩
│ string │ int64 │ int64 │ string │ string │ int64 │ int64 │ int64 │ … │
├───────────┼────────┼───────┼────────┼────────┼───────┼───────┼───────┼───┤
│ abercda01 │ 1871 │ 1 │ TRO │ NA │ 1 │ 4 │ 0 │ … │
│ addybo01 │ 1871 │ 1 │ RC1 │ NA │ 25 │ 118 │ 30 │ … │
│ allisar01 │ 1871 │ 1 │ CL1 │ NA │ 29 │ 137 │ 28 │ … │
│ allisdo01 │ 1871 │ 1 │ WS3 │ NA │ 27 │ 133 │ 28 │ … │
│ ansonca01 │ 1871 │ 1 │ RC1 │ NA │ 25 │ 120 │ 29 │ … │
│ armstbo01 │ 1871 │ 1 │ FW1 │ NA │ 12 │ 49 │ 9 │ … │
│ barkeal01 │ 1871 │ 1 │ RC1 │ NA │ 1 │ 4 │ 0 │ … │
│ barnero01 │ 1871 │ 1 │ BS1 │ NA │ 31 │ 157 │ 66 │ … │
│ barrebi01 │ 1871 │ 1 │ FW1 │ NA │ 1 │ 5 │ 1 │ … │
│ barrofr01 │ 1871 │ 1 │ BS1 │ NA │ 18 │ 86 │ 13 │ … │
│ … │ … │ … │ … │ … │ … │ … │ … │ … │
└───────────┴────────┴───────┴────────┴────────┴───────┴───────┴───────┴───┘
"""
from ibis.config import _default_backend

Expand All @@ -947,13 +995,17 @@ def set_backend(backend: str | BaseBackend) -> None:
Examples
--------
May pass the backend as a name:
You can pass the backend as a name:
>>> import ibis
>>> ibis.set_backend("polars")
Or as a URI:
>>> ibis.set_backend("postgres://user:password@hostname:5432")
Or as a URI
>>> ibis.set_backend("postgres://user:password@hostname:5432") # doctest: +SKIP
Or as an existing backend instance
Or as an existing backend instance:
>>> ibis.set_backend(ibis.duckdb.connect())
"""
import ibis
Expand Down
6 changes: 3 additions & 3 deletions ibis/expr/operations/logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ class _UnresolvedSubquery(Value, _Negatable):
-----
Consider the following ibis expressions
>>> t = ibis.table(dict(a="string"))
>>> s = ibis.table(dict(a="string"))
>>> cond = (t.a == s.a).any()
>>> t = ibis.table(dict(a="string")) # doctest: +SKIP
>>> s = ibis.table(dict(a="string")) # doctest: +SKIP
>>> cond = (t.a == s.a).any() # doctest: +SKIP
Without knowing the table to use as the outer query there are two ways to
turn this expression into a SQL `EXISTS` predicate depending on which of
Expand Down
8 changes: 4 additions & 4 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ def merge(self, other: Schema) -> Schema:
Examples
--------
>>> import ibis
>>> first = ibis.Schema.from_dict({"a": "int", "b": "string"})
>>> second = ibis.Schema.from_dict({"c": "float", "d": "int16"})
>>> first.merge(second)
>>> first = ibis.Schema({"a": "int", "b": "string"})
>>> second = ibis.Schema({"c": "float", "d": "int16"})
>>> first.merge(second) # doctest: +SKIP
ibis.Schema {
a int64
b string
Expand Down Expand Up @@ -206,7 +206,7 @@ def name_at_position(self, i: int) -> str:
Examples
--------
>>> import ibis
>>> sch = ibis.Schema.from_dict({"a": "int", "b": "string"})
>>> sch = ibis.Schema({"a": "int", "b": "string"})
>>> sch.name_at_position(0)
'a'
>>> sch.name_at_position(1)
Expand Down
55 changes: 26 additions & 29 deletions ibis/expr/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@
Without selectors this becomes quite verbose and tedious to write:
```python
>>> t.select([t[c] for c in t.columns if t[c].type().is_numeric()])
>>> t.select([t[c] for c in t.columns if t[c].type().is_numeric()]) # doctest: +SKIP
```
Compare that to the [`numeric`][ibis.expr.selectors.numeric] selector:
```python
>>> t.select(s.numeric())
>>> t.select(s.numeric()) # doctest: +SKIP
```
When there are multiple properties to check it gets worse:
```python
>>> t.select(
>>> t.select( # doctest: +SKIP
... [
... t[c] for c in t.columns
... if t[c].type().is_numeric()
Expand All @@ -36,7 +36,7 @@
Using a composition of selectors this is much less tiresome:
```python
>>> t.select(s.numeric() & s.contains(("a", "cd")))
>>> t.select(s.numeric() & s.contains(("a", "cd"))) # doctest: +SKIP
```
"""

Expand Down Expand Up @@ -133,14 +133,12 @@ def where(predicate: Callable[[ir.Value], bool]) -> Predicate:
Examples
--------
>>> import ibis
>>> import ibis.expr.selectors as s
>>> t = ibis.table(dict(a="float32"), name="t")
>>> t.select(s.where(lambda col: col.get_name() == "a"))
r0 := UnboundTable: t
a float32
<BLANKLINE>
Selection[r0]
selections:
a: r0.a
>>> expr = t.select(s.where(lambda col: col.get_name() == "a"))
>>> expr.columns
['a']
"""
return Predicate(predicate=predicate)

Expand All @@ -151,22 +149,17 @@ def numeric() -> Predicate:
Examples
--------
>>> import ibis.selectors as s
>>> import ibis
>>> import ibis.expr.selectors as s
>>> t = ibis.table(dict(a="int", b="string", c="array<string>"), name="t")
>>> t
r0 := UnboundTable: t
UnboundTable: t
a int64
b string
c array<string>
>>> t.select(s.numeric()) # `a` has integer type, so it's numeric
r0 := UnboundTable: t
a int64
b string
c array<string>
<BLANKLINE>
Selection[r0]
selections:
a: r0.a
>>> expr = t.select(s.numeric()) # `a` has integer type, so it's numeric
>>> expr.columns
['a']
See Also
--------
Expand All @@ -188,15 +181,15 @@ def of_type(dtype: dt.DataType | str | type[dt.DataType]) -> Predicate:
--------
Select according to a specific `DataType` instance
>>> t.select(s.of_type(dt.Array(dt.string)))
>>> t.select(s.of_type(dt.Array(dt.string))) # doctest: +SKIP
Strings are also accepted
>>> t.select(s.of_type("map<string, float>"))
>>> t.select(s.of_type("map<string, float>")) # doctest: +SKIP
Select by category of `DataType` by passing the `DataType` class
>>> t.select(s.of_type(dt.Struct)) # all struct columns, regardless of field types
>>> t.select(s.of_type(dt.Struct)) # doctest: +SKIP
See Also
--------
Expand All @@ -221,8 +214,12 @@ def startswith(prefixes: str | tuple[str, ...]) -> Predicate:
Examples
--------
>>> import ibis
>>> import ibis.expr.selectors as s
>>> t = ibis.table(dict(apples="int", oranges="float", bananas="bool"), name="t")
>>> t.select(s.startswith(("a", "b")))
>>> expr = t.select(s.startswith(("a", "b")))
>>> expr.columns
['apples', 'bananas']
See Also
--------
Expand Down Expand Up @@ -264,11 +261,11 @@ def contains(
--------
Select columns that contain either `"a"` or `"b"`
>>> t.select(s.contains(("a", "b")))
>>> t.select(s.contains(("a", "b"))) # doctest: +SKIP
Select columns that contain all of `"a"` and `"b"`
>>> t.select(s.contains(("a", "b"), how=all))
>>> t.select(s.contains(("a", "b"), how=all)) # doctest: +SKIP
See Also
--------
Expand All @@ -293,7 +290,7 @@ def matches(regex: str | re.Pattern) -> Selector:
Examples
--------
>>> t.select(s.matches(r"ab+"))
>>> t.select(s.matches(r"ab+")) # doctest: +SKIP
See Also
--------
Expand Down
1 change: 1 addition & 0 deletions ibis/expr/types/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def __getitem__(self, index: int | ir.IntegerValue | slice) -> ir.Value:
Extract a range of elements
>>> t = ibis.memtable({"a": [[7, 42, 72], [3] * 5, None]})
>>> t
┏━━━━━━━━━━━━━━━━━━━━━━┓
┃ a ┃
┡━━━━━━━━━━━━━━━━━━━━━━┩
Expand Down
6 changes: 4 additions & 2 deletions ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def equals(self, other):
Examples
--------
>>> import ibis
>>> t1 = ibis.table(dict(a="int"), name="t")
>>> t2 = ibis.table(dict(a="int"), name="t")
>>> t1.equals(t2)
Expand Down Expand Up @@ -183,7 +184,7 @@ def pipe(self, f, *args: Any, **kwargs: Any) -> Expr:
>>> g = lambda a: (a * 2).name('a')
>>> result1 = t.a.pipe(f).pipe(g)
>>> result1
r0 := UnboundTable[t]
r0 := UnboundTable: t
a int64
b string
a: r0.a + 1 * 2
Expand Down Expand Up @@ -487,10 +488,11 @@ def _binop(
Examples
--------
>>> import ibis
>>> import ibis.expr.operations as ops
>>> expr = _binop(ops.TimeAdd, ibis.time("01:00"), ibis.interval(hours=1))
>>> expr
datetime.time(1, 0) + 1
TimeAdd(datetime.time(1, 0), 1): datetime.time(1, 0) + 1 h
>>> _binop(ops.TimeAdd, 1, ibis.interval(hours=1))
NotImplemented
"""
Expand Down
81 changes: 55 additions & 26 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def name(self, name):
Examples
--------
>>> import ibis
>>> t = ibis.table(dict(a="int64"))
>>> t = ibis.table(dict(a="int64"), name="t")
>>> t.a.name("b")
r0 := UnboundTable[unbound_table_...]
r0 := UnboundTable: t
a int64
b: r0.a
"""
Expand Down Expand Up @@ -120,8 +120,8 @@ def coalesce(self, *args: Value) -> Value:
Examples
--------
>>> import ibis
>>> ibis.coalesce(None, 4, 5)
Coalesce((None, 4, 5))
>>> ibis.coalesce(None, 4, 5).name("x")
x: Coalesce(...)
"""
return ops.Coalesce((self, *args)).to_expr()

Expand Down Expand Up @@ -178,17 +178,44 @@ def fillna(self, fill_value: Scalar) -> Value:
Examples
--------
>>> import ibis
>>> table = ibis.table(dict(col='int64', other_col='int64'))
>>> result = table.col.fillna(5)
r0 := UnboundTable: unbound_table_0
col int64
other_col int64
IfNull(r0.col, ifnull_expr=5)
>>> table.col.fillna(table.other_col * 3)
r0 := UnboundTable: unbound_table_0
col int64
other_col int64
IfNull(r0.col, ifnull_expr=r0.other_col * 3)
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t.sex
┏━━━━━━━━┓
┃ sex ┃
┡━━━━━━━━┩
│ string │
├────────┤
│ male │
│ female │
│ female │
│ ∅ │
│ female │
│ male │
│ female │
│ male │
│ ∅ │
│ ∅ │
│ … │
└────────┘
>>> t.sex.fillna("unrecorded").name("sex")
┏━━━━━━━━━━━━┓
┃ sex ┃
┡━━━━━━━━━━━━┩
│ string │
├────────────┤
│ male │
│ female │
│ female │
│ unrecorded │
│ female │
│ male │
│ female │
│ male │
│ unrecorded │
│ unrecorded │
│ … │
└────────────┘
Returns
-------
Expand Down Expand Up @@ -254,21 +281,21 @@ def isin(self, values: Value | Sequence[Value]) -> ir.BooleanValue:
Check whether a column's values are contained in a sequence
>>> import ibis
>>> table = ibis.table(dict(string_col='string'))
>>> table = ibis.table(dict(string_col='string'), name="t")
>>> table.string_col.isin(['foo', 'bar', 'baz'])
r0 := UnboundTable: unbound_table_1
r0 := UnboundTable: t
string_col string
Contains(value=r0.string_col, options=('foo', 'bar', 'baz'))
Contains(string_col): Contains(...)
Check whether a column's values are contained in another table's column
>>> table2 = ibis.table(dict(other_string_col='string'))
>>> table2 = ibis.table(dict(other_string_col='string'), name="t2")
>>> table.string_col.isin(table2.other_string_col)
r0 := UnboundTable: unbound_table_3
other_string_col string
r1 := UnboundTable: unbound_table_1
r0 := UnboundTable: t
string_col string
Contains(value=r1.string_col, options=r0.other_string_col)
r1 := UnboundTable: t2
other_string_col string
Contains(string_col, other_string_col): Contains(...)
"""
return ops.Contains(self, values).to_expr()

Expand Down Expand Up @@ -410,9 +437,9 @@ def case(self):
... .else_('null or (not a and not b)')
... .end())
>>> case_expr
r0 := UnboundTable[t]
r0 := UnboundTable: t
string_col string
SimpleCase(base=r0.string_col, cases=[List(values=['a', 'b'])], results=[List(values=['an a', 'a b'])], default='null or (not a and not b)')
SimpleCase(...)
"""
import ibis.expr.builders as bl

Expand Down Expand Up @@ -479,7 +506,7 @@ def collect(self, where: ir.BooleanValue | None = None) -> ir.ArrayScalar:
>>> t.value.collect()
[1, 2, 3, 4, 5]
>>> type(t.value.collect())
ibis.expr.types.arrays.ArrayScalar
<class 'ibis.expr.types.arrays.ArrayScalar'>
Collect elements per group
Expand Down Expand Up @@ -637,6 +664,8 @@ def as_table(self) -> ir.Table:
--------
Promote an aggregation to a table
>>> import ibis
>>> import ibis.expr.types as ir
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().sum().name("len").as_table()
>>> isinstance(expr, ir.Table)
Expand Down
8 changes: 4 additions & 4 deletions ibis/expr/types/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,23 +172,23 @@ def mutate(self, *exprs: ir.Value | Sequence[ir.Value], **kwexprs: ir.Value):
... ('baz', 'double'),
... ], name='t')
>>> t
UnboundTable[t]
UnboundTable: t
foo string
bar string
baz float64
>>> expr = (t.group_by('foo')
... .order_by(ibis.desc('bar'))
... .mutate(qux=lambda x: x.baz.lag(), qux2=t.baz.lead()))
>>> print(expr)
r0 := UnboundTable[t]
r0 := UnboundTable: t
foo string
bar string
baz float64
Selection[r0]
selections:
r0
qux: Window(Lag(r0.baz), window=Window(group_by=[r0.foo], order_by=[desc|r0.bar], how='rows'))
qux2: Window(Lead(r0.baz), window=Window(group_by=[r0.foo], order_by=[desc|r0.bar], how='rows'))
qux: WindowFunction(...)
qux2: WindowFunction(...)
Returns
-------
Expand Down
20 changes: 10 additions & 10 deletions ibis/expr/types/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@ def __getitem__(
>>> import json, ibis
>>> ibis.options.interactive = True
>>> rows = [{"js": json.dumps({"a": [i, 1]})} for i in range(2)]
>>> t = ibis.memtable(rows, schema=ibis.schema(js="json"))
>>> t = ibis.memtable(rows, schema=ibis.schema(dict(js="json")))
>>> t
┏━━━━━━━━━━━━━━━┓
┃ js ┃
┡━━━━━━━━━━━━━━━┩
│ json │
├───────────────┤
│ {'a': [0, 1]}
│ {'a': [1, 1]}
└───────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━
┃ js
┡━━━━━━━━━━━━━━━━━━━━━━
│ json
├──────────────────────
│ {'a': [...]}
│ {'a': [...]}
└──────────────────────
Extract the `"a"` field
Expand Down Expand Up @@ -74,7 +74,7 @@ def __getitem__(
Extract a non-existent field
>>> t.js.a["foo"]
>>> t.js["a"]["foo"]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ JSONGetItem(JSONGetItem(js, 'a'), 'foo') ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
Expand Down
6 changes: 3 additions & 3 deletions ibis/expr/types/logical.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def ifelse(
>>> import ibis
>>> t = ibis.table([("is_person", "boolean")], name="t")
>>> expr = t.is_person.ifelse("yes", "no")
>>> print(ibis.impala.compile(expr))
SELECT CASE WHEN `is_person` THEN 'yes' ELSE 'no' END AS `tmp`
FROM t
>>> print(ibis.impala.compile(expr.name("tmp")))
SELECT if(t0.`is_person`, 'yes', 'no') AS `tmp`
FROM t t0
"""
# Result will be the result of promotion of true/false exprs. These
# might be conflicting types; same type resolution as case expressions
Expand Down
22 changes: 11 additions & 11 deletions ibis/expr/types/maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ def get(
>>> import ibis
>>> m = ibis.map({"a": 1, "b": 2})
>>> m.get("a")
MapGet(frozendict({'a': 1, 'b': 2}), key='a', default=None)
MapGet(...)
>>> m.get("c", 3)
MapGet(frozendict({'a': 1, 'b': 2}), key='c', default=3)
MapGet(...)
>>> m.get("d")
MapGet(frozendict({'a': 1, 'b': 2}), key='d', default=None)
MapGet(...)
"""

return ops.MapGet(self, key, default).to_expr()
Expand All @@ -61,7 +61,7 @@ def length(self) -> ir.IntegerValue:
>>> import ibis
>>> m = ibis.map({"a": 1, "b": 2})
>>> m.length()
MapLength(frozendict({'a': 1, 'b': 2}))
MapLength(...)
"""
return ops.MapLength(self).to_expr()

Expand All @@ -88,9 +88,9 @@ def __getitem__(self, key: ir.Value) -> ir.Value:
>>> import ibis
>>> m = ibis.map({"a": 1, "b": 2})
>>> m["a"]
MapValueForKey(frozendict({'a': 1, 'b': 2}), key='a')
MapGet(...)
>>> m["c"] # note that this does not fail on construction
MapValueForKey(frozendict({'a': 1, 'b': 2}), key='c')
MapGet(...)
"""
return ops.MapGet(self, key).to_expr()

Expand Down Expand Up @@ -124,7 +124,7 @@ def keys(self) -> ir.ArrayValue:
>>> import ibis
>>> m = ibis.map({"a": 1, "b": 2})
>>> m.keys()
MapKeys(frozendict({'a': 1, 'b': 2}))
MapKeys(...)
"""
return ops.MapKeys(self).to_expr()

Expand All @@ -140,8 +140,8 @@ def values(self) -> ir.ArrayValue:
--------
>>> import ibis
>>> m = ibis.map({"a": 1, "b": 2})
>>> m.keys()
MapKeys(frozendict({'a': 1, 'b': 2}))
>>> m.values()
MapValues(...)
"""
return ops.MapValues(self).to_expr()

Expand All @@ -164,7 +164,7 @@ def __add__(self, other: MapValue) -> MapValue:
>>> m1 = ibis.map({"a": 1, "b": 2})
>>> m2 = ibis.map({"c": 3, "d": 4})
>>> m1 + m2
MapConcat(left=frozendict({'a': 1, 'b': 2}), right=frozendict({'c': 3, 'd': 4}))
MapMerge(...)
"""
return ops.MapMerge(self, other).to_expr()

Expand All @@ -187,7 +187,7 @@ def __radd__(self, other: MapValue) -> MapValue:
>>> m1 = ibis.map({"a": 1, "b": 2})
>>> m2 = ibis.map({"c": 3, "d": 4})
>>> m1 + m2
MapConcat(left=frozendict({'a': 1, 'b': 2}), right=frozendict({'c': 3, 'd': 4}))
MapMerge(...)
"""
return ops.MapMerge(self, other).to_expr()

Expand Down
1,451 changes: 1,194 additions & 257 deletions ibis/expr/types/relations.py

Large diffs are not rendered by default.

23 changes: 12 additions & 11 deletions ibis/expr/types/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __getitem__(self, key: slice | int | ir.IntegerScalar) -> StringValue:
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"food": ["bread", "cheese", "rice"], "idx": [1, 2, 4]})
>>> t
┏━━━━━━━━┳━━━━━━━┓
┃ food ┃ idx ┃
┡━━━━━━━━╇━━━━━━━┩
Expand Down Expand Up @@ -313,7 +314,7 @@ def strip(self) -> StringValue:
return ops.Strip(self).to_expr()

def lstrip(self) -> StringValue:
"""Remove whitespace from the left side of string.
r"""Remove whitespace from the left side of string.
Returns
-------
Expand Down Expand Up @@ -349,7 +350,7 @@ def lstrip(self) -> StringValue:
return ops.LStrip(self).to_expr()

def rstrip(self) -> StringValue:
"""Remove whitespace from the right side of string.
r"""Remove whitespace from the right side of string.
Returns
-------
Expand Down Expand Up @@ -1052,15 +1053,15 @@ def re_extract(
Extract a specific group
>>> t.s.re_extract(r"^(a)bc", 1)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ RegexExtract(s, '^(a)', 1) ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ string │
├────────────────────────────┤
│ a │
│ ~ │
│ ~ │
└────────────────────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
┃ RegexExtract(s, '^(a)bc', 1) ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
│ string
├──────────────────────────────
│ a
│ ~
│ ~
└──────────────────────────────
Extract the entire match
Expand Down
47 changes: 30 additions & 17 deletions ibis/expr/types/structs.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from __future__ import annotations

import collections

from keyword import iskeyword
from typing import TYPE_CHECKING, Iterable, Mapping, Sequence

from public import public

import ibis.expr.operations as ops

from ibis.expr.types.generic import Column, Scalar, Value, literal
from ibis.expr.types.typing import V

Expand Down Expand Up @@ -88,7 +86,7 @@ def __getitem__(self, name: str) -> ir.Value:
>>> import ibis
>>> s = ibis.struct(dict(fruit="pear", weight=0))
>>> s['fruit']
fruit: StructField(frozendict({'fruit': 'pear', 'weight': 0}), field='fruit')
fruit: StructField(...)
"""
return ops.StructField(self, name).to_expr()

Expand Down Expand Up @@ -130,21 +128,36 @@ def lift(self) -> ir.Table:
Examples
--------
>>> schema = dict(a="struct<b: float, c: string>", d="string")
>>> t = ibis.table(schema, name="t")
>>> import ibis
>>> ibis.options.interactive = True
>>> lines = '''
... {"pos": {"lat": 10.1, "lon": 30.3}}
... {"pos": {"lat": 10.2, "lon": 30.2}}
... {"pos": {"lat": 10.3, "lon": 30.1}}
... '''
>>> with open("/tmp/lines.json", "w") as f:
... _ = f.write(lines)
>>> t = ibis.read_json("/tmp/lines.json")
>>> t
UnboundTable: t
a struct<b: float64, c: string>
d string
>>> t.a.lift()
r0 := UnboundTable: t
a struct<b: float64, c: string>
d string
Selection[r0]
selections:
b: StructField(r0.a, field='b')
c: StructField(r0.a, field='c')
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ pos ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ struct<lat: float64, lon: float64> │
├────────────────────────────────────┤
│ {'lat': 10.1, 'lon': 30.3} │
│ {'lat': 10.2, 'lon': 30.2} │
│ {'lat': 10.3, 'lon': 30.1} │
└────────────────────────────────────┘
>>> t.pos.lift()
┏━━━━━━━━━┳━━━━━━━━━┓
┃ lat ┃ lon ┃
┡━━━━━━━━━╇━━━━━━━━━┩
│ float64 │ float64 │
├─────────┼─────────┤
│ 10.1 │ 30.3 │
│ 10.2 │ 30.2 │
│ 10.3 │ 30.1 │
└─────────┴─────────┘
See Also
--------
Expand Down
27 changes: 12 additions & 15 deletions ibis/udf/vectorized.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,28 +133,25 @@ def _coerce_to_dataframe(
Examples
--------
>>> _coerce_to_dataframe(pd.DataFrame({'a': [1, 2, 3]}), dt.Struct([('b', 'int32')])) # noqa: E501
>>> import pandas as pd
>>> _coerce_to_dataframe(pd.DataFrame({'a': [1, 2, 3]}), dt.Struct(dict(b="int32"))) # noqa: E501
b
0 1
1 2
2 3
dtype: int32
>>> _coerce_to_dataframe(pd.Series([[1, 2, 3]]), dt.Struct([('a', 'int32'), ('b', 'int32'), ('c', 'int32')])) # noqa: E501
>>> _coerce_to_dataframe(pd.Series([[1, 2, 3]]), dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501
a b c
0 1 2 3
dtypes: [int32, int32, int32]
>>> _coerce_to_dataframe(pd.Series([range(3), range(3)]), dt.Struct([('a', 'int32'), ('b', 'int32'), ('c', 'int32')])) # noqa: E501
>>> _coerce_to_dataframe(pd.Series([range(3), range(3)]), dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501
a b c
0 0 1 2
1 0 1 2
dtypes: [int32, int32, int32]
>>> _coerce_to_dataframe([pd.Series(x) for x in [1, 2, 3]], dt.Struct([('a', 'int32'), ('b', 'int32'), ('c', 'int32')])) # noqa: E501
>>> _coerce_to_dataframe([pd.Series(x) for x in [1, 2, 3]], dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501
a b c
0 1 2 3
>>> _coerce_to_dataframe([1, 2, 3], dt.Struct([('a', 'int32'), ('b', 'int32'), ('c', 'int32')])) # noqa: E501
>>> _coerce_to_dataframe([1, 2, 3], dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501
a b c
0 1 2 3
dtypes: [int32, int32, int32]
"""
import pandas as pd

Expand Down Expand Up @@ -285,7 +282,7 @@ def analytic(input_type, output_type):
>>> @analytic(
... input_type=[dt.double],
... output_type=dt.Struct(['demean', 'zscore'], [dt.double, dt.double])
... output_type=dt.Struct(dict(demean="double", zscore="double")),
... )
... def demean_and_zscore(v):
... mean = v.mean()
Expand All @@ -294,7 +291,7 @@ def analytic(input_type, output_type):
>>>
>>> win = ibis.window(preceding=None, following=None, group_by='key')
>>> # add two columns "demean" and "zscore"
>>> table = table.mutate(
>>> table = table.mutate( # doctest: +SKIP
... demean_and_zscore(table['v']).over(win).destructure()
... )
"""
Expand Down Expand Up @@ -332,13 +329,13 @@ def elementwise(input_type, output_type):
>>> @elementwise(
... input_type=[dt.string],
... output_type=dt.Struct(['year', 'monthday'], [dt.string, dt.string])
... output_type=dt.Struct(dict(year=dt.string, monthday=dt.string))
... )
... def year_monthday(date):
... return date.str.slice(0, 4), date.str.slice(4, 8)
>>>
>>> # add two columns "year" and "monthday"
>>> table = table.mutate(year_monthday(table['date']).destructure())
>>> table = table.mutate(year_monthday(table['date']).destructure()) # doctest: +SKIP
"""
return _udf_decorator(ElementWiseVectorizedUDF, input_type, output_type)

Expand Down Expand Up @@ -368,13 +365,13 @@ def reduction(input_type, output_type):
>>> @reduction(
... input_type=[dt.double],
... output_type=dt.Struct(['mean', 'std'], [dt.double, dt.double])
... output_type=dt.Struct(dict(mean="double", std="double"))
... )
... def mean_and_std(v):
... return v.mean(), v.std()
>>>
>>> # create aggregation columns "mean" and "std"
>>> table = table.group_by('key').aggregate(
>>> table = table.group_by('key').aggregate( # doctest: +SKIP
... mean_and_std(table['v']).destructure()
... )
"""
Expand Down
4 changes: 2 additions & 2 deletions ibis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,11 +510,11 @@ def import_object(qualname: str) -> Any:
Examples
--------
>>> out = import_object("foo.bar.baz")
>>> ex = import_object("ibis.examples")
Is the same as
>>> from foo.bar import baz
>>> from ibis import examples as ex
"""
mod_name, name = qualname.rsplit(".", 1)
mod = importlib.import_module(mod_name)
Expand Down
15 changes: 15 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ test +backends:

pytest "${pytest_args[@]}"

# run doctests
doctest *args:
#!/usr/bin/env bash
# TODO(cpcloud): why doesn't pytest --ignore-glob=test_*.py work?
mapfile -t doctest_modules < <(
find \
ibis \
-wholename '*.py' \
-and -not -wholename '*test*.py' \
-and -not -wholename '*__init__*' \
-and -not -wholename '*gen_*.py'
)
pytest --doctest-modules {{ args }} "${doctest_modules[@]}"

# download testing data
download-data owner="ibis-project" repo="testing-data" rev="master":
#!/usr/bin/env bash
Expand Down
36 changes: 20 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -226,14 +226,30 @@ doctest_optionflags = [
]
xfail_strict = true
addopts = [
"--ignore=site-packages",
"--ignore=dist-packages",
"--strict-markers",
"--strict-config",
"--benchmark-disable",
"--benchmark-group-by=name",
"--benchmark-sort=name",
]
norecursedirs = [
"**/snapshots",
".benchmarks",
".direnv",
".git",
".github",
".hypothesis",
".pytest_cache",
".streamlit",
"LICENSES",
"ci",
"conda-lock",
"dev",
"docker",
"docs",
"nix",
"result*",
]
filterwarnings = [
# fail on any warnings that are not explicitly matched below
"error",
Expand Down Expand Up @@ -281,22 +297,10 @@ filterwarnings = [
"ignore:Deprecated call to `pkg_resources\\.declare_namespace\\('google.*'\\):DeprecationWarning",
# pyspark on python 3.11
"ignore:typing\\.io is deprecated:DeprecationWarning",
# warnings from google's use of the cgi module
"ignore:'cgi' is deprecated and slated for removal in Python 3\\.13:DeprecationWarning",
]
empty_parameter_set_mark = "fail_at_collect"
norecursedirs = [
".benchmarks",
".direnv",
".git",
".github",
"LICENSES",
"ci",
"conda-lock",
"dev",
"docker",
"nix",
"result",
"result-*",
]
markers = [
"backend: tests specific to a backend",
"benchmark: benchmarks",
Expand Down