Skip to content

Commit

Permalink
fix(expr): fix formatting of table info using tabulate
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Apr 13, 2022
1 parent e250afc commit b110636
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 50 deletions.
8 changes: 0 additions & 8 deletions ibis/backends/impala/tests/test_exprs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from decimal import Decimal
from io import StringIO

import pandas as pd
import pandas.testing as tm
Expand All @@ -20,13 +19,6 @@ def test_embedded_identifier_quoting(alltypes):
expr.execute()


def test_table_info(alltypes):
buf = StringIO()
alltypes.info(buf=buf)

assert buf.getvalue() is not None


def test_summary_execute(alltypes):
table = alltypes

Expand Down
11 changes: 11 additions & 0 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import decimal
import io

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -402,3 +403,13 @@ def test_dropna_table(backend, alltypes, how, subset):
def test_select_sort_sort(alltypes):
query = alltypes[alltypes.year, alltypes.bool_col]
query = query.sort_by(query.year).sort_by(query.bool_col)


def test_table_info(alltypes):
buf = io.StringIO()
alltypes.info(buf=buf)

info_str = buf.getvalue()
assert info_str is not None
assert "Nulls" in info_str
assert all(str(type) in info_str for type in alltypes.schema().types)
31 changes: 21 additions & 10 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import IO, TYPE_CHECKING, Any, Iterable, Literal, Mapping, Sequence

import numpy as np
import tabulate
from cached_property import cached_property
from public import public

Expand Down Expand Up @@ -850,19 +851,29 @@ def info(self, buf: IO[str] | None = None) -> None:
buf
A writable buffer, defaults to stdout
"""
metrics = [self.count().name("nrows")]
for col in self.columns:
metrics.append(self[col].count().name(col))
metrics = [self[col].count().name(col) for col in self.columns]
metrics.append(self.count().name("nrows"))

metrics = self.aggregate(metrics).execute().loc[0]
schema = self.schema()

names = ["Column", "------"] + self.columns
types = ["Type", "----"] + [repr(x) for x in self.schema().types]
counts = ["Non-null #", "----------"] + [str(x) for x in metrics[1:]]
col_metrics = util.adjoin(2, names, types, counts)
result = f"Table rows: {metrics[0]}\n\n{col_metrics}"
*items, (_, n) = self.aggregate(metrics).execute().squeeze().items()

print(result, file=buf)
tabulated = tabulate.tabulate(
[
(
column,
schema[column],
f"{n - non_nulls} ({100 * (1.0 - non_nulls / n):>3.3g}%)",
)
for column, non_nulls in items
],
headers=["Column", "Type", "Nulls (%)"],
colalign=("left", "left", "right"),
)
width = tabulated[tabulated.index("\n") + 1 :].index("\n")
row_count = f"Rows: {n}".center(width)
footer_line = "-" * width
print("\n".join([tabulated, footer_line, row_count]), file=buf)

def set_column(self, name: str, expr: ir.ValueExpr) -> TableExpr:
"""Replace an existing column with a new expression.
Expand Down
28 changes: 0 additions & 28 deletions ibis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
TYPE_CHECKING,
Any,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Expand Down Expand Up @@ -159,33 +158,6 @@ def is_function(v: Any) -> bool:
return isinstance(v, (types.FunctionType, types.LambdaType))


def adjoin(space: int, *lists: Iterable[str]) -> str:
"""Glue together two sets of strings using `space`.
Parameters
----------
space : int
lists : list or tuple
Returns
-------
str
"""
lengths = [max(map(len, x)) + space for x in lists[:-1]]

# not the last one
lengths.append(max(map(len, lists[-1])))
max_len = max(map(len, lists))
chains = (
itertools.chain(
(x.ljust(length) for x in lst),
itertools.repeat(' ' * length, max_len - len(lst)),
)
for lst, length in zip(lists, lengths)
)
return '\n'.join(map(''.join, zip(*chains)))


def log(msg: str) -> None:
"""Log `msg` using ``options.verbose_log`` if set, otherwise ``print``.
Expand Down
4 changes: 4 additions & 0 deletions poetry-overrides.nix
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,8 @@ self: super:
self.poetry-core
];
});

tabulate = super.tabulate.overridePythonAttrs (_: {
TABULATE_INSTALL = "lib-only";
});
}
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ multipledispatch = ">=0.6,<0.7"
numpy = ">=1,<2"
pandas = ">=1.2.5,<2"
parsy = ">=1.3.0,<2"
poetry-dynamic-versioning = ">=0.14.0,<1"
pydantic = ">=1.9.0,<2"
regex = ">=2021.7.6"
tabulate = ">=0.8.9,<1"
toolz = ">=0.11,<0.12"
clickhouse-cityhash = { version = ">=1.0.2,<2", optional = true }
clickhouse-driver = { version = ">=0.1,<0.3", optional = true }
Expand All @@ -59,7 +61,6 @@ pyspark = { version = ">=3,<4", optional = true }
requests = { version = ">=2,<3", optional = true }
Shapely = { version = ">=1.6,<1.8.1", optional = true }
sqlalchemy = { version = ">=1.4,<2.0", optional = true }
poetry-dynamic-versioning = "^0.14.0"

[tool.poetry.dev-dependencies]
black = ">=22.1.0,<23"
Expand Down
3 changes: 2 additions & 1 deletion setup.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b110636

Please sign in to comment.