Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 44 additions & 7 deletions bigframes/display/anywidget.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ class TableWidget(WIDGET_BASE):

page = traitlets.Int(0).tag(sync=True)
page_size = traitlets.Int(0).tag(sync=True)
row_count = traitlets.Int(0).tag(sync=True)
row_count = traitlets.Union(
[traitlets.Int(), traitlets.Instance(type(None))],
default_value=None,
allow_none=True,
).tag(sync=True)
table_html = traitlets.Unicode().tag(sync=True)
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
_batches: Optional[blocks.PandasBatches] = None
Expand Down Expand Up @@ -94,12 +98,17 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
# SELECT COUNT(*) query. It is a must have however.
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
# before we get here so that the count might already be cached.
# TODO(b/452747934): Allow row_count to be None and check to see if
# there are multiple pages and show "page 1 of many" in this case
self._reset_batches_for_new_page_size()
if self._batches is None or self._batches.total_rows is None:
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
self.row_count = 0

if self._batches is None:
self._error_message = "Could not retrieve data batches. Data might be unavailable or an error occurred."
self.row_count = None
elif self._batches.total_rows is None:
# Total rows is unknown, this is an expected state.
# TODO(b/461536343): Cheaply discover if we have exactly 1 page.
# There are cases where total rows is not set, but there are no additional
# pages. We could disable the "next" button in these cases.
self.row_count = None
else:
self.row_count = self._batches.total_rows

Expand Down Expand Up @@ -131,11 +140,22 @@ def _validate_page(self, proposal: Dict[str, Any]) -> int:
Returns:
The validated and clamped page number as an integer.
"""

value = proposal["value"]

if value < 0:
raise ValueError("Page number cannot be negative.")

# If truly empty or invalid page size, stay on page 0.
# This handles cases where row_count is 0 or page_size is 0, preventing
# division by zero or nonsensical pagination, regardless of row_count being None.
if self.row_count == 0 or self.page_size == 0:
return 0

# If row count is unknown, allow any non-negative page. The previous check
# ensures that invalid page_size (0) is already handled.
if self.row_count is None:
return value

# Calculate the zero-indexed maximum page number.
max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)

Expand Down Expand Up @@ -229,6 +249,23 @@ def _set_table_html(self) -> None:
# Get the data for the current page
page_data = cached_data.iloc[start:end]

# Handle case where user navigated beyond available data with unknown row count
is_unknown_count = self.row_count is None
is_beyond_data = self._all_data_loaded and len(page_data) == 0 and self.page > 0
if is_unknown_count and is_beyond_data:
# Calculate the last valid page (zero-indexed)
total_rows = len(cached_data)
if total_rows > 0:
last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
# Navigate back to the last valid page
self.page = last_valid_page
# Recursively call to display the correct page
return self._set_table_html()
else:
# If no data at all, stay on page 0 with empty display
self.page = 0
return self._set_table_html()

# Generate HTML table
self.table_html = bigframes.display.html.render_html(
dataframe=page_data,
Expand Down
23 changes: 15 additions & 8 deletions bigframes/display/table_widget.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,21 @@ function render({ model, el }) {
const rowCount = model.get(ModelProperty.ROW_COUNT);
const pageSize = model.get(ModelProperty.PAGE_SIZE);
const currentPage = model.get(ModelProperty.PAGE);
const totalPages = Math.ceil(rowCount / pageSize);

rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`;
paginationLabel.textContent = `Page ${(
currentPage + 1
).toLocaleString()} of ${(totalPages || 1).toLocaleString()}`;
prevPage.disabled = currentPage === 0;
nextPage.disabled = currentPage >= totalPages - 1;

if (rowCount === null) {
// Unknown total rows
rowCountLabel.textContent = "Total rows unknown";
paginationLabel.textContent = `Page ${(currentPage + 1).toLocaleString()} of many`;
prevPage.disabled = currentPage === 0;
nextPage.disabled = false; // Allow navigation until we hit the end
} else {
// Known total rows
const totalPages = Math.ceil(rowCount / pageSize);
rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`;
paginationLabel.textContent = `Page ${(currentPage + 1).toLocaleString()} of ${rowCount.toLocaleString()}`;
prevPage.disabled = currentPage === 0;
nextPage.disabled = currentPage >= totalPages - 1;
}
pageSizeSelect.value = pageSize;
}

Expand Down
135 changes: 97 additions & 38 deletions notebooks/dataframes/anywidget_mode.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,7 @@
"execution_count": 2,
"id": "ca22f059",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.15) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n",
" warnings.warn(message, FutureWarning)\n"
]
}
],
"outputs": [],
"source": [
"import bigframes.pandas as bpd"
]
Expand Down Expand Up @@ -151,7 +142,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "47795eaa10f149aeb99574232c0936eb",
"model_id": "8fcad7b7e408422cae71d519cd2d4980",
"version_major": 2,
"version_minor": 1
},
Expand All @@ -175,7 +166,7 @@
}
],
"source": [
"df"
"df.set_index(\"name\")"
]
},
{
Expand Down Expand Up @@ -214,7 +205,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8354ce0f82d3495a9b630dfc362f73ee",
"model_id": "06cb98c577514d5c9654a7792d93f8e6",
"version_major": 2,
"version_minor": 1
},
Expand Down Expand Up @@ -293,27 +284,8 @@
{
"data": {
"text/html": [
"\n",
" Query started with request ID bigframes-dev:US.c45952fb-01b4-409c-9da4-f7c5bfc0d47d.<details><summary>SQL</summary><pre>SELECT\n",
"`state` AS `state`,\n",
"`gender` AS `gender`,\n",
"`year` AS `year`,\n",
"`name` AS `name`,\n",
"`number` AS `number`\n",
"FROM\n",
"(SELECT\n",
" *\n",
"FROM (\n",
" SELECT\n",
" `state`,\n",
" `gender`,\n",
" `year`,\n",
" `name`,\n",
" `number`\n",
" FROM `bigquery-public-data.usa_names.usa_1910_2013` FOR SYSTEM_TIME AS OF TIMESTAMP(&#x27;2025-10-30T21:48:48.979701+00:00&#x27;)\n",
") AS `t0`)\n",
"ORDER BY `name` ASC NULLS LAST ,`year` ASC NULLS LAST ,`state` ASC NULLS LAST\n",
"LIMIT 5</pre></details>\n",
"✅ Completed. \n",
" Query processed 171.4 MB in a moment of slot time.\n",
" "
],
"text/plain": [
Expand All @@ -333,7 +305,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "59461286a17d4a42b6be6d9d9c7bf7e3",
"model_id": "1672f826f7a347e38539dbb5fb72cd43",
"version_major": 2,
"version_minor": 1
},
Expand Down Expand Up @@ -373,7 +345,7 @@
"data": {
"text/html": [
"✅ Completed. \n",
" Query processed 85.9 kB in 14 seconds of slot time.\n",
" Query processed 85.9 kB in 12 seconds of slot time.\n",
" "
],
"text/plain": [
Expand All @@ -387,7 +359,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
"instead of using `db_dtypes` in the future when available in pandas\n",
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
Expand All @@ -408,7 +380,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d1794b42579542a8980bd158e521bd3e",
"model_id": "127a2e356b834c18b6f07c58ee2c4228",
"version_major": 2,
"version_minor": 1
},
Expand Down Expand Up @@ -443,6 +415,93 @@
" LIMIT 5;\n",
"\"\"\")"
]
},
{
"cell_type": "markdown",
"id": "multi-index-display-markdown",
"metadata": {},
"source": [
"## Display Multi-Index DataFrame in anywidget mode\n",
"This section demonstrates how BigFrames can display a DataFrame with multiple levels of indexing (a \"multi-index\") when using the `anywidget` display mode."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "ad7482aa",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"✅ Completed. \n",
" Query processed 483.3 GB in 51 minutes of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3eace7c0-7776-48d6-925c-965be33d8738&page=queryresults\">Job bigframes-dev:US.3eace7c0-7776-48d6-925c-965be33d8738 details</a>]\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"✅ Completed. \n",
" Query processed 124.4 MB in 7 seconds of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS&page=queryresults\">Job bigframes-dev:US.job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS details</a>]\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3f9652b5fdc0441eac2b05ab36d571d0",
"version_major": 2,
"version_minor": 1
},
"text/plain": [
"TableWidget(page_size=10, row_count=3967869, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
"Computation deferred. Computation will process 513.5 GB"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import datetime\n",
"\n",
" # Read the PyPI downloads dataset\n",
"pypi_df = bpd.read_gbq(\"bigquery-public-data.pypi.file_downloads\")\n",
"\n",
"# Filter for the last 7 days to reduce the data size for this example\n",
"seven_days_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=7)\n",
"pypi_df_recent = pypi_df[pypi_df[\"timestamp\"] > seven_days_ago]\n",
" \n",
"# Create a multi-index by grouping by date and project\n",
"pypi_df_recent['date'] = pypi_df_recent['timestamp'].dt.date\n",
"multi_index_df = pypi_df_recent.groupby([\"date\", \"project\"]).size().to_frame(\"downloads\")\n",
" \n",
"# Display the DataFrame with the multi-index\n",
"multi_index_df"
]
}
],
"metadata": {
Expand Down
Loading