From 25d049c078693466905a19cc0954fafcac6c414c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Fri, 31 May 2024 13:35:19 -0500
Subject: [PATCH] feat: `merge` only generates a default index if both inputs
 already have an index (#733)

* feat: `merge` only generates a default index if both inputs already have an index

* add tests for merge with null index
---
 bigframes/_config/__init__.py             |    6 +-
 bigframes/core/blocks.py                  |   22 +-
 bigframes/exceptions.py                   |    4 +
 notebooks/dataframes/index_col_null.ipynb | 1409 +++++++++++++++++++++
 tests/system/conftest.py                  |    2 +-
 tests/system/small/test_empty_index.py    |  227 ----
 tests/system/small/test_null_index.py     |  288 +++++
 7 files changed, 1723 insertions(+), 235 deletions(-)
 create mode 100644 notebooks/dataframes/index_col_null.ipynb
 delete mode 100644 tests/system/small/test_empty_index.py
 create mode 100644 tests/system/small/test_null_index.py

diff --git a/bigframes/_config/__init__.py b/bigframes/_config/__init__.py
index bf33420e6..4729532e9 100644
--- a/bigframes/_config/__init__.py
+++ b/bigframes/_config/__init__.py
@@ -61,10 +61,12 @@ def _init_bigquery_thread_local(self):
     @property
     def bigquery(self) -> bigquery_options.BigQueryOptions:
         """Options to use with the BigQuery engine."""
-        if self._local.bigquery_options is not None:
+        if (
+            bigquery_options := getattr(self._local, "bigquery_options", None)
+        ) is not None:
             # The only way we can get here is if someone called
             # _init_bigquery_thread_local.
-            return self._local.bigquery_options
+            return bigquery_options
 
         return self._bigquery_options
 
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 010eb96f7..9c567555f 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -124,7 +124,7 @@ def __init__(
         if len(index_columns) == 0:
             warnings.warn(
                 "Creating object with Null Index. Null Index is a preview feature.",
-                category=bigframes.exceptions.PreviewWarning,
+                category=bigframes.exceptions.NullIndexPreviewWarning,
             )
         self._index_columns = tuple(index_columns)
         # Index labels don't need complicated hierarchical access so can store as tuple
@@ -1930,10 +1930,22 @@ def merge(
             coalesce_labels=matching_join_labels,
             suffixes=suffixes,
         )
-        # Constructs default index
-        offset_index_id = guid.generate_guid()
-        expr = joined_expr.promote_offsets(offset_index_id)
-        return Block(expr, index_columns=[offset_index_id], column_labels=labels)
+
+        # Construct a default index only if this object and the other both have
+        # indexes. In other words, joining anything to a NULL index object
+        # keeps everything as a NULL index.
+        #
+        # This keeps us from generating an index if the user joins a large
+        # BigQuery table against small local data, for example.
+        if len(self._index_columns) > 0 and len(other._index_columns) > 0:
+            offset_index_id = guid.generate_guid()
+            expr = joined_expr.promote_offsets(offset_index_id)
+            index_columns = [offset_index_id]
+        else:
+            expr = joined_expr
+            index_columns = []
+
+        return Block(expr, index_columns=index_columns, column_labels=labels)
 
     def join(
         self,
diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py
index 1162217fc..bae239b6d 100644
--- a/bigframes/exceptions.py
+++ b/bigframes/exceptions.py
@@ -39,6 +39,10 @@ class PreviewWarning(Warning):
     """The feature is in preview."""
 
 
+class NullIndexPreviewWarning(PreviewWarning):
+    """Null index feature is in preview."""
+
+
 class NullIndexError(ValueError):
     """Object has no index."""
 
diff --git a/notebooks/dataframes/index_col_null.ipynb b/notebooks/dataframes/index_col_null.ipynb
new file mode 100644
index 000000000..de373050f
--- /dev/null
+++ b/notebooks/dataframes/index_col_null.ipynb
@@ -0,0 +1,1409 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "eeec3428",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright 2023 Google LLC\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+    "# you may not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "47439dbd-4e54-4954-8b16-edc4bcd4f855",
+   "metadata": {},
+   "source": [
+    "# Operations with an \"NULL index\" DataFrame\n",
+    "\n",
+    "**Note**: This notebook describes a feature that is currently in [preview](https://cloud.google.com/blog/products/gcp/google-cloud-gets-simplified-product-launch-stages). There may be breaking changes to the functionality when using \"NULL index\" objects.\n",
+    "\n",
+    "Use the \"NULL\" index for more efficient query generation, but\n",
+    "some pandas-compatible methods may not be possible without an index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "96757c59-fc22-420e-a42f-c6cb956110ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import warnings\n",
+    "\n",
+    "import bigframes.enums\n",
+    "import bigframes.exceptions\n",
+    "import bigframes.pandas as bpd\n",
+    "\n",
+    "# Explicitly opt-in to the NULL index preview feature.\n",
+    "warnings.simplefilter(\n",
+    "    \"ignore\",\n",
+    "    bigframes.exceptions.NullIndexPreviewWarning,\n",
+    ")\n",
+    "\n",
+    "df = bpd.read_gbq(\n",
+    "    \"bigquery-public-data.baseball.schedules\",\n",
+    "    index_col=bigframes.enums.DefaultIndexKind.NULL,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d15688e1",
+   "metadata": {},
+   "source": [
+    "Use `peek()` to view an arbitrary selection of rows from the DataFrame. This is much more efficient than `head()`, which requires a total ordering for determinism."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c93949fb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 1b8726ce-c4ea-47fe-a47c-d6fae50d8fb0 is DONE. 582.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:1b8726ce-c4ea-47fe-a47c-d6fae50d8fb0&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>gameNumber</th>\n",
+       "      <th>seasonId</th>\n",
+       "      <th>year</th>\n",
+       "      <th>type</th>\n",
+       "      <th>dayNight</th>\n",
+       "      <th>duration</th>\n",
+       "      <th>duration_minutes</th>\n",
+       "      <th>homeTeamId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamId</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "      <th>startTime</th>\n",
+       "      <th>attendance</th>\n",
+       "      <th>status</th>\n",
+       "      <th>created</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>e14b6493-9e7f-404f-840a-8a680cc364bf</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>D</td>\n",
+       "      <td>3:07</td>\n",
+       "      <td>187</td>\n",
+       "      <td>03556285-bdbb-4576-a06d-42f71f46ddc5</td>\n",
+       "      <td>Marlins</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-26 17:10:00+00:00</td>\n",
+       "      <td>27318</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1f32b347-cbcb-4c31-a145-0e685306d168</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>D</td>\n",
+       "      <td>3:09</td>\n",
+       "      <td>189</td>\n",
+       "      <td>03556285-bdbb-4576-a06d-42f71f46ddc5</td>\n",
+       "      <td>Marlins</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-25 20:10:00+00:00</td>\n",
+       "      <td>29457</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0c2292d1-7398-48be-bf8e-b41dad5e1a43</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>D</td>\n",
+       "      <td>2:45</td>\n",
+       "      <td>165</td>\n",
+       "      <td>12079497-e414-450a-8bf2-29f91de646bf</td>\n",
+       "      <td>Braves</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-11 20:10:00+00:00</td>\n",
+       "      <td>43114</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>8fbec734-a15a-42ab-8d51-60790de7750b</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>D</td>\n",
+       "      <td>3:42</td>\n",
+       "      <td>222</td>\n",
+       "      <td>12079497-e414-450a-8bf2-29f91de646bf</td>\n",
+       "      <td>Braves</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-12 17:35:00+00:00</td>\n",
+       "      <td>31625</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>D</td>\n",
+       "      <td>2:44</td>\n",
+       "      <td>164</td>\n",
+       "      <td>2142e1ba-3b40-445c-b8bb-f1f8b1054220</td>\n",
+       "      <td>Phillies</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-08 17:05:00+00:00</td>\n",
+       "      <td>28650</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId  gameNumber  \\\n",
+       "0  e14b6493-9e7f-404f-840a-8a680cc364bf           1   \n",
+       "1  1f32b347-cbcb-4c31-a145-0e685306d168           1   \n",
+       "2  0c2292d1-7398-48be-bf8e-b41dad5e1a43           1   \n",
+       "3  8fbec734-a15a-42ab-8d51-60790de7750b           1   \n",
+       "4  89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd           1   \n",
+       "\n",
+       "                               seasonId  year type dayNight duration  \\\n",
+       "0  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        D     3:07   \n",
+       "1  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        D     3:09   \n",
+       "2  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        D     2:45   \n",
+       "3  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        D     3:42   \n",
+       "4  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        D     2:44   \n",
+       "\n",
+       "   duration_minutes                            homeTeamId homeTeamName  \\\n",
+       "0               187  03556285-bdbb-4576-a06d-42f71f46ddc5      Marlins   \n",
+       "1               189  03556285-bdbb-4576-a06d-42f71f46ddc5      Marlins   \n",
+       "2               165  12079497-e414-450a-8bf2-29f91de646bf       Braves   \n",
+       "3               222  12079497-e414-450a-8bf2-29f91de646bf       Braves   \n",
+       "4               164  2142e1ba-3b40-445c-b8bb-f1f8b1054220     Phillies   \n",
+       "\n",
+       "                             awayTeamId awayTeamName  \\\n",
+       "0  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "1  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "2  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "3  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "4  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "\n",
+       "                   startTime  attendance  status                    created  \n",
+       "0  2016-06-26 17:10:00+00:00       27318  closed  2016-10-06 06:25:15+00:00  \n",
+       "1  2016-06-25 20:10:00+00:00       29457  closed  2016-10-06 06:25:15+00:00  \n",
+       "2  2016-06-11 20:10:00+00:00       43114  closed  2016-10-06 06:25:15+00:00  \n",
+       "3  2016-06-12 17:35:00+00:00       31625  closed  2016-10-06 06:25:15+00:00  \n",
+       "4  2016-06-08 17:05:00+00:00       28650  closed  2016-10-06 06:25:15+00:00  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.peek()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78e3d27d",
+   "metadata": {},
+   "source": [
+    "# Inspect the properties of the DataFrame\n",
+    "\n",
+    "Some properties, such as `dtypes`, can be retrieved without executing a query job."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "38f566c5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "gameId                             string[pyarrow]\n",
+       "gameNumber                                   Int64\n",
+       "seasonId                           string[pyarrow]\n",
+       "year                                         Int64\n",
+       "type                               string[pyarrow]\n",
+       "dayNight                           string[pyarrow]\n",
+       "duration                           string[pyarrow]\n",
+       "duration_minutes                             Int64\n",
+       "homeTeamId                         string[pyarrow]\n",
+       "homeTeamName                       string[pyarrow]\n",
+       "awayTeamId                         string[pyarrow]\n",
+       "awayTeamName                       string[pyarrow]\n",
+       "startTime           timestamp[us, tz=UTC][pyarrow]\n",
+       "attendance                                   Int64\n",
+       "status                             string[pyarrow]\n",
+       "created             timestamp[us, tz=UTC][pyarrow]\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "38a59ecc",
+   "metadata": {},
+   "source": [
+    "Other properties, such as `shape` require a query. In this case, `shape` runs a `COUNT(1)` query."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e3b43d37",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 0f85f12c-227c-4001-b851-6e9b9087ab7e is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:0f85f12c-227c-4001-b851-6e9b9087ab7e&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(2431, 16)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "13861abc-120c-4db6-ad0c-e414b85d3443",
+   "metadata": {},
+   "source": [
+    "### Select a subset of the DataFrame\n",
+    "\n",
+    "Filter columns by selecting a list of columns from the DataFrame.\n",
+    "\n",
+    "**Note**: Even with `index_col=bigframes.enums.DefaultIndexKind.NULL`, it is more efficient to do this selection in `read_gbq` / `read_gbq_table` except in cases where the total ordering ID columns can be pruned."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "05cb36e9-bb75-4f6f-8eb6-e4219df6e1d2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job efa6b4be-cf60-4951-9125-7d77fb6b6b44 is DONE. 174.4 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:efa6b4be-cf60-4951-9125-7d77fb6b6b44&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>year</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "      <th>duration_minutes</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>e14b6493-9e7f-404f-840a-8a680cc364bf</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>Marlins</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>187</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1f32b347-cbcb-4c31-a145-0e685306d168</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>Marlins</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>189</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0c2292d1-7398-48be-bf8e-b41dad5e1a43</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>Braves</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>165</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>8fbec734-a15a-42ab-8d51-60790de7750b</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>Braves</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>222</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>Phillies</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>164</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId  year homeTeamName awayTeamName  \\\n",
+       "0  e14b6493-9e7f-404f-840a-8a680cc364bf  2016      Marlins         Cubs   \n",
+       "1  1f32b347-cbcb-4c31-a145-0e685306d168  2016      Marlins         Cubs   \n",
+       "2  0c2292d1-7398-48be-bf8e-b41dad5e1a43  2016       Braves         Cubs   \n",
+       "3  8fbec734-a15a-42ab-8d51-60790de7750b  2016       Braves         Cubs   \n",
+       "4  89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd  2016     Phillies         Cubs   \n",
+       "\n",
+       "   duration_minutes  \n",
+       "0               187  \n",
+       "1               189  \n",
+       "2               165  \n",
+       "3               222  \n",
+       "4               164  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "column_filtered = df[[\"gameId\", \"year\", \"homeTeamName\", \"awayTeamName\", \"duration_minutes\"]]\n",
+    "column_filtered.peek()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d4d52c41",
+   "metadata": {},
+   "source": [
+    "Filter by rows using a boolean Series. This Series must be derived from the DataFrame being filtered so that the NULL index can still align correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a6b8b3ac-1df8-46ff-ac4f-d6e7657fc80c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 0be8e44d-854a-45ca-950b-269280e3de41 is DONE. 582.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:0be8e44d-854a-45ca-950b-269280e3de41&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>gameNumber</th>\n",
+       "      <th>seasonId</th>\n",
+       "      <th>year</th>\n",
+       "      <th>type</th>\n",
+       "      <th>dayNight</th>\n",
+       "      <th>duration</th>\n",
+       "      <th>duration_minutes</th>\n",
+       "      <th>homeTeamId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamId</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "      <th>startTime</th>\n",
+       "      <th>attendance</th>\n",
+       "      <th>status</th>\n",
+       "      <th>created</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>63f14670-c28e-432b-84ee-1a2c6ac29527</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>N</td>\n",
+       "      <td>2:43</td>\n",
+       "      <td>163</td>\n",
+       "      <td>03556285-bdbb-4576-a06d-42f71f46ddc5</td>\n",
+       "      <td>Marlins</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-23 23:10:00+00:00</td>\n",
+       "      <td>25291</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>bf4e80d1-3125-44fa-8a89-de93d039d465</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>N</td>\n",
+       "      <td>3:24</td>\n",
+       "      <td>204</td>\n",
+       "      <td>03556285-bdbb-4576-a06d-42f71f46ddc5</td>\n",
+       "      <td>Marlins</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-24 23:10:00+00:00</td>\n",
+       "      <td>24385</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>e8af534c-36ed-4ff9-8511-780825fdd041</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>N</td>\n",
+       "      <td>2:51</td>\n",
+       "      <td>171</td>\n",
+       "      <td>12079497-e414-450a-8bf2-29f91de646bf</td>\n",
+       "      <td>Braves</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-10 23:35:00+00:00</td>\n",
+       "      <td>30547</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>e599c525-ac42-4b54-928d-7ee5fbe67dd9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>N</td>\n",
+       "      <td>2:45</td>\n",
+       "      <td>165</td>\n",
+       "      <td>2142e1ba-3b40-445c-b8bb-f1f8b1054220</td>\n",
+       "      <td>Phillies</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-07 23:05:00+00:00</td>\n",
+       "      <td>27381</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>d80ffb65-57a4-42c9-ae1c-2c51d0650336</td>\n",
+       "      <td>1</td>\n",
+       "      <td>565de4be-dc80-4849-a7e1-54bc79156cc8</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>REG</td>\n",
+       "      <td>N</td>\n",
+       "      <td>3:05</td>\n",
+       "      <td>185</td>\n",
+       "      <td>2142e1ba-3b40-445c-b8bb-f1f8b1054220</td>\n",
+       "      <td>Phillies</td>\n",
+       "      <td>55714da8-fcaf-4574-8443-59bfb511a524</td>\n",
+       "      <td>Cubs</td>\n",
+       "      <td>2016-06-06 23:05:00+00:00</td>\n",
+       "      <td>22162</td>\n",
+       "      <td>closed</td>\n",
+       "      <td>2016-10-06 06:25:15+00:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId  gameNumber  \\\n",
+       "0  63f14670-c28e-432b-84ee-1a2c6ac29527           1   \n",
+       "1  bf4e80d1-3125-44fa-8a89-de93d039d465           1   \n",
+       "2  e8af534c-36ed-4ff9-8511-780825fdd041           1   \n",
+       "3  e599c525-ac42-4b54-928d-7ee5fbe67dd9           1   \n",
+       "4  d80ffb65-57a4-42c9-ae1c-2c51d0650336           1   \n",
+       "\n",
+       "                               seasonId  year type dayNight duration  \\\n",
+       "0  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        N     2:43   \n",
+       "1  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        N     3:24   \n",
+       "2  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        N     2:51   \n",
+       "3  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        N     2:45   \n",
+       "4  565de4be-dc80-4849-a7e1-54bc79156cc8  2016  REG        N     3:05   \n",
+       "\n",
+       "   duration_minutes                            homeTeamId homeTeamName  \\\n",
+       "0               163  03556285-bdbb-4576-a06d-42f71f46ddc5      Marlins   \n",
+       "1               204  03556285-bdbb-4576-a06d-42f71f46ddc5      Marlins   \n",
+       "2               171  12079497-e414-450a-8bf2-29f91de646bf       Braves   \n",
+       "3               165  2142e1ba-3b40-445c-b8bb-f1f8b1054220     Phillies   \n",
+       "4               185  2142e1ba-3b40-445c-b8bb-f1f8b1054220     Phillies   \n",
+       "\n",
+       "                             awayTeamId awayTeamName  \\\n",
+       "0  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "1  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "2  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "3  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "4  55714da8-fcaf-4574-8443-59bfb511a524         Cubs   \n",
+       "\n",
+       "                   startTime  attendance  status                    created  \n",
+       "0  2016-06-23 23:10:00+00:00       25291  closed  2016-10-06 06:25:15+00:00  \n",
+       "1  2016-06-24 23:10:00+00:00       24385  closed  2016-10-06 06:25:15+00:00  \n",
+       "2  2016-06-10 23:35:00+00:00       30547  closed  2016-10-06 06:25:15+00:00  \n",
+       "3  2016-06-07 23:05:00+00:00       27381  closed  2016-10-06 06:25:15+00:00  \n",
+       "4  2016-06-06 23:05:00+00:00       22162  closed  2016-10-06 06:25:15+00:00  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "night_games = df[df['dayNight'] == 'N']\n",
+    "night_games.peek()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "80e9a2e2-c4c9-4c17-bbd0-06882d7657fe",
+   "metadata": {},
+   "source": [
+    "### Join two DataFrames\n",
+    "\n",
+    "Even though pandas usually joins by the index, NULL index objects can still be manually joined by a column using the `on` parameter in `merge`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3f09ff32-ef43-4fab-a86b-8868afc34363",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 5d2c69d2-33fe-4513-923b-fd64f4da098b is DONE. 113.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:5d2c69d2-33fe-4513-923b-fd64f4da098b&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>e14b6493-9e7f-404f-840a-8a680cc364bf</td>\n",
+       "      <td>Marlins</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1f32b347-cbcb-4c31-a145-0e685306d168</td>\n",
+       "      <td>Marlins</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0c2292d1-7398-48be-bf8e-b41dad5e1a43</td>\n",
+       "      <td>Braves</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>8fbec734-a15a-42ab-8d51-60790de7750b</td>\n",
+       "      <td>Braves</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd</td>\n",
+       "      <td>Phillies</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId homeTeamName\n",
+       "0  e14b6493-9e7f-404f-840a-8a680cc364bf      Marlins\n",
+       "1  1f32b347-cbcb-4c31-a145-0e685306d168      Marlins\n",
+       "2  0c2292d1-7398-48be-bf8e-b41dad5e1a43       Braves\n",
+       "3  8fbec734-a15a-42ab-8d51-60790de7750b       Braves\n",
+       "4  89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd     Phillies"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df1 = df[[\"gameId\", \"homeTeamName\"]]\n",
+    "df1.peek()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "5331d2c8-7912-4d96-8da1-f64b57374df3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job b6b70d6d-a490-44d6-ba74-0ee32b4f0a1a is DONE. 582.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:b6b70d6d-a490-44d6-ba74-0ee32b4f0a1a&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 68acd168-8b42-44f8-8702-99618935991e is DONE. 94 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:68acd168-8b42-44f8-8702-99618935991e&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>af72a0b9-65f7-49fb-9b30-d505068bdf6d</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>d60c6036-0ce1-4c90-8dd9-de3b403c92a8</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId awayTeamName\n",
+       "0  af72a0b9-65f7-49fb-9b30-d505068bdf6d      Brewers\n",
+       "1  d60c6036-0ce1-4c90-8dd9-de3b403c92a8      Brewers"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2 = df[[\"gameId\", \"awayTeamName\"]].head(2)\n",
+    "df2.peek()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "a574ad3e-a219-454c-8bb5-c5ed6627f2c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 0ac171dd-3859-4589-b7ff-59fd81ec3c3a is DONE. 582.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:0ac171dd-3859-4589-b7ff-59fd81ec3c3a&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 034f8807-c128-444a-8033-0c95f34b0e32 is DONE. 111 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:034f8807-c128-444a-8033-0c95f34b0e32&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>af72a0b9-65f7-49fb-9b30-d505068bdf6d</td>\n",
+       "      <td>Reds</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>d60c6036-0ce1-4c90-8dd9-de3b403c92a8</td>\n",
+       "      <td>Nationals</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId homeTeamName awayTeamName\n",
+       "0  af72a0b9-65f7-49fb-9b30-d505068bdf6d         Reds      Brewers\n",
+       "1  d60c6036-0ce1-4c90-8dd9-de3b403c92a8    Nationals      Brewers"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged = df1.merge(df2, on=\"gameId\", how=\"inner\")\n",
+    "merged.peek()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "288e7a95-a077-46c4-8fe6-802474c01f8b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 30fd5a60-772c-4ef0-a151-5ab390ff4322 is DONE. 582.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:30fd5a60-772c-4ef0-a151-5ab390ff4322&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 701fa9a8-1ec6-49b9-ac41-228cb34d4c8c is DONE. 114.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:701fa9a8-1ec6-49b9-ac41-228cb34d4c8c&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>039bb40e-7613-4674-a653-584b93e9b21b</td>\n",
+       "      <td>American League</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>78000e12-2ef3-4246-adc1-c8a4d157631c</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>de5555dc-9228-4f7c-88ae-4451e3ffb980</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>f29a2754-004b-436c-91fe-3d86c0bb17a8</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>8e5af008-8a07-4f9a-90cb-336ca4c84c71</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId     homeTeamName awayTeamName\n",
+       "0  039bb40e-7613-4674-a653-584b93e9b21b  American League         <NA>\n",
+       "1  78000e12-2ef3-4246-adc1-c8a4d157631c           Angels         <NA>\n",
+       "2  de5555dc-9228-4f7c-88ae-4451e3ffb980           Angels         <NA>\n",
+       "3  f29a2754-004b-436c-91fe-3d86c0bb17a8           Angels         <NA>\n",
+       "4  8e5af008-8a07-4f9a-90cb-336ca4c84c71           Angels         <NA>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged = df1.merge(df2, on=\"gameId\", how=\"outer\")\n",
+    "merged.peek()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "7ee87a01-2ff5-4021-855d-44b71cf2a225",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job e3d8168c-48e9-4ba9-a916-10259ad9c0ea is DONE. 582.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:e3d8168c-48e9-4ba9-a916-10259ad9c0ea&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 82d2a5e4-66a8-4478-92de-57d3f806aa76 is DONE. 114.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:82d2a5e4-66a8-4478-92de-57d3f806aa76&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>039bb40e-7613-4674-a653-584b93e9b21b</td>\n",
+       "      <td>American League</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>f6fcd83c-e130-487c-a0cc-d00b2712d08b</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>fe401dd2-089c-4822-8657-4d510d460f38</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>c894bdee-5dda-49f4-87c8-53b9b9bfcd3b</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>bbda59d9-fd52-4bed-bcfb-2ceed4be997c</td>\n",
+       "      <td>Angels</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId     homeTeamName awayTeamName\n",
+       "0  039bb40e-7613-4674-a653-584b93e9b21b  American League         <NA>\n",
+       "1  f6fcd83c-e130-487c-a0cc-d00b2712d08b           Angels         <NA>\n",
+       "2  fe401dd2-089c-4822-8657-4d510d460f38           Angels         <NA>\n",
+       "3  c894bdee-5dda-49f4-87c8-53b9b9bfcd3b           Angels         <NA>\n",
+       "4  bbda59d9-fd52-4bed-bcfb-2ceed4be997c           Angels         <NA>"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged = df1.merge(df2, on=\"gameId\", how=\"left\")\n",
+    "merged.peek()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "330ed69c-f122-4af9-bf5e-96e309d3fa0c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 518ed511-606a-42b2-a28d-61a601eccfa7 is DONE. 582.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:518ed511-606a-42b2-a28d-61a601eccfa7&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job bc381640-74e0-4885-9c32-87805a49f357 is DONE. 111 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:bc381640-74e0-4885-9c32-87805a49f357&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>af72a0b9-65f7-49fb-9b30-d505068bdf6d</td>\n",
+       "      <td>Reds</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>d60c6036-0ce1-4c90-8dd9-de3b403c92a8</td>\n",
+       "      <td>Nationals</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId homeTeamName awayTeamName\n",
+       "0  af72a0b9-65f7-49fb-9b30-d505068bdf6d         Reds      Brewers\n",
+       "1  d60c6036-0ce1-4c90-8dd9-de3b403c92a8    Nationals      Brewers"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "merged = df1.merge(df2, on=\"gameId\", how=\"right\")\n",
+    "merged.peek()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "162eede7",
+   "metadata": {},
+   "source": [
+    "### Download the result as (in-memory) pandas DataFrame\n",
+    "\n",
+    "Use the `ordered=False` argument for more efficient query execution."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "ab429fa5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 2d4fbd55-ba6a-46d2-87ae-5da416ad3642 is DONE. 159 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=swast-scratch&j=bq:US:2d4fbd55-ba6a-46d2-87ae-5da416ad3642&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>gameId</th>\n",
+       "      <th>homeTeamName</th>\n",
+       "      <th>awayTeamName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>d60c6036-0ce1-4c90-8dd9-de3b403c92a8</td>\n",
+       "      <td>Nationals</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>af72a0b9-65f7-49fb-9b30-d505068bdf6d</td>\n",
+       "      <td>Reds</td>\n",
+       "      <td>Brewers</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                 gameId homeTeamName awayTeamName\n",
+       "0  d60c6036-0ce1-4c90-8dd9-de3b403c92a8    Nationals      Brewers\n",
+       "1  af72a0b9-65f7-49fb-9b30-d505068bdf6d         Reds      Brewers"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dfp = merged.to_pandas(ordered=False)\n",
+    "dfp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "896212ab",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index f7fbd5f4b..250169308 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -395,7 +395,7 @@ def scalars_df_index(
 
 
 @pytest.fixture(scope="session")
-def scalars_df_empty_index(
+def scalars_df_null_index(
     scalars_table_id: str, session: bigframes.Session
 ) -> bigframes.dataframe.DataFrame:
     """DataFrame pointing at test data."""
diff --git a/tests/system/small/test_empty_index.py b/tests/system/small/test_empty_index.py
deleted file mode 100644
index 3216264a8..000000000
--- a/tests/system/small/test_empty_index.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Copyright 2024 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import pandas as pd
-import pytest
-
-import bigframes.exceptions
-import bigframes.pandas as bpd
-from tests.system.utils import skip_legacy_pandas
-
-
-def test_empty_index_materialize(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = scalars_df_empty_index.to_pandas()
-    pd.testing.assert_frame_equal(
-        bf_result, scalars_pandas_df_default_index, check_index_type=False
-    )
-
-
-def test_empty_index_series_repr(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = scalars_df_empty_index["int64_too"].head(5).__repr__()
-    pd_result = (
-        scalars_pandas_df_default_index["int64_too"]
-        .head(5)
-        .to_string(dtype=True, index=False, length=False, name=True)
-    )
-    assert bf_result == pd_result
-
-
-def test_empty_index_dataframe_repr(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = scalars_df_empty_index[["int64_too", "int64_col"]].head(5).__repr__()
-    pd_result = (
-        scalars_pandas_df_default_index[["int64_too", "int64_col"]]
-        .head(5)
-        .to_string(index=False)
-    )
-    assert bf_result == pd_result + "\n\n[5 rows x 2 columns]"
-
-
-def test_empty_index_reset_index(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = scalars_df_empty_index.reset_index().to_pandas()
-    pd_result = scalars_pandas_df_default_index.reset_index(drop=True)
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
-
-
-def test_empty_index_set_index(scalars_df_empty_index, scalars_pandas_df_default_index):
-    bf_result = scalars_df_empty_index.set_index("int64_col").to_pandas()
-    pd_result = scalars_pandas_df_default_index.set_index("int64_col")
-    pd.testing.assert_frame_equal(bf_result, pd_result)
-
-
-def test_empty_index_concat(scalars_df_empty_index, scalars_pandas_df_default_index):
-    bf_result = bpd.concat(
-        [scalars_df_empty_index, scalars_df_empty_index], axis=0
-    ).to_pandas()
-    pd_result = pd.concat(
-        [scalars_pandas_df_default_index, scalars_pandas_df_default_index], axis=0
-    )
-    pd.testing.assert_frame_equal(bf_result, pd_result.reset_index(drop=True))
-
-
-def test_empty_index_aggregate(scalars_df_empty_index, scalars_pandas_df_default_index):
-    bf_result = scalars_df_empty_index.count().to_pandas()
-    pd_result = scalars_pandas_df_default_index.count()
-
-    pd_result.index = pd_result.index.astype("string[pyarrow]")
-
-    pd.testing.assert_series_equal(
-        bf_result, pd_result, check_dtype=False, check_index_type=False
-    )
-
-
-def test_empty_index_groupby_aggregate(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = scalars_df_empty_index.groupby("int64_col").count().to_pandas()
-    pd_result = scalars_pandas_df_default_index.groupby("int64_col").count()
-
-    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
-
-
-@skip_legacy_pandas
-def test_empty_index_analytic(scalars_df_empty_index, scalars_pandas_df_default_index):
-    bf_result = scalars_df_empty_index["int64_col"].cumsum().to_pandas()
-    pd_result = scalars_pandas_df_default_index["int64_col"].cumsum()
-    pd.testing.assert_series_equal(
-        bf_result, pd_result.reset_index(drop=True), check_dtype=False
-    )
-
-
-def test_empty_index_groupby_analytic(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = (
-        scalars_df_empty_index.groupby("bool_col")["int64_col"].cummax().to_pandas()
-    )
-    pd_result = scalars_pandas_df_default_index.groupby("bool_col")[
-        "int64_col"
-    ].cummax()
-    pd.testing.assert_series_equal(
-        bf_result, pd_result.reset_index(drop=True), check_dtype=False
-    )
-
-
-@skip_legacy_pandas
-def test_empty_index_stack(scalars_df_empty_index, scalars_pandas_df_default_index):
-    stacking_cols = ["int64_col", "int64_too"]
-    bf_result = scalars_df_empty_index[stacking_cols].stack().to_pandas()
-    pd_result = (
-        scalars_pandas_df_default_index[stacking_cols]
-        .stack(future_stack=True)
-        .droplevel(level=0, axis=0)
-    )
-    pd_result.index = pd_result.index.astype(bf_result.index.dtype)
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-        check_dtype=False,
-    )
-
-
-def test_empty_index_series_self_aligns(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = (
-        scalars_df_empty_index["int64_col"] + scalars_df_empty_index["int64_too"]
-    )
-    pd_result = (
-        scalars_pandas_df_default_index["int64_col"]
-        + scalars_pandas_df_default_index["int64_too"]
-    )
-    pd.testing.assert_series_equal(
-        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
-    )
-
-
-def test_empty_index_df_self_aligns(
-    scalars_df_empty_index, scalars_pandas_df_default_index
-):
-    bf_result = (
-        scalars_df_empty_index[["int64_col", "float64_col"]]
-        + scalars_df_empty_index[["int64_col", "float64_col"]]
-    )
-    pd_result = (
-        scalars_pandas_df_default_index[["int64_col", "float64_col"]]
-        + scalars_pandas_df_default_index[["int64_col", "float64_col"]]
-    )
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
-    )
-
-
-def test_empty_index_setitem(scalars_df_empty_index, scalars_pandas_df_default_index):
-    bf_result = scalars_df_empty_index.copy()
-    bf_result["new_col"] = (
-        scalars_df_empty_index["int64_col"] + scalars_df_empty_index["float64_col"]
-    )
-    pd_result = scalars_pandas_df_default_index.copy()
-    pd_result["new_col"] = (
-        scalars_pandas_df_default_index["int64_col"]
-        + scalars_pandas_df_default_index["float64_col"]
-    )
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
-    )
-
-
-def test_empty_index_df_concat(scalars_df_empty_index, scalars_pandas_df_default_index):
-    bf_result = bpd.concat([scalars_df_empty_index, scalars_df_empty_index])
-    pd_result = pd.concat(
-        [scalars_pandas_df_default_index, scalars_pandas_df_default_index]
-    )
-    pd.testing.assert_frame_equal(
-        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
-    )
-
-
-def test_empty_index_align_error(scalars_df_empty_index):
-    with pytest.raises(bigframes.exceptions.NullIndexError):
-        _ = (
-            scalars_df_empty_index["int64_col"]
-            + scalars_df_empty_index["int64_col"].cumsum()
-        )
-
-
-def test_empty_index_loc_error(scalars_df_empty_index):
-    with pytest.raises(bigframes.exceptions.NullIndexError):
-        scalars_df_empty_index["int64_col"].loc[1]
-
-
-def test_empty_index_at_error(scalars_df_empty_index):
-    with pytest.raises(bigframes.exceptions.NullIndexError):
-        scalars_df_empty_index["int64_col"].at[1]
-
-
-def test_empty_index_idxmin_error(scalars_df_empty_index):
-    with pytest.raises(bigframes.exceptions.NullIndexError):
-        scalars_df_empty_index[["int64_col", "int64_too"]].idxmin()
-
-
-def test_empty_index_index_property(scalars_df_empty_index):
-    with pytest.raises(bigframes.exceptions.NullIndexError):
-        _ = scalars_df_empty_index.index
-
-
-def test_empty_index_transpose(scalars_df_empty_index):
-    with pytest.raises(bigframes.exceptions.NullIndexError):
-        _ = scalars_df_empty_index.T
diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py
new file mode 100644
index 000000000..27a3d8dff
--- /dev/null
+++ b/tests/system/small/test_null_index.py
@@ -0,0 +1,288 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pandas as pd
+import pytest
+
+import bigframes.exceptions
+import bigframes.pandas as bpd
+from tests.system.utils import skip_legacy_pandas
+
+
+def test_null_index_materialize(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = scalars_df_null_index.to_pandas()
+    pd.testing.assert_frame_equal(
+        bf_result, scalars_pandas_df_default_index, check_index_type=False
+    )
+
+
+def test_null_index_series_repr(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = scalars_df_null_index["int64_too"].head(5).__repr__()
+    pd_result = (
+        scalars_pandas_df_default_index["int64_too"]
+        .head(5)
+        .to_string(dtype=True, index=False, length=False, name=True)
+    )
+    assert bf_result == pd_result
+
+
+def test_null_index_dataframe_repr(
+    scalars_df_null_index, scalars_pandas_df_default_index
+):
+    bf_result = scalars_df_null_index[["int64_too", "int64_col"]].head(5).__repr__()
+    pd_result = (
+        scalars_pandas_df_default_index[["int64_too", "int64_col"]]
+        .head(5)
+        .to_string(index=False)
+    )
+    assert bf_result == pd_result + "\n\n[5 rows x 2 columns]"
+
+
+def test_null_index_reset_index(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = scalars_df_null_index.reset_index().to_pandas()
+    pd_result = scalars_pandas_df_default_index.reset_index(drop=True)
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_null_index_set_index(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = scalars_df_null_index.set_index("int64_col").to_pandas()
+    pd_result = scalars_pandas_df_default_index.set_index("int64_col")
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_null_index_concat(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = bpd.concat(
+        [scalars_df_null_index, scalars_df_null_index], axis=0
+    ).to_pandas()
+    pd_result = pd.concat(
+        [scalars_pandas_df_default_index, scalars_pandas_df_default_index], axis=0
+    )
+    pd.testing.assert_frame_equal(bf_result, pd_result.reset_index(drop=True))
+
+
+def test_null_index_aggregate(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = scalars_df_null_index.count().to_pandas()
+    pd_result = scalars_pandas_df_default_index.count()
+
+    pd_result.index = pd_result.index.astype("string[pyarrow]")
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_null_index_groupby_aggregate(
+    scalars_df_null_index, scalars_pandas_df_default_index
+):
+    bf_result = scalars_df_null_index.groupby("int64_col").count().to_pandas()
+    pd_result = scalars_pandas_df_default_index.groupby("int64_col").count()
+
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+@skip_legacy_pandas
+def test_null_index_analytic(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = scalars_df_null_index["int64_col"].cumsum().to_pandas()
+    pd_result = scalars_pandas_df_default_index["int64_col"].cumsum()
+    pd.testing.assert_series_equal(
+        bf_result, pd_result.reset_index(drop=True), check_dtype=False
+    )
+
+
+def test_null_index_groupby_analytic(
+    scalars_df_null_index, scalars_pandas_df_default_index
+):
+    bf_result = (
+        scalars_df_null_index.groupby("bool_col")["int64_col"].cummax().to_pandas()
+    )
+    pd_result = scalars_pandas_df_default_index.groupby("bool_col")[
+        "int64_col"
+    ].cummax()
+    pd.testing.assert_series_equal(
+        bf_result, pd_result.reset_index(drop=True), check_dtype=False
+    )
+
+
+def test_null_index_merge_left_null_index_object(
+    scalars_df_null_index, scalars_df_default_index, scalars_pandas_df_default_index
+):
+    df1 = scalars_df_null_index[scalars_df_null_index["int64_col"] > 0]
+    df1_pd = scalars_pandas_df_default_index[
+        scalars_pandas_df_default_index["int64_col"] > 0
+    ]
+    assert not df1._has_index
+    df2 = scalars_df_default_index[scalars_df_default_index["int64_col"] <= 55555]
+    df2_pd = scalars_pandas_df_default_index[
+        scalars_pandas_df_default_index["int64_col"] <= 55555
+    ]
+    assert df2._has_index
+
+    got = df1.merge(df2, how="inner", on="bool_col")
+    expected = df1_pd.merge(df2_pd, how="inner", on="bool_col")
+
+    # Combining any NULL index object should result in a NULL index.
+    # This keeps us from generating an index if the user joins a large
+    # BigQuery table against small local data, for example.
+    assert not got._has_index
+    assert got.shape == expected.shape
+
+
+def test_null_index_merge_right_null_index_object(
+    scalars_df_null_index, scalars_df_default_index, scalars_pandas_df_default_index
+):
+    df1 = scalars_df_default_index[scalars_df_default_index["int64_col"] > 0]
+    df1_pd = scalars_pandas_df_default_index[
+        scalars_pandas_df_default_index["int64_col"] > 0
+    ]
+    assert df1._has_index
+    df2 = scalars_df_null_index[scalars_df_null_index["int64_col"] <= 55555]
+    df2_pd = scalars_pandas_df_default_index[
+        scalars_pandas_df_default_index["int64_col"] <= 55555
+    ]
+    assert not df2._has_index
+
+    got = df1.merge(df2, how="left", on="bool_col")
+    expected = df1_pd.merge(df2_pd, how="left", on="bool_col")
+
+    # Combining any NULL index object should result in a NULL index.
+    # This keeps us from generating an index if the user joins a large
+    # BigQuery table against small local data, for example.
+    assert not got._has_index
+    assert got.shape == expected.shape
+
+
+def test_null_index_merge_two_null_index_objects(
+    scalars_df_null_index, scalars_pandas_df_default_index
+):
+    df1 = scalars_df_null_index[scalars_df_null_index["int64_col"] > 0]
+    df1_pd = scalars_pandas_df_default_index[
+        scalars_pandas_df_default_index["int64_col"] > 0
+    ]
+    assert not df1._has_index
+    df2 = scalars_df_null_index[scalars_df_null_index["int64_col"] <= 55555]
+    df2_pd = scalars_pandas_df_default_index[
+        scalars_pandas_df_default_index["int64_col"] <= 55555
+    ]
+    assert not df2._has_index
+
+    got = df1.merge(df2, how="outer", on="bool_col")
+    expected = df1_pd.merge(df2_pd, how="outer", on="bool_col")
+
+    assert not got._has_index
+    assert got.shape == expected.shape
+
+
+@skip_legacy_pandas
+def test_null_index_stack(scalars_df_null_index, scalars_pandas_df_default_index):
+    stacking_cols = ["int64_col", "int64_too"]
+    bf_result = scalars_df_null_index[stacking_cols].stack().to_pandas()
+    pd_result = (
+        scalars_pandas_df_default_index[stacking_cols]
+        .stack(future_stack=True)
+        .droplevel(level=0, axis=0)
+    )
+    pd_result.index = pd_result.index.astype(bf_result.index.dtype)
+    pd.testing.assert_series_equal(
+        bf_result,
+        pd_result,
+        check_dtype=False,
+    )
+
+
+def test_null_index_series_self_aligns(
+    scalars_df_null_index, scalars_pandas_df_default_index
+):
+    bf_result = scalars_df_null_index["int64_col"] + scalars_df_null_index["int64_too"]
+    pd_result = (
+        scalars_pandas_df_default_index["int64_col"]
+        + scalars_pandas_df_default_index["int64_too"]
+    )
+    pd.testing.assert_series_equal(
+        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
+    )
+
+
+def test_null_index_df_self_aligns(
+    scalars_df_null_index, scalars_pandas_df_default_index
+):
+    bf_result = (
+        scalars_df_null_index[["int64_col", "float64_col"]]
+        + scalars_df_null_index[["int64_col", "float64_col"]]
+    )
+    pd_result = (
+        scalars_pandas_df_default_index[["int64_col", "float64_col"]]
+        + scalars_pandas_df_default_index[["int64_col", "float64_col"]]
+    )
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
+    )
+
+
+def test_null_index_setitem(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = scalars_df_null_index.copy()
+    bf_result["new_col"] = (
+        scalars_df_null_index["int64_col"] + scalars_df_null_index["float64_col"]
+    )
+    pd_result = scalars_pandas_df_default_index.copy()
+    pd_result["new_col"] = (
+        scalars_pandas_df_default_index["int64_col"]
+        + scalars_pandas_df_default_index["float64_col"]
+    )
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
+    )
+
+
+def test_null_index_df_concat(scalars_df_null_index, scalars_pandas_df_default_index):
+    bf_result = bpd.concat([scalars_df_null_index, scalars_df_null_index])
+    pd_result = pd.concat(
+        [scalars_pandas_df_default_index, scalars_pandas_df_default_index]
+    )
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(), pd_result.reset_index(drop=True), check_dtype=False
+    )
+
+
+def test_null_index_align_error(scalars_df_null_index):
+    with pytest.raises(bigframes.exceptions.NullIndexError):
+        _ = (
+            scalars_df_null_index["int64_col"]
+            + scalars_df_null_index["int64_col"].cumsum()
+        )
+
+
+def test_null_index_loc_error(scalars_df_null_index):
+    with pytest.raises(bigframes.exceptions.NullIndexError):
+        scalars_df_null_index["int64_col"].loc[1]
+
+
+def test_null_index_at_error(scalars_df_null_index):
+    with pytest.raises(bigframes.exceptions.NullIndexError):
+        scalars_df_null_index["int64_col"].at[1]
+
+
+def test_null_index_idxmin_error(scalars_df_null_index):
+    with pytest.raises(bigframes.exceptions.NullIndexError):
+        scalars_df_null_index[["int64_col", "int64_too"]].idxmin()
+
+
+def test_null_index_index_property(scalars_df_null_index):
+    with pytest.raises(bigframes.exceptions.NullIndexError):
+        _ = scalars_df_null_index.index
+
+
+def test_null_index_transpose(scalars_df_null_index):
+    with pytest.raises(bigframes.exceptions.NullIndexError):
+        _ = scalars_df_null_index.T