From 95cccca47e12fb2cdb75b05e3ada8259f5eab2d1 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:07:09 -0600
Subject: [PATCH 01/20] docs: scaffold Sphinx documentation site

---
 .gitignore                             | 11 +++++
 docs/Makefile                          | 31 ++++++++++++++
 docs/README.md                         | 49 ++++++++++++++++++++++
 docs/build.sh                          | 31 ++++++++++++++
 docs/make.bat                          | 35 ++++++++++++++++
 docs/requirements.txt                  | 20 +++++++++
 docs/source/_static/.gitkeep           |  0
 docs/source/_templates/.gitkeep        |  0
 docs/source/conf.py                    | 56 ++++++++++++++++++++++++++
 docs/source/contributor-guide/index.md | 22 ++++++++++
 docs/source/index.md                   | 29 +++++++++++++
 docs/source/user-guide/index.md        | 22 ++++++++++
 12 files changed, 306 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 docs/Makefile
 create mode 100644 docs/README.md
 create mode 100755 docs/build.sh
 create mode 100644 docs/make.bat
 create mode 100644 docs/requirements.txt
 create mode 100644 docs/source/_static/.gitkeep
 create mode 100644 docs/source/_templates/.gitkeep
 create mode 100644 docs/source/conf.py
 create mode 100644 docs/source/contributor-guide/index.md
 create mode 100644 docs/source/index.md
 create mode 100644 docs/source/user-guide/index.md

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..009119e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+target/
+*.class
+.idea/
+.vscode/
+*.iml
+.DS_Store
+tpch-data/
+.claude
+docs/superpowers
+docs/build/
+docs/venv/
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..714088d
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Minimal makefile for Sphinx documentation
+
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..83c5f37
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,49 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion Java Documentation
+
+This directory contains the Sphinx source for the Apache DataFusion Java
+documentation site.
+
+## Build
+
+Building the docs requires Python 3.9 or newer. A virtual environment under
+`docs/venv/` is the recommended workflow.
+
+```sh
+cd docs
+python3 -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+./build.sh
+```
+
+The generated site is written to `docs/build/html/`. Open
+`docs/build/html/index.html` in a browser to preview.
+
+Subsequent builds need only:
+
+```sh
+cd docs
+source venv/bin/activate
+./build.sh
+```
+
+`./build.sh` runs `sphinx-build` with `-W` so warnings fail the build.
diff --git a/docs/build.sh b/docs/build.sh
new file mode 100755
index 0000000..c487a09
--- /dev/null
+++ b/docs/build.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+cd "$(dirname "$0")"
+
+rm -rf build
+
+if [ -d venv ]; then
+  # shellcheck disable=SC1091
+  source venv/bin/activate
+fi
+
+sphinx-build -b html -W --keep-going source build/html
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..dc1312a
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..6ac8d0e
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+sphinx>=7.0,<8.0
+myst-parser>=2.0,<4.0
+pydata-sphinx-theme>=0.16.1,<0.17.0
diff --git a/docs/source/_static/.gitkeep b/docs/source/_static/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/docs/source/_templates/.gitkeep b/docs/source/_templates/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..cf86b40
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Sphinx configuration for the Apache DataFusion Java documentation."""
+
+project = "Apache DataFusion Java"
+copyright = "2026, Apache Software Foundation"
+author = "Apache Software Foundation"
+
+extensions = [
+    "sphinx.ext.mathjax",
+    "sphinx.ext.napoleon",
+    "myst_parser",
+]
+
+source_suffix = {
+    ".md": "markdown",
+}
+
+templates_path = ["_templates"]
+exclude_patterns = []
+
+html_theme = "pydata_sphinx_theme"
+html_theme_options = {
+    "use_edit_page_button": False,
+    "show_toc_level": 2,
+}
+
+html_context = {
+    "github_user": "apache",
+    "github_repo": "datafusion-java",
+    "github_version": "main",
+    "doc_path": "docs/source",
+}
+
+html_static_path = ["_static"]
+
+# Auto-generate anchor links for headings h1, h2, h3.
+myst_heading_anchors = 3
+
+# Enable nice rendering of GitHub-style task lists.
+myst_enable_extensions = ["tasklist"]
diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
new file mode 100644
index 0000000..bac4c60
--- /dev/null
+++ b/docs/source/contributor-guide/index.md
@@ -0,0 +1,22 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Contributor Guide
+
+This guide is under construction.
diff --git a/docs/source/index.md b/docs/source/index.md
new file mode 100644
index 0000000..afbc75a
--- /dev/null
+++ b/docs/source/index.md
@@ -0,0 +1,29 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion Java
+
+```{toctree}
+:maxdepth: 1
+:caption: Documentation
+:hidden:
+
+User Guide <user-guide/index>
+Contributor Guide <contributor-guide/index>
+```
diff --git a/docs/source/user-guide/index.md b/docs/source/user-guide/index.md
new file mode 100644
index 0000000..62fd3fd
--- /dev/null
+++ b/docs/source/user-guide/index.md
@@ -0,0 +1,22 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# User Guide
+
+This guide is under construction.

From 0dcab135e5ea600d7af4d319096bcb2851ecfbd7 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:10:54 -0600
Subject: [PATCH 02/20] docs: add ASF license header to make.bat

---
 docs/make.bat | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/make.bat b/docs/make.bat
index dc1312a..08805d0 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,5 +1,22 @@
 @ECHO OFF
 
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem   http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied.  See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
 pushd %~dp0
 
 REM Command file for Sphinx documentation

From 19d166648de34b71bfb226e1e391fb7c7d327421 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:11:55 -0600
Subject: [PATCH 03/20] docs: write landing page with toctree

---
 docs/source/index.md | 72 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

diff --git a/docs/source/index.md b/docs/source/index.md
index afbc75a..675021e 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -19,11 +19,77 @@ under the License.
 
 # Apache DataFusion Java
 
+Java bindings for [Apache DataFusion]. Queries run in native Rust and results
+return to the JVM as [Apache Arrow] batches via the Arrow C Data Interface.
+
+[Apache DataFusion]: https://datafusion.apache.org/
+[Apache Arrow]: https://arrow.apache.org/
+
+> Early development: no releases yet, API will change. Bug reports and
+> contributions welcome.
+
+## Quickstart
+
+```java
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.datafusion.DataFrame;
+import org.apache.datafusion.SessionContext;
+
+try (var allocator = new RootAllocator();
+     var ctx = new SessionContext()) {
+
+    ctx.registerParquet("orders", "/path/to/orders.parquet");
+
+    try (DataFrame df = ctx.sql(
+            "SELECT o_orderpriority, COUNT(*) AS n " +
+            "FROM orders GROUP BY o_orderpriority");
+         ArrowReader reader = df.collect(allocator)) {
+        while (reader.loadNextBatch()) {
+            var batch = reader.getVectorSchemaRoot();
+            // ...
+        }
+    }
+}
+```
+
+See the [User Guide](user-guide/index.md) for installation, the DataFrame and
+SQL APIs, and Parquet ingestion. See the [Contributor Guide](contributor-guide/index.md)
+for build, test, and release workflows.
+
+```{toctree}
+:maxdepth: 1
+:caption: Links
+:hidden:
+
+GitHub Repository <https://github.com/apache/datafusion-java>
+Issue Tracker <https://github.com/apache/datafusion-java/issues>
+Apache DataFusion <https://datafusion.apache.org/>
+Code of Conduct <https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md>
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: User Guide
+:hidden:
+
+user-guide/index
+user-guide/installation
+user-guide/quickstart
+user-guide/sessioncontext
+user-guide/dataframe
+user-guide/parquet
+user-guide/project-status
+```
+
 ```{toctree}
 :maxdepth: 1
-:caption: Documentation
+:caption: Contributor Guide
 :hidden:
 
-User Guide <user-guide/index>
-Contributor Guide <contributor-guide/index>
+contributor-guide/index
+contributor-guide/development
+contributor-guide/code-style
+contributor-guide/releasing
+contributor-guide/updating-datafusion-version
 ```

From da500e7801fcab12b369d4ae01a2fd844e6803ba Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:13:53 -0600
Subject: [PATCH 04/20] docs: add user guide installation page

---
 docs/source/user-guide/installation.md | 52 ++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 docs/source/user-guide/installation.md

diff --git a/docs/source/user-guide/installation.md b/docs/source/user-guide/installation.md
new file mode 100644
index 0000000..3141355
--- /dev/null
+++ b/docs/source/user-guide/installation.md
@@ -0,0 +1,52 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Installation
+
+Apache DataFusion Java has not yet published a release. Until the first
+release, the only way to use the library is to build from source.
+
+## Requirements
+
+- **JDK 17 or newer.** Set `JAVA_HOME` to point at it.
+- **Rust toolchain (stable).** Install via [rustup].
+
+[rustup]: https://rustup.rs/
+
+## Build from source
+
+```sh
+git clone https://github.com/apache/datafusion-java.git
+cd datafusion-java
+make test
+```
+
+`make test` compiles the native Rust crate, then runs the JUnit tests
+against it. The native library must be built before the JVM tests can
+run.
+
+The first build in a fresh checkout reaches out to
+`raw.githubusercontent.com` to fetch the DataFusion `.proto` files used to
+generate the `datafusion-proto` Java classes. Subsequent builds are
+offline; the `download-maven-plugin` cache under
+`~/.m2/repository/.cache/` satisfies them.
+
+For development workflow details — running individual tests, the TPC-H
+integration test data, code style, and how to update the underlying
+DataFusion version — see the [Contributor Guide](../contributor-guide/development.md).

From dbc55e35bf1cf05685efc3113eba60109f399254 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:15:11 -0600
Subject: [PATCH 05/20] docs: write user guide landing page

---
 docs/source/user-guide/index.md | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/docs/source/user-guide/index.md b/docs/source/user-guide/index.md
index 62fd3fd..d5cbeb9 100644
--- a/docs/source/user-guide/index.md
+++ b/docs/source/user-guide/index.md
@@ -19,4 +19,23 @@ under the License.
 
 # User Guide
 
-This guide is under construction.
+Apache DataFusion Java is a thin Java binding over the
+[Apache DataFusion](https://datafusion.apache.org/) query engine. SQL and
+DataFrame queries execute in native Rust; results return to the JVM as
+[Apache Arrow](https://arrow.apache.org/) record batches over the Arrow C
+Data Interface.
+
+This guide covers installation, the `SessionContext` and `DataFrame` APIs,
+and Parquet ingestion.
+
+- [Installation](installation.md) — JDK and Rust prerequisites, building
+  from source.
+- [Quickstart](quickstart.md) — a complete example, walked through.
+- [SessionContext](sessioncontext.md) — lifecycle and threading.
+- [DataFrame and SQL](dataframe.md) — building and executing queries.
+- [Parquet](parquet.md) — registering files and reading them with
+  `ParquetReadOptions`.
+- [Project status](project-status.md) — snapshot of what works today.
+
+> Early development: no releases yet, API will change. Bug reports and
+> contributions welcome.

From 86eb8fbb564bd81a68ea03aa268e6ff83995fa2f Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:16:18 -0600
Subject: [PATCH 06/20] docs: add user guide quickstart

---
 docs/source/user-guide/quickstart.md | 74 ++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 docs/source/user-guide/quickstart.md

diff --git a/docs/source/user-guide/quickstart.md b/docs/source/user-guide/quickstart.md
new file mode 100644
index 0000000..7d0df38
--- /dev/null
+++ b/docs/source/user-guide/quickstart.md
@@ -0,0 +1,74 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Quickstart
+
+This page walks through a complete query end-to-end.
+
+## The full example
+
+```java
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.datafusion.DataFrame;
+import org.apache.datafusion.SessionContext;
+
+try (var allocator = new RootAllocator();
+     var ctx = new SessionContext()) {
+
+    ctx.registerParquet("orders", "/path/to/orders.parquet");
+
+    try (DataFrame df = ctx.sql(
+            "SELECT o_orderpriority, COUNT(*) AS n " +
+            "FROM orders GROUP BY o_orderpriority");
+         ArrowReader reader = df.collect(allocator)) {
+        while (reader.loadNextBatch()) {
+            var batch = reader.getVectorSchemaRoot();
+            // ...
+        }
+    }
+}
+```
+
+## Walkthrough
+
+**Allocator.** `RootAllocator` is the Arrow off-heap memory allocator. Every
+JVM-side Arrow buffer is tracked under an allocator; when the allocator is
+closed, leaked buffers are reported. Use one allocator per query (or one
+per application) and close it in a `try`-with-resources.
+
+**Session context.** `SessionContext` is the entry point into DataFusion. It
+holds the catalog of registered tables and the query planner. It is
+`AutoCloseable` and **not thread-safe** — use one per thread, or guard
+access externally.
+
+**Registering data.** `registerParquet(name, path)` reads the file's footer
+on call and exposes it under the given table name. See
+[Parquet](parquet.md) for the options form.
+
+**SQL.** `ctx.sql("...")` plans the query and returns a `DataFrame`. The
+query is not executed until results are pulled.
+
+**Collecting results.** `df.collect(allocator)` starts native execution and
+returns an `ArrowReader`. Each `loadNextBatch()` call pulls the next
+`VectorSchemaRoot`; iterate until it returns `false`.
+
+**Cleanup.** Both `SessionContext` and `DataFrame` are `AutoCloseable`. Use
+`try`-with-resources so native resources and Arrow buffers are released
+even on exception.

From b5e0c96e3fb34dcd1d8f103009f0af8cff5d0994 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:17:08 -0600
Subject: [PATCH 07/20] docs: add user guide sessioncontext page

---
 docs/source/user-guide/sessioncontext.md | 48 ++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 docs/source/user-guide/sessioncontext.md

diff --git a/docs/source/user-guide/sessioncontext.md b/docs/source/user-guide/sessioncontext.md
new file mode 100644
index 0000000..14111b8
--- /dev/null
+++ b/docs/source/user-guide/sessioncontext.md
@@ -0,0 +1,48 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# SessionContext
+
+`SessionContext` is the entry point into DataFusion from Java. It owns the
+catalog of registered tables and the query planner.
+
+## Lifecycle
+
+```java
+try (SessionContext ctx = new SessionContext()) {
+    // register tables, build queries...
+}
+```
+
+`SessionContext` is `AutoCloseable`. Closing it releases the underlying
+native context. Use `try`-with-resources so the native side is freed even
+on exception.
+
+## Threading
+
+A `SessionContext` is **not thread-safe**. Do not share one across threads
+without external synchronization. The simplest pattern is one context per
+thread.
+
+## What's configurable today
+
+Today, `SessionContext` exposes only data-source registration and query
+construction. Tuning knobs that DataFusion offers natively
+(`SessionConfig`, `RuntimeEnv`) are not yet wired through the Java API.
+See [Project status](project-status.md) for the current shape of the API.

From abbaf0917a845bec54f621748aa86ce4b391c653 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:17:59 -0600
Subject: [PATCH 08/20] docs: add user guide dataframe page

---
 docs/source/user-guide/dataframe.md | 90 +++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 docs/source/user-guide/dataframe.md

diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
new file mode 100644
index 0000000..e1ca33d
--- /dev/null
+++ b/docs/source/user-guide/dataframe.md
@@ -0,0 +1,90 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# DataFrame and SQL
+
+DataFusion Java supports two query interfaces: SQL strings via
+`SessionContext.sql(String)`, and a programmatic DataFrame API.
+
+## SQL
+
+```java
+try (DataFrame df = ctx.sql("SELECT a, b FROM t WHERE a > 10")) {
+    df.show();
+}
+```
+
+`sql(String)` plans the query and returns a `DataFrame`. Execution does
+not start until you pull results.
+
+## DataFrame transformations
+
+The DataFrame API exposes `select` and `filter` today. Other
+transformations are TBD — see [Project status](project-status.md).
+
+```java
+try (DataFrame df = ctx.readParquet("/path/to/orders.parquet")) {
+    try (DataFrame filtered = df.filter("o_orderpriority = '1-URGENT'")) {
+        filtered.show();
+    }
+}
+```
+
+Each transformation returns a new `DataFrame` that must be closed.
+
+## Pulling results
+
+Three patterns are available:
+
+**Stream as Arrow batches.** Use `collect(allocator)` to pull the result
+set as Arrow record batches via the [Arrow C Data Interface]:
+
+```java
+try (DataFrame df = ctx.sql("SELECT ...");
+     ArrowReader reader = df.collect(allocator)) {
+    while (reader.loadNextBatch()) {
+        var batch = reader.getVectorSchemaRoot();
+        // process batch...
+    }
+}
+```
+
+[Arrow C Data Interface]: https://arrow.apache.org/docs/format/CDataInterface.html
+
+**Count rows.** `df.count()` returns the row count without materializing
+the rows in the JVM.
+
+**Print for inspection.** `df.show()` and `df.show(int n)` print results
+to standard output. Useful for exploration; not appropriate for
+production code paths.
+
+## Schema introspection
+
+To get the schema of a registered table without running a query:
+
+```java
+Schema schema = ctx.tableSchema("orders");
+```
+
+## Plan input
+
+A DataFusion logical plan can be deserialized from `datafusion-proto`
+bytes via `SessionContext.fromProto(byte[])`. The `datafusion-proto` Java
+classes are generated by the Maven build. This is useful for accepting
+plans produced by other DataFusion-aware tooling.

From b7b692f48507e49949a2bd88687bd66420e7bd5c Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:18:55 -0600
Subject: [PATCH 09/20] docs: add user guide parquet page

---
 docs/source/user-guide/parquet.md | 69 +++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 docs/source/user-guide/parquet.md

diff --git a/docs/source/user-guide/parquet.md b/docs/source/user-guide/parquet.md
new file mode 100644
index 0000000..7febbdd
--- /dev/null
+++ b/docs/source/user-guide/parquet.md
@@ -0,0 +1,69 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Parquet
+
+DataFusion Java reads Parquet through two entry points on `SessionContext`:
+`registerParquet` to expose a file as a named table, and `readParquet` to
+get a `DataFrame` directly.
+
+## Register a table
+
+```java
+ctx.registerParquet("orders", "/path/to/orders.parquet");
+
+try (DataFrame df = ctx.sql("SELECT * FROM orders LIMIT 10")) {
+    df.show();
+}
+```
+
+The file's footer is read at registration time. The table remains in the
+catalog for the lifetime of the `SessionContext`.
+
+## Read a DataFrame directly
+
+```java
+try (DataFrame df = ctx.readParquet("/path/to/orders.parquet")) {
+    df.show();
+}
+```
+
+`readParquet` skips the catalog and hands back a `DataFrame` straight
+away.
+
+## ParquetReadOptions
+
+Both entry points accept a `ParquetReadOptions` to tune the underlying
+read. Construct one with the builder:
+
+```java
+ParquetReadOptions opts = ParquetReadOptions.builder()
+    .fileExtension(".parquet")
+    .build();
+
+ctx.registerParquet("orders", "/path/to/orders.parquet", opts);
+// or
+try (DataFrame df = ctx.readParquet("/path/to/orders.parquet", opts)) {
+    df.show();
+}
+```
+
+The supported options track what DataFusion exposes on its Rust
+`ParquetReadOptions` builder. Inspect the class on the Java side for the
+exact setters available in the version you are using.

From f2767008cea3164f923e530451bb13975a9e6d44 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:19:45 -0600
Subject: [PATCH 10/20] docs: add user guide project status page

---
 docs/source/user-guide/project-status.md | 48 ++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 docs/source/user-guide/project-status.md

diff --git a/docs/source/user-guide/project-status.md b/docs/source/user-guide/project-status.md
new file mode 100644
index 0000000..aacb19f
--- /dev/null
+++ b/docs/source/user-guide/project-status.md
@@ -0,0 +1,48 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Project status
+
+A snapshot of what works today. The library is in early development; the
+API will change before the first release.
+
+## Query interfaces
+
+- [x] SQL: `SessionContext.sql(String)`
+- [x] DataFrame: `select`, `filter` (other transformations TBD)
+- [x] DataFusion-Proto `LogicalPlanNode`: `SessionContext.fromProto(byte[])`.
+      The `datafusion-proto` Java classes are generated by the build.
+
+## Data sources
+
+- [x] Parquet via `registerParquet` / `readParquet`, with `ParquetReadOptions`
+- [ ] CSV, JSON, Avro
+- [ ] Custom catalog and table providers
+
+## Results
+
+- [x] `DataFrame.collect(allocator)` — Arrow C Data Interface stream
+- [x] `DataFrame.count()`, `show()`, `show(int)`
+- [x] `SessionContext.tableSchema(String)`
+
+## Not yet
+
+- [ ] `SessionConfig` / `RuntimeEnv` knobs
+- [ ] Java UDFs
+- [ ] `write_*` outputs

From 4552569217f18e612c5df0292bee1a37c18ecd88 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:20:48 -0600
Subject: [PATCH 11/20] docs: write contributor guide landing page

---
 docs/source/contributor-guide/index.md | 28 +++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index bac4c60..8afa315 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -19,4 +19,30 @@ under the License.
 
 # Contributor Guide
 
-This guide is under construction.
+Bug reports, design discussion, and patches are welcome. This project follows
+the Apache DataFusion contribution model.
+
+## Filing issues and discussing changes
+
+- File bugs and feature requests on
+  [GitHub issues](https://github.com/apache/datafusion-java/issues).
+- For larger or design-level discussion, the mailing list is
+  [dev@datafusion.apache.org](mailto:dev@datafusion.apache.org).
+- Please open an issue before sending a PR for any significant change so
+  the approach can be agreed on first.
+
+## Development workflow
+
+Branch from `main`, write changes with
+[conventional commit](https://www.conventionalcommits.org/) messages in
+the imperative mood (e.g. `feat: add foo`, `fix(native): handle bar`),
+and open a pull request targeting `main`.
+
+## Topics
+
+- [Development](development.md) — build prerequisites, running tests,
+  TPC-H test data, repo layout.
+- [Code style](code-style.md) — formatters and license headers.
+- [Releasing](releasing.md) — Apache release process (placeholder).
+- [Updating DataFusion / protobuf version](updating-datafusion-version.md) —
+  step-by-step recipe.

From 056d4876ae645695b4dfec229340affa4758ecfe Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:21:45 -0600
Subject: [PATCH 12/20] docs: add contributor guide development page

---
 docs/source/contributor-guide/development.md | 74 ++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 docs/source/contributor-guide/development.md

diff --git a/docs/source/contributor-guide/development.md b/docs/source/contributor-guide/development.md
new file mode 100644
index 0000000..e98b87d
--- /dev/null
+++ b/docs/source/contributor-guide/development.md
@@ -0,0 +1,74 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Development
+
+## Build prerequisites
+
+- JDK 17 or newer.
+- Rust toolchain (stable, installed via [rustup]).
+- [`tpchgen-cli`] — only needed to generate test data for the Parquet
+  integration test (`cargo install tpchgen-cli`).
+
+Maven is bundled via the `./mvnw` wrapper; no separate Maven install is
+required.
+
+[rustup]: https://rustup.rs/
+[`tpchgen-cli`]: https://github.com/clflushopt/tpchgen-rs
+
+## Build and test
+
+```sh
+make test
+```
+
+This builds the native Rust crate and runs the JUnit tests. The steps can
+be run individually:
+
+```sh
+cd native && cargo build
+./mvnw test
+```
+
+The native library must be built before running JVM tests.
+
+The first build in a fresh checkout reaches out to
+`raw.githubusercontent.com` to fetch the DataFusion `.proto` files used
+to generate the `datafusion-proto` Java classes. Subsequent builds are
+offline; the `download-maven-plugin` cache under
+`~/.m2/repository/.cache/` satisfies them.
+
+## Test data
+
+The Parquet integration test reads TPC-H SF1 data (~345 MB across 8 tables
+in Snappy-compressed Parquet). Generate it once with:
+
+```sh
+make tpch-data
+```
+
+Tests that need this data skip cleanly if it is missing. `make clean`
+does **not** remove `tpch-data/` — delete it manually to reclaim the
+disk space.
+
+## Repository layout
+
+- `src/` — Java sources and tests.
+- `native/` — Rust crate (JNI + Arrow C Data Interface).
+- `docs/` — Sphinx documentation source and build scripts.

From a947eab03850bde8ba8e9cd49f83bfa4cd8d7793 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:22:29 -0600
Subject: [PATCH 13/20] docs: add contributor guide code style page

---
 docs/source/contributor-guide/code-style.md | 46 +++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 docs/source/contributor-guide/code-style.md

diff --git a/docs/source/contributor-guide/code-style.md b/docs/source/contributor-guide/code-style.md
new file mode 100644
index 0000000..3c6020c
--- /dev/null
+++ b/docs/source/contributor-guide/code-style.md
@@ -0,0 +1,46 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Code style
+
+## Java
+
+Run the Spotless formatter before committing. CI fails the build if
+formatting drifts:
+
+```sh
+./mvnw spotless:apply
+```
+
+## Rust
+
+Run inside `native/`:
+
+```sh
+cargo fmt
+cargo clippy --all-targets -- -D warnings
+```
+
+`-D warnings` turns clippy warnings into build failures, matching CI.
+
+## License headers
+
+New source files need the Apache 2.0 license header. Apache RAT enforces
+this during `verify` — `./mvnw verify` will fail if a tracked file is
+missing the header.

From b3e7de83a8aaca6b3956111850a16df2a16f522d Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:23:17 -0600
Subject: [PATCH 14/20] docs: add contributor guide releasing placeholder

---
 docs/source/contributor-guide/releasing.md | 28 ++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 docs/source/contributor-guide/releasing.md

diff --git a/docs/source/contributor-guide/releasing.md b/docs/source/contributor-guide/releasing.md
new file mode 100644
index 0000000..8210d2b
--- /dev/null
+++ b/docs/source/contributor-guide/releasing.md
@@ -0,0 +1,28 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Releasing
+
+There are no releases of Apache DataFusion Java yet. Once the first
+release approaches, this page will document the Apache release process
+the project follows.
+
+In the meantime, refer to the
+[Apache DataFusion release process](https://datafusion.apache.org/contributor-guide/release/index.html)
+for the broader pattern used by sibling subprojects.

From 0c05dc7c9c9215e5f8ba762193e229f78d267a5e Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:24:08 -0600
Subject: [PATCH 15/20] docs: add contributor guide datafusion bump recipe

---
 .../updating-datafusion-version.md            | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 docs/source/contributor-guide/updating-datafusion-version.md

diff --git a/docs/source/contributor-guide/updating-datafusion-version.md b/docs/source/contributor-guide/updating-datafusion-version.md
new file mode 100644
index 0000000..56d50dc
--- /dev/null
+++ b/docs/source/contributor-guide/updating-datafusion-version.md
@@ -0,0 +1,62 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Updating the DataFusion / protobuf schema version
+
+Three things must move together when bumping DataFusion:
+
+1. `native/Cargo.toml` — the `datafusion` crate dependency.
+2. `pom.xml` — the `<datafusion.version>` Maven property. **Must equal
+   the Cargo version**; a mismatch means JVM-built protobuf plans won't
+   deserialize on the native side.
+3. `pom.xml` — the `<sha512>` checksums on the two `download-maven-plugin`
+   executions. These pin the downloaded `.proto` files; the build fails
+   if upstream silently re-tags them, which is the desired behavior.
+
+## Recipe
+
+```sh
+# 1. Bump the Cargo dep
+$EDITOR native/Cargo.toml             # set datafusion = "<new>"
+(cd native && cargo update -p datafusion)
+
+# 2. Bump the Maven property to match
+$EDITOR pom.xml                       # set <datafusion.version>
+
+# 3. Compute the new SHA-512 hashes for both `.proto` files from the
+#    upstream tag you just set in step 2, then paste them into the two
+#    <sha512> elements in pom.xml.
+NEW=$(grep -m1 -oE '<datafusion.version>[^<]+' pom.xml | cut -d'>' -f2)
+curl -sL "https://raw.githubusercontent.com/apache/datafusion/$NEW/datafusion/proto-common/proto/datafusion_common.proto" | shasum -a 512 | awk '{print $1}'
+curl -sL "https://raw.githubusercontent.com/apache/datafusion/$NEW/datafusion/proto/proto/datafusion.proto" | shasum -a 512 | awk '{print $1}'
+$EDITOR pom.xml                       # paste the two hashes into the <sha512> elements
+
+# Drop the local download cache so the next build re-downloads against
+# the new hashes.
+rm -rf ~/.m2/repository/.cache/download-maven-plugin target/proto
+
+# 4. Verify
+make && make test
+```
+
+## Why the protobuf runtime version is separate
+
+The protobuf runtime version (`<protobuf.version>` in `pom.xml`) tracks
+the Java ecosystem (security and JDK compatibility), not DataFusion.
+Bump it independently when there is a reason.

From 7cf3531dd96e0350cf0c72cb3ae650d693549f98 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:25:48 -0600
Subject: [PATCH 16/20] docs: trim README and link to docs site

---
 README.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9635952..8085e8d 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,62 @@
-# DataFusion Java Bindings
+# Apache DataFusion Java
+
+Java bindings for [Apache DataFusion]. Queries run in native Rust and results
+return to the JVM as [Apache Arrow] batches via the Arrow C Data Interface.
+
+[Apache DataFusion]: https://datafusion.apache.org/
+[Apache Arrow]: https://arrow.apache.org/
+
+> Early development: no releases yet, API will change. Bug reports and
+> contributions welcome.
+
+## Quickstart
+
+```java
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.datafusion.DataFrame;
+import org.apache.datafusion.SessionContext;
+
+try (var allocator = new RootAllocator();
+     var ctx = new SessionContext()) {
+
+    ctx.registerParquet("orders", "/path/to/orders.parquet");
+
+    try (DataFrame df = ctx.sql(
+            "SELECT o_orderpriority, COUNT(*) AS n " +
+            "FROM orders GROUP BY o_orderpriority");
+         ArrowReader reader = df.collect(allocator)) {
+        while (reader.loadNextBatch()) {
+            var batch = reader.getVectorSchemaRoot();
+            // ...
+        }
+    }
+}
+```
+
+`SessionContext` and `DataFrame` are `AutoCloseable` and not thread-safe.
+
+## Documentation
+
+The full documentation lives under [`docs/source/`](docs/source/index.md)
+and is built with Sphinx (see [`docs/README.md`](docs/README.md) for the
+build steps):
+
+- [User guide](docs/source/user-guide/index.md) — installation, the
+  DataFrame and SQL APIs, Parquet ingestion, project status.
+- [Contributor guide](docs/source/contributor-guide/index.md) — build,
+  test, code style, and how to bump the DataFusion version.
+
+## Requirements
+
+JDK 17+. Building from source: see
+[`docs/source/contributor-guide/development.md`](docs/source/contributor-guide/development.md).
+
+## Contributing
+
+Open an issue to discuss non-trivial changes before sending a PR. See the
+[contributor guide](docs/source/contributor-guide/index.md).
+
+## License
+
+Apache License 2.0. See [LICENSE.txt](LICENSE.txt) and [NOTICE.txt](NOTICE.txt).

From 62216f800ca7e67286417cd024b68e1bbd3f80e9 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:28:09 -0600
Subject: [PATCH 17/20] docs: trim CONTRIBUTING and link to docs site

---
 CONTRIBUTING.md | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..48688d8
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,33 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Contributing to Apache DataFusion Java
+
+Bug reports, design discussion, and patches are welcome. This project follows
+the Apache DataFusion contribution model.
+
+- File bugs and feature requests on
+  [GitHub issues](https://github.com/apache/datafusion-java/issues).
+- For larger or design-level discussion, the mailing list is
+  [dev@datafusion.apache.org](mailto:dev@datafusion.apache.org).
+- Please open an issue before sending a PR for any significant change so
+  the approach can be agreed on first.
+
+For build, test, code style, and version-bump workflows, see the
+[contributor guide](docs/source/contributor-guide/index.md).

From 9cb7dc35424124f3489e7030f51ada69cba8cc44 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:33:30 -0600
Subject: [PATCH 18/20] docs: fix incorrect ParquetReadOptions API and tighten
 development page

---
 docs/source/contributor-guide/development.md | 3 +++
 docs/source/user-guide/dataframe.md          | 2 +-
 docs/source/user-guide/parquet.md            | 9 ++++-----
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/source/contributor-guide/development.md b/docs/source/contributor-guide/development.md
index e98b87d..c6818f6 100644
--- a/docs/source/contributor-guide/development.md
+++ b/docs/source/contributor-guide/development.md
@@ -69,6 +69,9 @@ disk space.
 
 ## Repository layout
 
+- `pom.xml` — Maven build descriptor.
+- `Makefile` — top-level build orchestration (`make test`, `make tpch-data`).
+- `mvnw`, `mvnw.cmd` — bundled Maven wrapper.
 - `src/` — Java sources and tests.
 - `native/` — Rust crate (JNI + Arrow C Data Interface).
 - `docs/` — Sphinx documentation source and build scripts.
diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
index e1ca33d..e91eab7 100644
--- a/docs/source/user-guide/dataframe.md
+++ b/docs/source/user-guide/dataframe.md
@@ -79,7 +79,7 @@ production code paths.
 To get the schema of a registered table without running a query:
 
 ```java
-Schema schema = ctx.tableSchema("orders");
+org.apache.arrow.vector.types.pojo.Schema schema = ctx.tableSchema("orders");
 ```
 
 ## Plan input
diff --git a/docs/source/user-guide/parquet.md b/docs/source/user-guide/parquet.md
index 7febbdd..73b74c8 100644
--- a/docs/source/user-guide/parquet.md
+++ b/docs/source/user-guide/parquet.md
@@ -50,12 +50,11 @@ away.
 ## ParquetReadOptions
 
 Both entry points accept a `ParquetReadOptions` to tune the underlying
-read. Construct one with the builder:
+read. Construct one directly and chain setters:
 
 ```java
-ParquetReadOptions opts = ParquetReadOptions.builder()
-    .fileExtension(".parquet")
-    .build();
+ParquetReadOptions opts = new ParquetReadOptions()
+    .fileExtension(".parquet");
 
 ctx.registerParquet("orders", "/path/to/orders.parquet", opts);
 // or
@@ -64,6 +63,6 @@ try (DataFrame df = ctx.readParquet("/path/to/orders.parquet", opts)) {
 }
 ```
 
-The supported options track what DataFusion exposes on its Rust
+The supported setters track what DataFusion exposes on its Rust
 `ParquetReadOptions` builder. Inspect the class on the Java side for the
 exact setters available in the version you are using.

From a691900b6a535e87ab60821ce3fdb37447f3aac4 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:39:58 -0600
Subject: [PATCH 19/20] docs: nest toctrees in section index pages for sidebar
 nav

---
 docs/source/contributor-guide/index.md | 16 ++++++++--------
 docs/source/index.md                   | 25 ++++---------------------
 docs/source/user-guide/index.md        | 18 ++++++++++--------
 3 files changed, 22 insertions(+), 37 deletions(-)

diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index 8afa315..de7f22a 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -38,11 +38,11 @@ Branch from `main`, write changes with
 the imperative mood (e.g. `feat: add foo`, `fix(native): handle bar`),
 and open a pull request targeting `main`.
 
-## Topics
-
-- [Development](development.md) — build prerequisites, running tests,
-  TPC-H test data, repo layout.
-- [Code style](code-style.md) — formatters and license headers.
-- [Releasing](releasing.md) — Apache release process (placeholder).
-- [Updating DataFusion / protobuf version](updating-datafusion-version.md) —
-  step-by-step recipe.
+```{toctree}
+:maxdepth: 1
+
+development
+code-style
+releasing
+updating-datafusion-version
+```
diff --git a/docs/source/index.md b/docs/source/index.md
index 675021e..0ee9519 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -69,27 +69,10 @@ Code of Conduct <https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.
 ```
 
 ```{toctree}
-:maxdepth: 1
-:caption: User Guide
-:hidden:
-
-user-guide/index
-user-guide/installation
-user-guide/quickstart
-user-guide/sessioncontext
-user-guide/dataframe
-user-guide/parquet
-user-guide/project-status
-```
-
-```{toctree}
-:maxdepth: 1
-:caption: Contributor Guide
+:maxdepth: 2
+:caption: Documentation
 :hidden:
 
-contributor-guide/index
-contributor-guide/development
-contributor-guide/code-style
-contributor-guide/releasing
-contributor-guide/updating-datafusion-version
+User Guide <user-guide/index>
+Contributor Guide <contributor-guide/index>
 ```
diff --git a/docs/source/user-guide/index.md b/docs/source/user-guide/index.md
index d5cbeb9..13728a5 100644
--- a/docs/source/user-guide/index.md
+++ b/docs/source/user-guide/index.md
@@ -28,14 +28,16 @@ Data Interface.
 This guide covers installation, the `SessionContext` and `DataFrame` APIs,
 and Parquet ingestion.
 
-- [Installation](installation.md) — JDK and Rust prerequisites, building
-  from source.
-- [Quickstart](quickstart.md) — a complete example, walked through.
-- [SessionContext](sessioncontext.md) — lifecycle and threading.
-- [DataFrame and SQL](dataframe.md) — building and executing queries.
-- [Parquet](parquet.md) — registering files and reading them with
-  `ParquetReadOptions`.
-- [Project status](project-status.md) — snapshot of what works today.
+```{toctree}
+:maxdepth: 1
+
+installation
+quickstart
+sessioncontext
+dataframe
+parquet
+project-status
+```
 
 > Early development: no releases yet, API will change. Bug reports and
 > contributions welcome.

From c8018e48ff560cfe5cd6925a5cb5b1db5823aad2 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Wed, 13 May 2026 07:48:05 -0600
Subject: [PATCH 20/20] docs: add user guide page on building plans via
 datafusion-proto

---
 docs/source/user-guide/index.md       |   1 +
 docs/source/user-guide/proto-plans.md | 201 ++++++++++++++++++++++++++
 2 files changed, 202 insertions(+)
 create mode 100644 docs/source/user-guide/proto-plans.md

diff --git a/docs/source/user-guide/index.md b/docs/source/user-guide/index.md
index 13728a5..289eafa 100644
--- a/docs/source/user-guide/index.md
+++ b/docs/source/user-guide/index.md
@@ -36,6 +36,7 @@ quickstart
 sessioncontext
 dataframe
 parquet
+proto-plans
 project-status
 ```
 
diff --git a/docs/source/user-guide/proto-plans.md b/docs/source/user-guide/proto-plans.md
new file mode 100644
index 0000000..513018b
--- /dev/null
+++ b/docs/source/user-guide/proto-plans.md
@@ -0,0 +1,201 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Logical plans via datafusion-proto
+
+`SessionContext.fromProto(byte[])` accepts a serialized DataFusion
+`LogicalPlanNode` and returns a lazy `DataFrame`. This is useful when you
+already have a plan produced by another DataFusion-aware tool, or when
+you want to construct the plan programmatically with finer-grained
+control than the `sql` or DataFrame APIs.
+
+The protobuf Java classes are generated by the build into the
+`org.apache.datafusion.protobuf` (plan and expression nodes) and
+`datafusion_common` (scalar values, schema, column references, file
+formats) packages. The Maven build downloads pinned `.proto` files from
+the matching upstream DataFusion tag on first build, then generates the
+Java classes locally — see the
+[Contributor Guide](../contributor-guide/updating-datafusion-version.md)
+for how to bump the version.
+
+## A minimal plan
+
+The smallest interesting plan is a projection of a literal over an
+empty input. It is useful as a sanity check and exercises serialization
+end-to-end without touching any storage.
+
+```java
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.datafusion.DataFrame;
+import org.apache.datafusion.SessionContext;
+import org.apache.datafusion.protobuf.EmptyRelationNode;
+import org.apache.datafusion.protobuf.LogicalExprNode;
+import org.apache.datafusion.protobuf.LogicalPlanNode;
+import org.apache.datafusion.protobuf.ProjectionNode;
+
+import datafusion_common.DatafusionCommon;
+
+LogicalPlanNode plan =
+    LogicalPlanNode.newBuilder()
+        .setProjection(
+            ProjectionNode.newBuilder()
+                .setInput(
+                    LogicalPlanNode.newBuilder()
+                        .setEmptyRelation(
+                            EmptyRelationNode.newBuilder().setProduceOneRow(true).build())
+                        .build())
+                .addExpr(
+                    LogicalExprNode.newBuilder()
+                        .setLiteral(
+                            DatafusionCommon.ScalarValue.newBuilder().setInt32Value(1).build())
+                        .build())
+                .build())
+        .build();
+
+try (var allocator = new RootAllocator();
+     SessionContext ctx = new SessionContext();
+     DataFrame df = ctx.fromProto(plan.toByteArray());
+     ArrowReader reader = df.collect(allocator)) {
+    reader.loadNextBatch();
+    VectorSchemaRoot batch = reader.getVectorSchemaRoot();
+    IntVector col = (IntVector) batch.getVector(0);
+    System.out.println(col.get(0));  // 1
+}
+```
+
+`fromProto` performs the same logical-planning, optimization, and
+physical-planning pipeline as `sql`; the result is a lazy
+[`DataFrame`](dataframe.md) that only executes when you pull results.
+
+## Scanning a Parquet file via ListingTableScanNode
+
+A `ListingTableScanNode` reads one or more files of the same format
+from disk. Unlike `registerParquet`, it does not require the table to
+be in the catalog — the scan node carries everything DataFusion needs:
+the file paths, the schema, the projection, the file format, and the
+target partition count.
+
+The scan node's `schema` field is a `datafusion_common.Schema`, not an
+Arrow `Schema`. Convert between the two with the helper in
+`org.apache.datafusion.proto.SchemaConverter`:
+
+```java
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.datafusion.proto.SchemaConverter;
+
+Schema arrow = ctx.tableSchema("lineitem");
+DatafusionCommon.Schema schemaProto = SchemaConverter.toProto(arrow);
+```
+
+The full example: register the file once to introspect its schema,
+then build a plan that scans the same file, sorts by `l_orderkey`,
+and fetches the first row. This is equivalent to
+`SELECT l_orderkey FROM lineitem ORDER BY l_orderkey LIMIT 1`.
+
+```java
+import org.apache.datafusion.protobuf.BareTableReference;
+import org.apache.datafusion.protobuf.ListingTableScanNode;
+import org.apache.datafusion.protobuf.ProjectionColumns;
+import org.apache.datafusion.protobuf.SortExprNode;
+import org.apache.datafusion.protobuf.SortNode;
+import org.apache.datafusion.protobuf.TableReference;
+
+String path = "/path/to/lineitem.parquet";
+
+try (var allocator = new RootAllocator();
+     SessionContext ctx = new SessionContext()) {
+
+    ctx.registerParquet("lineitem", path);
+    DatafusionCommon.Schema schemaProto =
+        SchemaConverter.toProto(ctx.tableSchema("lineitem"));
+
+    LogicalExprNode orderKeyCol =
+        LogicalExprNode.newBuilder()
+            .setColumn(DatafusionCommon.Column.newBuilder().setName("l_orderkey").build())
+            .build();
+
+    LogicalPlanNode plan =
+        LogicalPlanNode.newBuilder()
+            .setSort(
+                SortNode.newBuilder()
+                    .setInput(
+                        LogicalPlanNode.newBuilder()
+                            .setListingScan(
+                                ListingTableScanNode.newBuilder()
+                                    .setTableName(
+                                        TableReference.newBuilder()
+                                            .setBare(
+                                                BareTableReference.newBuilder()
+                                                    .setTable("lineitem")
+                                                    .build())
+                                            .build())
+                                    .addPaths(path)
+                                    .setFileExtension(".parquet")
+                                    .setSchema(schemaProto)
+                                    .setProjection(
+                                        ProjectionColumns.newBuilder()
+                                            .addColumns("l_orderkey")
+                                            .build())
+                                    .setParquet(
+                                        DatafusionCommon.ParquetFormat.getDefaultInstance())
+                                    .setTargetPartitions(1)
+                                    .build())
+                            .build())
+                    .addExpr(
+                        SortExprNode.newBuilder()
+                            .setExpr(orderKeyCol)
+                            .setAsc(true)
+                            .setNullsFirst(false)
+                            .build())
+                    .setFetch(1)
+                    .build())
+            .build();
+
+    try (DataFrame df = ctx.fromProto(plan.toByteArray());
+         ArrowReader reader = df.collect(allocator)) {
+        reader.loadNextBatch();
+        // ...
+    }
+}
+```
+
+## When to use proto plans
+
+The `sql` and DataFrame APIs are the right choice for most workloads.
+Reach for `fromProto` when you need one of:
+
+- **Cross-tool interop.** Accept plans produced by another
+  DataFusion-based system (a planner, a scheduler, a query frontend).
+- **Programmatic plan construction.** Build the plan node tree directly
+  instead of going through SQL parsing, useful for tools that compile
+  their own surface language to DataFusion.
+- **Plan persistence.** Serialize a plan to bytes, store or transmit
+  it, and execute it later — possibly in a different process or on a
+  different machine.
+
+## Schema conversion support
+
+`SchemaConverter.toProto` and `SchemaConverter.fromProto` support the
+primitive Arrow types this project's tests exercise: `Bool`, signed and
+unsigned integer types 8 through 64 bits, `Float32`, `Float64`, `Utf8`,
+`Utf8View`, `LargeUtf8`, `Date32`, and `Decimal128`. Anything else
+raises `UnsupportedOperationException` naming the offending type.