From faf447b7abdd10b2738cd58a13eecd265ba5a413 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 09:19:50 -0400
Subject: [PATCH 1/8] Set fail on warning for documentation generation

---
 docs/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Makefile b/docs/Makefile
index e65c8e250..49ebae372 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -35,4 +35,4 @@ help:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) --fail-on-warning
\ No newline at end of file

From 1a8a7a44ee1fa6b362f2cfec4c486e5bd41feea9 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 09:33:42 -0400
Subject: [PATCH 2/8] Avoid building the wheel if possible during documentation
 generation

---
 .github/workflows/docs.yaml | 87 +++++++++++++++++++++++++++++++------
 1 file changed, 73 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index c24fa5ade..724fbe64a 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -7,19 +7,25 @@ on:
   pull_request:
     branches:
       - main
+  # Run after build workflow completes so we can get the built artifact
+  workflow_run:
+    workflows: ["Python Release Build"]
+    types:
+      - completed
 
 name: Deploy DataFusion Python site
 
 jobs:
-  debug-github-context:    
+  debug-github-context:
     name: Print github context
     runs-on: ubuntu-latest
     steps:
-    - name: Dump GitHub context
-      env:
-        GITHUB_CONTEXT: ${{ toJson(github) }}
-      run: |
-        echo "$GITHUB_CONTEXT"
+      - name: Dump GitHub context
+        env:
+          GITHUB_CONTEXT: ${{ toJson(github) }}
+        run: |
+          echo "$GITHUB_CONTEXT"
+
   build-docs:
     name: Build docs
     runs-on: ubuntu-latest
@@ -37,8 +43,10 @@ jobs:
             echo "Unsupported input: ${{ github.ref }} / ${{ github.ref_type }}"
             exit 1
           fi
+
       - name: Checkout docs sources
         uses: actions/checkout@v5
+
       - name: Checkout docs target branch
         if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
         uses: actions/checkout@v5
@@ -46,24 +54,75 @@ jobs:
           fetch-depth: 0
           ref: ${{ steps.target-branch.outputs.value }}
           path: docs-target
+
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
-      - name: Install Protoc
-        uses: arduino/setup-protoc@v3
+      - name: Install dependencies
+        uses: astral-sh/setup-uv@v6
         with:
-          version: '27.4'
-          repo-token: ${{ secrets.GITHUB_TOKEN }}
+          enable-cache: true
 
-      - name: Install dependencies and build
-        uses: astral-sh/setup-uv@v6
+      # Try to download pre-built wheel from the build workflow
+      - name: Download wheel from build workflow
+        id: download-wheel
+        continue-on-error: true
+        uses: actions/download-artifact@v5
         with:
-            enable-cache: true
+          name: dist
+          path: wheels/
+          # For workflow_run events, get artifacts from the triggering workflow
+          run-id: ${{ github.event.workflow_run.id || github.run_id }}
 
-      - name: Build repo
+      # Check if we have a compatible wheel
+      - name: Check for compatible wheel
+        id: check-wheel
         run: |
+          set -x
+          if [ -d "wheels/" ] && [ "$(ls -A wheels/)" ]; then
+            echo "Available wheels:"
+            ls -la wheels/
+
+            # Find a compatible wheel for Linux x86_64 (the docs runner)
+            WHEEL=$(find wheels/ -name "*linux_x86_64*.whl" -o -name "*manylinux*x86_64*.whl" | head -1)
+            if [ -n "$WHEEL" ]; then
+              echo "Found compatible wheel: $WHEEL"
+              echo "wheel-found=true" >> "$GITHUB_OUTPUT"
+              echo "wheel-path=$WHEEL" >> "$GITHUB_OUTPUT"
+            else
+              echo "No compatible wheel found for Linux x86_64"
+              echo "wheel-found=false" >> "$GITHUB_OUTPUT"
+            fi
+          else
+            echo "No wheels directory or wheels found"
+            echo "wheel-found=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      # Install from pre-built wheel if available
+      - name: Install from pre-built wheel
+        if: steps.check-wheel.outputs.wheel-found == 'true'
+        run: |
+          set -x
+          uv venv
+          # Install documentation dependencies
+          uv sync --dev --no-install-package datafusion --group docs
+          # Install the pre-built wheel
+          uv pip install "${{ steps.check-wheel.outputs.wheel-path }}"
+          echo "Installed datafusion from pre-built wheel"
+
+      # Fallback: Build from source if no wheel is available
+      - name: Build from source (fallback)
+        if: steps.check-wheel.outputs.wheel-found != 'true'
+        run: |
+          set -x
+          echo "No compatible pre-built wheel found, building from source"
+          
+          # Install Protoc for building from source
+          sudo apt-get update
+          sudo apt-get install -y protobuf-compiler
+          
           uv venv
           uv sync --dev --no-install-package datafusion --group docs
           uv run --no-project maturin develop --uv

From 31d5cf8e7af0b252416ec894925cfdb5085e7fef Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 09:48:04 -0400
Subject: [PATCH 3/8] Revert "Avoid building the wheel if possible during
 documentation generation"

This reverts commit 1a8a7a44ee1fa6b362f2cfec4c486e5bd41feea9.
---
 .github/workflows/docs.yaml | 87 ++++++-------------------------------
 1 file changed, 14 insertions(+), 73 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 724fbe64a..c24fa5ade 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -7,25 +7,19 @@ on:
   pull_request:
     branches:
       - main
-  # Run after build workflow completes so we can get the built artifact
-  workflow_run:
-    workflows: ["Python Release Build"]
-    types:
-      - completed
 
 name: Deploy DataFusion Python site
 
 jobs:
-  debug-github-context:
+  debug-github-context:    
     name: Print github context
     runs-on: ubuntu-latest
     steps:
-      - name: Dump GitHub context
-        env:
-          GITHUB_CONTEXT: ${{ toJson(github) }}
-        run: |
-          echo "$GITHUB_CONTEXT"
-
+    - name: Dump GitHub context
+      env:
+        GITHUB_CONTEXT: ${{ toJson(github) }}
+      run: |
+        echo "$GITHUB_CONTEXT"
   build-docs:
     name: Build docs
     runs-on: ubuntu-latest
@@ -43,10 +37,8 @@ jobs:
             echo "Unsupported input: ${{ github.ref }} / ${{ github.ref_type }}"
             exit 1
           fi
-
       - name: Checkout docs sources
         uses: actions/checkout@v5
-
       - name: Checkout docs target branch
         if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
         uses: actions/checkout@v5
@@ -54,75 +46,24 @@ jobs:
           fetch-depth: 0
           ref: ${{ steps.target-branch.outputs.value }}
           path: docs-target
-
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
-      - name: Install dependencies
-        uses: astral-sh/setup-uv@v6
+      - name: Install Protoc
+        uses: arduino/setup-protoc@v3
         with:
-          enable-cache: true
+          version: '27.4'
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
 
-      # Try to download pre-built wheel from the build workflow
-      - name: Download wheel from build workflow
-        id: download-wheel
-        continue-on-error: true
-        uses: actions/download-artifact@v5
+      - name: Install dependencies and build
+        uses: astral-sh/setup-uv@v6
         with:
-          name: dist
-          path: wheels/
-          # For workflow_run events, get artifacts from the triggering workflow
-          run-id: ${{ github.event.workflow_run.id || github.run_id }}
+            enable-cache: true
 
-      # Check if we have a compatible wheel
-      - name: Check for compatible wheel
-        id: check-wheel
+      - name: Build repo
         run: |
-          set -x
-          if [ -d "wheels/" ] && [ "$(ls -A wheels/)" ]; then
-            echo "Available wheels:"
-            ls -la wheels/
-
-            # Find a compatible wheel for Linux x86_64 (the docs runner)
-            WHEEL=$(find wheels/ -name "*linux_x86_64*.whl" -o -name "*manylinux*x86_64*.whl" | head -1)
-            if [ -n "$WHEEL" ]; then
-              echo "Found compatible wheel: $WHEEL"
-              echo "wheel-found=true" >> "$GITHUB_OUTPUT"
-              echo "wheel-path=$WHEEL" >> "$GITHUB_OUTPUT"
-            else
-              echo "No compatible wheel found for Linux x86_64"
-              echo "wheel-found=false" >> "$GITHUB_OUTPUT"
-            fi
-          else
-            echo "No wheels directory or wheels found"
-            echo "wheel-found=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      # Install from pre-built wheel if available
-      - name: Install from pre-built wheel
-        if: steps.check-wheel.outputs.wheel-found == 'true'
-        run: |
-          set -x
-          uv venv
-          # Install documentation dependencies
-          uv sync --dev --no-install-package datafusion --group docs
-          # Install the pre-built wheel
-          uv pip install "${{ steps.check-wheel.outputs.wheel-path }}"
-          echo "Installed datafusion from pre-built wheel"
-
-      # Fallback: Build from source if no wheel is available
-      - name: Build from source (fallback)
-        if: steps.check-wheel.outputs.wheel-found != 'true'
-        run: |
-          set -x
-          echo "No compatible pre-built wheel found, building from source"
-          
-          # Install Protoc for building from source
-          sudo apt-get update
-          sudo apt-get install -y protobuf-compiler
-          
           uv venv
           uv sync --dev --no-install-package datafusion --group docs
           uv run --no-project maturin develop --uv

From b7c1eedad9bfc1a823f0bf49a0939b6a16577a8c Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 09:49:25 -0400
Subject: [PATCH 4/8] Move documentation into build workflow so that it is
 guaranteed to run after wheel build

---
 .github/workflows/build.yml | 95 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index bc770bbc9..97ece3d42 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -271,7 +271,100 @@ jobs:
         with:
           name: dist
           pattern: dist-*
-  
+
+  # Documentation build job that runs after wheels are built
+  build-docs:
+    name: Build docs
+    runs-on: ubuntu-latest
+    needs: [build-manylinux-x86_64]  # Only need the Linux wheel for docs
+    # Only run docs on main branch pushes, tags, or PRs
+    if: github.event_name == 'push' || github.event_name == 'pull_request'
+    steps:
+      - name: Set target branch
+        if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
+        id: target-branch
+        run: |
+          set -x
+          if test '${{ github.ref }}' = 'refs/heads/main'; then
+            echo "value=asf-staging" >> "$GITHUB_OUTPUT"
+          elif test '${{ github.ref_type }}' = 'tag'; then
+            echo "value=asf-site" >> "$GITHUB_OUTPUT"
+          else
+            echo "Unsupported input: ${{ github.ref }} / ${{ github.ref_type }}"
+            exit 1
+          fi
+
+      - name: Checkout docs sources
+        uses: actions/checkout@v5
+
+      - name: Checkout docs target branch
+        if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
+        uses: actions/checkout@v5
+        with:
+          fetch-depth: 0
+          ref: ${{ steps.target-branch.outputs.value }}
+          path: docs-target
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+
+      # Download the Linux wheel built in the previous job
+      - name: Download pre-built Linux wheel
+        uses: actions/download-artifact@v5
+        with:
+          name: dist-manylinux-x86_64
+          path: wheels/
+
+      # Install from the pre-built wheel
+      - name: Install from pre-built wheel
+        run: |
+          set -x
+          uv venv
+          # Install documentation dependencies
+          uv sync --dev --no-install-package datafusion --group docs
+          # Install the pre-built wheel
+          WHEEL=$(find wheels/ -name "*.whl" | head -1)
+          if [ -n "$WHEEL" ]; then
+            echo "Installing wheel: $WHEEL"
+            uv pip install "$WHEEL"
+          else
+            echo "ERROR: No wheel found!"
+            exit 1
+          fi
+
+      - name: Build docs
+        run: |
+          set -x
+          cd docs
+          curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv
+          curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet
+          uv run --no-project make html
+
+      - name: Copy & push the generated HTML
+        if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
+        run: |
+          set -x
+          cd docs-target
+          # delete anything but: 1) '.'; 2) '..'; 3) .git/
+          find ./ | grep -vE "^./$|^../$|^./.git" | xargs rm -rf
+          cp ../.asf.yaml .
+          cp -r ../docs/build/html/* .
+          git status --porcelain
+          if [ "$(git status --porcelain)" != "" ]; then
+            git config user.name "github-actions[bot]"
+            git config user.email "github-actions[bot]@users.noreply.github.com"
+            git add --all
+            git commit -m 'Publish built docs triggered by ${{ github.sha }}'
+            git push || git push --force
+          fi
+
   # NOTE: PyPI publish needs to be done manually for now after release passed the vote
   # release:
   #   name: Publish in PyPI

From a9d421feacf705837b40f6af35a421d77d2a3759 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 09:49:51 -0400
Subject: [PATCH 5/8] Remove redundant documentatino build

---
 .github/workflows/docs.yaml | 95 -------------------------------------
 1 file changed, 95 deletions(-)
 delete mode 100644 .github/workflows/docs.yaml

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
deleted file mode 100644
index c24fa5ade..000000000
--- a/.github/workflows/docs.yaml
+++ /dev/null
@@ -1,95 +0,0 @@
-on:
-  push:
-    branches:
-      - main
-    tags-ignore:
-      - "**-rc**"
-  pull_request:
-    branches:
-      - main
-
-name: Deploy DataFusion Python site
-
-jobs:
-  debug-github-context:    
-    name: Print github context
-    runs-on: ubuntu-latest
-    steps:
-    - name: Dump GitHub context
-      env:
-        GITHUB_CONTEXT: ${{ toJson(github) }}
-      run: |
-        echo "$GITHUB_CONTEXT"
-  build-docs:
-    name: Build docs
-    runs-on: ubuntu-latest
-    steps:
-      - name: Set target branch
-        if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
-        id: target-branch
-        run: |
-          set -x
-          if test '${{ github.ref }}' = 'refs/heads/main'; then
-            echo "value=asf-staging" >> "$GITHUB_OUTPUT"
-          elif test '${{ github.ref_type }}' = 'tag'; then
-            echo "value=asf-site" >> "$GITHUB_OUTPUT"
-          else
-            echo "Unsupported input: ${{ github.ref }} / ${{ github.ref_type }}"
-            exit 1
-          fi
-      - name: Checkout docs sources
-        uses: actions/checkout@v5
-      - name: Checkout docs target branch
-        if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
-        uses: actions/checkout@v5
-        with:
-          fetch-depth: 0
-          ref: ${{ steps.target-branch.outputs.value }}
-          path: docs-target
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install Protoc
-        uses: arduino/setup-protoc@v3
-        with:
-          version: '27.4'
-          repo-token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Install dependencies and build
-        uses: astral-sh/setup-uv@v6
-        with:
-            enable-cache: true
-
-      - name: Build repo
-        run: |
-          uv venv
-          uv sync --dev --no-install-package datafusion --group docs
-          uv run --no-project maturin develop --uv
-
-      - name: Build docs
-        run: |
-          set -x
-          cd docs
-          curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv
-          curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet
-          uv run --no-project make html
-
-      - name: Copy & push the generated HTML
-        if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
-        run: |
-          set -x
-          cd docs-target
-          # delete anything but: 1) '.'; 2) '..'; 3) .git/
-          find ./ | grep -vE "^./$|^../$|^./.git" | xargs rm -rf
-          cp ../.asf.yaml .
-          cp -r ../docs/build/html/* .
-          git status --porcelain
-          if [ "$(git status --porcelain)" != "" ]; then
-            git config user.name "github-actions[bot]"
-            git config user.email "github-actions[bot]@users.noreply.github.com"
-            git add --all
-            git commit -m 'Publish built docs triggered by ${{ github.sha }}'
-            git push || git push --force
-          fi

From 615e2a123f5d89c75433296de3ec7a1e1dc6b903 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 10:39:28 -0400
Subject: [PATCH 6/8] Move parameters into init method to fix documentation
 error

---
 python/datafusion/dataframe.py | 181 +++++++++++++++++----------------
 1 file changed, 94 insertions(+), 87 deletions(-)

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 61cb09438..e8c26ba33 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -119,68 +119,8 @@ class ParquetWriterOptions:
     """Advanced parquet writer options.
 
     Allows settings the writer options that apply to the entire file. Some options can
-    also be set on a column by column basis, with the field `column_specific_options`
-    (see `ParquetColumnOptions`).
-
-    Attributes:
-        data_pagesize_limit: Sets best effort maximum size of data page in bytes.
-        write_batch_size: Sets write_batch_size in bytes.
-        writer_version: Sets parquet writer version. Valid values are `1.0` and
-            `2.0`.
-        skip_arrow_metadata: Skip encoding the embedded arrow metadata in the
-            KV_meta.
-        compression: Compression type to use. Default is "zstd(3)".
-            Available compression types are
-            - "uncompressed": No compression.
-            - "snappy": Snappy compression.
-            - "gzip(n)": Gzip compression with level n.
-            - "brotli(n)": Brotli compression with level n.
-            - "lz4": LZ4 compression.
-            - "lz4_raw": LZ4_RAW compression.
-            - "zstd(n)": Zstandard compression with level n.
-        dictionary_enabled: Sets if dictionary encoding is enabled. If None, uses
-            the default parquet writer setting.
-        dictionary_page_size_limit: Sets best effort maximum dictionary page size,
-            in bytes.
-        statistics_enabled: Sets if statistics are enabled for any column Valid
-            values are `none`, `chunk`, and `page`. If None, uses the default
-            parquet writer setting.
-        max_row_group_size: Target maximum number of rows in each row group
-            (defaults to 1M rows). Writing larger row groups requires more memory to
-            write, but can get better compression and be faster to read.
-        created_by: Sets "created by" property.
-        column_index_truncate_length: Sets column index truncate length.
-        statistics_truncate_length: Sets statistics truncate length. If None, uses
-            the default parquet writer setting.
-        data_page_row_count_limit: Sets best effort maximum number of rows in a data
-            page.
-        encoding: Sets default encoding for any column. Valid values are `plain`,
-            `plain_dictionary`, `rle`, `bit_packed`, `delta_binary_packed`,
-            `delta_length_byte_array`, `delta_byte_array`, `rle_dictionary`, and
-            `byte_stream_split`. If None, uses the default parquet writer setting.
-        bloom_filter_on_write: Write bloom filters for all columns when creating
-            parquet files.
-        bloom_filter_fpp: Sets bloom filter false positive probability. If None,
-            uses the default parquet writer setting
-        bloom_filter_ndv: Sets bloom filter number of distinct values. If None, uses
-            the default parquet writer setting.
-        allow_single_file_parallelism: Controls whether DataFusion will attempt to
-            speed up writing parquet files by serializing them in parallel. Each
-            column in each row group in each output file are serialized in parallel
-            leveraging a maximum possible core count of n_files * n_row_groups *
-            n_columns.
-        maximum_parallel_row_group_writers: By default parallel parquet writer is
-            tuned for minimum memory usage in a streaming execution plan. You may
-            see a performance benefit when writing large parquet files by increasing
-            `maximum_parallel_row_group_writers` and
-            `maximum_buffered_record_batches_per_stream` if your system has idle
-            cores and can tolerate additional memory usage. Boosting these values is
-            likely worthwhile when writing out already in-memory data, such as from
-            a cached data frame.
-        maximum_buffered_record_batches_per_stream: See
-            `maximum_parallel_row_group_writers`.
-        column_specific_options: Overrides options for specific columns. If a column
-            is not a part of this dictionary, it will use the parameters provided here.
+    also be set on a column by column basis, with the field ``column_specific_options``
+    (see ``ParquetColumnOptions``).
     """
 
     def __init__(
@@ -208,7 +148,72 @@ def __init__(
         maximum_buffered_record_batches_per_stream: int = 2,
         column_specific_options: Optional[dict[str, ParquetColumnOptions]] = None,
     ) -> None:
-        """Initialize the ParquetWriterOptions."""
+        """Initialize the ParquetWriterOptions.
+
+        Args:
+            data_pagesize_limit: Sets best effort maximum size of data page in bytes.
+            write_batch_size: Sets write_batch_size in bytes.
+            writer_version: Sets parquet writer version. Valid values are ``1.0`` and
+                ``2.0``.
+            skip_arrow_metadata: Skip encoding the embedded arrow metadata in the
+                KV_meta.
+            compression: Compression type to use. Default is ``zstd(3)``.
+                Available compression types are
+
+                - ``uncompressed``: No compression.
+                - ``snappy``: Snappy compression.
+                - ``gzip(n)``: Gzip compression with level n.
+                - ``brotli(n)``: Brotli compression with level n.
+                - ``lz4``: LZ4 compression.
+                - ``lz4_raw``: LZ4_RAW compression.
+                - ``zstd(n)``: Zstandard compression with level n.
+            compression_level: Compression level to set.
+            dictionary_enabled: Sets if dictionary encoding is enabled. If ``None``,
+                uses the default parquet writer setting.
+            dictionary_page_size_limit: Sets best effort maximum dictionary page size,
+                in bytes.
+            statistics_enabled: Sets if statistics are enabled for any column Valid
+                values are ``none``, ``chunk``, and ``page``. If ``None``, uses the
+                default parquet writer setting.
+            max_row_group_size: Target maximum number of rows in each row group
+                (defaults to 1M rows). Writing larger row groups requires more memory
+                to write, but can get better compression and be faster to read.
+            created_by: Sets "created by" property.
+            column_index_truncate_length: Sets column index truncate length.
+            statistics_truncate_length: Sets statistics truncate length. If ``None``,
+                uses the default parquet writer setting.
+            data_page_row_count_limit: Sets best effort maximum number of rows in a data
+                page.
+            encoding: Sets default encoding for any column. Valid values are ``plain``,
+                ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``,
+                ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``,
+                and ``byte_stream_split``. If ``None``, uses the default parquet writer
+                setting.
+            bloom_filter_on_write: Write bloom filters for all columns when creating
+                parquet files.
+            bloom_filter_fpp: Sets bloom filter false positive probability. If ``None``,
+                uses the default parquet writer setting
+            bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``,
+                uses the default parquet writer setting.
+            allow_single_file_parallelism: Controls whether DataFusion will attempt to
+                speed up writing parquet files by serializing them in parallel. Each
+                column in each row group in each output file are serialized in parallel
+                leveraging a maximum possible core count of
+                ``n_files * n_row_groups * n_columns``.
+            maximum_parallel_row_group_writers: By default parallel parquet writer is
+                tuned for minimum memory usage in a streaming execution plan. You may
+                see a performance benefit when writing large parquet files by increasing
+                ``maximum_parallel_row_group_writers`` and
+                ``maximum_buffered_record_batches_per_stream`` if your system has idle
+                cores and can tolerate additional memory usage. Boosting these values is
+                likely worthwhile when writing out already in-memory data, such as from
+                a cached data frame.
+            maximum_buffered_record_batches_per_stream: See
+                ``maximum_parallel_row_group_writers``.
+            column_specific_options: Overrides options for specific columns. If a column
+                is not a part of this dictionary, it will use the parameters provided
+                here.
+        """
         self.data_pagesize_limit = data_pagesize_limit
         self.write_batch_size = write_batch_size
         self.writer_version = writer_version
@@ -241,29 +246,7 @@ class ParquetColumnOptions:
     """Parquet options for individual columns.
 
     Contains the available options that can be applied for an individual Parquet column,
-    replacing the global options in `ParquetWriterOptions`.
-
-    Attributes:
-        encoding: Sets encoding for the column path. Valid values are: `plain`,
-            `plain_dictionary`, `rle`, `bit_packed`, `delta_binary_packed`,
-            `delta_length_byte_array`, `delta_byte_array`, `rle_dictionary`, and
-            `byte_stream_split`. These values are not case-sensitive. If `None`, uses
-            the default parquet options
-        dictionary_enabled: Sets if dictionary encoding is enabled for the column path.
-            If `None`, uses the default parquet options
-        compression: Sets default parquet compression codec for the column path. Valid
-            values are `uncompressed`, `snappy`, `gzip(level)`, `lzo`, `brotli(level)`,
-            `lz4`, `zstd(level)`, and `lz4_raw`. These values are not case-sensitive. If
-            `None`, uses the default parquet options.
-        statistics_enabled: Sets if statistics are enabled for the column Valid values
-            are: `none`, `chunk`, and `page` These values are not case sensitive. If
-            `None`, uses the default parquet options.
-        bloom_filter_enabled: Sets if bloom filter is enabled for the column path. If
-            `None`, uses the default parquet options.
-        bloom_filter_fpp: Sets bloom filter false positive probability for the column
-            path. If `None`, uses the default parquet options.
-        bloom_filter_ndv: Sets bloom filter number of distinct values. If `None`, uses
-            the default parquet options.
+    replacing the global options in ``ParquetWriterOptions``.
     """
 
     def __init__(
@@ -276,7 +259,31 @@ def __init__(
         bloom_filter_fpp: Optional[float] = None,
         bloom_filter_ndv: Optional[int] = None,
     ) -> None:
-        """Initialize the ParquetColumnOptions."""
+        """Initialize the ParquetColumnOptions.
+
+        Args:
+            encoding: Sets encoding for the column path. Valid values are: ``plain``,
+                ``plain_dictionary``, ``rle``, ``bit_packed``, ``delta_binary_packed``,
+                ``delta_length_byte_array``, ``delta_byte_array``, ``rle_dictionary``,
+                and ``byte_stream_split``. These values are not case-sensitive. If
+                ``None``, uses the default parquet options
+            dictionary_enabled: Sets if dictionary encoding is enabled for the column
+                path. If `None`, uses the default parquet options
+            compression: Sets default parquet compression codec for the column path.
+                Valid values are ``uncompressed``, ``snappy``, ``gzip(level)``, ``lzo``,
+                ``brotli(level)``, ``lz4``, ``zstd(level)``, and ``lz4_raw``. These
+                 values are not case-sensitive. If ``None``, uses the default parquet
+                 options.
+            statistics_enabled: Sets if statistics are enabled for the column Valid
+                values are: ``none``, ``chunk``, and ``page`` These values are not case
+                sensitive. If ``None``, uses the default parquet options.
+            bloom_filter_enabled: Sets if bloom filter is enabled for the column path.
+                If ``None``, uses the default parquet options.
+            bloom_filter_fpp: Sets bloom filter false positive probability for the
+                column path. If ``None``, uses the default parquet options.
+            bloom_filter_ndv: Sets bloom filter number of distinct values. If ``None``,
+                uses the default parquet options.
+        """
         self.encoding = encoding
         self.dictionary_enabled = dictionary_enabled
         self.compression = compression

From 0d06c0a196079ffa3e6b16ecd6c46aa0fbdb76f7 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 11:03:27 -0400
Subject: [PATCH 7/8] Whitespace correction

---
 python/datafusion/dataframe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index e8c26ba33..078bf3d2c 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -272,8 +272,8 @@ def __init__(
             compression: Sets default parquet compression codec for the column path.
                 Valid values are ``uncompressed``, ``snappy``, ``gzip(level)``, ``lzo``,
                 ``brotli(level)``, ``lz4``, ``zstd(level)``, and ``lz4_raw``. These
-                 values are not case-sensitive. If ``None``, uses the default parquet
-                 options.
+                values are not case-sensitive. If ``None``, uses the default parquet
+                options.
             statistics_enabled: Sets if statistics are enabled for the column Valid
                 values are: ``none``, ``chunk``, and ``page`` These values are not case
                 sensitive. If ``None``, uses the default parquet options.

From dc72fb0c7a2ac6126be277ff92a2dee3da50bb76 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 30 Aug 2025 12:45:31 -0400
Subject: [PATCH 8/8] Documentation test will occur in the build docs section
 and will fail now that the setting is correct to turn warnings into errors

---
 .github/workflows/test.yaml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 6ff30ac4d..ce50d1bb6 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -80,13 +80,6 @@ jobs:
         with:
             enable-cache: true
 
-      - name: Check documentation
-        if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
-        run: |
-          uv sync --dev --group docs --no-install-package datafusion
-          uv run --no-project maturin develop --uv
-          uv run --no-project docs/build.sh
-
       - name: Run tests
         env:
           RUST_BACKTRACE: 1