diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 7953a5b4e2913..f5ce368df724e 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -60,6 +60,7 @@ datafusion/proto-common/src/generated/prost.rs .github/ISSUE_TEMPLATE/bug_report.yml .github/ISSUE_TEMPLATE/feature_request.yml .github/workflows/docs.yaml +docs/source/llms.txt **/node_modules/* datafusion/wasmtest/pkg/* clippy.toml diff --git a/docs/source/conf.py b/docs/source/conf.py index 03dcfb5bfa61b..c8027fc71bd54 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -109,6 +109,10 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] +# Copy agent-facing files (llms.txt) verbatim to the site root so they +# resolve at the conventional URL `https://datafusion.apache.org/llms.txt`. +html_extra_path = ["llms.txt"] + html_logo = "_static/images/2x_bgwhite_original.png" html_css_files = ["theme_overrides.css"] diff --git a/docs/source/llms.txt b/docs/source/llms.txt new file mode 100644 index 0000000000000..5d738107c8d33 --- /dev/null +++ b/docs/source/llms.txt @@ -0,0 +1,26 @@ +# Apache DataFusion + +> Apache DataFusion is an extensible query engine written in Rust that uses Apache Arrow as its in-memory format. This file is a directory of agent-facing entry points for the DataFusion ecosystem — the Rust core query engine and its subprojects. Subproject `llms.txt` files contain the project-specific guidance for writing code against each one. + +## Core DataFusion (Rust) + +- [User guide](https://datafusion.apache.org/user-guide/introduction.html): install, example usage, SQL, DataFrame, expressions, configuration, explain plans. +- [Library user guide](https://datafusion.apache.org/library-user-guide/index.html): embedding DataFusion, extending SQL, custom table providers, building logical plans, the query optimizer. +- [Contributor guide](https://datafusion.apache.org/contributor-guide/index.html): development environment, architecture, testing, release management, governance. +- [Rust API docs (`docs.rs`)](https://docs.rs/datafusion/latest/datafusion/): generated reference for the `datafusion` crate. +- [GitHub repository](https://github.com/apache/datafusion): source, issues, pull requests. + +## Subprojects + +Each subproject may expose its own `llms.txt` at `/llms.txt` — agents following the [llmstxt.org](https://llmstxt.org) convention can probe these paths for project-specific guidance. + +- [DataFusion Python](https://datafusion.apache.org/python/): Python bindings — SQL and lazy DataFrame API over Apache Arrow. +- [DataFusion Ballista](https://datafusion.apache.org/ballista/): distributed execution extension for DataFusion. +- [DataFusion Comet](https://datafusion.apache.org/comet/): Apache Spark accelerator built on DataFusion. + +## Optional + +- [Blog](https://datafusion.apache.org/blog/): release notes and ecosystem updates. +- [crates.io `datafusion`](https://crates.io/crates/datafusion): published crate. +- [Code of conduct](https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md) +- [Apache Software Foundation](https://apache.org)