diff --git a/datafusion-cli/README.md b/datafusion-cli/README.md index ca796b525fa1..b34aa770374d 100644 --- a/datafusion-cli/README.md +++ b/datafusion-cli/README.md @@ -19,12 +19,15 @@ -# DataFusion Command-line Interface +# Apache DataFusion Command-line Interface -[DataFusion](https://datafusion.apache.org/) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. DataFusion CLI (`datafusion-cli`) is a small command line utility that runs SQL queries using the DataFusion engine. +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ + # Frequently Asked Questions ## Where can I find more information? diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml index b88461e7ebcb..38b843dec50b 100644 --- a/datafusion/catalog-listing/Cargo.toml +++ b/datafusion/catalog-listing/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-catalog-listing" description = "datafusion-catalog-listing" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/catalog-listing/README.md b/datafusion/catalog-listing/README.md index c8d1cf13b4ff..81a7c7b1da3a 100644 --- a/datafusion/catalog-listing/README.md +++ b/datafusion/catalog-listing/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion catalog-listing +# Apache DataFusion Catalog Listing -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion with [ListingTable], an implementation of [TableProvider] based on files in a directory (either locally or on remote @@ -29,8 +29,8 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [listingtable]: https://docs.rs/datafusion/latest/datafusion/datasource/listing/struct.ListingTable.html [tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/trait.TableProvider.html [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml index 5edb1c4a6878..a1db45654be0 100644 --- a/datafusion/catalog/Cargo.toml +++ b/datafusion/catalog/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-catalog" description = "datafusion-catalog" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/catalog/README.md b/datafusion/catalog/README.md index d4870e28f338..48c61b43c025 100644 --- a/datafusion/catalog/README.md +++ b/datafusion/catalog/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Catalog +# Apache DataFusion Catalog -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides catalog management functionality, including catalogs, schemas, and tables. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/common-runtime/README.md b/datafusion/common-runtime/README.md index bd0d4954b845..ff44e6c3e209 100644 --- a/datafusion/common-runtime/README.md +++ b/datafusion/common-runtime/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Common Runtime +# Apache DataFusion Common Runtime -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides common utilities. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/common/README.md b/datafusion/common/README.md index e4d6b772658d..4948c8c581be 100644 --- a/datafusion/common/README.md +++ b/datafusion/common/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Common +# Apache DataFusion Common -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides common data types and utilities. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/core/README.md b/datafusion/core/README.md index b5501087d264..859fcb9c0dff 100644 --- a/datafusion/core/README.md +++ b/datafusion/core/README.md @@ -17,15 +17,12 @@ under the License. --> -# DataFusion Core + -DataFusion is an extensible query execution framework, written in Rust, -that uses Apache Arrow as its in-memory format. +# Apache DataFusion Core This crate contains the main entry points and high level DataFusion APIs such as `SessionContext`, `DataFrame` and `ListingTable`. - -For more information, please see: - -- [DataFusion Website](https://datafusion.apache.org) -- [DataFusion API Docs](https://docs.rs/datafusion/latest/datafusion/) diff --git a/datafusion/datasource-avro/Cargo.toml b/datafusion/datasource-avro/Cargo.toml index ab04c68513d5..e013e8a3d093 100644 --- a/datafusion/datasource-avro/Cargo.toml +++ b/datafusion/datasource-avro/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-datasource-avro" description = "datafusion-datasource-avro" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/datasource-avro/README.md b/datafusion/datasource-avro/README.md index 3436d4a85ad0..e9b8affe60e3 100644 --- a/datafusion/datasource-avro/README.md +++ b/datafusion/datasource-avro/README.md @@ -17,15 +17,17 @@ under the License. --> -# DataFusion datasource +# Apache DataFusion Avro DataSource -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. -This crate is a submodule of DataFusion that defines a Avro based file source. +This crate is a submodule of DataFusion that defines an [Apache Avro] based file source. Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[apache avro]: https://avro.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/datasource-csv/Cargo.toml b/datafusion/datasource-csv/Cargo.toml index 978cfb5efe27..209cea403896 100644 --- a/datafusion/datasource-csv/Cargo.toml +++ b/datafusion/datasource-csv/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-datasource-csv" description = "datafusion-datasource-csv" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/datasource-csv/README.md b/datafusion/datasource-csv/README.md index 0ebddb538663..8bdadd0fe2c1 100644 --- a/datafusion/datasource-csv/README.md +++ b/datafusion/datasource-csv/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion datasource +# Apache DataFusion CSV DataSource -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that defines a CSV based file source. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/datasource-json/Cargo.toml b/datafusion/datasource-json/Cargo.toml index bc4a624c74e7..987ab60c70b7 100644 --- a/datafusion/datasource-json/Cargo.toml +++ b/datafusion/datasource-json/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-datasource-json" description = "datafusion-datasource-json" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/datasource-json/README.md b/datafusion/datasource-json/README.md index ac0b73b78e69..ca2771b9d67e 100644 --- a/datafusion/datasource-json/README.md +++ b/datafusion/datasource-json/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion datasource +# Apache DataFusion JSON DataSource -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that defines a JSON based file source. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/datasource-parquet/Cargo.toml b/datafusion/datasource-parquet/Cargo.toml index 690995a63177..1f866ffd6cc2 100644 --- a/datafusion/datasource-parquet/Cargo.toml +++ b/datafusion/datasource-parquet/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-datasource-parquet" description = "datafusion-datasource-parquet" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/datasource-parquet/README.md b/datafusion/datasource-parquet/README.md index 9ac472a9f4f0..833fc74a258b 100644 --- a/datafusion/datasource-parquet/README.md +++ b/datafusion/datasource-parquet/README.md @@ -17,15 +17,17 @@ under the License. --> -# DataFusion datasource +# Apache DataFusion Parquet DataSource -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. -This crate is a submodule of DataFusion that defines a Parquet based file source. +This crate is a submodule of DataFusion that defines an [Apache Parquet] based file source. Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[apache parquet]: https://parquet.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/datasource/Cargo.toml b/datafusion/datasource/Cargo.toml index 8df722a28a59..3f207d4e6555 100644 --- a/datafusion/datasource/Cargo.toml +++ b/datafusion/datasource/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-datasource" description = "datafusion-datasource" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/datasource/README.md b/datafusion/datasource/README.md index 5d743bc83063..cf0bb7547c07 100644 --- a/datafusion/datasource/README.md +++ b/datafusion/datasource/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion datasource +# Apache DataFusion DataSource -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that defines common DataSource related components like FileScanConfig, FileCompression etc. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/doc/Cargo.toml b/datafusion/doc/Cargo.toml index fa316348a6da..b8324565a0c6 100644 --- a/datafusion/doc/Cargo.toml +++ b/datafusion/doc/Cargo.toml @@ -19,6 +19,7 @@ name = "datafusion-doc" description = "Documentation module for DataFusion query engine" keywords = ["datafusion", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } homepage = { workspace = true } diff --git a/datafusion/doc/README.md b/datafusion/doc/README.md index c81a8e78c603..f137a273e31a 100644 --- a/datafusion/doc/README.md +++ b/datafusion/doc/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Execution +# Apache DataFusion Documentation -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides structures and macros for documenting user defined functions. @@ -28,5 +28,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/execution/README.md b/datafusion/execution/README.md index dd82e206e6d5..5b1528b0daab 100644 --- a/datafusion/execution/README.md +++ b/datafusion/execution/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Execution +# Apache DataFusion Execution -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides execution runtime such as the memory pools and disk manager. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml index 14717dd78135..db85f3207921 100644 --- a/datafusion/expr-common/Cargo.toml +++ b/datafusion/expr-common/Cargo.toml @@ -19,6 +19,7 @@ name = "datafusion-expr-common" description = "Logical plan and expression representation for DataFusion query engine" keywords = ["datafusion", "logical", "plan", "expressions"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } homepage = { workspace = true } diff --git a/datafusion/expr-common/README.md b/datafusion/expr-common/README.md index 5f95627ca0d4..97006702542a 100644 --- a/datafusion/expr-common/README.md +++ b/datafusion/expr-common/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Logical Plan and Expressions +# Apache DataFusion Common Logical Plan and Expressions -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides common logical expressions @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/expr/README.md b/datafusion/expr/README.md index 860c36769ee5..b3ab9a383dbb 100644 --- a/datafusion/expr/README.md +++ b/datafusion/expr/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Logical Plan and Expressions +# Apache DataFusion Logical Plan and Expressions -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides data types and utilities for logical plans and expressions. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/ffi/README.md b/datafusion/ffi/README.md index 48283f4cfdc1..72070984f931 100644 --- a/datafusion/ffi/README.md +++ b/datafusion/ffi/README.md @@ -17,10 +17,10 @@ under the License. --> -# `datafusion-ffi`: Apache DataFusion Foreign Function Interface +# Apache DataFusion Foreign Function Interface -This crate contains code to allow interoperability of Apache [DataFusion] with -functions from other libraries and/or [DataFusion] versions using a stable +This crate contains code to allow interoperability of [Apache DataFusion] with +functions from other libraries and/or DataFusion versions using a stable interface. One of the limitations of the Rust programming language is that there is no @@ -28,10 +28,10 @@ stable [Rust ABI] (Application Binary Interface). If a library is compiled with one version of the Rust compiler and you attempt to use that library with a program compiled by a different Rust compiler, there is no guarantee that you can access the data structures. In order to share code between libraries loaded -at runtime, you need to use Rust's [FFI](Foreign Function Interface (FFI)). +at runtime, you need to use Rust's [FFI] (Foreign Function Interface (FFI)). -The purpose of this crate is to define interfaces between [DataFusion] libraries -that will remain stable across different versions of [DataFusion]. This allows +The purpose of this crate is to define interfaces between DataFusion libraries +that will remain stable across different versions of DataFusion. This allows users to write libraries that can interface between each other at runtime rather than require compiling all of the code into a single executable. @@ -46,7 +46,7 @@ See [API Docs] for details and examples. Two use cases have been identified for this crate, but they are not intended to be all inclusive. -1. `datafusion-python` which will use the FFI to provide external services such +1. [`datafusion-python`] which will use the FFI to provide external services such as a `TableProvider` without needing to re-export the entire `datafusion-python` code base. With `datafusion-ffi` these packages do not need `datafusion-python` as a dependency at all. @@ -68,8 +68,8 @@ stable interfaces that closely mirror the Rust native approach. To learn more about this approach see the [abi_stable] and [async-ffi] crates. If you have a library in another language that you wish to interface to -[DataFusion] the recommendation is to create a Rust wrapper crate to interface -with your library and then to connect it to [DataFusion] using this crate. +DataFusion the recommendation is to create a Rust wrapper crate to interface +with your library and then to connect it to DataFusion using this crate. Alternatively, you could use [bindgen] to interface directly to the [FFI] provided by this crate, but that is currently not supported. @@ -101,12 +101,12 @@ In this crate we have a variety of structs which closely mimic the behavior of their internal counterparts. To see detailed notes about how to use them, see the example in `FFI_TableProvider`. -[datafusion]: https://datafusion.apache.org +[apache datafusion]: https://datafusion.apache.org/ [api docs]: http://docs.rs/datafusion-ffi/latest [rust abi]: https://doc.rust-lang.org/reference/abi.html [ffi]: https://doc.rust-lang.org/nomicon/ffi.html [abi_stable]: https://crates.io/crates/abi_stable [async-ffi]: https://crates.io/crates/async-ffi [bindgen]: https://crates.io/crates/bindgen -[datafusion-python]: https://datafusion.apache.org/python/ +[`datafusion-python`]: https://datafusion.apache.org/python/ [datafusion-contrib]: https://github.com/datafusion-contrib diff --git a/datafusion/functions-aggregate-common/Cargo.toml b/datafusion/functions-aggregate-common/Cargo.toml index cf065ca1cb17..a6e0a1fc2f8b 100644 --- a/datafusion/functions-aggregate-common/Cargo.toml +++ b/datafusion/functions-aggregate-common/Cargo.toml @@ -19,6 +19,7 @@ name = "datafusion-functions-aggregate-common" description = "Utility functions for implementing aggregate functions for the DataFusion query engine" keywords = ["datafusion", "logical", "plan", "expressions"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } homepage = { workspace = true } diff --git a/datafusion/functions-aggregate-common/README.md b/datafusion/functions-aggregate-common/README.md index 61a81e8085a4..3d52aa722033 100644 --- a/datafusion/functions-aggregate-common/README.md +++ b/datafusion/functions-aggregate-common/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Aggregate Function Library +# Apache DataFusion Aggregate Function Common Library -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains common functionality for implementation aggregate and window functions. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/functions-aggregate/README.md b/datafusion/functions-aggregate/README.md index 244112d4fd7a..aa50eaeedae0 100644 --- a/datafusion/functions-aggregate/README.md +++ b/datafusion/functions-aggregate/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Aggregate Function Library +# Apache DataFusion Aggregate Function Library -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains implementations of aggregate functions. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/functions-nested/README.md b/datafusion/functions-nested/README.md index 0fa93619b97b..6ab456edb192 100644 --- a/datafusion/functions-nested/README.md +++ b/datafusion/functions-nested/README.md @@ -17,16 +17,18 @@ under the License. --> -# DataFusion Nested Type Function Library +# Apache DataFusion Nested Type Function Library -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains functions for working with arrays, maps and structs, such as `array_append` that work with -`ListArray`, `LargeListArray` and `FixedListArray` types from the `arrow` crate. +`ListArray`, `LargeListArray` and `FixedListArray` types from the [`arrow`] crate. Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[`arrow`]: https://crates.io/crates/arrow [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/functions-table/README.md b/datafusion/functions-table/README.md index 485abe560dad..89f589a9584c 100644 --- a/datafusion/functions-table/README.md +++ b/datafusion/functions-table/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Table Function Library +# Apache DataFusion Table Function Library -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains table functions that can be used in DataFusion queries. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/functions-window-common/README.md b/datafusion/functions-window-common/README.md index 9f64c9dc8298..f2e45880724e 100644 --- a/datafusion/functions-window-common/README.md +++ b/datafusion/functions-window-common/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Window Function Common Library +# Apache DataFusion Window Function Common Library -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains common functions for implementing window functions. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/functions-window/README.md b/datafusion/functions-window/README.md index 746d625b4f8e..f2bb9f53f530 100644 --- a/datafusion/functions-window/README.md +++ b/datafusion/functions-window/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Window Function Library +# Apache DataFusion Window Function Library -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains window function definitions. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/functions/README.md b/datafusion/functions/README.md index 27dc4afc76bb..dee133042272 100644 --- a/datafusion/functions/README.md +++ b/datafusion/functions/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Function Library +# Apache DataFusion Function Library -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains packages of function that can be used to customize the functionality of DataFusion. @@ -28,5 +28,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/macros/Cargo.toml b/datafusion/macros/Cargo.toml index d42cd7153f30..16be6f5c6741 100644 --- a/datafusion/macros/Cargo.toml +++ b/datafusion/macros/Cargo.toml @@ -19,6 +19,7 @@ name = "datafusion-macros" description = "Procedural macros for DataFusion query engine" keywords = ["datafusion", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } homepage = { workspace = true } diff --git a/datafusion/macros/README.md b/datafusion/macros/README.md index c78c02f1ca3a..c45bba1423fc 100644 --- a/datafusion/macros/README.md +++ b/datafusion/macros/README.md @@ -17,15 +17,14 @@ under the License. --> -# DataFusion Window Function Common Library +# Apache DataFusion Macros -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains common macros used in DataFusion -Most projects should use the [`datafusion`] crate directly, which re-exports -this module. If you are already using the [`datafusion`] crate, there is no -reason to use this crate directly in your project as well. +Most projects should use the [`datafusion`] crate directly. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md index 1c9b37e09fc8..a95ec4828b35 100644 --- a/datafusion/optimizer/README.md +++ b/datafusion/optimizer/README.md @@ -17,7 +17,9 @@ under the License. --> -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +# Apache DataFusion Optimizer + +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains the DataFusion logical optimizer. Please see [Query Optimizer] in the Library User Guide for more information. @@ -26,6 +28,7 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion [query optimizer]: https://datafusion.apache.org/library-user-guide/query-optimizer.html diff --git a/datafusion/physical-expr-adapter/README.md b/datafusion/physical-expr-adapter/README.md index beecd53875f9..02bc144c16f3 100644 --- a/datafusion/physical-expr-adapter/README.md +++ b/datafusion/physical-expr-adapter/README.md @@ -1,4 +1,25 @@ -# DataFusion Physical Expression Adapter + + +# Apache DataFusion Physical Expression Adapter + +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate provides utilities for adapting physical expressions to different schemas in DataFusion. @@ -6,3 +27,12 @@ It handles schema differences in file scans by rewriting expressions to match th including type casting, missing columns, and partition values. For detailed documentation, see the [`PhysicalExprAdapter`] trait documentation. + +Most projects should use the [`datafusion`] crate directly, which re-exports +this module. If you are already using the [`datafusion`] crate, there is no +reason to use this crate directly in your project as well. + +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[`datafusion`]: https://crates.io/crates/datafusion +[`physicalexpradapter`]: https://docs.rs/datafusion/latest/datafusion/physical_expr_adapter/trait.PhysicalExprAdapter.html diff --git a/datafusion/physical-expr-common/README.md b/datafusion/physical-expr-common/README.md index fab03fb49752..c318e7468183 100644 --- a/datafusion/physical-expr-common/README.md +++ b/datafusion/physical-expr-common/README.md @@ -17,16 +17,19 @@ under the License. --> -# DataFusion Core Physical Expressions +# Apache DataFusion Core Physical Expressions -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides shared APIs for implementing -physical expressions such as `PhysicalExpr` and `PhysicalSortExpr`. +physical expressions such as [`PhysicalExpr`] and [`PhysicalSortExpr`]. Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion +[`physicalexpr`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/trait.PhysicalExpr.html +[`physicalsortexpr`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/struct.PhysicalSortExpr.html diff --git a/datafusion/physical-expr/README.md b/datafusion/physical-expr/README.md index b99f3c4946ce..4c79223b09b8 100644 --- a/datafusion/physical-expr/README.md +++ b/datafusion/physical-expr/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Physical Expressions +# Apache DataFusion Physical Expressions -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that provides data types and utilities for physical expressions. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/physical-optimizer/README.md b/datafusion/physical-optimizer/README.md index 374351b802c8..3efbc19d2e72 100644 --- a/datafusion/physical-optimizer/README.md +++ b/datafusion/physical-optimizer/README.md @@ -17,10 +17,9 @@ under the License. --> -# DataFusion Physical Optimizer +# Apache DataFusion Physical Optimizer -DataFusion is an extensible query execution framework, written in Rust, -that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate contains the physical optimizer for DataFusion. @@ -28,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/physical-plan/README.md b/datafusion/physical-plan/README.md index 37cc1658015c..3a33100f2f35 100644 --- a/datafusion/physical-plan/README.md +++ b/datafusion/physical-plan/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Physical Plan +# Apache DataFusion Physical Plan -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate is a submodule of DataFusion that contains the `ExecutionPlan` trait and the various implementations of that trait for built in operators such as filters, projections, joins, aggregations, etc. @@ -28,5 +28,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/proto-common/Cargo.toml b/datafusion/proto-common/Cargo.toml index 3e2bf6aef8ea..c67c8892a3de 100644 --- a/datafusion/proto-common/Cargo.toml +++ b/datafusion/proto-common/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-proto-common" description = "Protobuf serialization of DataFusion common types" keywords = ["arrow", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } diff --git a/datafusion/proto-common/README.md b/datafusion/proto-common/README.md index 67b3b2787006..9c4aa707b0ea 100644 --- a/datafusion/proto-common/README.md +++ b/datafusion/proto-common/README.md @@ -17,17 +17,21 @@ under the License. --> -# `datafusion-proto-common`: Apache DataFusion Protobuf Serialization / Deserialization +# Apache DataFusion Protobuf Common Serialization / Deserialization -This crate contains code to convert Apache [DataFusion] primitive types to and from -bytes, which can be useful for sending data over the network. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. + +This crate contains code to convert DataFusion primitive types to and from +bytes using [Protocol Buffers], which can be useful for sending data over the network. See [API Docs] for details and examples. Most projects should use the [`datafusion-proto`] crate directly, which re-exports -this module. If you are already using the [`datafusion-protp`] crate, there is no +this module. If you are already using the [`datafusion-proto`] crate, there is no reason to use this crate directly in your project as well. +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[protocol buffers]: https://protobuf.dev/ [`datafusion-proto`]: https://crates.io/crates/datafusion-proto -[datafusion]: https://datafusion.apache.org [api docs]: http://docs.rs/datafusion-proto/latest diff --git a/datafusion/proto/README.md b/datafusion/proto/README.md index f8930779db89..c1382c5b8f8f 100644 --- a/datafusion/proto/README.md +++ b/datafusion/proto/README.md @@ -17,13 +17,17 @@ under the License. --> -# `datafusion-proto`: Apache DataFusion Protobuf Serialization / Deserialization +# Apache DataFusion Protobuf Serialization / Deserialization -This crate contains code to convert [Apache DataFusion] plans to and from -bytes, which can be useful for sending plans over the network, for example -when building a distributed query engine. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. + +This crate contains code to convert DataFusion plans to and from bytes using [Protocol Buffers], +which can be useful for sending plans over the network, for example when building a distributed +query engine. See [API Docs] for details and examples. -[apache datafusion]: https://datafusion.apache.org +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[protocol buffers]: https://protobuf.dev/ [api docs]: http://docs.rs/datafusion-proto/latest diff --git a/datafusion/pruning/Cargo.toml b/datafusion/pruning/Cargo.toml index 095a5b692eb0..2429123bdf96 100644 --- a/datafusion/pruning/Cargo.toml +++ b/datafusion/pruning/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "datafusion-pruning" description = "DataFusion Pruning Logic" +readme = "README.md" version = { workspace = true } edition = { workspace = true } homepage = { workspace = true } diff --git a/datafusion/pruning/README.md b/datafusion/pruning/README.md new file mode 100644 index 000000000000..4db509193d17 --- /dev/null +++ b/datafusion/pruning/README.md @@ -0,0 +1,34 @@ + + +# Apache DataFusion Pruning Logic + +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. + +This crate is a submodule of DataFusion that contains pruning logic, to analyze filter expressions with +statistics such as min/max values and null counts, proving files / large subsections of files can be skipped +without reading the actual data. + +Most projects should use the [`datafusion`] crate directly, which re-exports +this module. If you are already using the [`datafusion`] crate, there is no +reason to use this crate directly in your project as well. + +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/session/Cargo.toml b/datafusion/session/Cargo.toml index 09bfbe1c4842..0489da61eed8 100644 --- a/datafusion/session/Cargo.toml +++ b/datafusion/session/Cargo.toml @@ -18,11 +18,11 @@ [package] name = "datafusion-session" description = "datafusion-session" +readme = "README.md" authors.workspace = true edition.workspace = true homepage.workspace = true license.workspace = true -readme.workspace = true repository.workspace = true rust-version.workspace = true version.workspace = true diff --git a/datafusion/session/README.md b/datafusion/session/README.md index f029c797366f..4bb605b1e199 100644 --- a/datafusion/session/README.md +++ b/datafusion/session/README.md @@ -17,9 +17,9 @@ under the License. --> -# DataFusion Session +# Apache DataFusion Session -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. This crate provides **session-related abstractions** used in the DataFusion query engine. A _session_ represents the runtime context for query execution, including configuration, runtime environment, function registry, and planning. @@ -27,5 +27,6 @@ Most projects should use the [`datafusion`] crate directly, which re-exports this module. If you are already using the [`datafusion`] crate, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion diff --git a/datafusion/spark/README.md b/datafusion/spark/README.md index c92ada0ab477..7cb24084cd22 100644 --- a/datafusion/spark/README.md +++ b/datafusion/spark/README.md @@ -17,9 +17,15 @@ specific language governing permissions and limitations under the License. --> -# datafusion-spark: Spark-compatible Expressions +# Apache DataFusion Spark-compatible Expressions -This crate provides Apache Spark-compatible expressions for use with DataFusion. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. + +This crate is a submodule of DataFusion that provides [Apache Spark] compatible expressions for use with DataFusion. + +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[apache spark]: https://spark.apache.org/ ## Testing Guide @@ -29,12 +35,15 @@ or `coerce_types`) is not applied. Therefore, direct invocation tests should only be used to verify that the function is correctly implemented. Please be sure to add additional tests beyond direct invocation. -For more detailed testing guidelines, refer to -the [Spark SQLLogicTest README](../sqllogictest/test_files/spark/README.md). +For more detailed testing guidelines, refer to the [Spark SQLLogicTest README]. ## Implementation References When implementing Spark-compatible functions, you can check if there are existing implementations in -the [Sail](https://github.com/lakehq/sail) or [Comet](https://github.com/apache/datafusion-comet) projects first. +the [Sail] or [Comet] projects first. If you do port functionality from these sources, make sure to port over the corresponding tests too, to ensure correctness and compatibility. + +[spark sqllogictest readme]: ../sqllogictest/test_files/spark/README.md +[sail]: https://github.com/lakehq/sail +[comet]: https://github.com/apache/datafusion-comet diff --git a/datafusion/sql/README.md b/datafusion/sql/README.md index d5ef3114c14e..d0e5e498e514 100644 --- a/datafusion/sql/README.md +++ b/datafusion/sql/README.md @@ -17,10 +17,10 @@ under the License. --> -# DataFusion SQL Query Planner +# Apache DataFusion SQL Query Planner This crate provides a general purpose SQL query planner that can parse SQL and translate queries into logical -plans. Although this crate is used by the [DataFusion][df] query engine, it was designed to be easily usable from any +plans. Although this crate is used by the [Apache DataFusion] query engine, it was designed to be easily usable from any project that requires a SQL query planner and does not make any assumptions about how the resulting logical plan will be translated to a physical plan. For example, there is no concept of row-based versus columnar execution in the logical plan. @@ -29,12 +29,12 @@ Note that the [`datafusion`] crate re-exports this module. If you are already using the [`datafusion`] crate in your project, there is no reason to use this crate directly in your project as well. -[df]: https://crates.io/crates/datafusion +[apache datafusion]: https://datafusion.apache.org/ [`datafusion`]: https://crates.io/crates/datafusion ## Example Usage -See the [examples](examples) directory for fully working examples. +See the [examples] directory for fully working examples. Here is an example of producing a logical plan from a SQL string. @@ -69,8 +69,8 @@ fn main() { ``` This is the logical plan that is produced from this example. Note that this is an **unoptimized** -logical plan. The [datafusion-optimizer](https://crates.io/crates/datafusion-optimizer) crate provides a query -optimizer that can be applied to plans produced by this crate. +logical plan. The [datafusion-optimizer] crate provides a query optimizer that can be applied to +plans produced by this crate. ``` Sort: state_tax DESC NULLS FIRST @@ -87,4 +87,5 @@ Sort: state_tax DESC NULLS FIRST TableScan: orders ``` -[df]: https://crates.io/crates/datafusion +[examples]: examples +[datafusion-optimizer]: https://crates.io/crates/datafusion-optimizer diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md index 3fdb29c9d5cd..a389ae1ef60e 100644 --- a/datafusion/sqllogictest/README.md +++ b/datafusion/sqllogictest/README.md @@ -17,23 +17,29 @@ under the License. --> -# DataFusion sqllogictest +# Apache DataFusion sqllogictest -[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. -This crate is a submodule of DataFusion that contains an implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). +This crate is a submodule of DataFusion that contains an implementation of [sqllogictest]. -[df]: https://crates.io/crates/datafusion +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ +[sqllogictest]: https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki ## Overview -This crate uses [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) to parse and run `.slt` files in the -[`test_files`](test_files) directory of this crate or the [`data/sqlite`](https://github.com/apache/datafusion-testing/tree/main/data/sqlite) -directory of the [datafusion-testing](https://github.com/apache/datafusion-testing) crate. +This crate uses [sqllogictest-rs] to parse and run `.slt` files in the [`test_files`] directory of +this crate or the [`data/sqlite`] directory of the [datafusion-testing] repository. + +[sqllogictest-rs]: https://github.com/risinglightdb/sqllogictest-rs +[`test_files`]: test_files +[`data/sqlite`]: https://github.com/apache/datafusion-testing/tree/main/data/sqlite +[datafusion-testing]: https://github.com/apache/datafusion-testing ## Testing setup -1. `rustup update stable` DataFusion uses the latest stable release of rust +1. `rustup update stable` DataFusion uses the latest stable release of Rust 2. `git submodule init` 3. `git submodule update --init --remote --recursive` diff --git a/datafusion/substrait/README.md b/datafusion/substrait/README.md index 8e7f99b7df38..d18d7bda5e3b 100644 --- a/datafusion/substrait/README.md +++ b/datafusion/substrait/README.md @@ -19,9 +19,12 @@ # Apache DataFusion Substrait -This crate contains a [Substrait] producer and consumer for [Apache DataFusion] +[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format. + +This crate is a submodule of DataFusion that provides a [Substrait] producer and consumer for DataFusion plans. See [API Docs] for details and examples. +[apache arrow]: https://arrow.apache.org/ +[apache datafusion]: https://datafusion.apache.org/ [substrait]: https://substrait.io -[apache datafusion]: https://datafusion.apache.org [api docs]: https://docs.rs/datafusion-substrait/latest