From dd846d70a4cac882a4e1c33d7bf353b563b6d095 Mon Sep 17 00:00:00 2001 From: Sergey Zhukov Date: Sun, 16 Nov 2025 09:29:33 +0300 Subject: [PATCH 1/2] Consolidate sql operations examples (#18142) --- datafusion-examples/README.md | 8 +- .../{sql_analysis.rs => sql_ops/analysis.rs} | 271 +++++++++--------- .../{sql_dialect.rs => sql_ops/dialect.rs} | 3 +- .../{sql_frontend.rs => sql_ops/frontend.rs} | 2 +- datafusion-examples/examples/sql_ops/main.rs | 105 +++++++ .../{sql_query.rs => sql_ops/query.rs} | 3 +- 6 files changed, 248 insertions(+), 144 deletions(-) rename datafusion-examples/examples/{sql_analysis.rs => sql_ops/analysis.rs} (98%) rename datafusion-examples/examples/{sql_dialect.rs => sql_ops/dialect.rs} (98%) rename datafusion-examples/examples/{sql_frontend.rs => sql_ops/frontend.rs} (99%) create mode 100644 datafusion-examples/examples/sql_ops/main.rs rename datafusion-examples/examples/{sql_query.rs => sql_ops/query.rs} (99%) diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index 1fd6d9f48273..937fb779ec5c 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -90,10 +90,10 @@ cargo run --example dataframe - [`examples/udf/simple_udf.rs`](examples/udf/simple_udf.rs): Define and invoke a User Defined Scalar Function (UDF) - [`examples/udf/simple_udtf.rs`](examples/udf/simple_udtf.rs): Define and invoke a User Defined Table Function (UDTF) - [`examples/udf/simple_udfw.rs`](examples/udf/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF) -- [`sql_analysis.rs`](examples/sql_analysis.rs): Analyse SQL queries with DataFusion structures -- [`sql_frontend.rs`](examples/sql_frontend.rs): Create LogicalPlans (only) from sql strings -- [`sql_dialect.rs`](examples/sql_dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser` -- [`sql_query.rs`](examples/memtable.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files) +- [`examples/sql_ops/analysis.rs`](examples/sql_ops/analysis.rs): Analyse SQL queries with DataFusion structures +- [`examples/sql_ops/frontend.rs`](examples/sql_ops/frontend.rs): Create LogicalPlans (only) from sql strings +- [`examples/sql_ops/dialect.rs`](examples/sql_ops/dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser` +- [`examples/sql_ops/query.rs`](examples/sql_ops/query.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files) ## Distributed diff --git a/datafusion-examples/examples/sql_analysis.rs b/datafusion-examples/examples/sql_ops/analysis.rs similarity index 98% rename from datafusion-examples/examples/sql_analysis.rs rename to datafusion-examples/examples/sql_ops/analysis.rs index 4ff669faf1d0..7d1cd0092e5b 100644 --- a/datafusion-examples/examples/sql_analysis.rs +++ b/datafusion-examples/examples/sql_ops/analysis.rs @@ -32,141 +32,9 @@ use datafusion::{ }; use test_utils::tpcds::tpcds_schemas; -/// Counts the total number of joins in a plan -fn total_join_count(plan: &LogicalPlan) -> usize { - let mut total = 0; - - // We can use the TreeNode API to walk over a LogicalPlan. - plan.apply(|node| { - // if we encounter a join we update the running count - if matches!(node, LogicalPlan::Join(_)) { - total += 1; - } - Ok(TreeNodeRecursion::Continue) - }) - .unwrap(); - - total -} - -/// Counts the total number of joins in a plan and collects every join tree in -/// the plan with their respective join count. -/// -/// Join Tree Definition: the largest subtree consisting entirely of joins -/// -/// For example, this plan: -/// -/// ```text -/// JOIN -/// / \ -/// A JOIN -/// / \ -/// B C -/// ``` -/// -/// has a single join tree `(A-B-C)` which will result in `(2, [2])` -/// -/// This plan: -/// -/// ```text -/// JOIN -/// / \ -/// A GROUP -/// | -/// JOIN -/// / \ -/// B C -/// ``` -/// -/// Has two join trees `(A-, B-C)` which will result in `(2, [1, 1])` -fn count_trees(plan: &LogicalPlan) -> (usize, Vec) { - // this works the same way as `total_count`, but now when we encounter a Join - // we try to collect it's entire tree - let mut to_visit = vec![plan]; - let mut total = 0; - let mut groups = vec![]; - - while let Some(node) = to_visit.pop() { - // if we encounter a join, we know were at the root of the tree - // count this tree and recurse on it's inputs - if matches!(node, LogicalPlan::Join(_)) { - let (group_count, inputs) = count_tree(node); - total += group_count; - groups.push(group_count); - to_visit.extend(inputs); - } else { - to_visit.extend(node.inputs()); - } - } - - (total, groups) -} - -/// Count the entire join tree and return its inputs using TreeNode API -/// -/// For example, if this function receives following plan: -/// -/// ```text -/// JOIN -/// / \ -/// A GROUP -/// | -/// JOIN -/// / \ -/// B C -/// ``` -/// -/// It will return `(1, [A, GROUP])` -fn count_tree(join: &LogicalPlan) -> (usize, Vec<&LogicalPlan>) { - let mut inputs = Vec::new(); - let mut total = 0; - - join.apply(|node| { - // Some extra knowledge: - // - // optimized plans have their projections pushed down as far as - // possible, which sometimes results in a projection going in between 2 - // subsequent joins giving the illusion these joins are not "related", - // when in fact they are. - // - // This plan: - // JOIN - // / \ - // A PROJECTION - // | - // JOIN - // / \ - // B C - // - // is the same as: - // - // JOIN - // / \ - // A JOIN - // / \ - // B C - // we can continue the recursion in this case - if let LogicalPlan::Projection(_) = node { - return Ok(TreeNodeRecursion::Continue); - } - - // any join we count - if matches!(node, LogicalPlan::Join(_)) { - total += 1; - Ok(TreeNodeRecursion::Continue) - } else { - inputs.push(node); - // skip children of input node - Ok(TreeNodeRecursion::Jump) - } - }) - .unwrap(); - - (total, inputs) -} - -#[tokio::main] -async fn main() -> Result<()> { +/// Demonstrates how to analyze a SQL query by counting JOINs and identifying +/// join-trees using DataFusion’s `LogicalPlan` and `TreeNode` API. +pub async fn analysis() -> Result<()> { // To show how we can count the joins in a sql query we'll be using query 88 // from the TPC-DS benchmark. // @@ -310,3 +178,136 @@ from Ok(()) } + +/// Counts the total number of joins in a plan +fn total_join_count(plan: &LogicalPlan) -> usize { + let mut total = 0; + + // We can use the TreeNode API to walk over a LogicalPlan. + plan.apply(|node| { + // if we encounter a join we update the running count + if matches!(node, LogicalPlan::Join(_)) { + total += 1; + } + Ok(TreeNodeRecursion::Continue) + }) + .unwrap(); + + total +} + +/// Counts the total number of joins in a plan and collects every join tree in +/// the plan with their respective join count. +/// +/// Join Tree Definition: the largest subtree consisting entirely of joins +/// +/// For example, this plan: +/// +/// ```text +/// JOIN +/// / \ +/// A JOIN +/// / \ +/// B C +/// ``` +/// +/// has a single join tree `(A-B-C)` which will result in `(2, [2])` +/// +/// This plan: +/// +/// ```text +/// JOIN +/// / \ +/// A GROUP +/// | +/// JOIN +/// / \ +/// B C +/// ``` +/// +/// Has two join trees `(A-, B-C)` which will result in `(2, [1, 1])` +fn count_trees(plan: &LogicalPlan) -> (usize, Vec) { + // this works the same way as `total_count`, but now when we encounter a Join + // we try to collect it's entire tree + let mut to_visit = vec![plan]; + let mut total = 0; + let mut groups = vec![]; + + while let Some(node) = to_visit.pop() { + // if we encounter a join, we know were at the root of the tree + // count this tree and recurse on it's inputs + if matches!(node, LogicalPlan::Join(_)) { + let (group_count, inputs) = count_tree(node); + total += group_count; + groups.push(group_count); + to_visit.extend(inputs); + } else { + to_visit.extend(node.inputs()); + } + } + + (total, groups) +} + +/// Count the entire join tree and return its inputs using TreeNode API +/// +/// For example, if this function receives following plan: +/// +/// ```text +/// JOIN +/// / \ +/// A GROUP +/// | +/// JOIN +/// / \ +/// B C +/// ``` +/// +/// It will return `(1, [A, GROUP])` +fn count_tree(join: &LogicalPlan) -> (usize, Vec<&LogicalPlan>) { + let mut inputs = Vec::new(); + let mut total = 0; + + join.apply(|node| { + // Some extra knowledge: + // + // optimized plans have their projections pushed down as far as + // possible, which sometimes results in a projection going in between 2 + // subsequent joins giving the illusion these joins are not "related", + // when in fact they are. + // + // This plan: + // JOIN + // / \ + // A PROJECTION + // | + // JOIN + // / \ + // B C + // + // is the same as: + // + // JOIN + // / \ + // A JOIN + // / \ + // B C + // we can continue the recursion in this case + if let LogicalPlan::Projection(_) = node { + return Ok(TreeNodeRecursion::Continue); + } + + // any join we count + if matches!(node, LogicalPlan::Join(_)) { + total += 1; + Ok(TreeNodeRecursion::Continue) + } else { + inputs.push(node); + // skip children of input node + Ok(TreeNodeRecursion::Jump) + } + }) + .unwrap(); + + (total, inputs) +} diff --git a/datafusion-examples/examples/sql_dialect.rs b/datafusion-examples/examples/sql_ops/dialect.rs similarity index 98% rename from datafusion-examples/examples/sql_dialect.rs rename to datafusion-examples/examples/sql_ops/dialect.rs index 20b515506f3b..986b3e31407e 100644 --- a/datafusion-examples/examples/sql_dialect.rs +++ b/datafusion-examples/examples/sql_ops/dialect.rs @@ -26,8 +26,7 @@ use datafusion::sql::{ /// This example demonstrates how to use the DFParser to parse a statement in a custom way /// /// This technique can be used to implement a custom SQL dialect, for example. -#[tokio::main] -async fn main() -> Result<()> { +pub async fn dialect() -> Result<()> { let mut my_parser = MyParser::new("COPY source_table TO 'file.fasta' STORED AS FASTA")?; diff --git a/datafusion-examples/examples/sql_frontend.rs b/datafusion-examples/examples/sql_ops/frontend.rs similarity index 99% rename from datafusion-examples/examples/sql_frontend.rs rename to datafusion-examples/examples/sql_ops/frontend.rs index 1fc9ce24ecbb..432af968bcf3 100644 --- a/datafusion-examples/examples/sql_frontend.rs +++ b/datafusion-examples/examples/sql_ops/frontend.rs @@ -44,7 +44,7 @@ use std::sync::Arc; /// /// In this example, we demonstrate how to use the lower level APIs directly, /// which only requires the `datafusion-sql` dependency. -pub fn main() -> Result<()> { +pub fn frontend() -> Result<()> { // First, we parse the SQL string. Note that we use the DataFusion // Parser, which wraps the `sqlparser-rs` SQL parser and adds DataFusion // specific syntax such as `CREATE EXTERNAL TABLE` diff --git a/datafusion-examples/examples/sql_ops/main.rs b/datafusion-examples/examples/sql_ops/main.rs new file mode 100644 index 000000000000..d67b16bf5c20 --- /dev/null +++ b/datafusion-examples/examples/sql_ops/main.rs @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! # SQL Examples +//! +//! These examples demonstrate SQL operations in DataFusion. +//! +//! ## Usage +//! ```bash +//! cargo run --example sql_ops -- [analysis|dialect|frontend|query] +//! ``` +//! +//! Each subcommand runs a corresponding example: +//! - `analysis` — analyse SQL queries with DataFusion structures +//! - `dialect` — implementing a custom SQL dialect on top of DFParser +//! - `frontend` — create LogicalPlans (only) from sql strings +//! - `query` — query data using SQL (in memory RecordBatches, local Parquet files) + +mod analysis; +mod dialect; +mod frontend; +mod query; + +use std::str::FromStr; + +use datafusion::error::{DataFusionError, Result}; + +enum ExampleKind { + Analysis, + Dialect, + Frontend, + Query, +} + +impl AsRef for ExampleKind { + fn as_ref(&self) -> &str { + match self { + Self::Analysis => "analysis", + Self::Dialect => "dialect", + Self::Frontend => "frontend", + Self::Query => "query", + } + } +} + +impl FromStr for ExampleKind { + type Err = DataFusionError; + + fn from_str(s: &str) -> Result { + match s { + "analysis" => Ok(Self::Analysis), + "dialect" => Ok(Self::Dialect), + "frontend" => Ok(Self::Frontend), + "query" => Ok(Self::Query), + _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))), + } + } +} + +impl ExampleKind { + const ALL: [Self; 4] = [Self::Analysis, Self::Dialect, Self::Frontend, Self::Query]; + + const EXAMPLE_NAME: &str = "sql_ops"; + + fn variants() -> Vec<&'static str> { + Self::ALL.iter().map(|x| x.as_ref()).collect() + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let usage = format!( + "Usage: cargo run --example {} -- [{}]", + ExampleKind::EXAMPLE_NAME, + ExampleKind::variants().join("|") + ); + + let arg = std::env::args().nth(1).ok_or_else(|| { + eprintln!("{usage}"); + DataFusionError::Execution("Missing argument".to_string()) + })?; + + match arg.parse::()? { + ExampleKind::Analysis => analysis::analysis().await?, + ExampleKind::Dialect => dialect::dialect().await?, + ExampleKind::Frontend => frontend::frontend()?, + ExampleKind::Query => query::query().await?, + } + + Ok(()) +} diff --git a/datafusion-examples/examples/sql_query.rs b/datafusion-examples/examples/sql_ops/query.rs similarity index 99% rename from datafusion-examples/examples/sql_query.rs rename to datafusion-examples/examples/sql_ops/query.rs index 4da07d33d03d..372b5ef51428 100644 --- a/datafusion-examples/examples/sql_query.rs +++ b/datafusion-examples/examples/sql_ops/query.rs @@ -32,8 +32,7 @@ use std::sync::Arc; /// /// [`query_memtable`]: a simple query against a [`MemTable`] /// [`query_parquet`]: a simple query against a directory with multiple Parquet files -#[tokio::main] -async fn main() -> Result<()> { +pub async fn query() -> Result<()> { query_memtable().await?; query_parquet().await?; Ok(()) From ef936e47fd10b77b7ed45672df68a0a591af9c8d Mon Sep 17 00:00:00 2001 From: Sergey Zhukov Date: Tue, 18 Nov 2025 12:49:14 +0300 Subject: [PATCH 2/2] Add comment how to run example in module level --- datafusion-examples/examples/builtin_functions/date_time.rs | 2 ++ .../examples/builtin_functions/function_factory.rs | 2 ++ datafusion-examples/examples/builtin_functions/regexp.rs | 2 ++ .../examples/custom_data_source/csv_json_opener.rs | 2 ++ .../examples/custom_data_source/csv_sql_streaming.rs | 2 ++ .../examples/custom_data_source/custom_datasource.rs | 2 ++ .../examples/custom_data_source/custom_file_casts.rs | 2 ++ .../examples/custom_data_source/custom_file_format.rs | 2 ++ .../examples/custom_data_source/file_stream_provider.rs | 2 ++ datafusion-examples/examples/data_io/catalog.rs | 2 ++ datafusion-examples/examples/data_io/json_shredding.rs | 2 ++ datafusion-examples/examples/data_io/parquet_advanced_index.rs | 2 ++ datafusion-examples/examples/data_io/parquet_embedded_index.rs | 2 ++ datafusion-examples/examples/data_io/parquet_encrypted.rs | 2 ++ .../examples/data_io/parquet_encrypted_with_kms.rs | 2 ++ datafusion-examples/examples/data_io/parquet_exec_visitor.rs | 2 ++ datafusion-examples/examples/data_io/parquet_index.rs | 2 ++ datafusion-examples/examples/data_io/query_http_csv.rs | 2 ++ datafusion-examples/examples/data_io/remote_catalog.rs | 2 ++ datafusion-examples/examples/flight/client.rs | 2 ++ datafusion-examples/examples/flight/server.rs | 2 ++ datafusion-examples/examples/flight/sql_server.rs | 2 ++ datafusion-examples/examples/query_planning/analyzer_rule.rs | 2 ++ datafusion-examples/examples/query_planning/expr_api.rs | 2 ++ datafusion-examples/examples/query_planning/optimizer_rule.rs | 2 ++ datafusion-examples/examples/query_planning/parse_sql_expr.rs | 2 ++ datafusion-examples/examples/query_planning/plan_to_sql.rs | 2 ++ datafusion-examples/examples/query_planning/planner_api.rs | 2 ++ datafusion-examples/examples/query_planning/pruning.rs | 2 ++ datafusion-examples/examples/query_planning/thread_pools.rs | 2 ++ datafusion-examples/examples/sql_ops/analysis.rs | 2 ++ datafusion-examples/examples/sql_ops/dialect.rs | 2 ++ datafusion-examples/examples/sql_ops/frontend.rs | 2 ++ datafusion-examples/examples/sql_ops/query.rs | 2 ++ datafusion-examples/examples/udf/advanced_udaf.rs | 2 ++ datafusion-examples/examples/udf/advanced_udf.rs | 2 ++ datafusion-examples/examples/udf/advanced_udwf.rs | 2 ++ datafusion-examples/examples/udf/async_udf.rs | 2 ++ datafusion-examples/examples/udf/simple_udaf.rs | 2 ++ datafusion-examples/examples/udf/simple_udf.rs | 2 ++ datafusion-examples/examples/udf/simple_udtf.rs | 2 ++ datafusion-examples/examples/udf/simple_udwf.rs | 2 ++ 42 files changed, 84 insertions(+) diff --git a/datafusion-examples/examples/builtin_functions/date_time.rs b/datafusion-examples/examples/builtin_functions/date_time.rs index 178cba979cb9..7fb2c25f560f 100644 --- a/datafusion-examples/examples/builtin_functions/date_time.rs +++ b/datafusion-examples/examples/builtin_functions/date_time.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::sync::Arc; use arrow::array::{Date32Array, Int32Array}; diff --git a/datafusion-examples/examples/builtin_functions/function_factory.rs b/datafusion-examples/examples/builtin_functions/function_factory.rs index 5d41e7a26071..164c5b892a78 100644 --- a/datafusion-examples/examples/builtin_functions/function_factory.rs +++ b/datafusion-examples/examples/builtin_functions/function_factory.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::datatypes::DataType; use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::common::{exec_datafusion_err, exec_err, internal_err, DataFusionError}; diff --git a/datafusion-examples/examples/builtin_functions/regexp.rs b/datafusion-examples/examples/builtin_functions/regexp.rs index 13c078693028..b8e15431603d 100644 --- a/datafusion-examples/examples/builtin_functions/regexp.rs +++ b/datafusion-examples/examples/builtin_functions/regexp.rs @@ -16,6 +16,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::common::{assert_batches_eq, assert_contains}; use datafusion::error::Result; use datafusion::prelude::*; diff --git a/datafusion-examples/examples/custom_data_source/csv_json_opener.rs b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs index 4205bbcdf86a..6fad8f4d5824 100644 --- a/datafusion-examples/examples/custom_data_source/csv_json_opener.rs +++ b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::sync::Arc; use arrow::datatypes::{DataType, Field, Schema}; diff --git a/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs index aca63c4f35c2..554382ea9549 100644 --- a/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs +++ b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::common::test_util::datafusion_test_data; use datafusion::error::Result; use datafusion::prelude::*; diff --git a/datafusion-examples/examples/custom_data_source/custom_datasource.rs b/datafusion-examples/examples/custom_data_source/custom_datasource.rs index 2213d50fccda..72c05c1a231e 100644 --- a/datafusion-examples/examples/custom_data_source/custom_datasource.rs +++ b/datafusion-examples/examples/custom_data_source/custom_datasource.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::any::Any; use std::collections::{BTreeMap, HashMap}; use std::fmt::{self, Debug, Formatter}; diff --git a/datafusion-examples/examples/custom_data_source/custom_file_casts.rs b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs index 31ec2845c611..6d8fd358607d 100644 --- a/datafusion-examples/examples/custom_data_source/custom_file_casts.rs +++ b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::sync::Arc; use arrow::array::{record_batch, RecordBatch}; diff --git a/datafusion-examples/examples/custom_data_source/custom_file_format.rs b/datafusion-examples/examples/custom_data_source/custom_file_format.rs index 510fa53c593f..05a34d3a83d9 100644 --- a/datafusion-examples/examples/custom_data_source/custom_file_format.rs +++ b/datafusion-examples/examples/custom_data_source/custom_file_format.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::{any::Any, sync::Arc}; use arrow::{ diff --git a/datafusion-examples/examples/custom_data_source/file_stream_provider.rs b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs index 55d2cc8cc0af..b1f471b5b344 100644 --- a/datafusion-examples/examples/custom_data_source/file_stream_provider.rs +++ b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + /// Demonstrates how to use [`FileStreamProvider`] and [`StreamTable`] to stream data /// from a file-like source (FIFO) into DataFusion for continuous querying. /// diff --git a/datafusion-examples/examples/data_io/catalog.rs b/datafusion-examples/examples/data_io/catalog.rs index 11614d074703..e68e48bdeaba 100644 --- a/datafusion-examples/examples/data_io/catalog.rs +++ b/datafusion-examples/examples/data_io/catalog.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. +//! //! Simple example of a catalog/schema implementation. use async_trait::async_trait; use datafusion::{ diff --git a/datafusion-examples/examples/data_io/json_shredding.rs b/datafusion-examples/examples/data_io/json_shredding.rs index d93b2167c1ab..baa0a2e6602e 100644 --- a/datafusion-examples/examples/data_io/json_shredding.rs +++ b/datafusion-examples/examples/data_io/json_shredding.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::any::Any; use std::sync::Arc; diff --git a/datafusion-examples/examples/data_io/parquet_advanced_index.rs b/datafusion-examples/examples/data_io/parquet_advanced_index.rs index af1e03fe4ddb..304e490bd63b 100644 --- a/datafusion-examples/examples/data_io/parquet_advanced_index.rs +++ b/datafusion-examples/examples/data_io/parquet_advanced_index.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::any::Any; use std::collections::{HashMap, HashSet}; use std::fs::File; diff --git a/datafusion-examples/examples/data_io/parquet_embedded_index.rs b/datafusion-examples/examples/data_io/parquet_embedded_index.rs index 0275a3141f7d..88a052546b5d 100644 --- a/datafusion-examples/examples/data_io/parquet_embedded_index.rs +++ b/datafusion-examples/examples/data_io/parquet_embedded_index.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. +//! //! Embedding and using a custom index in Parquet files //! //! # Background diff --git a/datafusion-examples/examples/data_io/parquet_encrypted.rs b/datafusion-examples/examples/data_io/parquet_encrypted.rs index 3f26c436a9c7..e3070cdddeac 100644 --- a/datafusion-examples/examples/data_io/parquet_encrypted.rs +++ b/datafusion-examples/examples/data_io/parquet_encrypted.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::common::DataFusionError; use datafusion::config::{ConfigFileEncryptionProperties, TableParquetOptions}; use datafusion::dataframe::{DataFrame, DataFrameWriteOptions}; diff --git a/datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs b/datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs index 3dca002db452..1a9bf56c09b3 100644 --- a/datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs +++ b/datafusion-examples/examples/data_io/parquet_encrypted_with_kms.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; use arrow_schema::SchemaRef; use async_trait::async_trait; diff --git a/datafusion-examples/examples/data_io/parquet_exec_visitor.rs b/datafusion-examples/examples/data_io/parquet_exec_visitor.rs index e0611fa144cd..925c202eac45 100644 --- a/datafusion-examples/examples/data_io/parquet_exec_visitor.rs +++ b/datafusion-examples/examples/data_io/parquet_exec_visitor.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::sync::Arc; use datafusion::datasource::file_format::parquet::ParquetFormat; diff --git a/datafusion-examples/examples/data_io/parquet_index.rs b/datafusion-examples/examples/data_io/parquet_index.rs index 4cca73b1f9be..7c708046f8a8 100644 --- a/datafusion-examples/examples/data_io/parquet_index.rs +++ b/datafusion-examples/examples/data_io/parquet_index.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::array::{ Array, ArrayRef, AsArray, BooleanArray, Int32Array, RecordBatch, StringArray, UInt64Array, diff --git a/datafusion-examples/examples/data_io/query_http_csv.rs b/datafusion-examples/examples/data_io/query_http_csv.rs index be27f8ad21d8..71421e6270cc 100644 --- a/datafusion-examples/examples/data_io/query_http_csv.rs +++ b/datafusion-examples/examples/data_io/query_http_csv.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::error::Result; use datafusion::prelude::*; use object_store::http::HttpBuilder; diff --git a/datafusion-examples/examples/data_io/remote_catalog.rs b/datafusion-examples/examples/data_io/remote_catalog.rs index e4898e19d4c9..3e1568fb4143 100644 --- a/datafusion-examples/examples/data_io/remote_catalog.rs +++ b/datafusion-examples/examples/data_io/remote_catalog.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. +//! /// This example shows how to implement the DataFusion [`CatalogProvider`] API /// for catalogs that are remote (require network access) and/or offer only /// asynchronous APIs such as [Polaris], [Unity], and [Hive]. diff --git a/datafusion-examples/examples/flight/client.rs b/datafusion-examples/examples/flight/client.rs index 031beea47d57..484576975a6f 100644 --- a/datafusion-examples/examples/flight/client.rs +++ b/datafusion-examples/examples/flight/client.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::collections::HashMap; use std::sync::Arc; use tonic::transport::Endpoint; diff --git a/datafusion-examples/examples/flight/server.rs b/datafusion-examples/examples/flight/server.rs index dc75287cf2e2..e4c05a13e8eb 100644 --- a/datafusion-examples/examples/flight/server.rs +++ b/datafusion-examples/examples/flight/server.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator}; use std::sync::Arc; diff --git a/datafusion-examples/examples/flight/sql_server.rs b/datafusion-examples/examples/flight/sql_server.rs index d86860f9d436..529296fac869 100644 --- a/datafusion-examples/examples/flight/sql_server.rs +++ b/datafusion-examples/examples/flight/sql_server.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::array::{ArrayRef, StringArray}; use arrow::datatypes::{DataType, Field, Schema}; use arrow::ipc::writer::IpcWriteOptions; diff --git a/datafusion-examples/examples/query_planning/analyzer_rule.rs b/datafusion-examples/examples/query_planning/analyzer_rule.rs index b6c97679cb43..df1ea4c1d75a 100644 --- a/datafusion-examples/examples/query_planning/analyzer_rule.rs +++ b/datafusion-examples/examples/query_planning/analyzer_rule.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; use datafusion::common::config::ConfigOptions; use datafusion::common::tree_node::{Transformed, TreeNode}; diff --git a/datafusion-examples/examples/query_planning/expr_api.rs b/datafusion-examples/examples/query_planning/expr_api.rs index 236ac4319bb6..225be0ee13c6 100644 --- a/datafusion-examples/examples/query_planning/expr_api.rs +++ b/datafusion-examples/examples/query_planning/expr_api.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::collections::HashMap; use std::sync::Arc; diff --git a/datafusion-examples/examples/query_planning/optimizer_rule.rs b/datafusion-examples/examples/query_planning/optimizer_rule.rs index 4af5ef50b3df..34aef85c6496 100644 --- a/datafusion-examples/examples/query_planning/optimizer_rule.rs +++ b/datafusion-examples/examples/query_planning/optimizer_rule.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; use arrow::datatypes::DataType; use datafusion::common::tree_node::{Transformed, TreeNode}; diff --git a/datafusion-examples/examples/query_planning/parse_sql_expr.rs b/datafusion-examples/examples/query_planning/parse_sql_expr.rs index 44e6b3cf5f67..376120de9d49 100644 --- a/datafusion-examples/examples/query_planning/parse_sql_expr.rs +++ b/datafusion-examples/examples/query_planning/parse_sql_expr.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::datatypes::{DataType, Field, Schema}; use datafusion::common::DFSchema; use datafusion::logical_expr::{col, lit}; diff --git a/datafusion-examples/examples/query_planning/plan_to_sql.rs b/datafusion-examples/examples/query_planning/plan_to_sql.rs index f23f083acd4a..95e04907d9be 100644 --- a/datafusion-examples/examples/query_planning/plan_to_sql.rs +++ b/datafusion-examples/examples/query_planning/plan_to_sql.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::common::DFSchemaRef; use datafusion::error::Result; use datafusion::logical_expr::sqlparser::ast::Statement; diff --git a/datafusion-examples/examples/query_planning/planner_api.rs b/datafusion-examples/examples/query_planning/planner_api.rs index dd3643471ead..9b8aa1c2fe64 100644 --- a/datafusion-examples/examples/query_planning/planner_api.rs +++ b/datafusion-examples/examples/query_planning/planner_api.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::error::Result; use datafusion::logical_expr::LogicalPlan; use datafusion::physical_plan::displayable; diff --git a/datafusion-examples/examples/query_planning/pruning.rs b/datafusion-examples/examples/query_planning/pruning.rs index 7c42d151c05b..d7562139c2b6 100644 --- a/datafusion-examples/examples/query_planning/pruning.rs +++ b/datafusion-examples/examples/query_planning/pruning.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::collections::HashSet; use std::sync::Arc; diff --git a/datafusion-examples/examples/query_planning/thread_pools.rs b/datafusion-examples/examples/query_planning/thread_pools.rs index 768638d11a1d..6fc7d51e91c1 100644 --- a/datafusion-examples/examples/query_planning/thread_pools.rs +++ b/datafusion-examples/examples/query_planning/thread_pools.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. +//! //! This example shows how to use separate thread pools (tokio [`Runtime`]))s to //! run the IO and CPU intensive parts of DataFusion plans. //! diff --git a/datafusion-examples/examples/sql_ops/analysis.rs b/datafusion-examples/examples/sql_ops/analysis.rs index 7d1cd0092e5b..266c6507724a 100644 --- a/datafusion-examples/examples/sql_ops/analysis.rs +++ b/datafusion-examples/examples/sql_ops/analysis.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. +//! //! This example shows how to use the structures that DataFusion provides to perform //! Analysis on SQL queries and their plans. //! diff --git a/datafusion-examples/examples/sql_ops/dialect.rs b/datafusion-examples/examples/sql_ops/dialect.rs index 986b3e31407e..62a9f3fa5604 100644 --- a/datafusion-examples/examples/sql_ops/dialect.rs +++ b/datafusion-examples/examples/sql_ops/dialect.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::fmt::Display; use datafusion::error::{DataFusionError, Result}; diff --git a/datafusion-examples/examples/sql_ops/frontend.rs b/datafusion-examples/examples/sql_ops/frontend.rs index 432af968bcf3..2cfb9de05c7b 100644 --- a/datafusion-examples/examples/sql_ops/frontend.rs +++ b/datafusion-examples/examples/sql_ops/frontend.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::common::{plan_err, TableReference}; use datafusion::config::ConfigOptions; diff --git a/datafusion-examples/examples/sql_ops/query.rs b/datafusion-examples/examples/sql_ops/query.rs index 372b5ef51428..3e052f0823b9 100644 --- a/datafusion-examples/examples/sql_ops/query.rs +++ b/datafusion-examples/examples/sql_ops/query.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::arrow::array::{UInt64Array, UInt8Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::arrow::record_batch::RecordBatch; diff --git a/datafusion-examples/examples/udf/advanced_udaf.rs b/datafusion-examples/examples/udf/advanced_udaf.rs index 81e227bfacee..b4916e5bd091 100644 --- a/datafusion-examples/examples/udf/advanced_udaf.rs +++ b/datafusion-examples/examples/udf/advanced_udaf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::datatypes::{Field, Schema}; use datafusion::physical_expr::NullState; use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility}; diff --git a/datafusion-examples/examples/udf/advanced_udf.rs b/datafusion-examples/examples/udf/advanced_udf.rs index bb5a68e90cbb..7459501a1c1c 100644 --- a/datafusion-examples/examples/udf/advanced_udf.rs +++ b/datafusion-examples/examples/udf/advanced_udf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::any::Any; use std::sync::Arc; diff --git a/datafusion-examples/examples/udf/advanced_udwf.rs b/datafusion-examples/examples/udf/advanced_udwf.rs index 86f215e019c7..37b6671639e4 100644 --- a/datafusion-examples/examples/udf/advanced_udwf.rs +++ b/datafusion-examples/examples/udf/advanced_udwf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::datatypes::Field; use arrow::{ array::{ArrayRef, AsArray, Float64Array}, diff --git a/datafusion-examples/examples/udf/async_udf.rs b/datafusion-examples/examples/udf/async_udf.rs index 475775a599f6..c55650223cd4 100644 --- a/datafusion-examples/examples/udf/async_udf.rs +++ b/datafusion-examples/examples/udf/async_udf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. +//! //! This example shows how to create and use "Async UDFs" in DataFusion. //! //! Async UDFs allow you to perform asynchronous operations, such as diff --git a/datafusion-examples/examples/udf/simple_udaf.rs b/datafusion-examples/examples/udf/simple_udaf.rs index e9f905e72099..42ea0054b759 100644 --- a/datafusion-examples/examples/udf/simple_udaf.rs +++ b/datafusion-examples/examples/udf/simple_udaf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. +//! /// In this example we will declare a single-type, single return type UDAF that computes the geometric mean. /// The geometric mean is described here: https://en.wikipedia.org/wiki/Geometric_mean use datafusion::arrow::{ diff --git a/datafusion-examples/examples/udf/simple_udf.rs b/datafusion-examples/examples/udf/simple_udf.rs index 7d4f3588e313..e8d6c9c8173a 100644 --- a/datafusion-examples/examples/udf/simple_udf.rs +++ b/datafusion-examples/examples/udf/simple_udf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use datafusion::{ arrow::{ array::{ArrayRef, Float32Array, Float64Array}, diff --git a/datafusion-examples/examples/udf/simple_udtf.rs b/datafusion-examples/examples/udf/simple_udtf.rs index a03b157134ae..12ee74fc52ee 100644 --- a/datafusion-examples/examples/udf/simple_udtf.rs +++ b/datafusion-examples/examples/udf/simple_udtf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use arrow::csv::reader::Format; use arrow::csv::ReaderBuilder; use async_trait::async_trait; diff --git a/datafusion-examples/examples/udf/simple_udwf.rs b/datafusion-examples/examples/udf/simple_udwf.rs index 2cf1df8d8ed8..a4f6d59a7d6d 100644 --- a/datafusion-examples/examples/udf/simple_udwf.rs +++ b/datafusion-examples/examples/udf/simple_udwf.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! See `main.rs` for how to run it. + use std::sync::Arc; use arrow::{