From 61b1a1632ffe40e33dc252f1d9b7b9c7253af7dc Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Mon, 10 Oct 2022 16:58:48 -0600
Subject: [PATCH 1/7] Optimizer example and docs

---
 datafusion/optimizer/README.md                | 140 ++++++++++++++-
 datafusion/optimizer/examples/rewrite_expr.rs | 163 ++++++++++++++++++
 2 files changed, 300 insertions(+), 3 deletions(-)
 create mode 100644 datafusion/optimizer/examples/rewrite_expr.rs
diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
index 39d28a8fae37..fc2b740d3295 100644
--- a/datafusion/optimizer/README.md
+++ b/datafusion/optimizer/README.md
@@ -17,10 +17,144 @@
   under the License.
 -->
 
-# DataFusion Query Optimizer Rules
+# DataFusion Query Optimizer
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
+format.
 
-This crate is a submodule of DataFusion that provides query optimizer rules.
+DataFusion has modular design, allowing individual crates to be re-used in other projects.
+
+This crate is a submodule of DataFusion that provides a query optimizer for logical plans.
+
+## Running the Optimizer
+
+The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules
+and applying it to a logical plan to produce an optimized logical plan.
+
+```rust
+
+// We need a logical plan as the starting point. There are many ways to build a logical plan:
+//
+// The `datafusion-expr` crate provides a LogicalPlanBuilder
+// The `datafusion-sql` crate provides a SQL query planner that can create a LogicalPlan from SQL
+// The `datafusion` crate provides a DataFrame API that can create a LogicalPlan
+let logical_plan = ...
+
+let mut config = OptimizerConfig::default();
+let optimizer = Optimizer::new(&config);
+let optimized_plan = optimizer.optimize(&logical_plan, &mut config, observe)?;
+
+fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
+    println!(
+        "After applying rule '{}':\n{}",
+        rule.name(),
+        plan.display_indent()
+    )
+}
+```
+
+## Providing Custom Rules
+
+The optimizer can be created with a custom set of rules.
+
+```rust
+let optimizer = Optimizer::with_rules(vec![
+    Arc::new(MyRule {})
+]);
+```
+
+## Writing Optimization Rules
+
+Please refer to the [examples](examples) to learn more about the general approach to writing optimizer rules and
+then move onto studying the existing rules.
+
+All rules must implement the `OptimizerRule` trait.
+
+```rust
+/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
+/// computes the same results, but in a potentially more efficient
+/// way.
+pub trait OptimizerRule {
+    /// Rewrite `plan` to an optimized form
+    fn optimize(
+        &self,
+        plan: &LogicalPlan,
+        optimizer_config: &mut OptimizerConfig,
+    ) -> Result<LogicalPlan>;
+
+    /// A human readable name for this optimizer rule
+    fn name(&self) -> &str;
+}
+```
+
+### General Guidelines
+
+Rules typical walk the logical plan and walk the expression trees inside operators and selectively mutate
+individual operators or expressions.
+
+Sometimes there is an initial pass that visits the plan and builds state that is used in a second pass that performs
+the actual optimization. This approach is used in projection push down and filter push down.
+
+### Utilities
+
+There are a number of utility methods provided that take care of some common tasks.
+
+### ExprVisitor
+
+TBD
+
+### Rewriting Expressions
+
+The `MyExprRewriter` trait can be implemented to provide a way to rewrite expressions. This rule can then be applied
+to an expression by calling `Expr::rewrite` (from the `ExprRewritable` trait).
+
+The `rewrite` method will perform a depth first walk of the expression and its children to rewrite an expression,
+consuming `self` producing a new expression.
+
+```rust
+let mut expr_rewriter = MyExprRewriter {};
+let expr = expr.rewrite(&mut expr_rewriter)?;
+```
+
+Here is an example implementation which will rewrite `expr BETWEEN a AND b` as `expr >= a AND expr <= b`. Note that the
+implementation does not need to perform any recursion since this is handled by the `rewrite` method.
+
+```rust
+struct MyExprRewriter {}
+
+impl ExprRewriter for MyExprRewriter {
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        match expr {
+            Expr::Between {
+                negated,
+                expr,
+                low,
+                high,
+            } => {
+                let expr: Expr = expr.as_ref().clone();
+                let low: Expr = low.as_ref().clone();
+                let high: Expr = high.as_ref().clone();
+                if negated {
+                    Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
+                } else {
+                    Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
+                }
+            }
+            _ => Ok(expr.clone()),
+        }
+    }
+}
+```
+
+### optimize_children
+
+TBD
+
+### Writing Tests
+
+There should be unit tests in the same file as the new rule that test the effect of the rule being applied to a plan
+in isolation (without any other rule being applied).
+
+There should also be a test in `integration-tests.rs` that tests the rule as part of the overall optimization process.
 
 [df]: https://crates.io/crates/datafusion
diff --git a/datafusion/optimizer/examples/rewrite_expr.rs b/datafusion/optimizer/examples/rewrite_expr.rs
new file mode 100644
index 000000000000..7d2c312fb03a
--- /dev/null
+++ b/datafusion/optimizer/examples/rewrite_expr.rs
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter};
+use datafusion_expr::{AggregateUDF, Expr, Filter, LogicalPlan, ScalarUDF, TableSource};
+use datafusion_optimizer::optimizer::Optimizer;
+use datafusion_optimizer::{utils, OptimizerConfig, OptimizerRule};
+use datafusion_sql::planner::{ContextProvider, SqlToRel};
+use datafusion_sql::sqlparser::dialect::PostgreSqlDialect;
+use datafusion_sql::sqlparser::parser::Parser;
+use datafusion_sql::TableReference;
+use std::any::Any;
+use std::sync::Arc;
+
+pub fn main() -> Result<()> {
+    // produce a logical plan using the datafusion-sql crate
+    let dialect = PostgreSqlDialect {};
+    let sql = "SELECT * FROM person WHERE age BETWEEN 21 AND 32";
+    let statements = Parser::parse_sql(&dialect, sql)?;
+
+    // produce a logical plan using the datafusion-sql crate
+    let context_provider = MyContextProvider {};
+    let sql_to_rel = SqlToRel::new(&context_provider);
+    let logical_plan = sql_to_rel.sql_statement_to_plan(statements[0].clone())?;
+    println!(
+        "Unoptimized Logical Plan:\n\n{}\n",
+        logical_plan.display_indent()
+    );
+
+    // now run the optimizer with our custom rule
+    let optimizer = Optimizer::with_rules(vec![Arc::new(MyRule {})]);
+    let mut optimizer_config = OptimizerConfig::default().with_skip_failing_rules(false);
+    let optimized_plan =
+        optimizer.optimize(&logical_plan, &mut optimizer_config, observe)?;
+    println!(
+        "Optimized Logical Plan:\n\n{}\n",
+        optimized_plan.display_indent()
+    );
+
+    Ok(())
+}
+
+fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
+    println!(
+        "After applying rule '{}':\n{}\n",
+        rule.name(),
+        plan.display_indent()
+    )
+}
+
+struct MyRule {}
+
+impl OptimizerRule for MyRule {
+    fn name(&self) -> &str {
+        "my_rule"
+    }
+
+    fn optimize(
+        &self,
+        plan: &LogicalPlan,
+        _config: &mut OptimizerConfig,
+    ) -> Result<LogicalPlan> {
+        // recurse down and optimize children first
+        let plan = utils::optimize_children(self, plan, _config)?;
+
+        match plan {
+            LogicalPlan::Filter(filter) => {
+                let mut expr_rewriter = MyExprRewriter {};
+                let predicate = filter.predicate.clone();
+                let predicate = predicate.rewrite(&mut expr_rewriter)?;
+                Ok(LogicalPlan::Filter(Filter {
+                    predicate,
+                    input: filter.input.clone(),
+                }))
+            }
+            _ => Ok(plan.clone()),
+        }
+    }
+}
+
+struct MyExprRewriter {}
+
+impl ExprRewriter for MyExprRewriter {
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        match expr {
+            Expr::Between {
+                negated,
+                expr,
+                low,
+                high,
+            } => {
+                let expr: Expr = expr.as_ref().clone();
+                let low: Expr = low.as_ref().clone();
+                let high: Expr = high.as_ref().clone();
+                if negated {
+                    Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
+                } else {
+                    Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
+                }
+            }
+            _ => Ok(expr.clone()),
+        }
+    }
+}
+
+struct MyContextProvider {}
+
+impl ContextProvider for MyContextProvider {
+    fn get_table_provider(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
+        if name.table() == "person" {
+            Ok(Arc::new(MyTableSource {
+                schema: Arc::new(Schema::new(vec![
+                    Field::new("name", DataType::Utf8, false),
+                    Field::new("age", DataType::UInt8, false),
+                ])),
+            }))
+        } else {
+            Err(DataFusionError::Plan("table not found".to_string()))
+        }
+    }
+
+    fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
+        None
+    }
+
+    fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
+        None
+    }
+
+    fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
+        None
+    }
+}
+
+struct MyTableSource {
+    schema: SchemaRef,
+}
+
+impl TableSource for MyTableSource {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}

From 795afc35f132b840e088e4edf071ac6d243e3fbe Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Mon, 10 Oct 2022 18:38:24 -0600
Subject: [PATCH 2/7] Add regression tests to benchmark

---
 benchmarks/expected-plans/q1.txt  |   6 ++
 benchmarks/expected-plans/q10.txt |  12 +++
 benchmarks/expected-plans/q11.txt |  19 ++++
 benchmarks/expected-plans/q12.txt |   7 ++
 benchmarks/expected-plans/q13.txt |  11 +++
 benchmarks/expected-plans/q14.txt |   7 ++
 benchmarks/expected-plans/q15.txt |  11 +++
 benchmarks/expected-plans/q16.txt |  13 +++
 benchmarks/expected-plans/q17.txt |  11 +++
 benchmarks/expected-plans/q18.txt |  13 +++
 benchmarks/expected-plans/q19.txt |   9 ++
 benchmarks/expected-plans/q2.txt  |  25 +++++
 benchmarks/expected-plans/q20.txt |  19 ++++
 benchmarks/expected-plans/q21.txt |  21 ++++
 benchmarks/expected-plans/q22.txt |  15 +++
 benchmarks/expected-plans/q3.txt  |  11 +++
 benchmarks/expected-plans/q4.txt  |   8 ++
 benchmarks/expected-plans/q5.txt  |  16 ++++
 benchmarks/expected-plans/q6.txt  |   5 +
 benchmarks/expected-plans/q7.txt  |  20 ++++
 benchmarks/expected-plans/q8.txt  |  25 +++++
 benchmarks/expected-plans/q9.txt  |  17 ++++
 benchmarks/src/bin/tpch.rs        | 153 ++++++++++++++++++++++++++++++
 dev/release/rat_exclude_files.txt |   1 +
 24 files changed, 455 insertions(+)
 create mode 100644 benchmarks/expected-plans/q1.txt
 create mode 100644 benchmarks/expected-plans/q10.txt
 create mode 100644 benchmarks/expected-plans/q11.txt
 create mode 100644 benchmarks/expected-plans/q12.txt
 create mode 100644 benchmarks/expected-plans/q13.txt
 create mode 100644 benchmarks/expected-plans/q14.txt
 create mode 100644 benchmarks/expected-plans/q15.txt
 create mode 100644 benchmarks/expected-plans/q16.txt
 create mode 100644 benchmarks/expected-plans/q17.txt
 create mode 100644 benchmarks/expected-plans/q18.txt
 create mode 100644 benchmarks/expected-plans/q19.txt
 create mode 100644 benchmarks/expected-plans/q2.txt
 create mode 100644 benchmarks/expected-plans/q20.txt
 create mode 100644 benchmarks/expected-plans/q21.txt
 create mode 100644 benchmarks/expected-plans/q22.txt
 create mode 100644 benchmarks/expected-plans/q3.txt
 create mode 100644 benchmarks/expected-plans/q4.txt
 create mode 100644 benchmarks/expected-plans/q5.txt
 create mode 100644 benchmarks/expected-plans/q6.txt
 create mode 100644 benchmarks/expected-plans/q7.txt
 create mode 100644 benchmarks/expected-plans/q8.txt
 create mode 100644 benchmarks/expected-plans/q9.txt

diff --git a/benchmarks/expected-plans/q1.txt b/benchmarks/expected-plans/q1.txt
new file mode 100644
index 000000000000..1a9d4b7c98cc
--- /dev/null
+++ b/benchmarks/expected-plans/q1.txt
@@ -0,0 +1,6 @@
+Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST
+  Projection: lineitem.l_returnflag, lineitem.l_linestatus, SUM(lineitem.l_quantity) AS sum_qty, SUM(lineitem.l_extendedprice) AS sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, AVG(lineitem.l_quantity) AS avg_qty, AVG(lineitem.l_extendedprice) AS avg_price, AVG(lineitem.l_discount) AS avg_disc, COUNT(UInt8(1)) AS count_order
+    Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(CAST(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount AS Decimal128(38, 6)) * CAST(Decimal128(Some(100),23,2) + CAST(lineitem.l_tax AS Decimal128(23, 2)) AS Decimal128(38, 6))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))]]
+      Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus
+        Filter: lineitem.l_shipdate <= Date32("10471")
+          TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q10.txt b/benchmarks/expected-plans/q10.txt
new file mode 100644
index 000000000000..25189feb61e1
--- /dev/null
+++ b/benchmarks/expected-plans/q10.txt
@@ -0,0 +1,12 @@
+Sort: revenue DESC NULLS FIRST
+  Projection: customer.c_custkey, customer.c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment
+    Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+      Inner Join: customer.c_nationkey = nation.n_nationkey
+        Inner Join: orders.o_orderkey = lineitem.l_orderkey
+          Inner Join: customer.c_custkey = orders.o_custkey
+            TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment]
+            Filter: orders.o_orderdate >= Date32("8674") AND orders.o_orderdate < Date32("8766")
+              TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
+          Filter: lineitem.l_returnflag = Utf8("R")
+            TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag]
+        TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q11.txt b/benchmarks/expected-plans/q11.txt
new file mode 100644
index 000000000000..b408340a32a0
--- /dev/null
+++ b/benchmarks/expected-plans/q11.txt
@@ -0,0 +1,19 @@
+Sort: value DESC NULLS FIRST
+  Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
+    Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 17)) > __sq_1.__value
+      CrossJoin:
+        Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
+          Inner Join: supplier.s_nationkey = nation.n_nationkey
+            Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+              TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
+              TableScan: supplier projection=[s_suppkey, s_nationkey]
+            Filter: nation.n_name = Utf8("GERMANY")
+              TableScan: nation projection=[n_nationkey, n_name]
+        Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 17)) * Decimal128(Some(10000000000000),38,17) AS __value, alias=__sq_1
+          Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]]
+            Inner Join: supplier.s_nationkey = nation.n_nationkey
+              Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+                TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
+                TableScan: supplier projection=[s_suppkey, s_nationkey]
+              Filter: nation.n_name = Utf8("GERMANY")
+                TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q12.txt b/benchmarks/expected-plans/q12.txt
new file mode 100644
index 000000000000..ef3bab4b7096
--- /dev/null
+++ b/benchmarks/expected-plans/q12.txt
@@ -0,0 +1,7 @@
+Sort: lineitem.l_shipmode ASC NULLS LAST
+  Projection: lineitem.l_shipmode, SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS high_line_count, SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS low_line_count
+    Aggregate: groupBy=[[lineitem.l_shipmode]], aggr=[[SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]]
+      Inner Join: lineitem.l_orderkey = orders.o_orderkey
+        Filter: lineitem.l_shipmode IN ([Utf8("MAIL"), Utf8("SHIP")]) AND lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_shipdate < lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("8766") AND lineitem.l_receiptdate < Date32("9131")
+          TableScan: lineitem projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode]
+        TableScan: orders projection=[o_orderkey, o_orderpriority]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q13.txt b/benchmarks/expected-plans/q13.txt
new file mode 100644
index 000000000000..12a9bf1dac24
--- /dev/null
+++ b/benchmarks/expected-plans/q13.txt
@@ -0,0 +1,11 @@
+Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST
+  Projection: c_orders.c_count, COUNT(UInt8(1)) AS custdist
+    Aggregate: groupBy=[[c_orders.c_count]], aggr=[[COUNT(UInt8(1))]]
+      Projection: c_orders.COUNT(orders.o_orderkey) AS c_count, alias=c_orders
+        Projection: c_orders.COUNT(orders.o_orderkey), alias=c_orders
+          Projection: COUNT(orders.o_orderkey), alias=c_orders
+            Aggregate: groupBy=[[customer.c_custkey]], aggr=[[COUNT(orders.o_orderkey)]]
+              Left Join: customer.c_custkey = orders.o_custkey
+                TableScan: customer projection=[c_custkey]
+                Filter: orders.o_comment NOT LIKE Utf8("%special%requests%")
+                  TableScan: orders projection=[o_orderkey, o_custkey, o_comment]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q14.txt b/benchmarks/expected-plans/q14.txt
new file mode 100644
index 000000000000..c410363a5821
--- /dev/null
+++ b/benchmarks/expected-plans/q14.txt
@@ -0,0 +1,7 @@
+Projection: CAST(Decimal128(Some(1000000000000000000000),38,19) * CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Decimal128(38, 19)) AS Decimal128(38, 38)) / CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Decimal128(38, 38)) AS promo_revenue
+  Aggregate: groupBy=[[]], aggr=[[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%")  THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+    Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, part.p_type
+      Inner Join: lineitem.l_partkey = part.p_partkey
+        Filter: lineitem.l_shipdate >= Date32("9374") AND lineitem.l_shipdate < Date32("9404")
+          TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate]
+        TableScan: part projection=[p_partkey, p_type]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q15.txt b/benchmarks/expected-plans/q15.txt
new file mode 100644
index 000000000000..e2f59dc5ca0f
--- /dev/null
+++ b/benchmarks/expected-plans/q15.txt
@@ -0,0 +1,11 @@
+EmptyRelation
+Sort: supplier.s_suppkey ASC NULLS LAST
+  Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue
+    Inner Join: revenue0.total_revenue = __sq_1.__value
+      Inner Join: supplier.s_suppkey = revenue0.supplier_no
+        TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
+        TableScan: revenue0 projection=[supplier_no, total_revenue]
+      Projection: MAX(revenue0.total_revenue) AS __value, alias=__sq_1
+        Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]
+          TableScan: revenue0 projection=[total_revenue]
+EmptyRelation
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q16.txt b/benchmarks/expected-plans/q16.txt
new file mode 100644
index 000000000000..11943cf2477b
--- /dev/null
+++ b/benchmarks/expected-plans/q16.txt
@@ -0,0 +1,13 @@
+Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type ASC NULLS LAST, part.p_size ASC NULLS LAST
+  Projection: part.p_brand, part.p_type, part.p_size, COUNT(DISTINCT partsupp.ps_suppkey) AS supplier_cnt
+    Projection: group_alias_0 AS p_brand, group_alias_1 AS p_type, group_alias_2 AS p_size, COUNT(alias1) AS COUNT(DISTINCT partsupp.ps_suppkey)
+      Aggregate: groupBy=[[group_alias_0, group_alias_1, group_alias_2]], aggr=[[COUNT(alias1)]]
+        Aggregate: groupBy=[[part.p_brand AS group_alias_0, part.p_type AS group_alias_1, part.p_size AS group_alias_2, partsupp.ps_suppkey AS alias1]], aggr=[[]]
+          Anti Join: partsupp.ps_suppkey = __sq_1.s_suppkey
+            Inner Join: partsupp.ps_partkey = part.p_partkey
+              TableScan: partsupp projection=[ps_partkey, ps_suppkey]
+              Filter: part.p_brand != Utf8("Brand#45") AND part.p_type NOT LIKE Utf8("MEDIUM POLISHED%") AND part.p_size IN ([Int32(49), Int32(14), Int32(23), Int32(45), Int32(19), Int32(3), Int32(36), Int32(9)])
+                TableScan: part projection=[p_partkey, p_brand, p_type, p_size]
+            Projection: supplier.s_suppkey AS s_suppkey, alias=__sq_1
+              Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%")
+                TableScan: supplier projection=[s_suppkey, s_comment]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q17.txt b/benchmarks/expected-plans/q17.txt
new file mode 100644
index 000000000000..17b8e969879a
--- /dev/null
+++ b/benchmarks/expected-plans/q17.txt
@@ -0,0 +1,11 @@
+Projection: CAST(SUM(lineitem.l_extendedprice) AS Decimal128(38, 33)) / Decimal128(Some(7000000000000000195487369212723200),38,33) AS avg_yearly
+  Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]
+    Filter: CAST(lineitem.l_quantity AS Decimal128(38, 21)) < __sq_1.__value
+      Inner Join: part.p_partkey = __sq_1.l_partkey
+        Inner Join: lineitem.l_partkey = part.p_partkey
+          TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
+          Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")
+            TableScan: part projection=[p_partkey, p_brand, p_container]
+        Projection: lineitem.l_partkey, Decimal128(Some(200000000000000000000),38,21) * CAST(AVG(lineitem.l_quantity) AS Decimal128(38, 21)) AS __value, alias=__sq_1
+          Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
+            TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q18.txt b/benchmarks/expected-plans/q18.txt
new file mode 100644
index 000000000000..ebc22ea5d886
--- /dev/null
+++ b/benchmarks/expected-plans/q18.txt
@@ -0,0 +1,13 @@
+Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST
+  Projection: customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice, SUM(lineitem.l_quantity)
+    Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[SUM(lineitem.l_quantity)]]
+      Semi Join: orders.o_orderkey = __sq_1.l_orderkey
+        Inner Join: orders.o_orderkey = lineitem.l_orderkey
+          Inner Join: customer.c_custkey = orders.o_custkey
+            TableScan: customer projection=[c_custkey, c_name]
+            TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate]
+          TableScan: lineitem projection=[l_orderkey, l_quantity]
+        Projection: lineitem.l_orderkey AS l_orderkey, alias=__sq_1
+          Filter: SUM(lineitem.l_quantity) > Decimal128(Some(30000),25,2)
+            Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[SUM(lineitem.l_quantity)]]
+              TableScan: lineitem projection=[l_orderkey, l_quantity]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q19.txt b/benchmarks/expected-plans/q19.txt
new file mode 100644
index 000000000000..902893ea9012
--- /dev/null
+++ b/benchmarks/expected-plans/q19.txt
@@ -0,0 +1,9 @@
+Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue
+  Aggregate: groupBy=[[]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+    Projection: lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") AS lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON")Utf8("DELIVER IN PERSON")lineitem.l_shipinstruct, lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR REG")]) AS lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR REG")])Utf8("AIR REG")Utf8("AIR")lineitem.l_shipmode, part.p_size >= Int32(1) AS part.p_size >= Int32(1)Int32(1)part.p_size, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, part.p_brand, part.p_size, part.p_container
+      Filter: part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2) AND part.p_size <= Int32(15)
+        Inner Join: lineitem.l_partkey = part.p_partkey
+          Filter: lineitem.l_shipmode IN ([Utf8("AIR"), Utf8("AIR REG")]) AND lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON")
+            TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode]
+          Filter: part.p_size >= Int32(1)
+            TableScan: part projection=[p_partkey, p_brand, p_size, p_container]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q2.txt b/benchmarks/expected-plans/q2.txt
new file mode 100644
index 000000000000..10d68cd374db
--- /dev/null
+++ b/benchmarks/expected-plans/q2.txt
@@ -0,0 +1,25 @@
+Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST
+  Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
+    Filter: partsupp.ps_supplycost = __sq_1.__value
+      Inner Join: part.p_partkey = __sq_1.ps_partkey
+        Inner Join: nation.n_regionkey = region.r_regionkey
+          Inner Join: supplier.s_nationkey = nation.n_nationkey
+            Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+              Inner Join: part.p_partkey = partsupp.ps_partkey
+                Filter: part.p_size = Int32(15) AND part.p_type LIKE Utf8("%BRASS")
+                  TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size]
+                TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
+              TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
+            TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
+          Filter: region.r_name = Utf8("EUROPE")
+            TableScan: region projection=[r_regionkey, r_name]
+        Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value, alias=__sq_1
+          Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
+            Inner Join: nation.n_regionkey = region.r_regionkey
+              Inner Join: supplier.s_nationkey = nation.n_nationkey
+                Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+                  TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
+                  TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment]
+                TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
+              Filter: region.r_name = Utf8("EUROPE")
+                TableScan: region projection=[r_regionkey, r_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q20.txt b/benchmarks/expected-plans/q20.txt
new file mode 100644
index 000000000000..6d3ef1f6cc75
--- /dev/null
+++ b/benchmarks/expected-plans/q20.txt
@@ -0,0 +1,19 @@
+Sort: supplier.s_name ASC NULLS LAST
+  Projection: supplier.s_name, supplier.s_address
+    Semi Join: supplier.s_suppkey = __sq_2.ps_suppkey
+      Inner Join: supplier.s_nationkey = nation.n_nationkey
+        TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey]
+        Filter: nation.n_name = Utf8("CANADA")
+          TableScan: nation projection=[n_nationkey, n_name]
+      Projection: partsupp.ps_suppkey AS ps_suppkey, alias=__sq_2
+        Filter: CAST(partsupp.ps_availqty AS Decimal128(38, 17)) > __sq_3.__value
+          Inner Join: partsupp.ps_partkey = __sq_3.l_partkey, partsupp.ps_suppkey = __sq_3.l_suppkey
+            Semi Join: partsupp.ps_partkey = __sq_1.p_partkey
+              TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]
+              Projection: part.p_partkey AS p_partkey, alias=__sq_1
+                Filter: part.p_name LIKE Utf8("forest%")
+                  TableScan: part projection=[p_partkey, p_name]
+            Projection: lineitem.l_partkey, lineitem.l_suppkey, Decimal128(Some(50000000000000000),38,17) * CAST(SUM(lineitem.l_quantity) AS Decimal128(38, 17)) AS __value, alias=__sq_3
+              Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]
+                Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131")
+                  TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q21.txt b/benchmarks/expected-plans/q21.txt
new file mode 100644
index 000000000000..5689fafc2eb4
--- /dev/null
+++ b/benchmarks/expected-plans/q21.txt
@@ -0,0 +1,21 @@
+Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST
+  Projection: supplier.s_name, COUNT(UInt8(1)) AS numwait
+    Aggregate: groupBy=[[supplier.s_name]], aggr=[[COUNT(UInt8(1))]]
+      Anti Join: l1.l_orderkey = l3.l_orderkey Filter: l3.l_suppkey != l1.l_suppkey
+        Semi Join: l1.l_orderkey = l2.l_orderkey Filter: l2.l_suppkey != l1.l_suppkey
+          Inner Join: supplier.s_nationkey = nation.n_nationkey
+            Inner Join: l1.l_orderkey = orders.o_orderkey
+              Inner Join: supplier.s_suppkey = l1.l_suppkey
+                TableScan: supplier projection=[s_suppkey, s_name, s_nationkey]
+                Filter: l1.l_receiptdate > l1.l_commitdate AND l1.l_receiptdate > l1.l_commitdate
+                  SubqueryAlias: l1
+                    TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate]
+              Filter: orders.o_orderstatus = Utf8("F") AND orders.o_orderstatus = Utf8("F")
+                TableScan: orders projection=[o_orderkey, o_orderstatus]
+            Filter: nation.n_name = Utf8("SAUDI ARABIA") AND nation.n_name = Utf8("SAUDI ARABIA")
+              TableScan: nation projection=[n_nationkey, n_name]
+          SubqueryAlias: l2
+            TableScan: lineitem projection=[l_orderkey, l_suppkey]
+        Filter: l3.l_receiptdate > l3.l_commitdate
+          SubqueryAlias: l3
+            TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q22.txt b/benchmarks/expected-plans/q22.txt
new file mode 100644
index 000000000000..4b0c1a59c00b
--- /dev/null
+++ b/benchmarks/expected-plans/q22.txt
@@ -0,0 +1,15 @@
+Sort: custsale.cntrycode ASC NULLS LAST
+  Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal
+    Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]
+      Projection: custsale.cntrycode, custsale.c_acctbal, alias=custsale
+        Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal, alias=custsale
+          Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __sq_1.__value
+            CrossJoin:
+              Anti Join: customer.c_custkey = orders.o_custkey
+                Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
+                  TableScan: customer projection=[c_custkey, c_phone, c_acctbal]
+                TableScan: orders projection=[o_custkey]
+              Projection: AVG(customer.c_acctbal) AS __value, alias=__sq_1
+                Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
+                  Filter: CAST(customer.c_acctbal AS Decimal128(30, 15)) > Decimal128(Some(0),30,15) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
+                    TableScan: customer projection=[c_phone, c_acctbal]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q3.txt b/benchmarks/expected-plans/q3.txt
new file mode 100644
index 000000000000..7cd69b92a556
--- /dev/null
+++ b/benchmarks/expected-plans/q3.txt
@@ -0,0 +1,11 @@
+Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST
+  Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority
+    Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+      Inner Join: orders.o_orderkey = lineitem.l_orderkey
+        Inner Join: customer.c_custkey = orders.o_custkey
+          Filter: customer.c_mktsegment = Utf8("BUILDING")
+            TableScan: customer projection=[c_custkey, c_mktsegment]
+          Filter: orders.o_orderdate < Date32("9204")
+            TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority]
+        Filter: lineitem.l_shipdate > Date32("9204")
+          TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q4.txt b/benchmarks/expected-plans/q4.txt
new file mode 100644
index 000000000000..a4339732e68f
--- /dev/null
+++ b/benchmarks/expected-plans/q4.txt
@@ -0,0 +1,8 @@
+Sort: orders.o_orderpriority ASC NULLS LAST
+  Projection: orders.o_orderpriority, COUNT(UInt8(1)) AS order_count
+    Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[COUNT(UInt8(1))]]
+      Semi Join: orders.o_orderkey = lineitem.l_orderkey
+        Filter: orders.o_orderdate >= Date32("8582") AND orders.o_orderdate < Date32("8674")
+          TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority]
+        Filter: lineitem.l_commitdate < lineitem.l_receiptdate
+          TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q5.txt b/benchmarks/expected-plans/q5.txt
new file mode 100644
index 000000000000..0c2e0c131805
--- /dev/null
+++ b/benchmarks/expected-plans/q5.txt
@@ -0,0 +1,16 @@
+Sort: revenue DESC NULLS FIRST
+  Projection: nation.n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue
+    Aggregate: groupBy=[[nation.n_name]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+      Inner Join: nation.n_regionkey = region.r_regionkey
+        Inner Join: supplier.s_nationkey = nation.n_nationkey
+          Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey
+            Inner Join: orders.o_orderkey = lineitem.l_orderkey
+              Inner Join: customer.c_custkey = orders.o_custkey
+                TableScan: customer projection=[c_custkey, c_nationkey]
+                Filter: orders.o_orderdate >= Date32("8766") AND orders.o_orderdate < Date32("9131")
+                  TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
+              TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount]
+            TableScan: supplier projection=[s_suppkey, s_nationkey]
+          TableScan: nation projection=[n_nationkey, n_name, n_regionkey]
+        Filter: region.r_name = Utf8("ASIA")
+          TableScan: region projection=[r_regionkey, r_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q6.txt b/benchmarks/expected-plans/q6.txt
new file mode 100644
index 000000000000..ad27ba2b9aeb
--- /dev/null
+++ b/benchmarks/expected-plans/q6.txt
@@ -0,0 +1,5 @@
+Projection: SUM(lineitem.l_extendedprice * lineitem.l_discount) AS revenue
+  Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice * lineitem.l_discount)]]
+    Projection: CAST(lineitem.l_discount AS Decimal128(30, 15)) AS CAST(lineitem.l_discount AS Decimal128(30, 15))lineitem.l_discount, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate
+      Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131") AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount >= Decimal128(Some(49999999999999),30,15) AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount <= Decimal128(Some(69999999999999),30,15) AND lineitem.l_quantity < Decimal128(Some(2400),15,2)
+        TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q7.txt b/benchmarks/expected-plans/q7.txt
new file mode 100644
index 000000000000..4a2866a42df7
--- /dev/null
+++ b/benchmarks/expected-plans/q7.txt
@@ -0,0 +1,20 @@
+Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST
+  Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, SUM(shipping.volume) AS revenue
+    Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[SUM(shipping.volume)]]
+      Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, shipping.volume, alias=shipping
+        Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, datepart(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume, alias=shipping
+          Filter: n1.n_name = Utf8("FRANCE") AND n2.n_name = Utf8("GERMANY") OR n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("FRANCE")
+            Inner Join: customer.c_nationkey = n2.n_nationkey
+              Inner Join: supplier.s_nationkey = n1.n_nationkey
+                Inner Join: orders.o_custkey = customer.c_custkey
+                  Inner Join: lineitem.l_orderkey = orders.o_orderkey
+                    Inner Join: supplier.s_suppkey = lineitem.l_suppkey
+                      TableScan: supplier projection=[s_suppkey, s_nationkey]
+                      Filter: lineitem.l_shipdate >= Date32("9131") AND lineitem.l_shipdate <= Date32("9861")
+                        TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate]
+                    TableScan: orders projection=[o_orderkey, o_custkey]
+                  TableScan: customer projection=[c_custkey, c_nationkey]
+                SubqueryAlias: n1
+                  TableScan: nation projection=[n_nationkey, n_name]
+              SubqueryAlias: n2
+                TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q8.txt b/benchmarks/expected-plans/q8.txt
new file mode 100644
index 000000000000..20452d4bde93
--- /dev/null
+++ b/benchmarks/expected-plans/q8.txt
@@ -0,0 +1,25 @@
+Sort: all_nations.o_year ASC NULLS LAST
+  Projection: all_nations.o_year, SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) / SUM(all_nations.volume) AS mkt_share
+    Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]]
+      Projection: all_nations.o_year, all_nations.volume, all_nations.nation, alias=all_nations
+        Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume, n2.n_name AS nation, alias=all_nations
+          Inner Join: n1.n_regionkey = region.r_regionkey
+            Inner Join: supplier.s_nationkey = n2.n_nationkey
+              Inner Join: customer.c_nationkey = n1.n_nationkey
+                Inner Join: orders.o_custkey = customer.c_custkey
+                  Inner Join: lineitem.l_orderkey = orders.o_orderkey
+                    Inner Join: lineitem.l_suppkey = supplier.s_suppkey
+                      Inner Join: part.p_partkey = lineitem.l_partkey
+                        Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL")
+                          TableScan: part projection=[p_partkey, p_type]
+                        TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount]
+                      TableScan: supplier projection=[s_suppkey, s_nationkey]
+                    Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861")
+                      TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate]
+                  TableScan: customer projection=[c_custkey, c_nationkey]
+                SubqueryAlias: n1
+                  TableScan: nation projection=[n_nationkey, n_regionkey]
+              SubqueryAlias: n2
+                TableScan: nation projection=[n_nationkey, n_name]
+            Filter: region.r_name = Utf8("AMERICA")
+              TableScan: region projection=[r_regionkey, r_name]
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q9.txt b/benchmarks/expected-plans/q9.txt
new file mode 100644
index 000000000000..954c28a35bb1
--- /dev/null
+++ b/benchmarks/expected-plans/q9.txt
@@ -0,0 +1,17 @@
+Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST
+  Projection: profit.nation, profit.o_year, SUM(profit.amount) AS sum_profit
+    Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[SUM(profit.amount)]]
+      Projection: profit.nation, profit.o_year, profit.amount, alias=profit
+        Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) - CAST(partsupp.ps_supplycost * lineitem.l_quantity AS Decimal128(38, 4)) AS amount, alias=profit
+          Inner Join: supplier.s_nationkey = nation.n_nationkey
+            Inner Join: lineitem.l_orderkey = orders.o_orderkey
+              Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey
+                Inner Join: lineitem.l_suppkey = supplier.s_suppkey
+                  Inner Join: part.p_partkey = lineitem.l_partkey
+                    Filter: part.p_name LIKE Utf8("%green%")
+                      TableScan: part projection=[p_partkey, p_name]
+                    TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount]
+                  TableScan: supplier projection=[s_suppkey, s_nationkey]
+                TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
+              TableScan: orders projection=[o_orderkey, o_orderdate]
+            TableScan: nation projection=[n_nationkey, n_name]
\ No newline at end of file
diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index d3cab89a2f03..b08539eaf7af 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -598,6 +598,7 @@ mod tests {
     use datafusion::logical_expr::Expr;
     use datafusion::logical_expr::Expr::Cast;
     use datafusion::logical_expr::Expr::ScalarFunction;
+    use datafusion::sql::TableReference;
 
     const QUERY_LIMIT: [Option<usize>; 22] = [
         None,
@@ -624,6 +625,158 @@ mod tests {
         None,
     ];
 
+    #[tokio::test]
+    async fn q1_expected_plan() -> Result<()> {
+        expected_plan(1).await
+    }
+
+    #[tokio::test]
+    async fn q2_expected_plan() -> Result<()> {
+        expected_plan(2).await
+    }
+
+    #[tokio::test]
+    async fn q3_expected_plan() -> Result<()> {
+        expected_plan(3).await
+    }
+
+    #[tokio::test]
+    async fn q4_expected_plan() -> Result<()> {
+        expected_plan(4).await
+    }
+
+    #[tokio::test]
+    async fn q5_expected_plan() -> Result<()> {
+        expected_plan(5).await
+    }
+
+    #[tokio::test]
+    async fn q6_expected_plan() -> Result<()> {
+        expected_plan(6).await
+    }
+
+    #[tokio::test]
+    async fn q7_expected_plan() -> Result<()> {
+        expected_plan(7).await
+    }
+
+    #[tokio::test]
+    async fn q8_expected_plan() -> Result<()> {
+        expected_plan(8).await
+    }
+
+    #[tokio::test]
+    async fn q9_expected_plan() -> Result<()> {
+        expected_plan(9).await
+    }
+
+    #[tokio::test]
+    async fn q10_expected_plan() -> Result<()> {
+        expected_plan(10).await
+    }
+
+    #[tokio::test]
+    async fn q11_expected_plan() -> Result<()> {
+        expected_plan(11).await
+    }
+
+    #[tokio::test]
+    async fn q12_expected_plan() -> Result<()> {
+        expected_plan(12).await
+    }
+
+    #[tokio::test]
+    async fn q13_expected_plan() -> Result<()> {
+        expected_plan(13).await
+    }
+
+    #[tokio::test]
+    async fn q14_expected_plan() -> Result<()> {
+        expected_plan(14).await
+    }
+
+    #[tokio::test]
+    async fn q15_expected_plan() -> Result<()> {
+        expected_plan(15).await
+    }
+
+    #[tokio::test]
+    async fn q16_expected_plan() -> Result<()> {
+        expected_plan(16).await
+    }
+
+    #[tokio::test]
+    async fn q17_expected_plan() -> Result<()> {
+        expected_plan(17).await
+    }
+
+    #[tokio::test]
+    async fn q18_expected_plan() -> Result<()> {
+        expected_plan(18).await
+    }
+
+    #[tokio::test]
+    async fn q19_expected_plan() -> Result<()> {
+        expected_plan(19).await
+    }
+
+    #[tokio::test]
+    async fn q20_expected_plan() -> Result<()> {
+        expected_plan(20).await
+    }
+
+    #[tokio::test]
+    async fn q21_expected_plan() -> Result<()> {
+        expected_plan(21).await
+    }
+
+    #[tokio::test]
+    async fn q22_expected_plan() -> Result<()> {
+        expected_plan(22).await
+    }
+
+    async fn expected_plan(query: usize) -> Result<()> {
+        let ctx = SessionContext::new();
+        for table in TABLES {
+            let table = table.to_string();
+            let schema = get_schema(&table);
+            let mem_table = MemTable::try_new(Arc::new(schema), vec![])?;
+            ctx.register_table(
+                TableReference::from(table.as_str()),
+                Arc::new(mem_table),
+            )?;
+        }
+
+        let mut actual = String::new();
+        let sql = get_query_sql(query)?;
+        for sql in &sql {
+            let df = ctx.sql(sql.as_str()).await?;
+            let plan = df.to_logical_plan()?;
+            if !actual.is_empty() {
+                actual += "\n";
+            }
+            actual += &format!("{}", plan.display_indent());
+        }
+
+        let possibilities = vec![
+            format!("expected-plans/q{}.txt", query),
+            format!("benchmarks/expected-plans/q{}.txt", query),
+        ];
+
+        let mut found = false;
+        for path in &possibilities {
+            let path = Path::new(&path);
+            if let Some(expected) = fs::read_to_string(path).ok() {
+                assert_eq!(expected, actual);
+                found = true;
+                break;
+            }
+        }
+        assert!(found);
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn q1() -> Result<()> {
         verify_query(1).await
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 98e0c6a38493..91ccef60f3fa 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -113,6 +113,7 @@ python/rust-toolchain
 python/requirements*.txt
 **/testdata/*
 benchmarks/queries/*
+benchmarks/expected-plans/*
 benchmarks/data/*
 ci/*
 **/*.svg

From d5f193ce7bf62c9465674f8229cebaf2778e40d7 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Mon, 10 Oct 2022 18:41:01 -0600
Subject: [PATCH 3/7] revert a file

---
 datafusion/optimizer/examples/rewrite_expr.rs | 163 ------------------
 1 file changed, 163 deletions(-)
 delete mode 100644 datafusion/optimizer/examples/rewrite_expr.rs

diff --git a/datafusion/optimizer/examples/rewrite_expr.rs b/datafusion/optimizer/examples/rewrite_expr.rs
deleted file mode 100644
index 7d2c312fb03a..000000000000
--- a/datafusion/optimizer/examples/rewrite_expr.rs
+++ /dev/null
@@ -1,163 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter};
-use datafusion_expr::{AggregateUDF, Expr, Filter, LogicalPlan, ScalarUDF, TableSource};
-use datafusion_optimizer::optimizer::Optimizer;
-use datafusion_optimizer::{utils, OptimizerConfig, OptimizerRule};
-use datafusion_sql::planner::{ContextProvider, SqlToRel};
-use datafusion_sql::sqlparser::dialect::PostgreSqlDialect;
-use datafusion_sql::sqlparser::parser::Parser;
-use datafusion_sql::TableReference;
-use std::any::Any;
-use std::sync::Arc;
-
-pub fn main() -> Result<()> {
-    // produce a logical plan using the datafusion-sql crate
-    let dialect = PostgreSqlDialect {};
-    let sql = "SELECT * FROM person WHERE age BETWEEN 21 AND 32";
-    let statements = Parser::parse_sql(&dialect, sql)?;
-
-    // produce a logical plan using the datafusion-sql crate
-    let context_provider = MyContextProvider {};
-    let sql_to_rel = SqlToRel::new(&context_provider);
-    let logical_plan = sql_to_rel.sql_statement_to_plan(statements[0].clone())?;
-    println!(
-        "Unoptimized Logical Plan:\n\n{}\n",
-        logical_plan.display_indent()
-    );
-
-    // now run the optimizer with our custom rule
-    let optimizer = Optimizer::with_rules(vec![Arc::new(MyRule {})]);
-    let mut optimizer_config = OptimizerConfig::default().with_skip_failing_rules(false);
-    let optimized_plan =
-        optimizer.optimize(&logical_plan, &mut optimizer_config, observe)?;
-    println!(
-        "Optimized Logical Plan:\n\n{}\n",
-        optimized_plan.display_indent()
-    );
-
-    Ok(())
-}
-
-fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
-    println!(
-        "After applying rule '{}':\n{}\n",
-        rule.name(),
-        plan.display_indent()
-    )
-}
-
-struct MyRule {}
-
-impl OptimizerRule for MyRule {
-    fn name(&self) -> &str {
-        "my_rule"
-    }
-
-    fn optimize(
-        &self,
-        plan: &LogicalPlan,
-        _config: &mut OptimizerConfig,
-    ) -> Result<LogicalPlan> {
-        // recurse down and optimize children first
-        let plan = utils::optimize_children(self, plan, _config)?;
-
-        match plan {
-            LogicalPlan::Filter(filter) => {
-                let mut expr_rewriter = MyExprRewriter {};
-                let predicate = filter.predicate.clone();
-                let predicate = predicate.rewrite(&mut expr_rewriter)?;
-                Ok(LogicalPlan::Filter(Filter {
-                    predicate,
-                    input: filter.input.clone(),
-                }))
-            }
-            _ => Ok(plan.clone()),
-        }
-    }
-}
-
-struct MyExprRewriter {}
-
-impl ExprRewriter for MyExprRewriter {
-    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-        match expr {
-            Expr::Between {
-                negated,
-                expr,
-                low,
-                high,
-            } => {
-                let expr: Expr = expr.as_ref().clone();
-                let low: Expr = low.as_ref().clone();
-                let high: Expr = high.as_ref().clone();
-                if negated {
-                    Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
-                } else {
-                    Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
-                }
-            }
-            _ => Ok(expr.clone()),
-        }
-    }
-}
-
-struct MyContextProvider {}
-
-impl ContextProvider for MyContextProvider {
-    fn get_table_provider(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
-        if name.table() == "person" {
-            Ok(Arc::new(MyTableSource {
-                schema: Arc::new(Schema::new(vec![
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("age", DataType::UInt8, false),
-                ])),
-            }))
-        } else {
-            Err(DataFusionError::Plan("table not found".to_string()))
-        }
-    }
-
-    fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
-        None
-    }
-
-    fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
-        None
-    }
-
-    fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
-        None
-    }
-}
-
-struct MyTableSource {
-    schema: SchemaRef,
-}
-
-impl TableSource for MyTableSource {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}

From 17d1de8d5210dd144b944119566aecf979822840 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Mon, 10 Oct 2022 18:41:47 -0600
Subject: [PATCH 4/7] revert a file

---
 datafusion/optimizer/README.md | 140 +--------------------------------
 1 file changed, 3 insertions(+), 137 deletions(-)

diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
index fc2b740d3295..39d28a8fae37 100644
--- a/datafusion/optimizer/README.md
+++ b/datafusion/optimizer/README.md
@@ -17,144 +17,10 @@
   under the License.
 -->
 
-# DataFusion Query Optimizer
+# DataFusion Query Optimizer Rules
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
-format.
+[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
 
-DataFusion has modular design, allowing individual crates to be re-used in other projects.
-
-This crate is a submodule of DataFusion that provides a query optimizer for logical plans.
-
-## Running the Optimizer
-
-The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules
-and applying it to a logical plan to produce an optimized logical plan.
-
-```rust
-
-// We need a logical plan as the starting point. There are many ways to build a logical plan:
-//
-// The `datafusion-expr` crate provides a LogicalPlanBuilder
-// The `datafusion-sql` crate provides a SQL query planner that can create a LogicalPlan from SQL
-// The `datafusion` crate provides a DataFrame API that can create a LogicalPlan
-let logical_plan = ...
-
-let mut config = OptimizerConfig::default();
-let optimizer = Optimizer::new(&config);
-let optimized_plan = optimizer.optimize(&logical_plan, &mut config, observe)?;
-
-fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
-    println!(
-        "After applying rule '{}':\n{}",
-        rule.name(),
-        plan.display_indent()
-    )
-}
-```
-
-## Providing Custom Rules
-
-The optimizer can be created with a custom set of rules.
-
-```rust
-let optimizer = Optimizer::with_rules(vec![
-    Arc::new(MyRule {})
-]);
-```
-
-## Writing Optimization Rules
-
-Please refer to the [examples](examples) to learn more about the general approach to writing optimizer rules and
-then move onto studying the existing rules.
-
-All rules must implement the `OptimizerRule` trait.
-
-```rust
-/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
-/// computes the same results, but in a potentially more efficient
-/// way.
-pub trait OptimizerRule {
-    /// Rewrite `plan` to an optimized form
-    fn optimize(
-        &self,
-        plan: &LogicalPlan,
-        optimizer_config: &mut OptimizerConfig,
-    ) -> Result<LogicalPlan>;
-
-    /// A human readable name for this optimizer rule
-    fn name(&self) -> &str;
-}
-```
-
-### General Guidelines
-
-Rules typical walk the logical plan and walk the expression trees inside operators and selectively mutate
-individual operators or expressions.
-
-Sometimes there is an initial pass that visits the plan and builds state that is used in a second pass that performs
-the actual optimization. This approach is used in projection push down and filter push down.
-
-### Utilities
-
-There are a number of utility methods provided that take care of some common tasks.
-
-### ExprVisitor
-
-TBD
-
-### Rewriting Expressions
-
-The `MyExprRewriter` trait can be implemented to provide a way to rewrite expressions. This rule can then be applied
-to an expression by calling `Expr::rewrite` (from the `ExprRewritable` trait).
-
-The `rewrite` method will perform a depth first walk of the expression and its children to rewrite an expression,
-consuming `self` producing a new expression.
-
-```rust
-let mut expr_rewriter = MyExprRewriter {};
-let expr = expr.rewrite(&mut expr_rewriter)?;
-```
-
-Here is an example implementation which will rewrite `expr BETWEEN a AND b` as `expr >= a AND expr <= b`. Note that the
-implementation does not need to perform any recursion since this is handled by the `rewrite` method.
-
-```rust
-struct MyExprRewriter {}
-
-impl ExprRewriter for MyExprRewriter {
-    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
-        match expr {
-            Expr::Between {
-                negated,
-                expr,
-                low,
-                high,
-            } => {
-                let expr: Expr = expr.as_ref().clone();
-                let low: Expr = low.as_ref().clone();
-                let high: Expr = high.as_ref().clone();
-                if negated {
-                    Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
-                } else {
-                    Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
-                }
-            }
-            _ => Ok(expr.clone()),
-        }
-    }
-}
-```
-
-### optimize_children
-
-TBD
-
-### Writing Tests
-
-There should be unit tests in the same file as the new rule that test the effect of the rule being applied to a plan
-in isolation (without any other rule being applied).
-
-There should also be a test in `integration-tests.rs` that tests the rule as part of the overall optimization process.
+This crate is a submodule of DataFusion that provides query optimizer rules.
 
 [df]: https://crates.io/crates/datafusion

From 0a249086ff3271c029f89cab985ebbb5fb08e819 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Mon, 10 Oct 2022 21:13:55 -0600
Subject: [PATCH 5/7] fix tests on windows

---
 benchmarks/src/bin/tpch.rs | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index b08539eaf7af..2341fc5b423d 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -590,6 +590,7 @@ struct QueryResult {
 mod tests {
     use super::*;
     use std::env;
+    use std::io::{BufRead, BufReader};
     use std::ops::{Div, Mul};
     use std::sync::Arc;
 
@@ -766,7 +767,7 @@ mod tests {
         let mut found = false;
         for path in &possibilities {
             let path = Path::new(&path);
-            if let Some(expected) = fs::read_to_string(path).ok() {
+            if let Some(expected) = read_text_file(path).ok() {
                 assert_eq!(expected, actual);
                 found = true;
                 break;
@@ -777,6 +778,21 @@ mod tests {
         Ok(())
     }
 
+    /// we need to read line by line and add \n so tests work on Windows
+    fn read_text_file(path: &Path) -> Result<String> {
+        let file = File::open(path)?;
+        let reader = BufReader::new(file);
+        let mut str = String::new();
+        for line in reader.lines() {
+            let line = line?;
+            if !str.is_empty() {
+                str += "\n";
+            }
+            str += &line;
+        }
+        Ok(str)
+    }
+
     #[tokio::test]
     async fn q1() -> Result<()> {
         verify_query(1).await

From 4608073992691fc038445b03223545b2e6e76e4a Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Mon, 10 Oct 2022 21:42:00 -0600
Subject: [PATCH 6/7] ignore one test

---
 benchmarks/src/bin/tpch.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index 2341fc5b423d..d5c10ce503c2 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -706,7 +706,17 @@ mod tests {
         expected_plan(16).await
     }
 
-    #[tokio::test]
+    /// This query produces different plans depending on operating system. The difference is
+    /// due to re-writing the following expression:
+    ///
+    /// `sum(l_extendedprice) / 7.0 as avg_yearly`
+    ///
+    /// Linux:   Decimal128(Some(7000000000000000195487369212723200),38,33)
+    /// Windows: Decimal128(Some(6999999999999999042565864605876224),38,33)
+    ///
+    /// See https://github.com/apache/arrow-datafusion/issues/3791
+    #[tokio::test]
+    #[ignore]
     async fn q17_expected_plan() -> Result<()> {
         expected_plan(17).await
     }

From b0ec2e2fd4226223a136062d94f16a2c5c72a2c7 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Tue, 11 Oct 2022 05:40:09 -0600
Subject: [PATCH 7/7] clippy

---
 benchmarks/src/bin/tpch.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs
index d5c10ce503c2..5205f37c114f 100644
--- a/benchmarks/src/bin/tpch.rs
+++ b/benchmarks/src/bin/tpch.rs
@@ -777,7 +777,7 @@ mod tests {
         let mut found = false;
         for path in &possibilities {
             let path = Path::new(&path);
-            if let Some(expected) = read_text_file(path).ok() {
+            if let Ok(expected) = read_text_file(path) {
                 assert_eq!(expected, actual);
                 found = true;
                 break;