From 124ec3fa494b55ffc362d73b255a0d0478df8152 Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Thu, 27 May 2021 15:50:09 -0400 Subject: [PATCH 1/4] test: display of each plan --- datafusion/tests/sql.rs | 196 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index eb50661b42e6..021d9d51446a 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1555,6 +1555,8 @@ fn create_join_context_qualified() -> Result { #[tokio::test] async fn csv_explain() { + // This test uses the execute function that create full plan cycle: logical, optimized logical, and physical, + // then execute the physical plan and return the final explain results let mut ctx = ExecutionContext::new(); register_aggregate_csv_by_sql(&mut ctx).await; let sql = "EXPLAIN SELECT c1 FROM aggregate_test_100 where c2 > 10"; @@ -1573,6 +1575,101 @@ async fn csv_explain() { assert_eq!(expected, actual); } +#[tokio::test] +async fn csv_explain_plans() { + // This test verify the look of each plan in its full cycle plan creation + + let mut ctx = ExecutionContext::new(); + register_aggregate_csv_by_sql(&mut ctx).await; + let sql = "EXPLAIN SELECT c1 FROM aggregate_test_100 where c2 > 10"; + + // Logical plan + // Create plan + let msg = format!("Creating logical plan for '{}'", sql); + let plan = ctx.create_logical_plan(&sql).expect(&msg); + let logical_schema = plan.schema(); + // + println!("SQL: {}", sql); + // Verify schema + let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let actual = format!("{}", plan.display_indent_schema()); + assert_eq!(expected, actual); + // Verify the text format of the plan + let expected = "Explain"; + let actual = format!("{}", plan.display_indent()); + assert_eq!(expected, actual); + // verify the grahviz format of the plan + let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n + digraph {\n + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + }\n + subgraph cluster_3\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + }\n + }\n + // End DataFusion GraphViz Plan"; + let actual = format!("{}", plan.display_graphviz()); + assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + // Optimized logical plan + // + let msg = format!("Optimizing logical plan for '{}': {:?}", sql, plan); + let plan = ctx.optimize(&plan).expect(&msg); + let optimized_logical_schema = plan.schema(); + // Both schema has to be the same + assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref()); + // Verify schema + let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let actual = format!("{}", plan.display_indent_schema()); + assert_eq!(expected, actual); + // Verify the text format of the plan + let expected = "Explain"; + let actual = format!("{}", plan.display_indent()); + assert_eq!(expected, actual); + // verify the grahviz format of the plan + let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n + digraph {\n + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + }\n + subgraph cluster_3\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + }\n + }\n + // End DataFusion GraphViz Plan"; + let actual = format!("{}", plan.display_graphviz()); + assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + + // Physical plan + // Create plan + let msg = format!("Creating physical plan for '{}': {:?}", sql, plan); + let plan = ctx.create_physical_plan(&plan).expect(&msg); + // Verify the text format of the plan + let expected = "ExplainExec"; + let actual = format!("{}", displayable(plan.as_ref()).indent()); + assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + + // Execute plan + let msg = format!("Executing physical plan for '{}': {:?}", sql, plan); + let results = collect(plan).await.expect(&msg); + // Compare final explain result from execution output + let expected = vec![ + vec!["logical_plan", + "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=None"]]; + let actual = result_vec(&results); + assert_eq!(expected, actual); +} + + + #[tokio::test] async fn csv_explain_verbose() { let mut ctx = ExecutionContext::new(); @@ -1591,6 +1688,105 @@ async fn csv_explain_verbose() { assert!(actual.contains("#c2 Gt Int64(10)"), "Actual: '{}'", actual); } +#[tokio::test] +async fn csv_explain_verbose_plans() { + // This test verify the look of each plan in its full cycle plan creation + + let mut ctx = ExecutionContext::new(); + register_aggregate_csv_by_sql(&mut ctx).await; + let sql = "EXPLAIN VERBOSE SELECT c1 FROM aggregate_test_100 where c2 > 10"; + + // Logical plan + // Create plan + let msg = format!("Creating logical plan for '{}'", sql); + let plan = ctx.create_logical_plan(&sql).expect(&msg); + let logical_schema = plan.schema(); + // + println!("SQL: {}", sql); + // Verify schema + let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let actual = format!("{}", plan.display_indent_schema()); + assert_eq!(expected, actual); + // Verify the text format of the plan + let expected = "Explain"; + let actual = format!("{}", plan.display_indent()); + assert_eq!(expected, actual); + // verify the grahviz format of the plan + let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n + digraph {\n + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + }\n + subgraph cluster_3\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + }\n + }\n + // End DataFusion GraphViz Plan"; + let actual = format!("{}", plan.display_graphviz()); + assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + // Optimized logical plan + // + let msg = format!("Optimizing logical plan for '{}': {:?}", sql, plan); + let plan = ctx.optimize(&plan).expect(&msg); + let optimized_logical_schema = plan.schema(); + // Both schema has to be the same + assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref()); + // Verify schema + let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let actual = format!("{}", plan.display_indent_schema()); + assert_eq!(expected, actual); + // Verify the text format of the plan + let expected = "Explain"; + let actual = format!("{}", plan.display_indent()); + assert_eq!(expected, actual); + // verify the grahviz format of the plan + let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n + digraph {\n + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + }\n + subgraph cluster_3\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + }\n + }\n + // End DataFusion GraphViz Plan"; + let actual = format!("{}", plan.display_graphviz()); + assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + + // Physical plan + // Create plan + let msg = format!("Creating physical plan for '{}': {:?}", sql, plan); + let plan = ctx.create_physical_plan(&plan).expect(&msg); + // Verify the text format of the plan + let expected = "ExplainExec"; + let actual = format!("{}", displayable(plan.as_ref()).indent()); + assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + + // Execute plan + let msg = format!("Executing physical plan for '{}': {:?}", sql, plan); + let results = collect(plan).await.expect(&msg); + // Compare final explain result from execution output + let expected = vec![ + vec!["logical_plan", + "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=None"], + vec!["logical_plan after projection_push_down", + "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=Some([0, 1])"], + vec!["physical_plan", + "ProjectionExec: expr=[c1]\n FilterExec: CAST(c2 AS Int64) > 10\n CsvExec: source=Path(/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv: [/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv]), has_header=true\n"] + + ]; + let actual = result_vec(&results); + assert_eq!(expected, actual); +} + fn aggr_test_schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("c1", DataType::Utf8, false), From 19265912320bfae23a5022b57e24fe84e7f83294 Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Fri, 28 May 2021 15:10:01 -0400 Subject: [PATCH 2/4] fix: Fix debug display of explain and potential fix of predicate pushdown --- datafusion/src/logical_plan/plan.rs | 9 +- datafusion/src/optimizer/filter_push_down.rs | 4 + datafusion/tests/sql.rs | 312 ++++++++++++++----- 3 files changed, 238 insertions(+), 87 deletions(-) diff --git a/datafusion/src/logical_plan/plan.rs b/datafusion/src/logical_plan/plan.rs index 8b9aac9ea73b..f8217323a7f9 100644 --- a/datafusion/src/logical_plan/plan.rs +++ b/datafusion/src/logical_plan/plan.rs @@ -329,11 +329,12 @@ impl LogicalPlan { LogicalPlan::Limit { input, .. } => vec![input], LogicalPlan::Extension { node } => node.inputs(), LogicalPlan::Union { inputs, .. } => inputs.iter().collect(), + LogicalPlan::Explain { plan, .. } => vec![plan], // plans without inputs LogicalPlan::TableScan { .. } | LogicalPlan::EmptyRelation { .. } - | LogicalPlan::CreateExternalTable { .. } - | LogicalPlan::Explain { .. } => vec![], + //| LogicalPlan::Explain { .. } + | LogicalPlan::CreateExternalTable { .. } => vec![], } } } @@ -438,11 +439,11 @@ impl LogicalPlan { } true } + LogicalPlan::Explain { plan, .. } => plan.accept(visitor)?, // plans without inputs LogicalPlan::TableScan { .. } | LogicalPlan::EmptyRelation { .. } - | LogicalPlan::CreateExternalTable { .. } - | LogicalPlan::Explain { .. } => true, + | LogicalPlan::CreateExternalTable { .. } => true, }; if !recurse { return Ok(false); diff --git a/datafusion/src/optimizer/filter_push_down.rs b/datafusion/src/optimizer/filter_push_down.rs index 4c248e2b6483..6f6a1e8cad53 100644 --- a/datafusion/src/optimizer/filter_push_down.rs +++ b/datafusion/src/optimizer/filter_push_down.rs @@ -234,6 +234,10 @@ fn split_members<'a>(predicate: &'a Expr, predicates: &mut Vec<&'a Expr>) { fn optimize(plan: &LogicalPlan, mut state: State) -> Result { match plan { + LogicalPlan::Explain { .. } => { + // push the optimization to the plan of this explain + push_down(&state, plan) + } LogicalPlan::Filter { input, predicate } => { let mut predicates = vec![]; split_members(predicate, &mut predicates); diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 021d9d51446a..617ee3eaebce 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1556,7 +1556,7 @@ fn create_join_context_qualified() -> Result { #[tokio::test] async fn csv_explain() { // This test uses the execute function that create full plan cycle: logical, optimized logical, and physical, - // then execute the physical plan and return the final explain results + // then execute the physical plan and return the final explain results let mut ctx = ExecutionContext::new(); register_aggregate_csv_by_sql(&mut ctx).await; let sql = "EXPLAIN SELECT c1 FROM aggregate_test_100 where c2 > 10"; @@ -1588,33 +1588,72 @@ async fn csv_explain_plans() { let msg = format!("Creating logical plan for '{}'", sql); let plan = ctx.create_logical_plan(&sql).expect(&msg); let logical_schema = plan.schema(); - // + // println!("SQL: {}", sql); + // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let expected = "Explain [plan_type:Utf8, plan:Utf8]\n + Projection: #c1 [c1:Utf8]\n + Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\n + TableScan: aggregate_test_100 projection=None [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]"; let actual = format!("{}", plan.display_indent_schema()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // Verify the text format of the plan - let expected = "Explain"; + let expected = "Explain\n Projection: #c1\n + Filter: #c2 Gt Int64(10)\n + TableScan: aggregate_test_100 projection=None"; let actual = format!("{}", plan.display_indent()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // verify the grahviz format of the plan let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - }\n - subgraph cluster_3\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - }\n + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + 3[shape=box label=\"Projection: #c1\"]\n + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n + 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n + 5[shape=box label=\"TableScan: aggregate_test_100 projection=None\"]\n + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n + subgraph cluster_6\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n + 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n + 10[shape=box label=\"TableScan: aggregate_test_100 projection=None\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n }\n - // End DataFusion GraphViz Plan"; + // End DataFusion GraphViz Plan\n"; let actual = format!("{}", plan.display_graphviz()); - assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // Optimized logical plan // let msg = format!("Optimizing logical plan for '{}': {:?}", sql, plan); @@ -1622,41 +1661,76 @@ async fn csv_explain_plans() { let optimized_logical_schema = plan.schema(); // Both schema has to be the same assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref()); + // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let expected = "Explain [plan_type:Utf8, plan:Utf8]\n + Projection: #c1 [c1:Utf8]\n + Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32]\n + TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]"; let actual = format!("{}", plan.display_indent_schema()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // Verify the text format of the plan - let expected = "Explain"; + let expected = "Explain\n + Projection: #c1\n + Filter: #c2 Gt Int64(10)\n + TableScan: aggregate_test_100 projection=Some([0, 1])"; let actual = format!("{}", plan.display_indent()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // verify the grahviz format of the plan let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - }\n - subgraph cluster_3\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - }\n - }\n - // End DataFusion GraphViz Plan"; + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + 3[shape=box label=\"Projection: #c1\"]\n + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n + 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n + 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]\n + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n + subgraph cluster_6\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n + 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]\n + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n + 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]\n + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n + }\n + // End DataFusion GraphViz Plan\n"; let actual = format!("{}", plan.display_graphviz()); - assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); // Physical plan // Create plan let msg = format!("Creating physical plan for '{}': {:?}", sql, plan); let plan = ctx.create_physical_plan(&plan).expect(&msg); - // Verify the text format of the plan - let expected = "ExplainExec"; - let actual = format!("{}", displayable(plan.as_ref()).indent()); - assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); - + // // Execute plan let msg = format!("Executing physical plan for '{}': {:?}", sql, plan); let results = collect(plan).await.expect(&msg); @@ -1668,8 +1742,6 @@ async fn csv_explain_plans() { assert_eq!(expected, actual); } - - #[tokio::test] async fn csv_explain_verbose() { let mut ctx = ExecutionContext::new(); @@ -1701,75 +1773,150 @@ async fn csv_explain_verbose_plans() { let msg = format!("Creating logical plan for '{}'", sql); let plan = ctx.create_logical_plan(&sql).expect(&msg); let logical_schema = plan.schema(); - // + // println!("SQL: {}", sql); + // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let expected = "Explain [plan_type:Utf8, plan:Utf8]\n + Projection: #c1 [c1:Utf8]\n + Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\n + TableScan: aggregate_test_100 projection=None [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]"; let actual = format!("{}", plan.display_indent_schema()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // Verify the text format of the plan - let expected = "Explain"; + let expected = "Explain\n + Projection: #c1\n + Filter: #c2 Gt Int64(10)\n + TableScan: aggregate_test_100 projection=None"; let actual = format!("{}", plan.display_indent()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // verify the grahviz format of the plan let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - }\n - subgraph cluster_3\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - }\n + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + 3[shape=box label=\"Projection: #c1\"]\n + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n + 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n + 5[shape=box label=\"TableScan: aggregate_test_100 projection=None\"]\n + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n + subgraph cluster_6\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n + 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n + 10[shape=box label=\"TableScan: aggregate_test_100 projection=None\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n }\n - // End DataFusion GraphViz Plan"; + // End DataFusion GraphViz Plan\n"; let actual = format!("{}", plan.display_graphviz()); - assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); - // Optimized logical plan + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); // + // Optimized logical plan let msg = format!("Optimizing logical plan for '{}': {:?}", sql, plan); let plan = ctx.optimize(&plan).expect(&msg); let optimized_logical_schema = plan.schema(); + // // Both schema has to be the same assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref()); + // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]"; + let expected = "Explain [plan_type:Utf8, plan:Utf8]\n + Projection: #c1 [c1:Utf8]\n + Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32]\n + TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]"; let actual = format!("{}", plan.display_indent_schema()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // Verify the text format of the plan - let expected = "Explain"; + let expected = "Explain\n + Projection: #c1\n + Filter: #c2 Gt Int64(10)\n + TableScan: aggregate_test_100 projection=Some([0, 1])"; let actual = format!("{}", plan.display_indent()); - assert_eq!(expected, actual); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); + // // verify the grahviz format of the plan let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - }\n - subgraph cluster_3\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 4[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - }\n - }\n - // End DataFusion GraphViz Plan"; + subgraph cluster_1\n + {\n + graph[label=\"LogicalPlan\"]\n + 2[shape=box label=\"Explain\"]\n + 3[shape=box label=\"Projection: #c1\"]\n + 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n + 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n + 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n + 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]\n + 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n + subgraph cluster_6\n + {\n + graph[label=\"Detailed LogicalPlan\"]\n + 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n + 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n + 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n + 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]\n + 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n + 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]\n + 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n + }\n + }\n + // End DataFusion GraphViz Plan\n"; let actual = format!("{}", plan.display_graphviz()); - assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); + assert_eq!( + expected + .replace("\t", "") + .replace("\n", "") + .replace(" ", ""), + actual.replace("\t", "").replace("\n", "").replace(" ", "") + ); // Physical plan // Create plan let msg = format!("Creating physical plan for '{}': {:?}", sql, plan); let plan = ctx.create_physical_plan(&plan).expect(&msg); - // Verify the text format of the plan - let expected = "ExplainExec"; - let actual = format!("{}", displayable(plan.as_ref()).indent()); - assert_eq!(expected.replace("\t","").replace("\n","").replace(" ",""), actual.replace("\t","").replace("\n","").replace(" ","")); - + // // Execute plan let msg = format!("Executing physical plan for '{}': {:?}", sql, plan); let results = collect(plan).await.expect(&msg); @@ -1781,7 +1928,6 @@ async fn csv_explain_verbose_plans() { "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=Some([0, 1])"], vec!["physical_plan", "ProjectionExec: expr=[c1]\n FilterExec: CAST(c2 AS Int64) > 10\n CsvExec: source=Path(/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv: [/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv]), has_header=true\n"] - ]; let actual = result_vec(&results); assert_eq!(expected, actual); From 9dbd9f1c201c89a1f941ae1b17ccd12b2f1a5c68 Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Tue, 1 Jun 2021 11:44:29 -0400 Subject: [PATCH 3/4] refactor: adddress Andrew's comments --- datafusion/src/logical_plan/plan.rs | 1 - datafusion/tests/sql.rs | 462 +++++++++++++++------------- 2 files changed, 245 insertions(+), 218 deletions(-) diff --git a/datafusion/src/logical_plan/plan.rs b/datafusion/src/logical_plan/plan.rs index f8217323a7f9..509859e97749 100644 --- a/datafusion/src/logical_plan/plan.rs +++ b/datafusion/src/logical_plan/plan.rs @@ -333,7 +333,6 @@ impl LogicalPlan { // plans without inputs LogicalPlan::TableScan { .. } | LogicalPlan::EmptyRelation { .. } - //| LogicalPlan::Explain { .. } | LogicalPlan::CreateExternalTable { .. } => vec![], } } diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 617ee3eaebce..d5bd42ebd4b5 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1592,66 +1592,70 @@ async fn csv_explain_plans() { println!("SQL: {}", sql); // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]\n - Projection: #c1 [c1:Utf8]\n - Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\n - TableScan: aggregate_test_100 projection=None [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]"; - let actual = format!("{}", plan.display_indent_schema()); + let expected = vec![ + "Explain [plan_type:Utf8, plan:Utf8]", + " Projection: #c1 [c1:Utf8]", + " Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]", + " TableScan: aggregate_test_100 projection=None [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]", + ]; + let formatted = plan.display_indent_schema().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // Verify the text format of the plan - let expected = "Explain\n Projection: #c1\n - Filter: #c2 Gt Int64(10)\n - TableScan: aggregate_test_100 projection=None"; - let actual = format!("{}", plan.display_indent()); + let expected = vec![ + "Explain", + " Projection: #c1", + " Filter: #c2 Gt Int64(10)", + " TableScan: aggregate_test_100 projection=None", + ]; + let formatted = plan.display_indent().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // verify the grahviz format of the plan - let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n - digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - 3[shape=box label=\"Projection: #c1\"]\n - 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n - 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n - 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n - 5[shape=box label=\"TableScan: aggregate_test_100 projection=None\"]\n - 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - subgraph cluster_6\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n - 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n - 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n - 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n - 10[shape=box label=\"TableScan: aggregate_test_100 projection=None\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n - 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - }\n - // End DataFusion GraphViz Plan\n"; - let actual = format!("{}", plan.display_graphviz()); + let expected = vec![ + "// Begin DataFusion GraphViz Plan (see https://graphviz.org)", + "digraph {", + " subgraph cluster_1", + " {", + " graph[label=\"LogicalPlan\"]", + " 2[shape=box label=\"Explain\"]", + " 3[shape=box label=\"Projection: #c1\"]", + " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", + " 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]", + " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", + " 5[shape=box label=\"TableScan: aggregate_test_100 projection=None\"]", + " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + " subgraph cluster_6", + " {", + " graph[label=\"Detailed LogicalPlan\"]", + " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", + " 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]", + " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", + " 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]", + " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", + " 10[shape=box label=\"TableScan: aggregate_test_100 projection=None\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]", + " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + "}", + "// End DataFusion GraphViz Plan", + ]; + let formatted = plan.display_graphviz().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // Optimized logical plan @@ -1663,67 +1667,70 @@ async fn csv_explain_plans() { assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref()); // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]\n - Projection: #c1 [c1:Utf8]\n - Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32]\n - TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]"; - let actual = format!("{}", plan.display_indent_schema()); + let expected = vec![ + "Explain [plan_type:Utf8, plan:Utf8]", + " Projection: #c1 [c1:Utf8]", + " Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32]", + " TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]", + ]; + let formatted = plan.display_indent_schema().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // Verify the text format of the plan - let expected = "Explain\n - Projection: #c1\n - Filter: #c2 Gt Int64(10)\n - TableScan: aggregate_test_100 projection=Some([0, 1])"; - let actual = format!("{}", plan.display_indent()); + let expected = vec![ + "Explain", + " Projection: #c1", + " Filter: #c2 Gt Int64(10)", + " TableScan: aggregate_test_100 projection=Some([0, 1])", + ]; + let formatted = plan.display_indent().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // verify the grahviz format of the plan - let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n - digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - 3[shape=box label=\"Projection: #c1\"]\n - 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n - 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n - 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n - 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]\n - 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - subgraph cluster_6\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n - 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n - 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]\n - 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n - 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]\n - 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - }\n - // End DataFusion GraphViz Plan\n"; - let actual = format!("{}", plan.display_graphviz()); + let expected = vec![ + "// Begin DataFusion GraphViz Plan (see https://graphviz.org)", + "digraph {", + " subgraph cluster_1", + " {", + " graph[label=\"LogicalPlan\"]", + " 2[shape=box label=\"Explain\"]", + " 3[shape=box label=\"Projection: #c1\"]", + " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", + " 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]", + " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", + " 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]", + " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + " subgraph cluster_6", + " {", + " graph[label=\"Detailed LogicalPlan\"]", + " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", + " 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]", + " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", + " 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]", + " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", + " 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]", + " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + "}", + "// End DataFusion GraphViz Plan", + ]; + let formatted = plan.display_graphviz().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // Physical plan @@ -1739,7 +1746,11 @@ async fn csv_explain_plans() { vec!["logical_plan", "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=None"]]; let actual = result_vec(&results); - assert_eq!(expected, actual); + assert_eq!( + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual + ); } #[tokio::test] @@ -1775,141 +1786,148 @@ async fn csv_explain_verbose_plans() { let logical_schema = plan.schema(); // println!("SQL: {}", sql); + // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]\n - Projection: #c1 [c1:Utf8]\n - Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\n - TableScan: aggregate_test_100 projection=None [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]"; - let actual = format!("{}", plan.display_indent_schema()); + let expected = vec![ + "Explain [plan_type:Utf8, plan:Utf8]", + " Projection: #c1 [c1:Utf8]", + " Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]", + " TableScan: aggregate_test_100 projection=None [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]", + ]; + let formatted = plan.display_indent_schema().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // Verify the text format of the plan - let expected = "Explain\n - Projection: #c1\n - Filter: #c2 Gt Int64(10)\n - TableScan: aggregate_test_100 projection=None"; - let actual = format!("{}", plan.display_indent()); + let expected = vec![ + "Explain", + " Projection: #c1", + " Filter: #c2 Gt Int64(10)", + " TableScan: aggregate_test_100 projection=None", + ]; + let formatted = plan.display_indent().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // verify the grahviz format of the plan - let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n - digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - 3[shape=box label=\"Projection: #c1\"]\n - 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n - 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n - 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n - 5[shape=box label=\"TableScan: aggregate_test_100 projection=None\"]\n - 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - subgraph cluster_6\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n - 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n - 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n - 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n - 10[shape=box label=\"TableScan: aggregate_test_100 projection=None\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]\n - 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - }\n - // End DataFusion GraphViz Plan\n"; - let actual = format!("{}", plan.display_graphviz()); + let expected = vec![ + "// Begin DataFusion GraphViz Plan (see https://graphviz.org)", + "digraph {", + " subgraph cluster_1", + " {", + " graph[label=\"LogicalPlan\"]", + " 2[shape=box label=\"Explain\"]", + " 3[shape=box label=\"Projection: #c1\"]", + " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", + " 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]", + " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", + " 5[shape=box label=\"TableScan: aggregate_test_100 projection=None\"]", + " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + " subgraph cluster_6", + " {", + " graph[label=\"Detailed LogicalPlan\"]", + " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", + " 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]", + " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", + " 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]", + " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", + " 10[shape=box label=\"TableScan: aggregate_test_100 projection=None\\nSchema: [c1:Utf8, c2:Int32, c3:Int16, c4:Int16, c5:Int32, c6:Int64, c7:Int16, c8:Int32, c9:Int64, c10:Utf8, c11:Float32, c12:Float64, c13:Utf8]\"]", + " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + "}", + "// End DataFusion GraphViz Plan", + ]; + let formatted = plan.display_graphviz().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); - // + // Optimized logical plan + // let msg = format!("Optimizing logical plan for '{}': {:?}", sql, plan); let plan = ctx.optimize(&plan).expect(&msg); let optimized_logical_schema = plan.schema(); - // // Both schema has to be the same assert_eq!(logical_schema.as_ref(), optimized_logical_schema.as_ref()); // // Verify schema - let expected = "Explain [plan_type:Utf8, plan:Utf8]\n - Projection: #c1 [c1:Utf8]\n - Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32]\n - TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]"; - let actual = format!("{}", plan.display_indent_schema()); + let expected = vec![ + "Explain [plan_type:Utf8, plan:Utf8]", + " Projection: #c1 [c1:Utf8]", + " Filter: #c2 Gt Int64(10) [c1:Utf8, c2:Int32]", + " TableScan: aggregate_test_100 projection=Some([0, 1]) [c1:Utf8, c2:Int32]", + ]; + let formatted = plan.display_indent_schema().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // Verify the text format of the plan - let expected = "Explain\n - Projection: #c1\n - Filter: #c2 Gt Int64(10)\n - TableScan: aggregate_test_100 projection=Some([0, 1])"; - let actual = format!("{}", plan.display_indent()); + let expected = vec![ + "Explain", + " Projection: #c1", + " Filter: #c2 Gt Int64(10)", + " TableScan: aggregate_test_100 projection=Some([0, 1])", + ]; + let formatted = plan.display_indent().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // // verify the grahviz format of the plan - let expected = "// Begin DataFusion GraphViz Plan (see https://graphviz.org)\n - digraph {\n - subgraph cluster_1\n - {\n - graph[label=\"LogicalPlan\"]\n - 2[shape=box label=\"Explain\"]\n - 3[shape=box label=\"Projection: #c1\"]\n - 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]\n - 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]\n - 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]\n - 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]\n - 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - subgraph cluster_6\n - {\n - graph[label=\"Detailed LogicalPlan\"]\n - 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]\n - 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]\n - 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]\n - 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]\n - 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]\n - 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]\n - 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]\n - }\n - }\n - // End DataFusion GraphViz Plan\n"; - let actual = format!("{}", plan.display_graphviz()); + let expected = vec![ + "// Begin DataFusion GraphViz Plan (see https://graphviz.org)", + "digraph {", + " subgraph cluster_1", + " {", + " graph[label=\"LogicalPlan\"]", + " 2[shape=box label=\"Explain\"]", + " 3[shape=box label=\"Projection: #c1\"]", + " 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]", + " 4[shape=box label=\"Filter: #c2 Gt Int64(10)\"]", + " 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]", + " 5[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\"]", + " 4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + " subgraph cluster_6", + " {", + " graph[label=\"Detailed LogicalPlan\"]", + " 7[shape=box label=\"Explain\\nSchema: [plan_type:Utf8, plan:Utf8]\"]", + " 8[shape=box label=\"Projection: #c1\\nSchema: [c1:Utf8]\"]", + " 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]", + " 9[shape=box label=\"Filter: #c2 Gt Int64(10)\\nSchema: [c1:Utf8, c2:Int32]\"]", + " 8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]", + " 10[shape=box label=\"TableScan: aggregate_test_100 projection=Some([0, 1])\\nSchema: [c1:Utf8, c2:Int32]\"]", + " 9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]", + " }", + "}", + "// End DataFusion GraphViz Plan", + ]; + let formatted = plan.display_graphviz().to_string(); + let actual: Vec<&str> = formatted.trim().lines().collect(); assert_eq!( - expected - .replace("\t", "") - .replace("\n", "") - .replace(" ", ""), - actual.replace("\t", "").replace("\n", "").replace(" ", "") + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual ); // Physical plan @@ -1922,15 +1940,25 @@ async fn csv_explain_verbose_plans() { let results = collect(plan).await.expect(&msg); // Compare final explain result from execution output let expected = vec![ - vec!["logical_plan", - "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=None"], - vec!["logical_plan after projection_push_down", - "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=Some([0, 1])"], - vec!["physical_plan", - "ProjectionExec: expr=[c1]\n FilterExec: CAST(c2 AS Int64) > 10\n CsvExec: source=Path(/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv: [/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv]), has_header=true\n"] + vec![ + "logical_plan", + "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=None", + ], + vec![ + "logical_plan after projection_push_down", + "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=Some([0, 1])", + ], + vec![ + "physical_plan", + "ProjectionExec: expr=[c1]\n FilterExec: CAST(c2 AS Int64) > 10\n CsvExec: source=Path(/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv: [/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv]), has_header=true\n", + ], ]; let actual = result_vec(&results); - assert_eq!(expected, actual); + assert_eq!( + expected, actual, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected, actual + ); } fn aggr_test_schema() -> SchemaRef { From 373a347ee77bf2632621551b7b76fef3495212ee Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Tue, 1 Jun 2021 14:15:31 -0400 Subject: [PATCH 4/4] fix: avoid comparing file path that are test environmentally dependant --- datafusion/tests/sql.rs | 54 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index d5bd42ebd4b5..12d40adc85a9 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1741,15 +1741,16 @@ async fn csv_explain_plans() { // Execute plan let msg = format!("Executing physical plan for '{}': {:?}", sql, plan); let results = collect(plan).await.expect(&msg); - // Compare final explain result from execution output - let expected = vec![ - vec!["logical_plan", - "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=None"]]; let actual = result_vec(&results); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - expected, actual + // flatten to a single string + let actual = actual.into_iter().map(|r| r.join("\t")).collect::(); + // Since the plan contains path that are environmentally dependant (e.g. full path of the test file), only verify important content + assert!(actual.contains("logical_plan"), "Actual: '{}'", actual); + assert!(actual.contains("Projection: #c1"), "Actual: '{}'", actual); + assert!( + actual.contains("Filter: #c2 Gt Int64(10)"), + "Actual: '{}'", + actual ); } @@ -1938,26 +1939,25 @@ async fn csv_explain_verbose_plans() { // Execute plan let msg = format!("Executing physical plan for '{}': {:?}", sql, plan); let results = collect(plan).await.expect(&msg); - // Compare final explain result from execution output - let expected = vec![ - vec![ - "logical_plan", - "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=None", - ], - vec![ - "logical_plan after projection_push_down", - "Projection: #c1\n Filter: #c2 Gt Int64(10)\n TableScan: aggregate_test_100 projection=Some([0, 1])", - ], - vec![ - "physical_plan", - "ProjectionExec: expr=[c1]\n FilterExec: CAST(c2 AS Int64) > 10\n CsvExec: source=Path(/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv: [/Users/nga/.cargo/git/checkouts/arrow-rs-3b86e19e889d5acc/4449ee9/arrow/../testing/data/csv/aggregate_test_100.csv]), has_header=true\n", - ], - ]; let actual = result_vec(&results); - assert_eq!( - expected, actual, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - expected, actual + // flatten to a single string + let actual = actual.into_iter().map(|r| r.join("\t")).collect::(); + // Since the plan contains path that are environmentally dependant(e.g. full path of the test file), only verify important content + assert!( + actual.contains("logical_plan after projection_push_down"), + "Actual: '{}'", + actual + ); + assert!(actual.contains("physical_plan"), "Actual: '{}'", actual); + assert!( + actual.contains("FilterExec: CAST(c2 AS Int64) > 10"), + "Actual: '{}'", + actual + ); + assert!( + actual.contains("ProjectionExec: expr=[c1]"), + "Actual: '{}'", + actual ); }