diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/core/src/physical_plan/joins/hash_join.rs index e75b4072e6ed..2c339996480c 100644 --- a/datafusion/core/src/physical_plan/joins/hash_join.rs +++ b/datafusion/core/src/physical_plan/joins/hash_join.rs @@ -1219,9 +1219,9 @@ impl HashJoinStream { self.join_metrics.output_rows.add(batch.num_rows()); Some(result) } - Err(_) => Some(Err(DataFusionError::Execution( - "Build left right indices error".to_string(), - ))), + Err(err) => Some(Err(DataFusionError::Execution(format!( + "Fail to build join indices in HashJoinExec, error:{err}", + )))), }; timer.done(); result diff --git a/datafusion/core/src/physical_plan/joins/nested_loop_join.rs b/datafusion/core/src/physical_plan/joins/nested_loop_join.rs index 3d4e64aa5463..e3834573eb2f 100644 --- a/datafusion/core/src/physical_plan/joins/nested_loop_join.rs +++ b/datafusion/core/src/physical_plan/joins/nested_loop_join.rs @@ -475,9 +475,9 @@ fn join_left_and_right_batch( let mut left_indices_builder = UInt64Builder::new(); let mut right_indices_builder = UInt32Builder::new(); let left_right_indices = match indices_result { - Err(_) => Err(DataFusionError::Execution( - "Build left right indices error".to_string(), - )), + Err(err) => Err(DataFusionError::Execution(format!( + "Fail to build join indices in NestedLoopJoinExec, error:{err}" + ))), Ok(indices) => { for (left_side, right_side) in indices { left_indices_builder diff --git a/datafusion/core/src/physical_plan/joins/utils.rs b/datafusion/core/src/physical_plan/joins/utils.rs index b01483f560bd..5803055efdb6 100644 --- a/datafusion/core/src/physical_plan/joins/utils.rs +++ b/datafusion/core/src/physical_plan/joins/utils.rs @@ -27,7 +27,7 @@ use arrow::array::{ }; use arrow::compute; use arrow::datatypes::{Field, Schema, UInt32Type, UInt64Type}; -use arrow::record_batch::RecordBatch; +use arrow::record_batch::{RecordBatch, RecordBatchOptions}; use datafusion_common::cast::as_boolean_array; use datafusion_common::ScalarValue; use datafusion_physical_expr::{EquivalentClass, PhysicalExpr}; @@ -789,6 +789,18 @@ pub(crate) fn build_batch_from_indices( right_indices: UInt32Array, column_indices: &[ColumnIndex], ) -> Result { + if schema.fields().is_empty() { + let options = RecordBatchOptions::new() + .with_match_field_names(true) + .with_row_count(Some(left_indices.len())); + + return Ok(RecordBatch::try_new_with_options( + Arc::new(schema.clone()), + vec![], + &options, + )?); + } + // build the columns of the new [RecordBatch]: // 1. pick whether the column is from the left or right // 2. based on the pick, `take` items from the different RecordBatches diff --git a/datafusion/core/tests/sqllogictests/test_files/join.slt b/datafusion/core/tests/sqllogictests/test_files/join.slt index e78925a7faab..8d5f573431bc 100644 --- a/datafusion/core/tests/sqllogictests/test_files/join.slt +++ b/datafusion/core/tests/sqllogictests/test_files/join.slt @@ -35,6 +35,12 @@ CREATE TABLE grades(grade INT, min INT, max INT) AS VALUES (4, 56, 79), (5, 80, 100); +statement ok +CREATE TABLE test1(a int, b int) as select 1 as a, 2 as b; + +statement ok +CREATE TABLE test2(a int, b int) as select 1 as a, 2 as b; + # Regression test: https://github.com/apache/arrow-datafusion/issues/4844 query TII SELECT s.*, g.grade FROM students s join grades g on s.mark between g.min and g.max WHERE grade > 2 ORDER BY s.mark DESC @@ -81,3 +87,9 @@ ON ( ---- 11 a 1 44 d 4 + +# issue: https://github.com/apache/arrow-datafusion/issues/5382 +query IIII rowsort +SELECT * FROM test2 FULL JOIN test1 ON true; +---- +1 2 1 2