diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs index 1bd1b4be823e..33cbeb12ca6b 100644 --- a/datafusion/src/datasource/csv.rs +++ b/datafusion/src/datasource/csv.rs @@ -71,8 +71,7 @@ impl CsvFile { let schema = Arc::new(match options.schema { Some(s) => s.clone(), None => { - let mut filenames: Vec = vec![]; - common::build_file_list(path, &mut filenames, options.file_extension)?; + let filenames = common::build_file_list(path, options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/physical_plan/common.rs b/datafusion/src/physical_plan/common.rs index 9de7ee2a32dd..f1ed3742340b 100644 --- a/datafusion/src/physical_plan/common.rs +++ b/datafusion/src/physical_plan/common.rs @@ -78,8 +78,19 @@ pub async fn collect(stream: SendableRecordBatchStream) -> Result, ext: &str) -> Result<()> { +/// Recursively builds a list of files in a directory with a given extension +pub fn build_file_list(dir: &str, ext: &str) -> Result> { + let mut filenames: Vec = Vec::new(); + build_file_list_recurse(dir, &mut filenames, ext)?; + Ok(filenames) +} + +/// Recursively build a list of files in a directory with a given extension with an accumulator list +fn build_file_list_recurse( + dir: &str, + filenames: &mut Vec, + ext: &str, +) -> Result<()> { let metadata = metadata(dir)?; if metadata.is_file() { if dir.ends_with(ext) { @@ -91,7 +102,7 @@ pub fn build_file_list(dir: &str, filenames: &mut Vec, ext: &str) -> Res let path = entry.path(); if let Some(path_name) = path.to_str() { if path.is_dir() { - build_file_list(path_name, filenames, ext)?; + build_file_list_recurse(path_name, filenames, ext)?; } else if path_name.ends_with(ext) { filenames.push(path_name.to_string()); } diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs index b96a702f2732..9ab817799954 100644 --- a/datafusion/src/physical_plan/csv.rs +++ b/datafusion/src/physical_plan/csv.rs @@ -199,8 +199,7 @@ impl CsvExec { ) -> Result { let file_extension = String::from(options.file_extension); - let mut filenames: Vec = vec![]; - common::build_file_list(path, &mut filenames, file_extension.as_str())?; + let filenames = common::build_file_list(path, file_extension.as_str())?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/physical_plan/hash_join.rs b/datafusion/src/physical_plan/hash_join.rs index 3398494e3c46..60d65b236160 100644 --- a/datafusion/src/physical_plan/hash_join.rs +++ b/datafusion/src/physical_plan/hash_join.rs @@ -389,7 +389,7 @@ impl ExecutionPlan for HashJoinExec { num_output_rows: 0, join_time: 0, random_state: self.random_state.clone(), - visited_left_side: visited_left_side, + visited_left_side, is_exhausted: false, })) } diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs index d41d6968fee0..09dd48df3ed5 100644 --- a/datafusion/src/physical_plan/parquet.rs +++ b/datafusion/src/physical_plan/parquet.rs @@ -118,8 +118,7 @@ impl ParquetExec { ) -> Result { // build a list of filenames from the specified path, which could be a single file or // a directory containing one or more parquet files - let mut filenames: Vec = vec![]; - common::build_file_list(path, &mut filenames, ".parquet")?; + let filenames = common::build_file_list(path, ".parquet")?; if filenames.is_empty() { Err(DataFusionError::Plan(format!( "No Parquet files found at path {}",