diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs index 4de68793ce02..c8ee4d3b9f57 100644 --- a/datafusion/datasource-parquet/src/metadata.rs +++ b/datafusion/datasource-parquet/src/metadata.rs @@ -299,7 +299,6 @@ impl<'a> DFParquetMetadata<'a> { summarize_min_max_null_counts( &mut accumulators, idx, - num_rows, &stats_converter, row_groups_metadata, ) @@ -417,7 +416,6 @@ struct StatisticsAccumulators<'a> { fn summarize_min_max_null_counts( accumulators: &mut StatisticsAccumulators, arrow_schema_index: usize, - num_rows: usize, stats_converter: &StatisticsConverter, row_groups_metadata: &[RowGroupMetaData], ) -> Result<()> { @@ -449,11 +447,14 @@ fn summarize_min_max_null_counts( ); } - accumulators.null_counts_array[arrow_schema_index] = - Precision::Exact(match sum(&null_counts) { - Some(null_count) => null_count as usize, - None => num_rows, - }); + accumulators.null_counts_array[arrow_schema_index] = match sum(&null_counts) { + Some(null_count) => Precision::Exact(null_count as usize), + None => match null_counts.len() { + // If sum() returned None we either have no rows or all values are null + 0 => Precision::Exact(0), + _ => Precision::Absent, + }, + }; Ok(()) } diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt index e722005bf0f0..11942108ab2b 100644 --- a/datafusion/sqllogictest/test_files/parquet.slt +++ b/datafusion/sqllogictest/test_files/parquet.slt @@ -862,3 +862,30 @@ select part, k, v from t order by k statement ok DROP TABLE t; + +# Regression test for files with stats on some columns and not others +# See https://github.com/apache/datafusion/pull/18276 + +query I +COPY (SELECT 1::int AS a, 2::int as b) +TO 'test_files/scratch/parquet/mixed_stats.parquet' +STORED AS PARQUET OPTIONS ( + 'STATISTICS_ENABLED::b' 'none' +); +---- +1 + +statement ok +CREATE EXTERNAL TABLE t +STORED AS PARQUET +LOCATION 'test_files/scratch/parquet/mixed_stats.parquet'; + +query I +SELECT b +FROM t +WHERE b = 2; +---- +2 + +statement ok +DROP TABLE t;