Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions datafusion/datasource-parquet/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,6 @@ impl<'a> DFParquetMetadata<'a> {
summarize_min_max_null_counts(
&mut accumulators,
idx,
num_rows,
&stats_converter,
row_groups_metadata,
)
Expand Down Expand Up @@ -417,7 +416,6 @@ struct StatisticsAccumulators<'a> {
fn summarize_min_max_null_counts(
accumulators: &mut StatisticsAccumulators,
arrow_schema_index: usize,
num_rows: usize,
stats_converter: &StatisticsConverter,
row_groups_metadata: &[RowGroupMetaData],
) -> Result<()> {
Expand Down Expand Up @@ -449,11 +447,14 @@ fn summarize_min_max_null_counts(
);
}

accumulators.null_counts_array[arrow_schema_index] =
Precision::Exact(match sum(&null_counts) {
Some(null_count) => null_count as usize,
None => num_rows,
});
accumulators.null_counts_array[arrow_schema_index] = match sum(&null_counts) {
Some(null_count) => Precision::Exact(null_count as usize),
None => match null_counts.len() {
// If sum() returned None we either have no rows or all values are null
0 => Precision::Exact(0),
_ => Precision::Absent,
},
};

Ok(())
}
Expand Down
27 changes: 27 additions & 0 deletions datafusion/sqllogictest/test_files/parquet.slt
Original file line number Diff line number Diff line change
Expand Up @@ -862,3 +862,30 @@ select part, k, v from t order by k

statement ok
DROP TABLE t;

# Regression test for files with stats on some columns and not others
# See https://github.com/apache/datafusion/pull/18276

query I
COPY (SELECT 1::int AS a, 2::int as b)
TO 'test_files/scratch/parquet/mixed_stats.parquet'
STORED AS PARQUET OPTIONS (
'STATISTICS_ENABLED::b' 'none'
);
----
1

statement ok
CREATE EXTERNAL TABLE t
STORED AS PARQUET
LOCATION 'test_files/scratch/parquet/mixed_stats.parquet';

query I
SELECT b
FROM t
WHERE b = 2;
----
2
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On main this outputs 0 rows:

❯ datafusion-cli -f q.sql
DataFusion CLI v50.0.0
+-------+
| count |
+-------+
| 1     |
+-------+
1 row(s) fetched. 
Elapsed 0.002 seconds.

0 row(s) fetched. 
Elapsed 0.001 seconds.

+---+
| b |
+---+
+---+
0 row(s) fetched. 
Elapsed 0.003 seconds.


statement ok
DROP TABLE t;