From d3ba1bc44470780fe7da474cd80f67ef13a733bc Mon Sep 17 00:00:00 2001 From: Arnav Balyan Date: Fri, 8 May 2026 18:44:06 +0530 Subject: [PATCH] update --- .../iceberg/manifest/IcebergManifestFile.java | 21 ++++++++- .../iceberg/IcebergCompatibilityTest.java | 45 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java index a717ca0da00d..46fd14390672 100644 --- a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java +++ b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java @@ -255,10 +255,19 @@ public IcebergManifestFileMeta result() throws IOException { for (int i = 0; i < stats.length; i++) { SimpleColStats fieldStats = stats[i]; DataType type = partitionType.getTypeAt(i); + boolean containsNan = false; + switch (type.getTypeRoot()) { + case FLOAT: + case DOUBLE: + containsNan = isNaN(fieldStats.min()) || isNaN(fieldStats.max()); + break; + default: + // contains_nan is only meaningful for FLOAT/DOUBLE per the Iceberg spec + } partitionSummaries.add( new IcebergPartitionSummary( Objects.requireNonNull(fieldStats.nullCount()) > 0, - false, // TODO correct it? + containsNan, toByteBuffer(type, fieldStats.min()).array(), toByteBuffer(type, fieldStats.max()).array())); } @@ -278,5 +287,15 @@ public IcebergManifestFileMeta result() throws IOException { deletedRowsCount, partitionSummaries); } + + private boolean isNaN(@Nullable Object value) { + if (value instanceof Float) { + return Float.isNaN((Float) value); + } + if (value instanceof Double) { + return Double.isNaN((Double) value); + } + return false; + } } } diff --git a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java index 2b5e794a57ab..2e09a61df1bb 100644 --- a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java @@ -752,6 +752,51 @@ public void testNestedTypes() throws Exception { "Record(2, {20=[Record(cherry, 200), Record(pear, 201)]})"); } + @Test + public void testDoublePartitionContainsNan() throws Exception { + RowType rowType = + RowType.of( + new DataType[] {DataTypes.DOUBLE(), DataTypes.INT()}, + new String[] {"value", "id"}); + FileStoreTable table = + createPaimonTable( + rowType, Collections.singletonList("value"), Collections.emptyList(), -1); + + String commitUser = UUID.randomUUID().toString(); + TableWriteImpl write = table.newWrite(commitUser); + TableCommitImpl commit = table.newCommit(commitUser); + + write.write(GenericRow.of(1.0, 100), 1); + write.write(GenericRow.of(2.0, 200), 1); + write.write(GenericRow.of(Double.NaN, 300), 1); + commit.commit(1, write.prepareCommit(false, 1)); + write.close(); + commit.close(); + + FileIO fileIO = table.fileIO(); + IcebergMetadata metadata = + IcebergMetadata.fromPath( + fileIO, new Path(table.location(), "metadata/v1.metadata.json")); + + String currentSnapshotManifest = metadata.currentSnapshot().manifestList(); + File snapShotAvroFile = new File(currentSnapshotManifest); + + boolean sawNanPartitionSummary = false; + try (DataFileReader dataFileReader = + new DataFileReader<>( + new SeekableFileInput(snapShotAvroFile), new GenericDatumReader<>())) { + while (dataFileReader.hasNext()) { + GenericRecord record = dataFileReader.next(); + String partitionSummary = record.get("partitions").toString(); + if (partitionSummary.contains("contains_nan\": true")) { + sawNanPartitionSummary = true; + } + } + } + + assertThat(sawNanPartitionSummary).isTrue(); + } + @Test public void testStringPartitionNullPadding() throws Exception { RowType rowType =