Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,19 @@ public IcebergManifestFileMeta result() throws IOException {
for (int i = 0; i < stats.length; i++) {
SimpleColStats fieldStats = stats[i];
DataType type = partitionType.getTypeAt(i);
boolean containsNan = false;
switch (type.getTypeRoot()) {
case FLOAT:
case DOUBLE:
containsNan = isNaN(fieldStats.min()) || isNaN(fieldStats.max());
break;
default:
// contains_nan is only meaningful for FLOAT/DOUBLE per the Iceberg spec
}
partitionSummaries.add(
new IcebergPartitionSummary(
Objects.requireNonNull(fieldStats.nullCount()) > 0,
false, // TODO correct it?
containsNan,
toByteBuffer(type, fieldStats.min()).array(),
toByteBuffer(type, fieldStats.max()).array()));
}
Expand All @@ -278,5 +287,15 @@ public IcebergManifestFileMeta result() throws IOException {
deletedRowsCount,
partitionSummaries);
}

private boolean isNaN(@Nullable Object value) {
if (value instanceof Float) {
return Float.isNaN((Float) value);
}
if (value instanceof Double) {
return Double.isNaN((Double) value);
}
return false;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,51 @@ public void testNestedTypes() throws Exception {
"Record(2, {20=[Record(cherry, 200), Record(pear, 201)]})");
}

@Test
public void testDoublePartitionContainsNan() throws Exception {
RowType rowType =
RowType.of(
new DataType[] {DataTypes.DOUBLE(), DataTypes.INT()},
new String[] {"value", "id"});
FileStoreTable table =
createPaimonTable(
rowType, Collections.singletonList("value"), Collections.emptyList(), -1);

String commitUser = UUID.randomUUID().toString();
TableWriteImpl<?> write = table.newWrite(commitUser);
TableCommitImpl commit = table.newCommit(commitUser);

write.write(GenericRow.of(1.0, 100), 1);
write.write(GenericRow.of(2.0, 200), 1);
write.write(GenericRow.of(Double.NaN, 300), 1);
commit.commit(1, write.prepareCommit(false, 1));
write.close();
commit.close();

FileIO fileIO = table.fileIO();
IcebergMetadata metadata =
IcebergMetadata.fromPath(
fileIO, new Path(table.location(), "metadata/v1.metadata.json"));

String currentSnapshotManifest = metadata.currentSnapshot().manifestList();
File snapShotAvroFile = new File(currentSnapshotManifest);

boolean sawNanPartitionSummary = false;
try (DataFileReader<GenericRecord> dataFileReader =
new DataFileReader<>(
new SeekableFileInput(snapShotAvroFile), new GenericDatumReader<>())) {
while (dataFileReader.hasNext()) {
GenericRecord record = dataFileReader.next();
String partitionSummary = record.get("partitions").toString();
if (partitionSummary.contains("contains_nan\": true")) {
sawNanPartitionSummary = true;
}
}
}

assertThat(sawNanPartitionSummary).isTrue();
}

@Test
public void testStringPartitionNullPadding() throws Exception {
RowType rowType =
Expand Down