diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java index e9ebed2826f4..1727a2b7dd3c 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java @@ -252,7 +252,14 @@ private static Types.NestedField getPhysicalType( // Use FixedSizeBinaryVector for binary backed decimal type = Types.FixedType.ofLength(primitive.getTypeLength()); } - physicalType = Types.NestedField.from(logicalType).ofType(type).build(); + // drop initialDefault/writeDefault: they are typed for the logical (decimal) type and + // cannot be cast to the underlying physical type + physicalType = + Types.NestedField.from(logicalType) + .ofType(type) + .withInitialDefault(null) + .withWriteDefault(null) + .build(); } return physicalType; diff --git a/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/TestVectorizedDefaultValues.java b/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/TestVectorizedDefaultValues.java new file mode 100644 index 000000000000..5b50168d6167 --- /dev/null +++ b/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/TestVectorizedDefaultValues.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.File; +import java.math.BigDecimal; +import java.util.List; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.DataFiles; +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.Files; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.data.GenericRecord; +import org.apache.iceberg.data.parquet.GenericParquetWriter; +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.hadoop.HadoopTables; +import org.apache.iceberg.io.FileAppender; +import org.apache.iceberg.parquet.Parquet; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Types; +import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Vectorized-read tests focused on Iceberg field defaults. The reader has two paths that interact + * with defaults: + * + *
These tests exercise the second path. The bug only surfaces when the column is not
+ * dictionary-encoded — with dictionary encoding {@code allocateDictEncodedVector} is used and
+ * {@code getPhysicalType} is bypassed. So the parquet file is written with dictionary encoding
+ * disabled.
+ */
+public class TestVectorizedDefaultValues {
+
+ @TempDir private File tempDir;
+
+ @Test
+ public void testDecimalWithDefaultValueNotDictionaryEncoded() throws Exception {
+ Schema schema =
+ new Schema(
+ Types.NestedField.required("id").withId(1).ofType(Types.LongType.get()).build(),
+ Types.NestedField.optional("int_backed")
+ .withId(2)
+ .ofType(Types.DecimalType.of(5, 2))
+ .withInitialDefault(Literal.of(new BigDecimal("0.00")))
+ .withWriteDefault(Literal.of(new BigDecimal("0.00")))
+ .build(),
+ Types.NestedField.optional("long_backed")
+ .withId(3)
+ .ofType(Types.DecimalType.of(15, 2))
+ .withInitialDefault(Literal.of(new BigDecimal("0.00")))
+ .withWriteDefault(Literal.of(new BigDecimal("0.00")))
+ .build(),
+ Types.NestedField.optional("fixed_backed")
+ .withId(4)
+ .ofType(Types.DecimalType.of(25, 2))
+ .withInitialDefault(Literal.of(new BigDecimal("0.00")))
+ .withWriteDefault(Literal.of(new BigDecimal("0.00")))
+ .build());
+
+ HadoopTables tables = new HadoopTables();
+ Table table =
+ tables.create(
+ schema,
+ PartitionSpec.unpartitioned(),
+ ImmutableMap.of(TableProperties.FORMAT_VERSION, "3"),
+ tempDir.toURI().toString());
+
+ List