diff --git a/core/src/main/java/org/apache/iceberg/ColumnFileInfo.java b/core/src/main/java/org/apache/iceberg/ColumnFileInfo.java new file mode 100644 index 000000000000..1c930e0eb961 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/ColumnFileInfo.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.util.List; +import org.apache.iceberg.types.Types; + +/** Information about a column file. */ +interface ColumnFileInfo { + Types.NestedField FIELD_IDS = + Types.NestedField.required( + 159, + "field_ids", + Types.ListType.ofRequired(160, Types.IntegerType.get()), + "Field IDs this column file contains"); + Types.NestedField LOCATION = + Types.NestedField.required( + 161, "location", Types.StringType.get(), "Location of the column file"); + Types.NestedField FILE_SIZE_IN_BYTES = + Types.NestedField.required( + 162, "file_size_in_bytes", Types.LongType.get(), "Total column file size in bytes"); + Types.NestedField SEQUENCE_NUMBER = + Types.NestedField.optional( + 163, "sequence_number", Types.LongType.get(), "Sequence number of the column file"); + + static Types.StructType schema() { + return Types.StructType.of(FIELD_IDS, LOCATION, FILE_SIZE_IN_BYTES, SEQUENCE_NUMBER); + } + + /** Returns the field IDs contained in this column file. */ + List fieldIds(); + + /** Returns the location of the column file. */ + String location(); + + /** Returns the total size of the column file in bytes. */ + long fileSizeInBytes(); + + /** Returns the sequence number of the column file, or null if not set. */ + Long sequenceNumber(); + + /** Copies this column file info. */ + ColumnFileInfo copy(); +} diff --git a/core/src/main/java/org/apache/iceberg/ColumnFileInfoStruct.java b/core/src/main/java/org/apache/iceberg/ColumnFileInfoStruct.java new file mode 100644 index 000000000000..8b6a50f10107 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/ColumnFileInfoStruct.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.List; +import org.apache.iceberg.avro.SupportsIndexProjection; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.ArrayUtil; + +/** Mutable {@link StructLike} implementation of {@link ColumnFileInfo}. */ +class ColumnFileInfoStruct extends SupportsIndexProjection implements ColumnFileInfo, Serializable { + private static final Types.StructType BASE_TYPE = + Types.StructType.of( + ColumnFileInfo.FIELD_IDS, + ColumnFileInfo.LOCATION, + ColumnFileInfo.FILE_SIZE_IN_BYTES, + ColumnFileInfo.SEQUENCE_NUMBER); + + private int[] fieldIds = null; + private String location = null; + private long fileSizeInBytes = -1L; + private Long sequenceNumber = null; + + /** Used by internal readers to instantiate this class with a projection schema. */ + ColumnFileInfoStruct(Types.StructType projection) { + super(BASE_TYPE, projection); + } + + private ColumnFileInfoStruct( + int[] fieldIds, String location, long fileSizeInBytes, Long sequenceNumber) { + super(BASE_TYPE, BASE_TYPE); + this.fieldIds = fieldIds; + this.location = location; + this.fileSizeInBytes = fileSizeInBytes; + this.sequenceNumber = sequenceNumber; + } + + /** Copy constructor. */ + private ColumnFileInfoStruct(ColumnFileInfoStruct toCopy) { + super(toCopy); + this.fieldIds = + toCopy.fieldIds != null ? Arrays.copyOf(toCopy.fieldIds, toCopy.fieldIds.length) : null; + this.location = toCopy.location; + this.fileSizeInBytes = toCopy.fileSizeInBytes; + this.sequenceNumber = toCopy.sequenceNumber; + } + + @Override + public List fieldIds() { + return fieldIds != null ? ArrayUtil.toUnmodifiableIntList(fieldIds) : null; + } + + @Override + public String location() { + return location; + } + + @Override + public long fileSizeInBytes() { + return fileSizeInBytes; + } + + @Override + public Long sequenceNumber() { + return sequenceNumber; + } + + @Override + public ColumnFileInfoStruct copy() { + return new ColumnFileInfoStruct(this); + } + + @Override + protected T internalGet(int pos, Class javaClass) { + return javaClass.cast(getByPos(pos)); + } + + private Object getByPos(int pos) { + switch (pos) { + case 0: + return fieldIds(); + case 1: + return location; + case 2: + return fileSizeInBytes; + case 3: + return sequenceNumber; + default: + throw new UnsupportedOperationException("Unknown field ordinal: " + pos); + } + } + + @Override + @SuppressWarnings("unchecked") + protected void internalSet(int pos, T value) { + switch (pos) { + case 0: + this.fieldIds = ArrayUtil.toIntArray((List) value); + break; + case 1: + // always coerce to String for Serializable + this.location = value.toString(); + break; + case 2: + this.fileSizeInBytes = (Long) value; + break; + case 3: + this.sequenceNumber = (Long) value; + break; + default: + // ignore the object, it must be from a newer version of the format + } + } + + static Builder builder() { + return new Builder(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("field_ids", fieldIds == null ? "null" : fieldIds()) + .add("location", location) + .add("file_size_in_bytes", fileSizeInBytes) + .add("sequence_number", sequenceNumber == null ? "null" : sequenceNumber) + .toString(); + } + + static class Builder { + private int[] fieldIds = null; + private String location = null; + private long fileSizeInBytes = -1L; + private Long sequenceNumber = null; + + Builder fieldIds(List ids) { + this.fieldIds = ids != null ? ArrayUtil.toIntArray(ids) : null; + return this; + } + + Builder location(String columnFileLocation) { + this.location = columnFileLocation; + return this; + } + + Builder fileSizeInBytes(long size) { + this.fileSizeInBytes = size; + return this; + } + + Builder sequenceNumber(Long sequence) { + this.sequenceNumber = sequence; + return this; + } + + ColumnFileInfoStruct build() { + Preconditions.checkArgument(fieldIds != null, "Invalid field IDs: null"); + Preconditions.checkArgument(fieldIds.length > 0, "Invalid field IDs: empty"); + Preconditions.checkArgument(location != null, "Invalid location: null"); + Preconditions.checkArgument(!location.isEmpty(), "Invalid location: empty"); + Preconditions.checkArgument( + fileSizeInBytes >= 0, "Invalid file size in bytes: %s (must be >= 0)", fileSizeInBytes); + Preconditions.checkArgument( + sequenceNumber == null || sequenceNumber >= 0, + "Invalid sequence number: %s (must be >= 0)", + sequenceNumber); + return new ColumnFileInfoStruct(fieldIds, location, fileSizeInBytes, sequenceNumber); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/TrackedFile.java b/core/src/main/java/org/apache/iceberg/TrackedFile.java index 8a6335972888..125d9c9b4b60 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFile.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFile.java @@ -91,6 +91,12 @@ interface TrackedFile { "equality_ids", Types.ListType.ofRequired(136, Types.IntegerType.get()), "Field ids used to determine row equality in equality delete files"); + Types.NestedField COLUMN_FILES = + Types.NestedField.optional( + 157, + "column_files", + Types.ListType.ofRequired(158, ColumnFileInfo.schema()), + "Column update files"); static Types.StructType schemaWithContentStats( Types.StructType partitionType, Types.StructType contentStatsType) { @@ -110,7 +116,8 @@ static Types.StructType schemaWithContentStats( MANIFEST_INFO, KEY_METADATA, SPLIT_OFFSETS, - EQUALITY_IDS); + EQUALITY_IDS, + COLUMN_FILES); } /** Returns the tracking information for this entry. */ @@ -158,6 +165,9 @@ static Types.StructType schemaWithContentStats( /** Returns the set of field IDs used for equality comparison in equality delete files. */ List equalityIds(); + /** Returns the column files for this file. */ + List columnFiles(); + /** Copies this tracked file. */ TrackedFile copy(); diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java index 4830f69d6bf1..77ae4bc2032a 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java @@ -21,8 +21,10 @@ import java.io.Serializable; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; import org.apache.iceberg.types.Type; @@ -65,7 +67,8 @@ public PartitionData copy() { TrackedFile.MANIFEST_INFO, TrackedFile.KEY_METADATA, TrackedFile.SPLIT_OFFSETS, - TrackedFile.EQUALITY_IDS); + TrackedFile.EQUALITY_IDS, + TrackedFile.COLUMN_FILES); private FileContent contentType = null; private String location = null; @@ -81,6 +84,7 @@ public PartitionData copy() { private Integer sortOrderId = null; private DeletionVector deletionVector = null; private ManifestInfo manifestInfo = null; + private List columnFiles = null; private byte[] keyMetadata = null; private long[] splitOffsets = null; private int[] equalityIds = null; @@ -155,6 +159,10 @@ private TrackedFileStruct(TrackedFileStruct toCopy, boolean withStats, Set equalityIds() { return equalityIds != null ? ArrayUtil.toUnmodifiableIntList(equalityIds) : null; } + @Override + public List columnFiles() { + return columnFiles != null ? Collections.unmodifiableList(columnFiles) : null; + } + @Override public TrackedFile copy() { return new TrackedFileStruct(this, true, null); @@ -279,6 +292,8 @@ private Object getByPos(int pos) { return splitOffsets(); case 14: return equalityIds(); + case 15: + return columnFiles; default: throw new UnsupportedOperationException("Unknown field ordinal: " + pos); } @@ -333,6 +348,11 @@ protected void internalSet(int pos, T value) { case 14: this.equalityIds = ArrayUtil.toIntArray((List) value); break; + case 15: + this.columnFiles = + ((List) value) + .stream().map(ColumnFileInfo::copy).collect(Collectors.toList()); + break; default: // ignore the object, it must be from a newer version of the format } @@ -356,6 +376,7 @@ public String toString() { .add("key_metadata", keyMetadata == null ? "null" : "(redacted)") .add("split_offsets", splitOffsets == null ? "null" : splitOffsets()) .add("equality_ids", equalityIds == null ? "null" : equalityIds()) + .add("column_files", columnFiles == null ? "null" : columnFiles) .toString(); } } diff --git a/core/src/test/java/org/apache/iceberg/TestColumnFileInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestColumnFileInfoStruct.java new file mode 100644 index 000000000000..8f173ebfcaf0 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestColumnFileInfoStruct.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.io.IOException; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +class TestColumnFileInfoStruct { + + private static final List FIELD_IDS = Lists.newArrayList(1, 2, 3); + private static final String LOCATION = "s3://bucket/data/column.parquet"; + + @Test + void testFieldAccess() { + ColumnFileInfoStruct columnFile = + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location(LOCATION) + .fileSizeInBytes(1024L) + .sequenceNumber(7L) + .build(); + + assertThat(columnFile.fieldIds()).containsExactly(1, 2, 3); + assertThat(columnFile.location()).isEqualTo(LOCATION); + assertThat(columnFile.fileSizeInBytes()).isEqualTo(1024L); + assertThat(columnFile.sequenceNumber()).isEqualTo(7L); + } + + @Test + void testCopy() { + ColumnFileInfoStruct columnFile = + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location(LOCATION) + .fileSizeInBytes(2048L) + .sequenceNumber(17L) + .build(); + + ColumnFileInfoStruct copy = columnFile.copy(); + + assertThat(copy.fieldIds()).containsExactly(1, 2, 3); + assertThat(copy.location()).isEqualTo(LOCATION); + assertThat(copy.fileSizeInBytes()).isEqualTo(2048L); + assertThat(copy.sequenceNumber()).isEqualTo(17L); + + // verify deep copy + assertThat(copy.fieldIds()).isNotSameAs(columnFile.fieldIds()); + } + + @Test + void testNullableFields() { + ColumnFileInfoStruct columnFile = + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location(LOCATION) + .fileSizeInBytes(2048L) + .build(); + + assertThat(columnFile.sequenceNumber()).isNull(); + } + + @Test + void testStructLikeSize() { + ColumnFileInfoStruct columnFile = new ColumnFileInfoStruct(ColumnFileInfo.schema()); + assertThat(columnFile.size()).isEqualTo(4); + } + + @Test + void testStructLikeGetSet() { + ColumnFileInfoStruct columnFile = new ColumnFileInfoStruct(ColumnFileInfo.schema()); + + columnFile.set(0, Lists.newArrayList(1, 2, 3, 4)); + columnFile.set(1, LOCATION); + columnFile.set(2, 128L); + columnFile.set(3, 5L); + + assertThat(columnFile.get(0, List.class)).containsExactly(1, 2, 3, 4); + assertThat(columnFile.get(1, String.class)).isEqualTo(LOCATION); + assertThat(columnFile.get(2, Long.class)).isEqualTo(128L); + assertThat(columnFile.get(3, Long.class)).isEqualTo(5L); + } + + @Test + void testProjectedStructLike() { + Types.StructType projection = + Types.StructType.of(ColumnFileInfo.LOCATION, ColumnFileInfo.FILE_SIZE_IN_BYTES); + + ColumnFileInfoStruct columnFile = new ColumnFileInfoStruct(projection); + assertThat(columnFile.size()).isEqualTo(2); + + // projected position 0 maps to internal position 1 (location) + // projected position 1 maps to internal position 2 (file_size_in_bytes) + columnFile.set(0, LOCATION); + columnFile.set(1, 1024L); + + assertThat(columnFile.location()).isEqualTo(LOCATION); + assertThat(columnFile.fileSizeInBytes()).isEqualTo(1024L); + assertThat(columnFile.get(0, String.class)).isEqualTo(LOCATION); + assertThat(columnFile.get(1, Long.class)).isEqualTo(1024L); + } + + @Test + void testBuilderValidation() { + assertThatThrownBy( + () -> + ColumnFileInfoStruct.builder() + .location(LOCATION) + .fileSizeInBytes(1024L) + .sequenceNumber(7L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid field IDs: null"); + + assertThatThrownBy( + () -> + ColumnFileInfoStruct.builder() + .fieldIds(Lists.newArrayList()) + .location(LOCATION) + .fileSizeInBytes(1024L) + .sequenceNumber(7L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid field IDs: empty"); + + assertThatThrownBy( + () -> + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .fileSizeInBytes(1024L) + .sequenceNumber(7L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy( + () -> + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location("") + .fileSizeInBytes(1024L) + .sequenceNumber(7L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: empty"); + + assertThatThrownBy( + () -> + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location(LOCATION) + .sequenceNumber(7L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid file size in bytes: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location(LOCATION) + .fileSizeInBytes(1024L) + .sequenceNumber(-1L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid sequence number: -1 (must be >= 0)"); + } + + @Test + void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { + ColumnFileInfoStruct columnFile = + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location(LOCATION) + .fileSizeInBytes(1024L) + .sequenceNumber(7L) + .build(); + + ColumnFileInfoStruct deserialized = TestHelpers.roundTripSerialize(columnFile); + + assertThat(deserialized.fieldIds()).containsExactly(1, 2, 3); + assertThat(deserialized.location()).isEqualTo(LOCATION); + assertThat(deserialized.fileSizeInBytes()).isEqualTo(1024L); + assertThat(deserialized.sequenceNumber()).isEqualTo(7L); + } + + @Test + void testKryoSerializationRoundTrip() throws IOException { + ColumnFileInfoStruct columnFile = + ColumnFileInfoStruct.builder() + .fieldIds(FIELD_IDS) + .location(LOCATION) + .fileSizeInBytes(1024L) + .sequenceNumber(7L) + .build(); + + ColumnFileInfoStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(columnFile); + + assertThat(deserialized.fieldIds()).containsExactly(1, 2, 3); + assertThat(deserialized.location()).isEqualTo(LOCATION); + assertThat(deserialized.fileSizeInBytes()).isEqualTo(1024L); + assertThat(deserialized.sequenceNumber()).isEqualTo(7L); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFile.java b/core/src/test/java/org/apache/iceberg/TestTrackedFile.java index 0d850ee4c886..9d6e05393580 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFile.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFile.java @@ -59,7 +59,8 @@ public void schemaWithContentStatsFieldOrder() { "manifest_info", "key_metadata", "split_offsets", - "equality_ids"); + "equality_ids", + "column_files"); } @Test @@ -69,7 +70,8 @@ public void schemaWithContentStatsFieldIds() { assertThat(fields) .extracting(Types.NestedField::fieldId) - .containsExactly(147, 134, 100, 101, 103, 104, 141, 102, 146, 140, 148, 150, 131, 132, 135); + .containsExactly( + 147, 134, 100, 101, 103, 104, 141, 102, 146, 140, 148, 150, 131, 132, 135, 157); } @Test diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 3abb36aa51ff..9a06507812bc 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -34,6 +34,19 @@ class TestTrackedFileStruct { Types.StructType.of( Types.NestedField.optional(1000, "id_bucket", Types.IntegerType.get()), Types.NestedField.optional(1001, "category", Types.StringType.get())); + private static final ColumnFileInfoStruct COLUMN_FILE_1 = + ColumnFileInfoStruct.builder() + .fieldIds(ImmutableList.of(1, 2)) + .location("s3://bucket/data/col-1.parquet") + .fileSizeInBytes(256L) + .sequenceNumber(7L) + .build(); + private static final ColumnFileInfoStruct COLUMN_FILE_2 = + ColumnFileInfoStruct.builder() + .fieldIds(ImmutableList.of(3)) + .location("s3://bucket/data/col-2.parquet") + .fileSizeInBytes(128L) + .build(); @Test void testFieldAccess() { @@ -73,6 +86,7 @@ void testFieldAccess() { file.set(12, ByteBuffer.wrap(new byte[] {1, 2, 3})); file.set(13, ImmutableList.of(100L, 200L)); file.set(14, ImmutableList.of(1, 2, 3)); + file.set(15, ImmutableList.of(COLUMN_FILE_1, COLUMN_FILE_2)); assertThat(file.tracking()).isNotNull(); assertThat(file.tracking().status()).isEqualTo(EntryStatus.ADDED); @@ -89,6 +103,10 @@ void testFieldAccess() { assertThat(file.keyMetadata()).isEqualTo(ByteBuffer.wrap(new byte[] {1, 2, 3})); assertThat(file.splitOffsets()).containsExactly(100L, 200L); assertThat(file.equalityIds()).containsExactly(1, 2, 3); + assertThat(file.columnFiles()).hasSize(2); + verifyColumnFile(COLUMN_FILE_1, file.columnFiles().get(0)); + verifyColumnFile(COLUMN_FILE_2, file.columnFiles().get(1)); + // should return EMPTY_PARTITION_DATA assertThat(file.partition()).isNotNull(); assertThat(file.partition().size()).isEqualTo(0); @@ -175,6 +193,9 @@ void testCopy() { assertThat(copy.tracking().manifestLocation()).isEqualTo("s3://bucket/manifest.avro"); assertThat(copy.tracking().manifestPos()).isEqualTo(3L); assertThat(copy.partition()).isEqualTo(newPartition(7, "music")); + assertThat(copy.columnFiles()).hasSize(2); + verifyColumnFile(COLUMN_FILE_1, copy.columnFiles().get(0)); + verifyColumnFile(COLUMN_FILE_2, copy.columnFiles().get(1)); } @Test @@ -208,14 +229,18 @@ void testCopyIsDeep() { TrackedFile copy = file.copy(); - // keyMetadata should be a deep copy assertThat(copy.keyMetadata()).isNotSameAs(file.keyMetadata()); + assertThat(copy.columnFiles()).isNotSameAs(file.columnFiles()); + assertThat(copy.columnFiles()).hasSize(file.columnFiles().size()); + for (int i = 0; i < file.columnFiles().size(); ++i) { + assertThat(copy.columnFiles().get(i)).isNotSameAs(file.columnFiles().get(i)); + } } @Test void testStructLikeSize() { TrackedFileStruct file = new TrackedFileStruct(); - assertThat(file.size()).isEqualTo(15); + assertThat(file.size()).isEqualTo(16); } @Test @@ -230,6 +255,13 @@ void testStructLikeGetSet() { file.set(4, 999L); assertThat(file.get(4, Long.class)).isEqualTo(999L); + + file.set(15, ImmutableList.of(COLUMN_FILE_1, COLUMN_FILE_2)); + @SuppressWarnings("unchecked") + List roundTrippedColumnFiles = file.get(15, List.class); + assertThat(roundTrippedColumnFiles).hasSize(2); + verifyColumnFile(COLUMN_FILE_1, roundTrippedColumnFiles.get(0)); + verifyColumnFile(COLUMN_FILE_2, roundTrippedColumnFiles.get(1)); } @Test @@ -272,6 +304,12 @@ void testContentStatsNullWhenNotSet() { assertThat(file.contentStats()).isNull(); } + @Test + void testColumnFilesNullWhenNotSet() { + TrackedFileStruct file = new TrackedFileStruct(); + assertThat(file.columnFiles()).isNull(); + } + @Test void testAllFileContentTypesSupported() { for (FileContent content : FileContent.values()) { @@ -302,6 +340,9 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException assertThat(deserialized.tracking().manifestPos()).isEqualTo(3L); assertThat(deserialized.tracking().manifestLocation()).isEqualTo("s3://bucket/manifest.avro"); assertThat(deserialized.partition()).isEqualTo(newPartition(7, "music")); + assertThat(deserialized.columnFiles()).hasSize(2); + verifyColumnFile(COLUMN_FILE_1, deserialized.columnFiles().get(0)); + verifyColumnFile(COLUMN_FILE_2, deserialized.columnFiles().get(1)); } @Test @@ -325,6 +366,9 @@ void testKryoSerializationRoundTrip() throws IOException { assertThat(deserialized.tracking().manifestPos()).isEqualTo(3L); assertThat(deserialized.tracking().manifestLocation()).isEqualTo("s3://bucket/manifest.avro"); assertThat(deserialized.partition()).isEqualTo(newPartition(7, "music")); + assertThat(deserialized.columnFiles()).hasSize(2); + verifyColumnFile(COLUMN_FILE_1, deserialized.columnFiles().get(0)); + verifyColumnFile(COLUMN_FILE_2, deserialized.columnFiles().get(1)); } static TrackedFileStruct createFullTrackedFile() { @@ -359,6 +403,7 @@ static TrackedFileStruct createFullTrackedFile() { file.set(10, dv); file.set(12, ByteBuffer.wrap(new byte[] {1, 2, 3})); file.set(13, ImmutableList.of(50L)); + file.set(15, ImmutableList.of(COLUMN_FILE_1, COLUMN_FILE_2)); return file; } @@ -432,4 +477,11 @@ static TrackedFileStruct createTrackedFileWithStats() { return file; } + + private static void verifyColumnFile(ColumnFileInfo expected, ColumnFileInfo actual) { + assertThat(actual.fieldIds()).containsExactlyElementsOf(expected.fieldIds()); + assertThat(actual.location()).isEqualTo(expected.location()); + assertThat(actual.fileSizeInBytes()).isEqualTo(expected.fileSizeInBytes()); + assertThat(actual.sequenceNumber()).isEqualTo(expected.sequenceNumber()); + } }