From 0114ec6d6f1b7b95bf4fedfb56c5f71511938f45 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Tue, 19 Dec 2023 17:51:25 +0530 Subject: [PATCH 01/21] spark/data dir: update tests to AssertJ non-parameterized --- .../iceberg/spark/data/AvroDataTest.java | 8 +- .../iceberg/spark/data/GenericsHelpers.java | 140 ++++++------ .../iceberg/spark/data/TestHelpers.java | 201 ++++++++---------- .../iceberg/spark/data/TestOrcWrite.java | 17 +- .../spark/data/TestParquetAvroReader.java | 26 +-- .../spark/data/TestParquetAvroWriter.java | 16 +- .../spark/data/TestSparkAvroEnums.java | 16 +- .../spark/data/TestSparkAvroReader.java | 6 +- .../spark/data/TestSparkDateTimes.java | 10 +- .../data/TestSparkOrcReadMetadataColumns.java | 22 +- .../spark/data/TestSparkOrcReader.java | 16 +- .../TestSparkParquetReadMetadataColumns.java | 30 +-- .../spark/data/TestSparkParquetReader.java | 35 ++- .../spark/data/TestSparkParquetWriter.java | 19 +- .../data/TestSparkRecordOrcReaderWriter.java | 30 +-- ...rquetDictionaryEncodedVectorizedReads.java | 20 +- ...allbackToPlainEncodingVectorizedReads.java | 8 +- .../TestParquetVectorizedReads.java | 56 ++--- 18 files changed, 330 insertions(+), 346 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java index 95def56bcc62..3561a3b82429 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java @@ -22,6 +22,7 @@ import static org.apache.iceberg.types.Types.NestedField.required; import java.io.IOException; +import java.nio.file.Path; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import org.apache.iceberg.Schema; @@ -34,9 +35,8 @@ import org.apache.iceberg.types.Types.MapType; import org.apache.iceberg.types.Types.StructType; import org.apache.spark.sql.internal.SQLConf; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public abstract class AvroDataTest { @@ -63,7 +63,7 @@ public abstract class AvroDataTest { required(117, "dec_38_10", Types.DecimalType.of(38, 10)) // Spark's maximum precision ); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir public Path temp; @Test public void testSimpleStruct() throws IOException { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java index a94d6525a7cf..ecb8b24fa779 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java @@ -19,6 +19,7 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.spark.SparkSchemaUtil.convert; +import static org.assertj.core.api.Assertions.assertThat; import static scala.collection.JavaConverters.mapAsJavaMapConverter; import static scala.collection.JavaConverters.seqAsJavaListConverter; @@ -47,8 +48,6 @@ import org.apache.spark.sql.catalyst.util.MapData; import org.apache.spark.sql.types.Decimal; import org.apache.spark.unsafe.types.UTF8String; -import org.assertj.core.api.Assertions; -import org.junit.Assert; import scala.collection.Seq; public class GenericsHelpers { @@ -84,8 +83,9 @@ private static void assertEqualsSafe( private static void assertEqualsSafe(Types.MapType map, Map expected, Map actual) { Type keyType = map.keyType(); Type valueType = map.valueType(); - Assert.assertEquals( - "Should have the same number of keys", expected.keySet().size(), actual.keySet().size()); + assertThat(actual.keySet()) + .as("Should have the same number of keys") + .hasSameSizeAs(expected.keySet()); for (Object expectedKey : expected.keySet()) { Object matchingKey = null; @@ -99,7 +99,7 @@ private static void assertEqualsSafe(Types.MapType map, Map expected, Map< } } - Assert.assertNotNull("Should have a matching key", matchingKey); + assertThat(matchingKey).as("Should have a matching key").isNotNull(); assertEqualsSafe(valueType, expected.get(expectedKey), actual.get(matchingKey)); } } @@ -116,88 +116,91 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) case LONG: case FLOAT: case DOUBLE: - Assert.assertEquals("Primitive value should be equal to expected", expected, actual); + assertThat(expected).as("Primitive value should be equal to expected").isEqualTo(actual); break; case DATE: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a LocalDate") .isInstanceOf(LocalDate.class); - Assertions.assertThat(actual).as("Should be a Date").isInstanceOf(Date.class); - Assert.assertEquals( - "ISO-8601 date should be equal", expected.toString(), actual.toString()); + assertThat(actual).as("Should be a Date").isInstanceOf(Date.class); + assertThat(actual.toString()) + .as("ISO-8601 date should be equal") + .isEqualTo(String.valueOf(expected)); break; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; if (timestampType.shouldAdjustToUTC()) { // Timestamptz - Assertions.assertThat(actual).as("Should be a Timestamp").isInstanceOf(Timestamp.class); + assertThat(actual).as("Should be a Timestamp").isInstanceOf(Timestamp.class); Timestamp ts = (Timestamp) actual; // milliseconds from nanos has already been added by getTime OffsetDateTime actualTs = EPOCH.plusNanos((ts.getTime() * 1_000_000) + (ts.getNanos() % 1_000_000)); - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect an OffsetDateTime") .isInstanceOf(OffsetDateTime.class); - Assert.assertEquals("Timestamp should be equal", expected, actualTs); + + assertThat(actualTs).as("Timestamp should be equal").isEqualTo(expected); } else { // Timestamp - Assertions.assertThat(actual) + assertThat(actual) .as("Should be a LocalDateTime") .isInstanceOf(LocalDateTime.class); - LocalDateTime ts = (LocalDateTime) actual; - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect an LocalDateTime") .isInstanceOf(LocalDateTime.class); - Assert.assertEquals("Timestamp should be equal", expected, ts); + + assertThat(actual).as("Timestamp should be equal").isEqualTo(expected); } break; case STRING: - Assertions.assertThat(actual).as("Should be a String").isInstanceOf(String.class); - Assert.assertEquals("Strings should be equal", String.valueOf(expected), actual); + assertThat(actual).as("Should be a String").isInstanceOf(String.class); + assertThat(actual.toString()).as("Strings should be equal").isEqualTo(String.valueOf(expected)); break; case UUID: - Assertions.assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); - Assertions.assertThat(actual).as("Should be a String").isInstanceOf(String.class); - Assert.assertEquals("UUID string representation should match", expected.toString(), actual); + assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); + assertThat(actual).as("Should be a String").isInstanceOf(String.class); + assertThat(actual.toString()) + .as("UUID string representation should match") + .isEqualTo(String.valueOf(expected)); break; case FIXED: - Assertions.assertThat(expected).as("Should expect a byte[]").isInstanceOf(byte[].class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals("Bytes should match", (byte[]) expected, (byte[]) actual); + assertThat(expected).as("Should expect a byte[]").isInstanceOf(byte[].class); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat(actual).as("Bytes should match").isEqualTo(expected); break; case BINARY: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a ByteBuffer") .isInstanceOf(ByteBuffer.class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals( - "Bytes should match", ((ByteBuffer) expected).array(), (byte[]) actual); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a BigDecimal") .isInstanceOf(BigDecimal.class); - Assertions.assertThat(actual).as("Should be a BigDecimal").isInstanceOf(BigDecimal.class); - Assert.assertEquals("BigDecimals should be equal", expected, actual); + assertThat(actual).as("Should be a BigDecimal").isInstanceOf(BigDecimal.class); + assertThat(actual).as("BigDecimals should be equal").isEqualTo(expected); break; case STRUCT: - Assertions.assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); - Assertions.assertThat(actual).as("Should be a Row").isInstanceOf(Row.class); + assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); + assertThat(actual).as("Should be a Row").isInstanceOf(Row.class); assertEqualsSafe(type.asNestedType().asStructType(), (Record) expected, (Row) actual); break; case LIST: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a Collection") .isInstanceOf(Collection.class); - Assertions.assertThat(actual).as("Should be a Seq").isInstanceOf(Seq.class); + assertThat(actual).as("Should be a Seq").isInstanceOf(Seq.class); List asList = seqAsJavaListConverter((Seq) actual).asJava(); assertEqualsSafe(type.asNestedType().asListType(), (Collection) expected, asList); break; case MAP: - Assertions.assertThat(expected).as("Should expect a Collection").isInstanceOf(Map.class); - Assertions.assertThat(actual) + assertThat(expected).as("Should expect a Collection").isInstanceOf(Map.class); + assertThat(actual) .as("Should be a Map") .isInstanceOf(scala.collection.Map.class); Map asMap = @@ -264,84 +267,81 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual case LONG: case FLOAT: case DOUBLE: - Assert.assertEquals("Primitive value should be equal to expected", expected, actual); + assertThat(expected).as("Primitive value should be equal to expected").isEqualTo(actual); break; case DATE: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a LocalDate") .isInstanceOf(LocalDate.class); int expectedDays = (int) ChronoUnit.DAYS.between(EPOCH_DAY, (LocalDate) expected); - Assert.assertEquals("Primitive value should be equal to expected", expectedDays, actual); + assertThat(expectedDays).as("Primitive value should be equal to expected").isEqualTo(actual); break; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; if (timestampType.shouldAdjustToUTC()) { - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect an OffsetDateTime") .isInstanceOf(OffsetDateTime.class); long expectedMicros = ChronoUnit.MICROS.between(EPOCH, (OffsetDateTime) expected); - Assert.assertEquals( - "Primitive value should be equal to expected", expectedMicros, actual); + assertThat(expectedMicros).as("Primitive value should be equal to expected").isEqualTo(actual); } else { - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect an LocalDateTime") .isInstanceOf(LocalDateTime.class); long expectedMicros = ChronoUnit.MICROS.between(EPOCH, ((LocalDateTime) expected).atZone(ZoneId.of("UTC"))); - Assert.assertEquals( - "Primitive value should be equal to expected", expectedMicros, actual); + assertThat(expectedMicros).as("Primitive value should be equal to expected").isEqualTo(actual); } break; case STRING: - Assertions.assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); - Assert.assertEquals("Strings should be equal", expected, actual.toString()); + assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); + assertThat(actual.toString()).as("Strings should be equal").isEqualTo(String.valueOf(expected)); break; case UUID: - Assertions.assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); - Assertions.assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); - Assert.assertEquals( - "UUID string representation should match", expected.toString(), actual.toString()); + assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); + assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); + assertThat(actual.toString()).as("UUID string representation should match").isEqualTo(String.valueOf(expected)); break; case FIXED: - Assertions.assertThat(expected).as("Should expect a byte[]").isInstanceOf(byte[].class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals("Bytes should match", (byte[]) expected, (byte[]) actual); + assertThat(expected).as("Should expect a byte[]").isInstanceOf(byte[].class); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat(actual).as("Bytes should match").isEqualTo(expected); break; case BINARY: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a ByteBuffer") .isInstanceOf(ByteBuffer.class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals( - "Bytes should match", ((ByteBuffer) expected).array(), (byte[]) actual); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a BigDecimal") .isInstanceOf(BigDecimal.class); - Assertions.assertThat(actual).as("Should be a Decimal").isInstanceOf(Decimal.class); - Assert.assertEquals( - "BigDecimals should be equal", expected, ((Decimal) actual).toJavaBigDecimal()); + assertThat(actual).as("Should be a Decimal").isInstanceOf(Decimal.class); + assertThat(((Decimal) actual).toJavaBigDecimal()) + .as("BigDecimals should be equal") + .isEqualTo(expected); break; case STRUCT: - Assertions.assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); - Assertions.assertThat(actual) + assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); + assertThat(actual) .as("Should be an InternalRow") .isInstanceOf(InternalRow.class); assertEqualsUnsafe( type.asNestedType().asStructType(), (Record) expected, (InternalRow) actual); break; case LIST: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a Collection") .isInstanceOf(Collection.class); - Assertions.assertThat(actual).as("Should be an ArrayData").isInstanceOf(ArrayData.class); + assertThat(actual).as("Should be an ArrayData").isInstanceOf(ArrayData.class); assertEqualsUnsafe( type.asNestedType().asListType(), (Collection) expected, (ArrayData) actual); break; case MAP: - Assertions.assertThat(expected).as("Should expect a Map").isInstanceOf(Map.class); - Assertions.assertThat(actual) + assertThat(expected).as("Should expect a Map").isInstanceOf(Map.class); + assertThat(actual) .as("Should be an ArrayBasedMapData") .isInstanceOf(MapData.class); assertEqualsUnsafe(type.asNestedType().asMapType(), (Map) expected, (MapData) actual); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java index 8e6b576ddffb..b1cd44966339 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java @@ -19,6 +19,7 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.spark.SparkSchemaUtil.convert; +import static org.assertj.core.api.Assertions.assertThat; import static scala.collection.JavaConverters.mapAsJavaMapConverter; import static scala.collection.JavaConverters.seqAsJavaListConverter; @@ -77,8 +78,6 @@ import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.apache.spark.unsafe.types.UTF8String; -import org.assertj.core.api.Assertions; -import org.junit.Assert; import scala.collection.Seq; public class TestHelpers { @@ -143,7 +142,7 @@ private static void assertEqualsSafe(Types.MapType map, Map expected, Map< } } - Assert.assertNotNull("Should have a matching key", matchingKey); + assertThat(matchingKey).as("Should have a matching key").isNotNull(); assertEqualsSafe(valueType, expected.get(expectedKey), actual.get(matchingKey)); } } @@ -163,28 +162,27 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) case LONG: case FLOAT: case DOUBLE: - Assert.assertEquals("Primitive value should be equal to expected", expected, actual); + assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected); break; case DATE: - Assertions.assertThat(expected).as("Should be an int").isInstanceOf(Integer.class); - Assertions.assertThat(actual).as("Should be a Date").isInstanceOf(Date.class); - int daysFromEpoch = (Integer) expected; - LocalDate date = ChronoUnit.DAYS.addTo(EPOCH_DAY, daysFromEpoch); - Assert.assertEquals("ISO-8601 date should be equal", date.toString(), actual.toString()); + assertThat(expected).as("Should be an int").isInstanceOf(Integer.class); + assertThat(actual).as("Should be a Date").isInstanceOf(Date.class); + LocalDate date = ChronoUnit.DAYS.addTo(EPOCH_DAY, (Integer) expected); + assertThat(actual.toString()).as("ISO-8601 date should be equal").isEqualTo(String.valueOf(date)); break; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; - Assertions.assertThat(expected).as("Should be a long").isInstanceOf(Long.class); + assertThat(expected).as("Should be a long").isInstanceOf(Long.class); if (timestampType.shouldAdjustToUTC()) { - Assertions.assertThat(actual).as("Should be a Timestamp").isInstanceOf(Timestamp.class); + assertThat(actual).as("Should be a Timestamp").isInstanceOf(Timestamp.class); Timestamp ts = (Timestamp) actual; // milliseconds from nanos has already been added by getTime long tsMicros = (ts.getTime() * 1000) + ((ts.getNanos() / 1000) % 1000); - Assert.assertEquals("Timestamp micros should be equal", expected, tsMicros); + assertThat(tsMicros).as("Timestamp micros should be equal").isEqualTo(expected); } else { - Assertions.assertThat(actual) + assertThat(actual) .as("Should be a LocalDateTime") .isInstanceOf(LocalDateTime.class); @@ -192,57 +190,55 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) Instant instant = ts.toInstant(ZoneOffset.UTC); // milliseconds from nanos has already been added by getTime long tsMicros = (instant.toEpochMilli() * 1000) + ((ts.getNano() / 1000) % 1000); - Assert.assertEquals("Timestamp micros should be equal", expected, tsMicros); + assertThat(tsMicros).as("Timestamp micros should be equal").isEqualTo(expected); } break; case STRING: - Assertions.assertThat(actual).as("Should be a String").isInstanceOf(String.class); - Assert.assertEquals("Strings should be equal", String.valueOf(expected), actual); + assertThat(actual).as("Should be a String").isInstanceOf(String.class); + assertThat(actual).as("Strings should be equal").isEqualTo(String.valueOf(expected)); break; case UUID: - Assertions.assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); - Assertions.assertThat(actual).as("Should be a String").isInstanceOf(String.class); - Assert.assertEquals("UUID string representation should match", expected.toString(), actual); + assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); + assertThat(actual).as("Should be a String").isInstanceOf(String.class); + assertThat(actual.toString()).as("UUID string representation should match").isEqualTo(String.valueOf(expected)); break; case FIXED: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a Fixed") .isInstanceOf(GenericData.Fixed.class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals( - "Bytes should match", ((GenericData.Fixed) expected).bytes(), (byte[]) actual); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat(actual).as("Bytes should match").isEqualTo(((GenericData.Fixed) expected).bytes()); break; case BINARY: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a ByteBuffer") .isInstanceOf(ByteBuffer.class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals( - "Bytes should match", ((ByteBuffer) expected).array(), (byte[]) actual); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat(actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a BigDecimal") .isInstanceOf(BigDecimal.class); - Assertions.assertThat(actual).as("Should be a BigDecimal").isInstanceOf(BigDecimal.class); - Assert.assertEquals("BigDecimals should be equal", expected, actual); + assertThat(actual).as("Should be a BigDecimal").isInstanceOf(BigDecimal.class); + assertThat(actual).as("BigDecimals should be equal").isEqualTo(expected); break; case STRUCT: - Assertions.assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); - Assertions.assertThat(actual).as("Should be a Row").isInstanceOf(Row.class); + assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); + assertThat(actual).as("Should be a Row").isInstanceOf(Row.class); assertEqualsSafe(type.asNestedType().asStructType(), (Record) expected, (Row) actual); break; case LIST: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a Collection") .isInstanceOf(Collection.class); - Assertions.assertThat(actual).as("Should be a Seq").isInstanceOf(Seq.class); + assertThat(actual).as("Should be a Seq").isInstanceOf(Seq.class); List asList = seqAsJavaListConverter((Seq) actual).asJava(); assertEqualsSafe(type.asNestedType().asListType(), (Collection) expected, asList); break; case MAP: - Assertions.assertThat(expected).as("Should expect a Collection").isInstanceOf(Map.class); - Assertions.assertThat(actual) + assertThat(expected).as("Should expect a Collection").isInstanceOf(Map.class); + assertThat(actual) .as("Should be a Map") .isInstanceOf(scala.collection.Map.class); Map asMap = @@ -304,22 +300,21 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual switch (type.typeId()) { case LONG: - Assertions.assertThat(actual).as("Should be a long").isInstanceOf(Long.class); + assertThat(actual).as("Should be a long").isInstanceOf(Long.class); if (expected instanceof Integer) { - Assert.assertEquals("Values didn't match", ((Number) expected).longValue(), actual); + assertThat(actual).as("Values didn't match").isEqualTo(((Number) expected).longValue()); } else { - Assert.assertEquals("Primitive value should be equal to expected", expected, actual); + assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected); } break; case DOUBLE: - Assertions.assertThat(actual).as("Should be a double").isInstanceOf(Double.class); + assertThat(actual).as("Should be a double").isInstanceOf(Double.class); if (expected instanceof Float) { - Assert.assertEquals( - "Values didn't match", - Double.doubleToLongBits(((Number) expected).doubleValue()), - Double.doubleToLongBits((double) actual)); + assertThat(Double.doubleToLongBits((double) actual)) + .as("Values didn't match") + .isEqualTo(Double.doubleToLongBits(((Number) expected).doubleValue())); } else { - Assert.assertEquals("Primitive value should be equal to expected", expected, actual); + assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected); } break; case INTEGER: @@ -327,61 +322,57 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual case BOOLEAN: case DATE: case TIMESTAMP: - Assert.assertEquals("Primitive value should be equal to expected", expected, actual); + assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected); break; case STRING: - Assertions.assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); - Assert.assertEquals("Strings should be equal", expected, actual.toString()); + assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); + assertThat(actual.toString()).as("Strings should be equal").isEqualTo(expected); break; case UUID: - Assertions.assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); - Assertions.assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); - Assert.assertEquals( - "UUID string representation should match", expected.toString(), actual.toString()); + assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); + assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); + assertThat(actual.toString()).as("UUID string representation should match").isEqualTo(String.valueOf(expected)); break; case FIXED: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a Fixed") .isInstanceOf(GenericData.Fixed.class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals( - "Bytes should match", ((GenericData.Fixed) expected).bytes(), (byte[]) actual); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((GenericData.Fixed) expected).bytes()); break; case BINARY: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a ByteBuffer") .isInstanceOf(ByteBuffer.class); - Assertions.assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - Assert.assertArrayEquals( - "Bytes should match", ((ByteBuffer) expected).array(), (byte[]) actual); + assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); + assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a BigDecimal") .isInstanceOf(BigDecimal.class); - Assertions.assertThat(actual).as("Should be a Decimal").isInstanceOf(Decimal.class); - Assert.assertEquals( - "BigDecimals should be equal", expected, ((Decimal) actual).toJavaBigDecimal()); + assertThat(actual).as("Should be a Decimal").isInstanceOf(Decimal.class); + assertThat(((Decimal) actual).toJavaBigDecimal()).as("BigDecimals should be equal").isEqualTo(expected); break; case STRUCT: - Assertions.assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); - Assertions.assertThat(actual) + assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); + assertThat(actual) .as("Should be an InternalRow") .isInstanceOf(InternalRow.class); assertEqualsUnsafe( type.asNestedType().asStructType(), (Record) expected, (InternalRow) actual); break; case LIST: - Assertions.assertThat(expected) + assertThat(expected) .as("Should expect a Collection") .isInstanceOf(Collection.class); - Assertions.assertThat(actual).as("Should be an ArrayData").isInstanceOf(ArrayData.class); + assertThat(actual).as("Should be an ArrayData").isInstanceOf(ArrayData.class); assertEqualsUnsafe( type.asNestedType().asListType(), (Collection) expected, (ArrayData) actual); break; case MAP: - Assertions.assertThat(expected).as("Should expect a Map").isInstanceOf(Map.class); - Assertions.assertThat(actual) + assertThat(expected).as("Should expect a Map").isInstanceOf(Map.class); + assertThat(actual) .as("Should be an ArrayBasedMapData") .isInstanceOf(MapData.class); assertEqualsUnsafe(type.asNestedType().asMapType(), (Map) expected, (MapData) actual); @@ -403,7 +394,7 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual public static void assertEquals( String prefix, Types.StructType type, InternalRow expected, Row actual) { if (expected == null || actual == null) { - Assert.assertEquals(prefix, expected, actual); + assertThat(actual).as(prefix).isEqualTo(expected); } else { List fields = type.fields(); for (int c = 0; c < fields.size(); ++c) { @@ -419,10 +410,9 @@ public static void assertEquals( case DECIMAL: case DATE: case TIMESTAMP: - Assert.assertEquals( - prefix + "." + fieldName + " - " + childType, - getValue(expected, c, childType), - getPrimitiveValue(actual, c, childType)); + assertThat(getPrimitiveValue(actual, c, childType)) + .as(prefix + "." + fieldName + " - " + childType) + .isEqualTo(getValue(expected, c, childType)); break; case UUID: case FIXED: @@ -466,9 +456,9 @@ public static void assertEquals( private static void assertEqualsLists( String prefix, Types.ListType type, ArrayData expected, List actual) { if (expected == null || actual == null) { - Assert.assertEquals(prefix, expected, actual); + assertThat(actual).as(prefix).isEqualTo(expected); } else { - Assert.assertEquals(prefix + " length", expected.numElements(), actual.size()); + assertThat(actual.size()).as(prefix + "length").isEqualTo(expected.numElements()); Type childType = type.elementType(); for (int e = 0; e < expected.numElements(); ++e) { switch (childType.typeId()) { @@ -481,10 +471,9 @@ private static void assertEqualsLists( case DECIMAL: case DATE: case TIMESTAMP: - Assert.assertEquals( - prefix + ".elem " + e + " - " + childType, - getValue(expected, e, childType), - actual.get(e)); + assertThat(actual.get(e)) + .as(prefix + ".elem " + e + " - " + childType) + .isEqualTo(getValue(expected, e, childType)); break; case UUID: case FIXED: @@ -528,21 +517,20 @@ private static void assertEqualsLists( private static void assertEqualsMaps( String prefix, Types.MapType type, MapData expected, Map actual) { if (expected == null || actual == null) { - Assert.assertEquals(prefix, expected, actual); + assertThat(actual).as(prefix).isEqualTo(expected); } else { Type keyType = type.keyType(); Type valueType = type.valueType(); ArrayData expectedKeyArray = expected.keyArray(); ArrayData expectedValueArray = expected.valueArray(); - Assert.assertEquals(prefix + " length", expected.numElements(), actual.size()); + assertThat(actual.size()).as(prefix + " length").isEqualTo(expected.numElements()); for (int e = 0; e < expected.numElements(); ++e) { Object expectedKey = getValue(expectedKeyArray, e, keyType); Object actualValue = actual.get(expectedKey); if (actualValue == null) { - Assert.assertEquals( - prefix + ".key=" + expectedKey + " has null", - true, - expected.valueArray().isNullAt(e)); + assertThat(true) + .as(prefix + ".key=" + expectedKey + " has null") + .isEqualTo(expected.valueArray().isNullAt(e)); } else { switch (valueType.typeId()) { case BOOLEAN: @@ -554,10 +542,9 @@ private static void assertEqualsMaps( case DECIMAL: case DATE: case TIMESTAMP: - Assert.assertEquals( - prefix + ".key=" + expectedKey + " - " + valueType, - getValue(expectedValueArray, e, valueType), - actual.get(expectedKey)); + assertThat(actual.get(expectedKey)) + .as(prefix + ".key=" + expectedKey + " - " + valueType) + .isEqualTo(getValue(expectedValueArray, e, valueType)); break; case UUID: case FIXED: @@ -687,11 +674,7 @@ private static List toList(Seq val) { } private static void assertEqualBytes(String context, byte[] expected, byte[] actual) { - if (expected == null || actual == null) { - Assert.assertEquals(context, expected, actual); - } else { - Assert.assertArrayEquals(context, expected, actual); - } + assertThat(actual).as(context).isEqualTo(expected); } static void assertEquals(Schema schema, Object expected, Object actual) { @@ -704,28 +687,28 @@ private static void assertEquals(String context, DataType type, Object expected, } if (type instanceof StructType) { - Assertions.assertThat(expected) + assertThat(expected) .as("Expected should be an InternalRow: " + context) .isInstanceOf(InternalRow.class); - Assertions.assertThat(actual) + assertThat(actual) .as("Actual should be an InternalRow: " + context) .isInstanceOf(InternalRow.class); assertEquals(context, (StructType) type, (InternalRow) expected, (InternalRow) actual); } else if (type instanceof ArrayType) { - Assertions.assertThat(expected) + assertThat(expected) .as("Expected should be an ArrayData: " + context) .isInstanceOf(ArrayData.class); - Assertions.assertThat(actual) + assertThat(actual) .as("Actual should be an ArrayData: " + context) .isInstanceOf(ArrayData.class); assertEquals(context, (ArrayType) type, (ArrayData) expected, (ArrayData) actual); } else if (type instanceof MapType) { - Assertions.assertThat(expected) + assertThat(expected) .as("Expected should be a MapData: " + context) .isInstanceOf(MapData.class); - Assertions.assertThat(actual) + assertThat(actual) .as("Actual should be a MapData: " + context) .isInstanceOf(MapData.class); assertEquals(context, (MapType) type, (MapData) expected, (MapData) actual); @@ -733,13 +716,13 @@ private static void assertEquals(String context, DataType type, Object expected, } else if (type instanceof BinaryType) { assertEqualBytes(context, (byte[]) expected, (byte[]) actual); } else { - Assert.assertEquals("Value should match expected: " + context, expected, actual); + assertThat(actual).as("Value should match expected: " + context).isEqualTo(expected); } } private static void assertEquals( String context, StructType struct, InternalRow expected, InternalRow actual) { - Assert.assertEquals("Should have correct number of fields", struct.size(), actual.numFields()); + assertThat(actual.numFields()).as("Should have correct number of fields").isEqualTo(struct.size()); for (int i = 0; i < actual.numFields(); i += 1) { StructField field = struct.fields()[i]; DataType type = field.dataType(); @@ -753,8 +736,9 @@ private static void assertEquals( private static void assertEquals( String context, ArrayType array, ArrayData expected, ArrayData actual) { - Assert.assertEquals( - "Should have the same number of elements", expected.numElements(), actual.numElements()); + assertThat(actual.numElements()) + .as("Should have the same number of elements") + .isEqualTo(expected.numElements()); DataType type = array.elementType(); for (int i = 0; i < actual.numElements(); i += 1) { assertEquals( @@ -766,8 +750,9 @@ private static void assertEquals( } private static void assertEquals(String context, MapType map, MapData expected, MapData actual) { - Assert.assertEquals( - "Should have the same number of elements", expected.numElements(), actual.numElements()); + assertThat(actual.numElements()) + .as("Should have the same number of elements") + .isEqualTo(expected.numElements()); DataType keyType = map.keyType(); ArrayData expectedKeys = expected.keyArray(); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java index 1e51a088390e..d3a7dab61927 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java @@ -19,22 +19,23 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.orc.ORC; import org.apache.iceberg.types.Types; import org.apache.spark.sql.catalyst.InternalRow; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestOrcWrite { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private static final Schema SCHEMA = new Schema( @@ -42,8 +43,8 @@ public class TestOrcWrite { @Test public void splitOffsets() throws IOException { - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); Iterable rows = RandomData.generateSpark(SCHEMA, 1, 0L); FileAppender writer = @@ -54,6 +55,6 @@ public void splitOffsets() throws IOException { writer.addAll(rows); writer.close(); - Assert.assertNotNull("Split offsets not present", writer.splitOffsets()); + assertThat(writer.splitOffsets()).as("Split offsets not present").isNotNull(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java index 657eebe00af7..2e135538ab11 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java @@ -20,9 +20,11 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.Iterator; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; @@ -34,14 +36,12 @@ import org.apache.iceberg.parquet.ParquetSchemaUtil; import org.apache.iceberg.types.Types; import org.apache.parquet.schema.MessageType; -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestParquetAvroReader { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private static final Schema COMPLEX_SCHEMA = new Schema( @@ -87,7 +87,7 @@ public class TestParquetAvroReader { optional(2, "slide", Types.StringType.get()), required(25, "foo", Types.DecimalType.of(7, 5))); - @Ignore + @Disabled public void testStructSchema() throws IOException { Schema structSchema = new Schema( @@ -145,7 +145,7 @@ public void testStructSchema() throws IOException { double stddev = Math.sqrt((((double) sumSq) / trials) - (mean * mean)); } - @Ignore + @Disabled public void testWithOldReadPath() throws IOException { File testFile = writeTestData(COMPLEX_SCHEMA, 500_000, 1985); // RandomData uses the root record name "test", which must match for records to be equal @@ -194,8 +194,8 @@ public void testWithOldReadPath() throws IOException { public void testCorrectness() throws IOException { Iterable records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139); - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = Parquet.write(Files.localOutput(testFile)).schema(COMPLEX_SCHEMA).build()) { @@ -217,15 +217,15 @@ public void testCorrectness() throws IOException { Iterator iter = records.iterator(); for (Record actual : reader) { Record expected = iter.next(); - Assert.assertEquals("Record " + recordNum + " should match expected", expected, actual); + assertThat(actual).as("Record " + recordNum + " should match expected").isEqualTo(expected); recordNum += 1; } } } private File writeTestData(Schema schema, int numRecords, int seed) throws IOException { - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = Parquet.write(Files.localOutput(testFile)).schema(schema).build()) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java index 15c6268da478..e8376426663d 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java @@ -20,9 +20,11 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.Iterator; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; @@ -35,13 +37,11 @@ import org.apache.iceberg.parquet.ParquetSchemaUtil; import org.apache.iceberg.types.Types; import org.apache.parquet.schema.MessageType; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestParquetAvroWriter { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private static final Schema COMPLEX_SCHEMA = new Schema( @@ -90,8 +90,8 @@ public class TestParquetAvroWriter { public void testCorrectness() throws IOException { Iterable records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139); - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = Parquet.write(Files.localOutput(testFile)) @@ -115,7 +115,7 @@ public void testCorrectness() throws IOException { Iterator iter = records.iterator(); for (Record actual : reader) { Record expected = iter.next(); - Assert.assertEquals("Record " + recordNum + " should match expected", expected, actual); + assertThat(actual).as("Record " + recordNum + " should match expected").isEqualTo(expected); recordNum += 1; } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java index 6f05a9ed7c1f..438335f71ccf 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java @@ -17,9 +17,11 @@ * under the License. */ package org.apache.iceberg.spark.data; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.List; import org.apache.avro.SchemaBuilder; import org.apache.avro.file.DataFileWriter; @@ -34,14 +36,12 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.spark.sql.catalyst.InternalRow; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestSparkAvroEnums { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; @Test public void writeAndValidateEnums() throws IOException { @@ -64,8 +64,8 @@ public void writeAndValidateEnums() throws IOException { Record enumRecord3 = new GenericData.Record(avroSchema); // null enum List expected = ImmutableList.of(enumRecord1, enumRecord2, enumRecord3); - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { writer.create(avroSchema, testFile); @@ -90,7 +90,7 @@ public void writeAndValidateEnums() throws IOException { expected.get(i).get("enumCol") == null ? null : expected.get(i).get("enumCol").toString(); String sparkString = rows.get(i).getUTF8String(0) == null ? null : rows.get(i).getUTF8String(0).toString(); - Assert.assertEquals(expectedEnumString, sparkString); + assertThat(sparkString).isEqualTo(expectedEnumString); } } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java index 6d1ef3db3657..c64ab0e283a1 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java @@ -19,6 +19,7 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.spark.data.TestHelpers.assertEqualsUnsafe; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; @@ -31,15 +32,14 @@ import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.spark.sql.catalyst.InternalRow; -import org.junit.Assert; public class TestSparkAvroReader extends AvroDataTest { @Override protected void writeAndValidate(Schema schema) throws IOException { List expected = RandomData.generateList(schema, 100, 0L); - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java index b31ea8fd277d..2d3716a04766 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java @@ -18,14 +18,15 @@ */ package org.apache.iceberg.spark.data; +import static org.assertj.core.api.Assertions.assertThat; + import java.time.ZoneId; import java.util.TimeZone; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Types; import org.apache.spark.sql.catalyst.util.DateTimeUtils; import org.apache.spark.sql.catalyst.util.TimestampFormatter; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSparkDateTimes { @Test @@ -47,7 +48,7 @@ public void testSparkDate() { public void checkSparkDate(String dateString) { Literal date = Literal.of(dateString).to(Types.DateType.get()); String sparkDate = DateTimeUtils.toJavaDate(date.value()).toString(); - Assert.assertEquals("Should be the same date (" + date.value() + ")", dateString, sparkDate); + assertThat(sparkDate).as("Should be the same date (" + date.value() + ")").isEqualTo(dateString); } @Test @@ -68,7 +69,6 @@ public void checkSparkTimestamp(String timestampString, String sparkRepr) { ZoneId zoneId = DateTimeUtils.getZoneId("UTC"); TimestampFormatter formatter = TimestampFormatter.getFractionFormatter(zoneId); String sparkTimestamp = formatter.format(ts.value()); - Assert.assertEquals( - "Should be the same timestamp (" + ts.value() + ")", sparkRepr, sparkTimestamp); + assertThat(sparkTimestamp).as("Should be the same timestamp (" + ts.value() + ")").isEqualTo(sparkRepr); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java index 3c9037adc393..746528c43c6d 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java @@ -19,11 +19,13 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.List; +import java.util.UUID; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -50,11 +52,9 @@ import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.apache.spark.unsafe.types.UTF8String; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -100,7 +100,7 @@ public static Object[] parameters() { return new Object[] {false, true}; } - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir public java.nio.file.Path temp; private boolean vectorized; private File testFile; @@ -109,10 +109,10 @@ public TestSparkOrcReadMetadataColumns(boolean vectorized) { this.vectorized = vectorized; } - @Before + @BeforeEach public void writeFile() throws IOException { - testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = ORC.write(Files.localOutput(testFile)) @@ -201,10 +201,10 @@ private void readAndValidate( final Iterator actualRows = reader.iterator(); final Iterator expectedRows = expected.iterator(); while (expectedRows.hasNext()) { - Assert.assertTrue("Should have expected number of rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); TestHelpers.assertEquals(PROJECTION_SCHEMA, expectedRows.next(), actualRows.next()); } - Assert.assertFalse("Should not have extra rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); } finally { if (reader != null) { reader.close(); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java index b23fe729a187..7b9787771c96 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java @@ -20,6 +20,7 @@ import static org.apache.iceberg.spark.data.TestHelpers.assertEquals; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; @@ -37,8 +38,7 @@ import org.apache.iceberg.types.Types; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.vectorized.ColumnarBatch; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSparkOrcReader extends AvroDataTest { @Override @@ -62,8 +62,8 @@ public void writeAndValidateRepeatingRecords() throws IOException { private void writeAndValidateRecords(Schema schema, Iterable expected) throws IOException { - final File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = ORC.write(Files.localOutput(testFile)) @@ -81,10 +81,10 @@ private void writeAndValidateRecords(Schema schema, Iterable expect final Iterator actualRows = reader.iterator(); final Iterator expectedRows = expected.iterator(); while (expectedRows.hasNext()) { - Assert.assertTrue("Should have expected number of rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); assertEquals(schema, expectedRows.next(), actualRows.next()); } - Assert.assertFalse("Should not have extra rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); } try (CloseableIterable reader = @@ -97,10 +97,10 @@ private void writeAndValidateRecords(Schema schema, Iterable expect final Iterator actualRows = batchesToRows(reader.iterator()); final Iterator expectedRows = expected.iterator(); while (expectedRows.hasNext()) { - Assert.assertTrue("Should have expected number of rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); assertEquals(schema, expectedRows.next(), actualRows.next()); } - Assert.assertFalse("Should not have extra rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java index 01b633da0f9e..d33116ac94e2 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java @@ -19,6 +19,8 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -57,12 +59,9 @@ import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.apache.spark.unsafe.types.UTF8String; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -119,7 +118,8 @@ public static Object[][] parameters() { return new Object[][] {new Object[] {false}, new Object[] {true}}; } - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public java.nio.file.Path temp; private final boolean vectorized; private File testFile; @@ -128,14 +128,14 @@ public TestSparkParquetReadMetadataColumns(boolean vectorized) { this.vectorized = vectorized; } - @Before + @BeforeEach public void writeFile() throws IOException { List fileSplits = Lists.newArrayList(); StructType struct = SparkSchemaUtil.convert(DATA_SCHEMA); Configuration conf = new Configuration(); - testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); ParquetFileWriter parquetFileWriter = new ParquetFileWriter( conf, @@ -144,8 +144,8 @@ public void writeFile() throws IOException { parquetFileWriter.start(); for (int i = 0; i < NUM_ROW_GROUPS; i += 1) { - File split = temp.newFile(); - Assert.assertTrue("Delete should succeed", split.delete()); + File split = temp.toFile(); + assertThat(split.delete()).as("Delete should succeed").isTrue(); fileSplits.add(new Path(split.getAbsolutePath())); try (FileAppender writer = Parquet.write(Files.localOutput(split)) @@ -171,7 +171,7 @@ public void testReadRowNumbers() throws IOException { @Test public void testReadRowNumbersWithDelete() throws IOException { - Assume.assumeTrue(vectorized); + assumeTrue(vectorized); List expectedRowsAfterDelete = Lists.newArrayList(); EXPECTED_ROWS.forEach(row -> expectedRowsAfterDelete.add(row.copy())); @@ -295,11 +295,11 @@ private void validate(List expected, Parquet.ReadBuilder builder) final Iterator actualRows = reader.iterator(); for (InternalRow internalRow : expected) { - Assert.assertTrue("Should have expected number of rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); TestHelpers.assertEquals(PROJECTION_SCHEMA, internalRow, actualRows.next()); } - Assert.assertFalse("Should not have extra rows", actualRows.hasNext()); + assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index 024ce3a60c2b..4bbbea666d84 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -20,6 +20,8 @@ import static org.apache.iceberg.spark.data.TestHelpers.assertEqualsUnsafe; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.io.File; import java.io.IOException; @@ -56,25 +58,19 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSparkParquetReader extends AvroDataTest { @Override protected void writeAndValidate(Schema schema) throws IOException { - Assume.assumeTrue( - "Parquet Avro cannot write non-string map keys", - null - == TypeUtil.find( - schema, - type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())); + assumeTrue(null == TypeUtil.find(schema, + type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get()), + "Parquet Avro cannot write non-string map keys"); List expected = RandomData.generateList(schema, 100, 0L); - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = Parquet.write(Files.localOutput(testFile)).schema(schema).named("test").build()) { @@ -88,10 +84,10 @@ protected void writeAndValidate(Schema schema) throws IOException { .build()) { Iterator rows = reader.iterator(); for (GenericData.Record record : expected) { - Assert.assertTrue("Should have expected number of rows", rows.hasNext()); + assertThat(rows.hasNext()).as("Should have expected number of rows").isTrue(); assertEqualsUnsafe(schema.asStruct(), record, rows.next()); } - Assert.assertFalse("Should not have extra rows", rows.hasNext()); + assertThat(rows.hasNext()).as("Should not have extra rows").isFalse(); } } @@ -112,7 +108,7 @@ protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IO schema, PartitionSpec.unpartitioned(), ImmutableMap.of(), - temp.newFolder().getCanonicalPath()); + java.nio.file.Files.createTempDirectory(temp, null).toFile().getCanonicalPath()); table .newAppend() @@ -131,7 +127,7 @@ protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IO @Test public void testInt96TimestampProducedBySparkIsReadCorrectly() throws IOException { String outputFilePath = - String.format("%s/%s", temp.getRoot().getAbsolutePath(), "parquet_int96.parquet"); + String.format("%s/%s", temp.toAbsolutePath(), "parquet_int96.parquet"); HadoopOutputFile outputFile = HadoopOutputFile.fromPath( new org.apache.hadoop.fs.Path(outputFilePath), new Configuration()); @@ -157,15 +153,16 @@ public void testInt96TimestampProducedBySparkIsReadCorrectly() throws IOExceptio InputFile parquetInputFile = Files.localInput(outputFilePath); List readRows = rowsFromFile(parquetInputFile, schema); - Assert.assertEquals(rows.size(), readRows.size()); - Assertions.assertThat(readRows).isEqualTo(rows); + + assertThat(readRows).hasSameSizeAs(rows); + assertThat(readRows).isEqualTo(rows); // Now we try to import that file as an Iceberg table to make sure Iceberg can read // Int96 end to end. Table int96Table = tableFromInputFile(parquetInputFile, schema); List tableRecords = Lists.newArrayList(IcebergGenerics.read(int96Table).build()); - Assert.assertEquals(rows.size(), tableRecords.size()); + assertThat(tableRecords).hasSameSizeAs(rows); for (int i = 0; i < tableRecords.size(); i++) { GenericsHelpers.assertEqualsUnsafe(schema.asStruct(), tableRecords.get(i), rows.get(i)); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java index 467d8a27a27c..1a358087cfc0 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java @@ -20,10 +20,13 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.Iterator; +import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.CloseableIterable; @@ -32,13 +35,11 @@ import org.apache.iceberg.spark.SparkSchemaUtil; import org.apache.iceberg.types.Types; import org.apache.spark.sql.catalyst.InternalRow; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestSparkParquetWriter { - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir public Path temp; private static final Schema COMPLEX_SCHEMA = new Schema( @@ -88,8 +89,8 @@ public void testCorrectness() throws IOException { int numRows = 50_000; Iterable records = RandomData.generateSpark(COMPLEX_SCHEMA, numRows, 19981); - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = Parquet.write(Files.localOutput(testFile)) @@ -110,10 +111,10 @@ public void testCorrectness() throws IOException { Iterator expected = records.iterator(); Iterator rows = reader.iterator(); for (int i = 0; i < numRows; i += 1) { - Assert.assertTrue("Should have expected number of rows", rows.hasNext()); + assertThat(rows.hasNext()).as("Should have expected number of rows").isTrue(); TestHelpers.assertEquals(COMPLEX_SCHEMA, expected.next(), rows.next()); } - Assert.assertFalse("Should not have extra rows", rows.hasNext()); + assertThat(rows.hasNext()).as("Should not have extra rows").isFalse(); } } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java index d10e7f5a19e3..7d6ac3cc4091 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java @@ -19,6 +19,7 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; @@ -38,15 +39,14 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.spark.sql.catalyst.InternalRow; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSparkRecordOrcReaderWriter extends AvroDataTest { private static final int NUM_RECORDS = 200; private void writeAndValidate(Schema schema, List expectedRecords) throws IOException { - final File originalFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", originalFile.delete()); + final File originalFile = temp.toFile(); + assertThat(originalFile.delete()).as("Delete should succeed").isTrue(); // Write few generic records into the original test file. try (FileAppender writer = @@ -68,8 +68,8 @@ private void writeAndValidate(Schema schema, List expectedRecords) throw assertEqualsUnsafe(schema.asStruct(), expectedRecords, reader, expectedRecords.size()); } - final File anotherFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", anotherFile.delete()); + final File anotherFile = temp.toFile(); + assertThat(anotherFile.delete()).as("Delete should succeed").isTrue(); // Write those spark InternalRows into a new file again. try (FileAppender writer = @@ -130,12 +130,12 @@ private static void assertRecordEquals( Iterator expectedIter = expected.iterator(); Iterator actualIter = actual.iterator(); for (int i = 0; i < size; i += 1) { - Assert.assertTrue("Expected iterator should have more rows", expectedIter.hasNext()); - Assert.assertTrue("Actual iterator should have more rows", actualIter.hasNext()); - Assert.assertEquals("Should have same rows.", expectedIter.next(), actualIter.next()); + assertThat(expectedIter.hasNext()).as("Expected iterator should have more rows").isTrue(); + assertThat(actualIter.hasNext()).as("Actual iterator should have more rows").isTrue(); + assertThat(actualIter.next()).as("Should have same rows.").isEqualTo(expectedIter.next()); } - Assert.assertFalse("Expected iterator should not have any extra rows.", expectedIter.hasNext()); - Assert.assertFalse("Actual iterator should not have any extra rows.", actualIter.hasNext()); + assertThat(expectedIter.hasNext()).as("Expected iterator should not have any extra rows.").isFalse(); + assertThat(actualIter.hasNext()).as("Actual iterator should not have any extra rows.").isFalse(); } private static void assertEqualsUnsafe( @@ -143,11 +143,11 @@ private static void assertEqualsUnsafe( Iterator expectedIter = expected.iterator(); Iterator actualIter = actual.iterator(); for (int i = 0; i < size; i += 1) { - Assert.assertTrue("Expected iterator should have more rows", expectedIter.hasNext()); - Assert.assertTrue("Actual iterator should have more rows", actualIter.hasNext()); + assertThat(expectedIter.hasNext()).as("Expected iterator should have more rows").isTrue(); + assertThat(actualIter.hasNext()).as("Actual iterator should have more rows").isTrue(); GenericsHelpers.assertEqualsUnsafe(struct, expectedIter.next(), actualIter.next()); } - Assert.assertFalse("Expected iterator should not have any extra rows.", expectedIter.hasNext()); - Assert.assertFalse("Actual iterator should not have any extra rows.", actualIter.hasNext()); + assertThat(expectedIter.hasNext()).as("Expected iterator should not have any extra rows.").isFalse(); + assertThat(actualIter.hasNext()).as("Actual iterator should not have any extra rows.").isFalse(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java index aeb18d43fd3d..2c9f4d0ebdec 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java @@ -19,6 +19,7 @@ package org.apache.iceberg.spark.data.parquet.vectorized; import static org.apache.iceberg.TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; @@ -32,9 +33,8 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.spark.data.RandomData; -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; public class TestParquetDictionaryEncodedVectorizedReads extends TestParquetVectorizedReads { @@ -52,14 +52,14 @@ Iterable generateData( @Test @Override - @Ignore // Ignored since this code path is already tested in TestParquetVectorizedReads + @Disabled // Ignored since this code path is already tested in TestParquetVectorizedReads public void testVectorizedReadsWithNewContainers() throws IOException {} @Test public void testMixedDictionaryNonDictionaryReads() throws IOException { Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dictionaryEncodedFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", dictionaryEncodedFile.delete()); + File dictionaryEncodedFile = temp.toFile(); + assertThat(dictionaryEncodedFile.delete()).as("Delete should succeed").isTrue(); Iterable dictionaryEncodableData = RandomData.generateDictionaryEncodableData( schema, 10000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE); @@ -68,8 +68,8 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { writer.addAll(dictionaryEncodableData); } - File plainEncodingFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", plainEncodingFile.delete()); + File plainEncodingFile = temp.toFile(); + assertThat(plainEncodingFile.delete()).as("Delete should succeed").isTrue(); Iterable nonDictionaryData = RandomData.generate(schema, 10000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE); try (FileAppender writer = getParquetWriter(schema, plainEncodingFile)) { @@ -77,8 +77,8 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { } int rowGroupSize = PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT; - File mixedFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", mixedFile.delete()); + File mixedFile = temp.toFile(); + assertThat(mixedFile.delete()).as("Delete should succeed").isTrue(); Parquet.concat( ImmutableList.of(dictionaryEncodedFile, plainEncodingFile, dictionaryEncodedFile), mixedFile, diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryFallbackToPlainEncodingVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryFallbackToPlainEncodingVectorizedReads.java index 42ea34936b5f..e6887c6f47b5 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryFallbackToPlainEncodingVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryFallbackToPlainEncodingVectorizedReads.java @@ -29,8 +29,8 @@ import org.apache.iceberg.relocated.com.google.common.base.Function; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.spark.data.RandomData; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; public class TestParquetDictionaryFallbackToPlainEncodingVectorizedReads extends TestParquetVectorizedReads { @@ -65,11 +65,11 @@ FileAppender getParquetWriter(Schema schema, File testFile) @Test @Override - @Ignore // Fallback encoding not triggered when data is mostly null + @Disabled // Fallback encoding not triggered when data is mostly null public void testMostlyNullsForOptionalFields() {} @Test @Override - @Ignore // Ignored since this code path is already tested in TestParquetVectorizedReads + @Disabled // Ignored since this code path is already tested in TestParquetVectorizedReads public void testVectorizedReadsWithNewContainers() throws IOException {} } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java index c763b7b7cc12..e2954c21f38a 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java @@ -20,6 +20,9 @@ import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; @@ -46,11 +49,8 @@ import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.Type; import org.apache.spark.sql.vectorized.ColumnarBatch; -import org.assertj.core.api.Assertions; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; public class TestParquetVectorizedReads extends AvroDataTest { private static final int NUM_ROWS = 200_000; @@ -80,19 +80,19 @@ private void writeAndValidate( Function transform) throws IOException { // Write test data - Assume.assumeTrue( - "Parquet Avro cannot write non-string map keys", - null + assumeThat(null == TypeUtil.find( - schema, - type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())); + schema, + type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) + .as("Parquet Avro cannot write non-string map keys") + .isTrue(); Iterable expected = generateData(schema, numRecords, seed, nullPercentage, transform); // write a test parquet file using iceberg writer - File testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = temp.toFile(); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = getParquetWriter(schema, testFile)) { writer.addAll(expected); @@ -157,49 +157,49 @@ void assertRecordsMatch( numRowsRead += batch.numRows(); TestHelpers.assertEqualsBatch(schema.asStruct(), expectedIter, batch); } - Assert.assertEquals(expectedSize, numRowsRead); + assertThat(numRowsRead).isEqualTo(expectedSize); } } @Override @Test - @Ignore + @Disabled public void testArray() {} @Override @Test - @Ignore + @Disabled public void testArrayOfStructs() {} @Override @Test - @Ignore + @Disabled public void testMap() {} @Override @Test - @Ignore + @Disabled public void testNumericMapKey() {} @Override @Test - @Ignore + @Disabled public void testComplexMapKey() {} @Override @Test - @Ignore + @Disabled public void testMapOfStructs() {} @Override @Test - @Ignore + @Disabled public void testMixedTypes() {} @Test @Override public void testNestedStruct() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> VectorizedSparkParquetReaders.buildReader( TypeUtil.assignIncreasingFreshIds( @@ -274,8 +274,8 @@ public void testReadsForTypePromotedColumns() throws Exception { optional(102, "float_data", Types.FloatType.get()), optional(103, "decimal_data", Types.DecimalType.of(10, 5))); - File dataFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", dataFile.delete()); + File dataFile = temp.toFile(); + assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(writeSchema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); try (FileAppender writer = getParquetWriter(writeSchema, dataFile)) { @@ -303,8 +303,8 @@ public void testSupportedReadsForParquetV2() throws Exception { optional(103, "double_data", Types.DoubleType.get()), optional(104, "decimal_data", Types.DecimalType.of(25, 5))); - File dataFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", dataFile.delete()); + File dataFile = temp.toFile(); + assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { @@ -318,14 +318,14 @@ public void testUnsupportedReadsForParquetV2() throws Exception { // Longs, ints, string types etc use delta encoding and which are not supported for vectorized // reads Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dataFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", dataFile.delete()); + File dataFile = temp.toFile(); + assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { writer.addAll(data); } - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageStartingWith("Cannot support vectorized reads for column") From b63609e5ee20d7d6753de2bf60a967732e6927e9 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Tue, 19 Dec 2023 18:15:59 +0530 Subject: [PATCH 02/21] remove redundant import --- .../test/java/org/apache/iceberg/spark/data/TestOrcWrite.java | 1 - .../iceberg/spark/data/TestSparkOrcReadMetadataColumns.java | 1 - .../org/apache/iceberg/spark/data/TestSparkParquetWriter.java | 1 - 3 files changed, 3 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java index d3a7dab61927..e7694be21fca 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java @@ -24,7 +24,6 @@ import java.io.File; import java.io.IOException; import java.nio.file.Path; -import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java index 746528c43c6d..69c482ba1437 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.util.Iterator; import java.util.List; -import java.util.UUID; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java index 1a358087cfc0..38375e9082c9 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java @@ -26,7 +26,6 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Iterator; -import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.CloseableIterable; From 72063ecf98757c41b35432a8ff61da22602e810f Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Tue, 19 Dec 2023 20:09:04 +0530 Subject: [PATCH 03/21] update hasNext() and isExhausted() --- .../data/TestSparkOrcReadMetadataColumns.java | 4 ++-- .../iceberg/spark/data/TestSparkOrcReader.java | 8 ++++---- .../TestSparkParquetReadMetadataColumns.java | 4 ++-- .../spark/data/TestSparkParquetReader.java | 4 ++-- .../spark/data/TestSparkParquetWriter.java | 4 ++-- .../data/TestSparkRecordOrcReaderWriter.java | 16 ++++++++-------- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java index 69c482ba1437..2022c4f51606 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java @@ -200,10 +200,10 @@ private void readAndValidate( final Iterator actualRows = reader.iterator(); final Iterator expectedRows = expected.iterator(); while (expectedRows.hasNext()) { - assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); + assertThat(actualRows).as("Should have expected number of rows").hasNext(); TestHelpers.assertEquals(PROJECTION_SCHEMA, expectedRows.next(), actualRows.next()); } - assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); + assertThat(actualRows).as("Should not have extra rows").isExhausted(); } finally { if (reader != null) { reader.close(); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java index 7b9787771c96..645eb20376bf 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java @@ -81,10 +81,10 @@ private void writeAndValidateRecords(Schema schema, Iterable expect final Iterator actualRows = reader.iterator(); final Iterator expectedRows = expected.iterator(); while (expectedRows.hasNext()) { - assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); + assertThat(actualRows).as("Should have expected number of rows").hasNext(); assertEquals(schema, expectedRows.next(), actualRows.next()); } - assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); + assertThat(actualRows).as("Should not have extra rows").isExhausted(); } try (CloseableIterable reader = @@ -97,10 +97,10 @@ private void writeAndValidateRecords(Schema schema, Iterable expect final Iterator actualRows = batchesToRows(reader.iterator()); final Iterator expectedRows = expected.iterator(); while (expectedRows.hasNext()) { - assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); + assertThat(actualRows).as("Should have expected number of rows").hasNext(); assertEquals(schema, expectedRows.next(), actualRows.next()); } - assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); + assertThat(actualRows).as("Should not have extra rows").isExhausted(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java index d33116ac94e2..f822a6a46f53 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java @@ -295,11 +295,11 @@ private void validate(List expected, Parquet.ReadBuilder builder) final Iterator actualRows = reader.iterator(); for (InternalRow internalRow : expected) { - assertThat(actualRows.hasNext()).as("Should have expected number of rows").isTrue(); + assertThat(actualRows).as("Should have expected number of rows").hasNext(); TestHelpers.assertEquals(PROJECTION_SCHEMA, internalRow, actualRows.next()); } - assertThat(actualRows.hasNext()).as("Should not have extra rows").isFalse(); + assertThat(actualRows).as("Should not have extra rows").isExhausted(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index 4bbbea666d84..fa55d4e5e1e2 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -84,10 +84,10 @@ protected void writeAndValidate(Schema schema) throws IOException { .build()) { Iterator rows = reader.iterator(); for (GenericData.Record record : expected) { - assertThat(rows.hasNext()).as("Should have expected number of rows").isTrue(); + assertThat(rows).as("Should have expected number of rows").hasNext(); assertEqualsUnsafe(schema.asStruct(), record, rows.next()); } - assertThat(rows.hasNext()).as("Should not have extra rows").isFalse(); + assertThat(rows).as("Should not have extra rows").isExhausted(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java index 38375e9082c9..7816becda3fa 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java @@ -110,10 +110,10 @@ public void testCorrectness() throws IOException { Iterator expected = records.iterator(); Iterator rows = reader.iterator(); for (int i = 0; i < numRows; i += 1) { - assertThat(rows.hasNext()).as("Should have expected number of rows").isTrue(); + assertThat(rows).as("Should have expected number of rows").hasNext(); TestHelpers.assertEquals(COMPLEX_SCHEMA, expected.next(), rows.next()); } - assertThat(rows.hasNext()).as("Should not have extra rows").isFalse(); + assertThat(rows).as("Should not have extra rows").isExhausted(); } } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java index 7d6ac3cc4091..e0027b45e2e6 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java @@ -130,12 +130,12 @@ private static void assertRecordEquals( Iterator expectedIter = expected.iterator(); Iterator actualIter = actual.iterator(); for (int i = 0; i < size; i += 1) { - assertThat(expectedIter.hasNext()).as("Expected iterator should have more rows").isTrue(); - assertThat(actualIter.hasNext()).as("Actual iterator should have more rows").isTrue(); + assertThat(expectedIter).as("Expected iterator should have more rows").hasNext(); + assertThat(actualIter).as("Actual iterator should have more rows").hasNext(); assertThat(actualIter.next()).as("Should have same rows.").isEqualTo(expectedIter.next()); } - assertThat(expectedIter.hasNext()).as("Expected iterator should not have any extra rows.").isFalse(); - assertThat(actualIter.hasNext()).as("Actual iterator should not have any extra rows.").isFalse(); + assertThat(expectedIter).as("Expected iterator should not have any extra rows.").isExhausted(); + assertThat(actualIter).as("Actual iterator should not have any extra rows.").isExhausted(); } private static void assertEqualsUnsafe( @@ -143,11 +143,11 @@ private static void assertEqualsUnsafe( Iterator expectedIter = expected.iterator(); Iterator actualIter = actual.iterator(); for (int i = 0; i < size; i += 1) { - assertThat(expectedIter.hasNext()).as("Expected iterator should have more rows").isTrue(); - assertThat(actualIter.hasNext()).as("Actual iterator should have more rows").isTrue(); + assertThat(expectedIter).as("Expected iterator should have more rows").hasNext(); + assertThat(actualIter).as("Actual iterator should have more rows").hasNext(); GenericsHelpers.assertEqualsUnsafe(struct, expectedIter.next(), actualIter.next()); } - assertThat(expectedIter.hasNext()).as("Expected iterator should not have any extra rows.").isFalse(); - assertThat(actualIter.hasNext()).as("Actual iterator should not have any extra rows.").isFalse(); + assertThat(expectedIter).as("Expected iterator should not have any extra rows.").isExhausted(); + assertThat(actualIter).as("Actual iterator should not have any extra rows.").isExhausted(); } } From d2750d6465e0251771ef204fb18bad0add1c60e3 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 07:50:11 +0530 Subject: [PATCH 04/21] update temp to private --- .../test/java/org/apache/iceberg/spark/data/AvroDataTest.java | 2 +- .../org/apache/iceberg/spark/data/TestSparkParquetWriter.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java index 3561a3b82429..baab12d4ccdb 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java @@ -63,7 +63,7 @@ public abstract class AvroDataTest { required(117, "dec_38_10", Types.DecimalType.of(38, 10)) // Spark's maximum precision ); - @TempDir public Path temp; + @TempDir private Path temp; @Test public void testSimpleStruct() throws IOException { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java index 7816becda3fa..9cb77ac51275 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java @@ -38,7 +38,7 @@ import org.junit.jupiter.api.io.TempDir; public class TestSparkParquetWriter { - @TempDir public Path temp; + @TempDir private Path temp; private static final Schema COMPLEX_SCHEMA = new Schema( From 2a116dec69a9c7fe7aeb2e8343ebf2f1b9ebb7b9 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 07:50:27 +0530 Subject: [PATCH 05/21] run spotlessApply --- .../iceberg/spark/data/GenericsHelpers.java | 96 +++++++-------- .../iceberg/spark/data/TestHelpers.java | 114 ++++++++---------- .../spark/data/TestSparkAvroEnums.java | 1 + .../spark/data/TestSparkDateTimes.java | 8 +- .../TestSparkParquetReadMetadataColumns.java | 3 +- .../spark/data/TestSparkParquetReader.java | 10 +- .../TestParquetVectorizedReads.java | 17 +-- 7 files changed, 119 insertions(+), 130 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java index ecb8b24fa779..7705001ca7e1 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java @@ -84,8 +84,8 @@ private static void assertEqualsSafe(Types.MapType map, Map expected, Map< Type keyType = map.keyType(); Type valueType = map.valueType(); assertThat(actual.keySet()) - .as("Should have the same number of keys") - .hasSameSizeAs(expected.keySet()); + .as("Should have the same number of keys") + .hasSameSizeAs(expected.keySet()); for (Object expectedKey : expected.keySet()) { Object matchingKey = null; @@ -119,13 +119,11 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) assertThat(expected).as("Primitive value should be equal to expected").isEqualTo(actual); break; case DATE: - assertThat(expected) - .as("Should expect a LocalDate") - .isInstanceOf(LocalDate.class); + assertThat(expected).as("Should expect a LocalDate").isInstanceOf(LocalDate.class); assertThat(actual).as("Should be a Date").isInstanceOf(Date.class); assertThat(actual.toString()) - .as("ISO-8601 date should be equal") - .isEqualTo(String.valueOf(expected)); + .as("ISO-8601 date should be equal") + .isEqualTo(String.valueOf(expected)); break; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; @@ -144,9 +142,7 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) assertThat(actualTs).as("Timestamp should be equal").isEqualTo(expected); } else { // Timestamp - assertThat(actual) - .as("Should be a LocalDateTime") - .isInstanceOf(LocalDateTime.class); + assertThat(actual).as("Should be a LocalDateTime").isInstanceOf(LocalDateTime.class); assertThat(expected) .as("Should expect an LocalDateTime") @@ -157,14 +153,16 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) break; case STRING: assertThat(actual).as("Should be a String").isInstanceOf(String.class); - assertThat(actual.toString()).as("Strings should be equal").isEqualTo(String.valueOf(expected)); + assertThat(actual.toString()) + .as("Strings should be equal") + .isEqualTo(String.valueOf(expected)); break; case UUID: assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); assertThat(actual).as("Should be a String").isInstanceOf(String.class); assertThat(actual.toString()) - .as("UUID string representation should match") - .isEqualTo(String.valueOf(expected)); + .as("UUID string representation should match") + .isEqualTo(String.valueOf(expected)); break; case FIXED: assertThat(expected).as("Should expect a byte[]").isInstanceOf(byte[].class); @@ -172,16 +170,14 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) assertThat(actual).as("Bytes should match").isEqualTo(expected); break; case BINARY: - assertThat(expected) - .as("Should expect a ByteBuffer") - .isInstanceOf(ByteBuffer.class); + assertThat(expected).as("Should expect a ByteBuffer").isInstanceOf(ByteBuffer.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); + assertThat((byte[]) actual) + .as("Bytes should match") + .isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - assertThat(expected) - .as("Should expect a BigDecimal") - .isInstanceOf(BigDecimal.class); + assertThat(expected).as("Should expect a BigDecimal").isInstanceOf(BigDecimal.class); assertThat(actual).as("Should be a BigDecimal").isInstanceOf(BigDecimal.class); assertThat(actual).as("BigDecimals should be equal").isEqualTo(expected); break; @@ -191,18 +187,14 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) assertEqualsSafe(type.asNestedType().asStructType(), (Record) expected, (Row) actual); break; case LIST: - assertThat(expected) - .as("Should expect a Collection") - .isInstanceOf(Collection.class); + assertThat(expected).as("Should expect a Collection").isInstanceOf(Collection.class); assertThat(actual).as("Should be a Seq").isInstanceOf(Seq.class); List asList = seqAsJavaListConverter((Seq) actual).asJava(); assertEqualsSafe(type.asNestedType().asListType(), (Collection) expected, asList); break; case MAP: assertThat(expected).as("Should expect a Collection").isInstanceOf(Map.class); - assertThat(actual) - .as("Should be a Map") - .isInstanceOf(scala.collection.Map.class); + assertThat(actual).as("Should be a Map").isInstanceOf(scala.collection.Map.class); Map asMap = mapAsJavaMapConverter((scala.collection.Map) actual).asJava(); assertEqualsSafe(type.asNestedType().asMapType(), (Map) expected, asMap); @@ -270,11 +262,11 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual assertThat(expected).as("Primitive value should be equal to expected").isEqualTo(actual); break; case DATE: - assertThat(expected) - .as("Should expect a LocalDate") - .isInstanceOf(LocalDate.class); + assertThat(expected).as("Should expect a LocalDate").isInstanceOf(LocalDate.class); int expectedDays = (int) ChronoUnit.DAYS.between(EPOCH_DAY, (LocalDate) expected); - assertThat(expectedDays).as("Primitive value should be equal to expected").isEqualTo(actual); + assertThat(expectedDays) + .as("Primitive value should be equal to expected") + .isEqualTo(actual); break; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; @@ -283,24 +275,32 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual .as("Should expect an OffsetDateTime") .isInstanceOf(OffsetDateTime.class); long expectedMicros = ChronoUnit.MICROS.between(EPOCH, (OffsetDateTime) expected); - assertThat(expectedMicros).as("Primitive value should be equal to expected").isEqualTo(actual); + assertThat(expectedMicros) + .as("Primitive value should be equal to expected") + .isEqualTo(actual); } else { assertThat(expected) .as("Should expect an LocalDateTime") .isInstanceOf(LocalDateTime.class); long expectedMicros = ChronoUnit.MICROS.between(EPOCH, ((LocalDateTime) expected).atZone(ZoneId.of("UTC"))); - assertThat(expectedMicros).as("Primitive value should be equal to expected").isEqualTo(actual); + assertThat(expectedMicros) + .as("Primitive value should be equal to expected") + .isEqualTo(actual); } break; case STRING: assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); - assertThat(actual.toString()).as("Strings should be equal").isEqualTo(String.valueOf(expected)); + assertThat(actual.toString()) + .as("Strings should be equal") + .isEqualTo(String.valueOf(expected)); break; case UUID: assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); - assertThat(actual.toString()).as("UUID string representation should match").isEqualTo(String.valueOf(expected)); + assertThat(actual.toString()) + .as("UUID string representation should match") + .isEqualTo(String.valueOf(expected)); break; case FIXED: assertThat(expected).as("Should expect a byte[]").isInstanceOf(byte[].class); @@ -308,42 +308,34 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual assertThat(actual).as("Bytes should match").isEqualTo(expected); break; case BINARY: - assertThat(expected) - .as("Should expect a ByteBuffer") - .isInstanceOf(ByteBuffer.class); + assertThat(expected).as("Should expect a ByteBuffer").isInstanceOf(ByteBuffer.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); + assertThat((byte[]) actual) + .as("Bytes should match") + .isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - assertThat(expected) - .as("Should expect a BigDecimal") - .isInstanceOf(BigDecimal.class); + assertThat(expected).as("Should expect a BigDecimal").isInstanceOf(BigDecimal.class); assertThat(actual).as("Should be a Decimal").isInstanceOf(Decimal.class); assertThat(((Decimal) actual).toJavaBigDecimal()) - .as("BigDecimals should be equal") - .isEqualTo(expected); + .as("BigDecimals should be equal") + .isEqualTo(expected); break; case STRUCT: assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); - assertThat(actual) - .as("Should be an InternalRow") - .isInstanceOf(InternalRow.class); + assertThat(actual).as("Should be an InternalRow").isInstanceOf(InternalRow.class); assertEqualsUnsafe( type.asNestedType().asStructType(), (Record) expected, (InternalRow) actual); break; case LIST: - assertThat(expected) - .as("Should expect a Collection") - .isInstanceOf(Collection.class); + assertThat(expected).as("Should expect a Collection").isInstanceOf(Collection.class); assertThat(actual).as("Should be an ArrayData").isInstanceOf(ArrayData.class); assertEqualsUnsafe( type.asNestedType().asListType(), (Collection) expected, (ArrayData) actual); break; case MAP: assertThat(expected).as("Should expect a Map").isInstanceOf(Map.class); - assertThat(actual) - .as("Should be an ArrayBasedMapData") - .isInstanceOf(MapData.class); + assertThat(actual).as("Should be an ArrayBasedMapData").isInstanceOf(MapData.class); assertEqualsUnsafe(type.asNestedType().asMapType(), (Map) expected, (MapData) actual); break; case TIME: diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java index b1cd44966339..1e2878f02995 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java @@ -168,7 +168,9 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) assertThat(expected).as("Should be an int").isInstanceOf(Integer.class); assertThat(actual).as("Should be a Date").isInstanceOf(Date.class); LocalDate date = ChronoUnit.DAYS.addTo(EPOCH_DAY, (Integer) expected); - assertThat(actual.toString()).as("ISO-8601 date should be equal").isEqualTo(String.valueOf(date)); + assertThat(actual.toString()) + .as("ISO-8601 date should be equal") + .isEqualTo(String.valueOf(date)); break; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; @@ -182,9 +184,7 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) long tsMicros = (ts.getTime() * 1000) + ((ts.getNanos() / 1000) % 1000); assertThat(tsMicros).as("Timestamp micros should be equal").isEqualTo(expected); } else { - assertThat(actual) - .as("Should be a LocalDateTime") - .isInstanceOf(LocalDateTime.class); + assertThat(actual).as("Should be a LocalDateTime").isInstanceOf(LocalDateTime.class); LocalDateTime ts = (LocalDateTime) actual; Instant instant = ts.toInstant(ZoneOffset.UTC); @@ -200,26 +200,24 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) case UUID: assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); assertThat(actual).as("Should be a String").isInstanceOf(String.class); - assertThat(actual.toString()).as("UUID string representation should match").isEqualTo(String.valueOf(expected)); + assertThat(actual.toString()) + .as("UUID string representation should match") + .isEqualTo(String.valueOf(expected)); break; case FIXED: - assertThat(expected) - .as("Should expect a Fixed") - .isInstanceOf(GenericData.Fixed.class); + assertThat(expected).as("Should expect a Fixed").isInstanceOf(GenericData.Fixed.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat(actual).as("Bytes should match").isEqualTo(((GenericData.Fixed) expected).bytes()); + assertThat(actual) + .as("Bytes should match") + .isEqualTo(((GenericData.Fixed) expected).bytes()); break; case BINARY: - assertThat(expected) - .as("Should expect a ByteBuffer") - .isInstanceOf(ByteBuffer.class); + assertThat(expected).as("Should expect a ByteBuffer").isInstanceOf(ByteBuffer.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); assertThat(actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - assertThat(expected) - .as("Should expect a BigDecimal") - .isInstanceOf(BigDecimal.class); + assertThat(expected).as("Should expect a BigDecimal").isInstanceOf(BigDecimal.class); assertThat(actual).as("Should be a BigDecimal").isInstanceOf(BigDecimal.class); assertThat(actual).as("BigDecimals should be equal").isEqualTo(expected); break; @@ -229,18 +227,14 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) assertEqualsSafe(type.asNestedType().asStructType(), (Record) expected, (Row) actual); break; case LIST: - assertThat(expected) - .as("Should expect a Collection") - .isInstanceOf(Collection.class); + assertThat(expected).as("Should expect a Collection").isInstanceOf(Collection.class); assertThat(actual).as("Should be a Seq").isInstanceOf(Seq.class); List asList = seqAsJavaListConverter((Seq) actual).asJava(); assertEqualsSafe(type.asNestedType().asListType(), (Collection) expected, asList); break; case MAP: assertThat(expected).as("Should expect a Collection").isInstanceOf(Map.class); - assertThat(actual) - .as("Should be a Map") - .isInstanceOf(scala.collection.Map.class); + assertThat(actual).as("Should be a Map").isInstanceOf(scala.collection.Map.class); Map asMap = mapAsJavaMapConverter((scala.collection.Map) actual).asJava(); assertEqualsSafe(type.asNestedType().asMapType(), (Map) expected, asMap); @@ -311,8 +305,8 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual assertThat(actual).as("Should be a double").isInstanceOf(Double.class); if (expected instanceof Float) { assertThat(Double.doubleToLongBits((double) actual)) - .as("Values didn't match") - .isEqualTo(Double.doubleToLongBits(((Number) expected).doubleValue())); + .as("Values didn't match") + .isEqualTo(Double.doubleToLongBits(((Number) expected).doubleValue())); } else { assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected); } @@ -331,50 +325,46 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual case UUID: assertThat(expected).as("Should expect a UUID").isInstanceOf(UUID.class); assertThat(actual).as("Should be a UTF8String").isInstanceOf(UTF8String.class); - assertThat(actual.toString()).as("UUID string representation should match").isEqualTo(String.valueOf(expected)); + assertThat(actual.toString()) + .as("UUID string representation should match") + .isEqualTo(String.valueOf(expected)); break; case FIXED: - assertThat(expected) - .as("Should expect a Fixed") - .isInstanceOf(GenericData.Fixed.class); + assertThat(expected).as("Should expect a Fixed").isInstanceOf(GenericData.Fixed.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((GenericData.Fixed) expected).bytes()); + assertThat((byte[]) actual) + .as("Bytes should match") + .isEqualTo(((GenericData.Fixed) expected).bytes()); break; case BINARY: - assertThat(expected) - .as("Should expect a ByteBuffer") - .isInstanceOf(ByteBuffer.class); + assertThat(expected).as("Should expect a ByteBuffer").isInstanceOf(ByteBuffer.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); + assertThat((byte[]) actual) + .as("Bytes should match") + .isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: - assertThat(expected) - .as("Should expect a BigDecimal") - .isInstanceOf(BigDecimal.class); + assertThat(expected).as("Should expect a BigDecimal").isInstanceOf(BigDecimal.class); assertThat(actual).as("Should be a Decimal").isInstanceOf(Decimal.class); - assertThat(((Decimal) actual).toJavaBigDecimal()).as("BigDecimals should be equal").isEqualTo(expected); + assertThat(((Decimal) actual).toJavaBigDecimal()) + .as("BigDecimals should be equal") + .isEqualTo(expected); break; case STRUCT: assertThat(expected).as("Should expect a Record").isInstanceOf(Record.class); - assertThat(actual) - .as("Should be an InternalRow") - .isInstanceOf(InternalRow.class); + assertThat(actual).as("Should be an InternalRow").isInstanceOf(InternalRow.class); assertEqualsUnsafe( type.asNestedType().asStructType(), (Record) expected, (InternalRow) actual); break; case LIST: - assertThat(expected) - .as("Should expect a Collection") - .isInstanceOf(Collection.class); + assertThat(expected).as("Should expect a Collection").isInstanceOf(Collection.class); assertThat(actual).as("Should be an ArrayData").isInstanceOf(ArrayData.class); assertEqualsUnsafe( type.asNestedType().asListType(), (Collection) expected, (ArrayData) actual); break; case MAP: assertThat(expected).as("Should expect a Map").isInstanceOf(Map.class); - assertThat(actual) - .as("Should be an ArrayBasedMapData") - .isInstanceOf(MapData.class); + assertThat(actual).as("Should be an ArrayBasedMapData").isInstanceOf(MapData.class); assertEqualsUnsafe(type.asNestedType().asMapType(), (Map) expected, (MapData) actual); break; case TIME: @@ -411,8 +401,8 @@ public static void assertEquals( case DATE: case TIMESTAMP: assertThat(getPrimitiveValue(actual, c, childType)) - .as(prefix + "." + fieldName + " - " + childType) - .isEqualTo(getValue(expected, c, childType)); + .as(prefix + "." + fieldName + " - " + childType) + .isEqualTo(getValue(expected, c, childType)); break; case UUID: case FIXED: @@ -472,8 +462,8 @@ private static void assertEqualsLists( case DATE: case TIMESTAMP: assertThat(actual.get(e)) - .as(prefix + ".elem " + e + " - " + childType) - .isEqualTo(getValue(expected, e, childType)); + .as(prefix + ".elem " + e + " - " + childType) + .isEqualTo(getValue(expected, e, childType)); break; case UUID: case FIXED: @@ -529,8 +519,8 @@ private static void assertEqualsMaps( Object actualValue = actual.get(expectedKey); if (actualValue == null) { assertThat(true) - .as(prefix + ".key=" + expectedKey + " has null") - .isEqualTo(expected.valueArray().isNullAt(e)); + .as(prefix + ".key=" + expectedKey + " has null") + .isEqualTo(expected.valueArray().isNullAt(e)); } else { switch (valueType.typeId()) { case BOOLEAN: @@ -543,8 +533,8 @@ private static void assertEqualsMaps( case DATE: case TIMESTAMP: assertThat(actual.get(expectedKey)) - .as(prefix + ".key=" + expectedKey + " - " + valueType) - .isEqualTo(getValue(expectedValueArray, e, valueType)); + .as(prefix + ".key=" + expectedKey + " - " + valueType) + .isEqualTo(getValue(expectedValueArray, e, valueType)); break; case UUID: case FIXED: @@ -674,7 +664,7 @@ private static List toList(Seq val) { } private static void assertEqualBytes(String context, byte[] expected, byte[] actual) { - assertThat(actual).as(context).isEqualTo(expected); + assertThat(actual).as(context).isEqualTo(expected); } static void assertEquals(Schema schema, Object expected, Object actual) { @@ -708,9 +698,7 @@ private static void assertEquals(String context, DataType type, Object expected, assertThat(expected) .as("Expected should be a MapData: " + context) .isInstanceOf(MapData.class); - assertThat(actual) - .as("Actual should be a MapData: " + context) - .isInstanceOf(MapData.class); + assertThat(actual).as("Actual should be a MapData: " + context).isInstanceOf(MapData.class); assertEquals(context, (MapType) type, (MapData) expected, (MapData) actual); } else if (type instanceof BinaryType) { @@ -722,7 +710,9 @@ private static void assertEquals(String context, DataType type, Object expected, private static void assertEquals( String context, StructType struct, InternalRow expected, InternalRow actual) { - assertThat(actual.numFields()).as("Should have correct number of fields").isEqualTo(struct.size()); + assertThat(actual.numFields()) + .as("Should have correct number of fields") + .isEqualTo(struct.size()); for (int i = 0; i < actual.numFields(); i += 1) { StructField field = struct.fields()[i]; DataType type = field.dataType(); @@ -737,8 +727,8 @@ private static void assertEquals( private static void assertEquals( String context, ArrayType array, ArrayData expected, ArrayData actual) { assertThat(actual.numElements()) - .as("Should have the same number of elements") - .isEqualTo(expected.numElements()); + .as("Should have the same number of elements") + .isEqualTo(expected.numElements()); DataType type = array.elementType(); for (int i = 0; i < actual.numElements(); i += 1) { assertEquals( @@ -751,8 +741,8 @@ private static void assertEquals( private static void assertEquals(String context, MapType map, MapData expected, MapData actual) { assertThat(actual.numElements()) - .as("Should have the same number of elements") - .isEqualTo(expected.numElements()); + .as("Should have the same number of elements") + .isEqualTo(expected.numElements()); DataType keyType = map.keyType(); ArrayData expectedKeys = expected.keyArray(); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java index 438335f71ccf..75b8697323e1 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java @@ -17,6 +17,7 @@ * under the License. */ package org.apache.iceberg.spark.data; + import static org.assertj.core.api.Assertions.assertThat; import java.io.File; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java index 2d3716a04766..6a06f9d5836d 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkDateTimes.java @@ -48,7 +48,9 @@ public void testSparkDate() { public void checkSparkDate(String dateString) { Literal date = Literal.of(dateString).to(Types.DateType.get()); String sparkDate = DateTimeUtils.toJavaDate(date.value()).toString(); - assertThat(sparkDate).as("Should be the same date (" + date.value() + ")").isEqualTo(dateString); + assertThat(sparkDate) + .as("Should be the same date (" + date.value() + ")") + .isEqualTo(dateString); } @Test @@ -69,6 +71,8 @@ public void checkSparkTimestamp(String timestampString, String sparkRepr) { ZoneId zoneId = DateTimeUtils.getZoneId("UTC"); TimestampFormatter formatter = TimestampFormatter.getFractionFormatter(zoneId); String sparkTimestamp = formatter.format(ts.value()); - assertThat(sparkTimestamp).as("Should be the same timestamp (" + ts.value() + ")").isEqualTo(sparkRepr); + assertThat(sparkTimestamp) + .as("Should be the same timestamp (" + ts.value() + ")") + .isEqualTo(sparkRepr); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java index f822a6a46f53..1b1c87a8099a 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java @@ -118,8 +118,7 @@ public static Object[][] parameters() { return new Object[][] {new Object[] {false}, new Object[] {true}}; } - @TempDir - public java.nio.file.Path temp; + @TempDir public java.nio.file.Path temp; private final boolean vectorized; private File testFile; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index fa55d4e5e1e2..e49a29411ea3 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -63,7 +63,10 @@ public class TestSparkParquetReader extends AvroDataTest { @Override protected void writeAndValidate(Schema schema) throws IOException { - assumeTrue(null == TypeUtil.find(schema, + assumeTrue( + null + == TypeUtil.find( + schema, type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get()), "Parquet Avro cannot write non-string map keys"); @@ -108,7 +111,7 @@ protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IO schema, PartitionSpec.unpartitioned(), ImmutableMap.of(), - java.nio.file.Files.createTempDirectory(temp, null).toFile().getCanonicalPath()); + java.nio.file.Files.createTempDirectory(temp, null).toFile().getCanonicalPath()); table .newAppend() @@ -126,8 +129,7 @@ protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IO @Test public void testInt96TimestampProducedBySparkIsReadCorrectly() throws IOException { - String outputFilePath = - String.format("%s/%s", temp.toAbsolutePath(), "parquet_int96.parquet"); + String outputFilePath = String.format("%s/%s", temp.toAbsolutePath(), "parquet_int96.parquet"); HadoopOutputFile outputFile = HadoopOutputFile.fromPath( new org.apache.hadoop.fs.Path(outputFilePath), new Configuration()); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java index e2954c21f38a..11560859eedd 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java @@ -80,12 +80,14 @@ private void writeAndValidate( Function transform) throws IOException { // Write test data - assumeThat(null - == TypeUtil.find( - schema, - type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) - .as("Parquet Avro cannot write non-string map keys") - .isTrue(); + assumeThat( + null + == TypeUtil.find( + schema, + type -> + type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) + .as("Parquet Avro cannot write non-string map keys") + .isTrue(); Iterable expected = generateData(schema, numRecords, seed, nullPercentage, transform); @@ -325,8 +327,7 @@ public void testUnsupportedReadsForParquetV2() throws Exception { try (FileAppender writer = getParquetV2Writer(schema, dataFile)) { writer.addAll(data); } - assertThatThrownBy( - () -> assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE)) + assertThatThrownBy(() -> assertRecordsMatch(schema, 30000, data, dataFile, false, BATCH_SIZE)) .isInstanceOf(UnsupportedOperationException.class) .hasMessageStartingWith("Cannot support vectorized reads for column") .hasMessageEndingWith("Disable vectorized reads to read this table/file"); From 4775c29f2722caf1c35a6fb66a6211acbdd5f317 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 13:58:14 +0530 Subject: [PATCH 06/21] fix (actual, expected) for assertj --- .../iceberg/spark/data/GenericsHelpers.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java index 7705001ca7e1..5bbd67c5e576 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java @@ -116,7 +116,7 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) case LONG: case FLOAT: case DOUBLE: - assertThat(expected).as("Primitive value should be equal to expected").isEqualTo(actual); + assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected); break; case DATE: assertThat(expected).as("Should expect a LocalDate").isInstanceOf(LocalDate.class); @@ -259,14 +259,14 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual case LONG: case FLOAT: case DOUBLE: - assertThat(expected).as("Primitive value should be equal to expected").isEqualTo(actual); + assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected); break; case DATE: assertThat(expected).as("Should expect a LocalDate").isInstanceOf(LocalDate.class); int expectedDays = (int) ChronoUnit.DAYS.between(EPOCH_DAY, (LocalDate) expected); - assertThat(expectedDays) + assertThat(actual) .as("Primitive value should be equal to expected") - .isEqualTo(actual); + .isEqualTo(expectedDays); break; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; @@ -275,18 +275,18 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual .as("Should expect an OffsetDateTime") .isInstanceOf(OffsetDateTime.class); long expectedMicros = ChronoUnit.MICROS.between(EPOCH, (OffsetDateTime) expected); - assertThat(expectedMicros) + assertThat(actual) .as("Primitive value should be equal to expected") - .isEqualTo(actual); + .isEqualTo(expectedMicros); } else { assertThat(expected) .as("Should expect an LocalDateTime") .isInstanceOf(LocalDateTime.class); long expectedMicros = ChronoUnit.MICROS.between(EPOCH, ((LocalDateTime) expected).atZone(ZoneId.of("UTC"))); - assertThat(expectedMicros) + assertThat(actual) .as("Primitive value should be equal to expected") - .isEqualTo(actual); + .isEqualTo(expectedMicros); } break; case STRING: From 9a0b90b808f5af8c6a58148a794e827ddb205559 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:44:40 +0530 Subject: [PATCH 07/21] revert file - modify this file with other paramaterized files --- .../TestSparkParquetReadMetadataColumns.java | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java index 1b1c87a8099a..01b633da0f9e 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java @@ -19,8 +19,6 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.types.Types.NestedField.required; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assumptions.assumeTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -59,9 +57,12 @@ import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.apache.spark.unsafe.types.UTF8String; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -118,7 +119,7 @@ public static Object[][] parameters() { return new Object[][] {new Object[] {false}, new Object[] {true}}; } - @TempDir public java.nio.file.Path temp; + @Rule public TemporaryFolder temp = new TemporaryFolder(); private final boolean vectorized; private File testFile; @@ -127,14 +128,14 @@ public TestSparkParquetReadMetadataColumns(boolean vectorized) { this.vectorized = vectorized; } - @BeforeEach + @Before public void writeFile() throws IOException { List fileSplits = Lists.newArrayList(); StructType struct = SparkSchemaUtil.convert(DATA_SCHEMA); Configuration conf = new Configuration(); - testFile = temp.toFile(); - assertThat(testFile.delete()).as("Delete should succeed").isTrue(); + testFile = temp.newFile(); + Assert.assertTrue("Delete should succeed", testFile.delete()); ParquetFileWriter parquetFileWriter = new ParquetFileWriter( conf, @@ -143,8 +144,8 @@ public void writeFile() throws IOException { parquetFileWriter.start(); for (int i = 0; i < NUM_ROW_GROUPS; i += 1) { - File split = temp.toFile(); - assertThat(split.delete()).as("Delete should succeed").isTrue(); + File split = temp.newFile(); + Assert.assertTrue("Delete should succeed", split.delete()); fileSplits.add(new Path(split.getAbsolutePath())); try (FileAppender writer = Parquet.write(Files.localOutput(split)) @@ -170,7 +171,7 @@ public void testReadRowNumbers() throws IOException { @Test public void testReadRowNumbersWithDelete() throws IOException { - assumeTrue(vectorized); + Assume.assumeTrue(vectorized); List expectedRowsAfterDelete = Lists.newArrayList(); EXPECTED_ROWS.forEach(row -> expectedRowsAfterDelete.add(row.copy())); @@ -294,11 +295,11 @@ private void validate(List expected, Parquet.ReadBuilder builder) final Iterator actualRows = reader.iterator(); for (InternalRow internalRow : expected) { - assertThat(actualRows).as("Should have expected number of rows").hasNext(); + Assert.assertTrue("Should have expected number of rows", actualRows.hasNext()); TestHelpers.assertEquals(PROJECTION_SCHEMA, internalRow, actualRows.next()); } - assertThat(actualRows).as("Should not have extra rows").isExhausted(); + Assert.assertFalse("Should not have extra rows", actualRows.hasNext()); } } From 221692a6480c59f77f08c371d3867584f28fe58b Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:51:00 +0530 Subject: [PATCH 08/21] fix assumeThat import and methods --- .../iceberg/spark/data/TestSparkParquetReader.java | 12 ++++++------ .../vectorized/TestParquetVectorizedReads.java | 10 ++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index e49a29411ea3..cda00cc4d1a3 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -21,7 +21,7 @@ import static org.apache.iceberg.spark.data.TestHelpers.assertEqualsUnsafe; import static org.apache.iceberg.types.Types.NestedField.required; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assumptions.assumeTrue; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; @@ -63,12 +63,12 @@ public class TestSparkParquetReader extends AvroDataTest { @Override protected void writeAndValidate(Schema schema) throws IOException { - assumeTrue( - null - == TypeUtil.find( + assumeThat( + TypeUtil.find( schema, - type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get()), - "Parquet Avro cannot write non-string map keys"); + type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) + .as("Parquet Avro cannot write non-string map keys") + .isNull(); List expected = RandomData.generateList(schema, 100, 0L); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java index 11560859eedd..30e7cb875398 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java @@ -81,13 +81,11 @@ private void writeAndValidate( throws IOException { // Write test data assumeThat( - null - == TypeUtil.find( - schema, - type -> - type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) + TypeUtil.find( + schema, + type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) .as("Parquet Avro cannot write non-string map keys") - .isTrue(); + .isNull(); Iterable expected = generateData(schema, numRecords, seed, nullPercentage, transform); From 436e518f1be927cf7b0158e5f1983b9415a04d95 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:51:22 +0530 Subject: [PATCH 09/21] remove byte[] cast --- .../org/apache/iceberg/spark/data/GenericsHelpers.java | 8 ++------ .../java/org/apache/iceberg/spark/data/TestHelpers.java | 6 ++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java index 5bbd67c5e576..501b46878bd2 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/GenericsHelpers.java @@ -172,9 +172,7 @@ private static void assertEqualsSafe(Type type, Object expected, Object actual) case BINARY: assertThat(expected).as("Should expect a ByteBuffer").isInstanceOf(ByteBuffer.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual) - .as("Bytes should match") - .isEqualTo(((ByteBuffer) expected).array()); + assertThat(actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: assertThat(expected).as("Should expect a BigDecimal").isInstanceOf(BigDecimal.class); @@ -310,9 +308,7 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual case BINARY: assertThat(expected).as("Should expect a ByteBuffer").isInstanceOf(ByteBuffer.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual) - .as("Bytes should match") - .isEqualTo(((ByteBuffer) expected).array()); + assertThat(actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: assertThat(expected).as("Should expect a BigDecimal").isInstanceOf(BigDecimal.class); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java index 1e2878f02995..c73ef630ac48 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestHelpers.java @@ -332,16 +332,14 @@ private static void assertEqualsUnsafe(Type type, Object expected, Object actual case FIXED: assertThat(expected).as("Should expect a Fixed").isInstanceOf(GenericData.Fixed.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual) + assertThat(actual) .as("Bytes should match") .isEqualTo(((GenericData.Fixed) expected).bytes()); break; case BINARY: assertThat(expected).as("Should expect a ByteBuffer").isInstanceOf(ByteBuffer.class); assertThat(actual).as("Should be a byte[]").isInstanceOf(byte[].class); - assertThat((byte[]) actual) - .as("Bytes should match") - .isEqualTo(((ByteBuffer) expected).array()); + assertThat(actual).as("Bytes should match").isEqualTo(((ByteBuffer) expected).array()); break; case DECIMAL: assertThat(expected).as("Should expect a BigDecimal").isInstanceOf(BigDecimal.class); From f723e786359029b52143defb41c4036828f099c8 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:52:09 +0530 Subject: [PATCH 10/21] update tempDir access for subclass --- .../test/java/org/apache/iceberg/spark/data/AvroDataTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java index baab12d4ccdb..8f90a51a6e30 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/AvroDataTest.java @@ -63,7 +63,7 @@ public abstract class AvroDataTest { required(117, "dec_38_10", Types.DecimalType.of(38, 10)) // Spark's maximum precision ); - @TempDir private Path temp; + @TempDir protected Path temp; @Test public void testSimpleStruct() throws IOException { From ca029fd90c7f632581b017d4dc4dadbb04bea31e Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 21:11:09 +0530 Subject: [PATCH 11/21] fix to clear CI --- .../iceberg/spark/source/TestDataFrameWrites.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index d0f181177f9b..73558d2d8453 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -147,7 +147,7 @@ protected void writeAndValidate(Schema schema) throws IOException { @Test public void testWriteWithCustomDataLocation() throws IOException { File location = createTableFolder(); - File tablePropertyDataLocation = temp.newFolder("test-table-property-data-dir"); + File tablePropertyDataLocation = temp.resolve("test-table-property-data-dir").toFile(); Table table = createTable(new Schema(SUPPORTED_PRIMITIVES.fields()), location); table .updateProperties() @@ -157,7 +157,7 @@ public void testWriteWithCustomDataLocation() throws IOException { } private File createTableFolder() throws IOException { - File parent = temp.newFolder("parquet"); + File parent = temp.resolve("parquet").toFile(); File location = new File(parent, "test"); Assert.assertTrue("Mkdir should succeed", location.mkdirs()); return location; @@ -247,7 +247,7 @@ private void writeDataWithFailOnPartition( private Dataset createDataset(Iterable records, Schema schema) throws IOException { // this uses the SparkAvroReader to create a DataFrame from the list of records // it assumes that SparkAvroReader is correct - File testFile = temp.newFile(); + File testFile = temp.toFile(); Assert.assertTrue("Delete should succeed", testFile.delete()); try (FileAppender writer = @@ -285,7 +285,7 @@ public void testNullableWithWriteOption() throws IOException { Assume.assumeTrue( "Spark 3 rejects writing nulls to a required column", spark.version().startsWith("2")); - File location = new File(temp.newFolder("parquet"), "test"); + File location = new File(temp.resolve("parquet").toFile(), "test"); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); @@ -338,7 +338,7 @@ public void testNullableWithSparkSqlOption() throws IOException { Assume.assumeTrue( "Spark 3 rejects writing nulls to a required column", spark.version().startsWith("2")); - File location = new File(temp.newFolder("parquet"), "test"); + File location = new File(temp.resolve("parquet").toFile(), "test"); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); From 53722e264f767bff2ddcc616c082966b7ca4460c Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Wed, 20 Dec 2023 22:02:57 +0530 Subject: [PATCH 12/21] fix HiddenField error --- .../iceberg/spark/data/TestSparkOrcReadMetadataColumns.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java index 2022c4f51606..04f91a52389c 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java @@ -110,7 +110,7 @@ public TestSparkOrcReadMetadataColumns(boolean vectorized) { @BeforeEach public void writeFile() throws IOException { - File testFile = temp.toFile(); + testFile = temp.toFile(); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = From dd92bca504ba7111dc2f74e956bfafde6cb2e474 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 21 Dec 2023 10:42:24 +0530 Subject: [PATCH 13/21] reduce CI errors --- .../iceberg/spark/data/TestOrcWrite.java | 3 +- .../spark/data/TestParquetAvroReader.java | 5 +- .../spark/data/TestParquetAvroWriter.java | 3 +- .../spark/data/TestSparkAvroEnums.java | 3 +- .../spark/data/TestSparkAvroReader.java | 3 +- .../data/TestSparkOrcReadMetadataColumns.java | 27 ++--- .../spark/data/TestSparkOrcReader.java | 3 +- .../TestSparkParquetReadMetadataColumns.java | 56 +++++---- .../spark/data/TestSparkParquetReader.java | 3 +- .../spark/data/TestSparkParquetWriter.java | 3 +- .../data/TestSparkRecordOrcReaderWriter.java | 6 +- ...rquetDictionaryEncodedVectorizedReads.java | 8 +- .../TestParquetVectorizedReads.java | 9 +- .../iceberg/spark/source/TestAvroScan.java | 21 ++-- .../spark/source/TestDataFrameWrites.java | 109 +++++++++--------- .../iceberg/spark/source/TestParquetScan.java | 62 +++++----- 16 files changed, 171 insertions(+), 153 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java index e7694be21fca..a1c0a1be8e16 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java @@ -24,6 +24,7 @@ import java.io.File; import java.io.IOException; import java.nio.file.Path; +import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; @@ -42,7 +43,7 @@ public class TestOrcWrite { @Test public void splitOffsets() throws IOException { - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); Iterable rows = RandomData.generateSpark(SCHEMA, 1, 0L); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java index 2e135538ab11..db51f9e7c659 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Iterator; +import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; @@ -194,7 +195,7 @@ public void testWithOldReadPath() throws IOException { public void testCorrectness() throws IOException { Iterable records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139); - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = @@ -224,7 +225,7 @@ public void testCorrectness() throws IOException { } private File writeTestData(Schema schema, int numRecords, int seed) throws IOException { - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java index e8376426663d..444dbdff2d6d 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Iterator; +import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; @@ -90,7 +91,7 @@ public class TestParquetAvroWriter { public void testCorrectness() throws IOException { Iterable records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139); - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java index 75b8697323e1..248065a1fbe6 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.List; +import java.util.UUID; import org.apache.avro.SchemaBuilder; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; @@ -65,7 +66,7 @@ public void writeAndValidateEnums() throws IOException { Record enumRecord3 = new GenericData.Record(avroSchema); // null enum List expected = ImmutableList.of(enumRecord1, enumRecord2, enumRecord3); - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java index c64ab0e283a1..d7233dc90ce0 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java @@ -24,6 +24,7 @@ import java.io.File; import java.io.IOException; import java.util.List; +import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; @@ -38,7 +39,7 @@ public class TestSparkAvroReader extends AvroDataTest { protected void writeAndValidate(Schema schema) throws IOException { List expected = RandomData.generateList(schema, 100, 0L); - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java index 04f91a52389c..5d4d355baf1c 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java @@ -23,13 +23,19 @@ import java.io.File; import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.UUID; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.iceberg.Files; import org.apache.iceberg.MetadataColumns; +import org.apache.iceberg.Parameter; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; import org.apache.iceberg.Schema; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.expressions.Expression; @@ -53,11 +59,10 @@ import org.apache.spark.unsafe.types.UTF8String; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.io.TempDir; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -@RunWith(Parameterized.class) +@ExtendWith(ParameterizedTestExtension.class) public class TestSparkOrcReadMetadataColumns { private static final Schema DATA_SCHEMA = new Schema( @@ -94,23 +99,19 @@ public class TestSparkOrcReadMetadataColumns { } } - @Parameterized.Parameters(name = "vectorized = {0}") - public static Object[] parameters() { - return new Object[] {false, true}; + @Parameters(name = "vectorized = {0}") + public static Collection parameters() { + return Arrays.asList(false, true); } - @TempDir public java.nio.file.Path temp; + @TempDir private java.nio.file.Path temp; - private boolean vectorized; + @Parameter private boolean vectorized; private File testFile; - public TestSparkOrcReadMetadataColumns(boolean vectorized) { - this.vectorized = vectorized; - } - @BeforeEach public void writeFile() throws IOException { - testFile = temp.toFile(); + testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java index 645eb20376bf..a93c768b1f5e 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java @@ -27,6 +27,7 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.CloseableIterable; @@ -62,7 +63,7 @@ public void writeAndValidateRepeatingRecords() throws IOException { private void writeAndValidateRecords(Schema schema, Iterable expected) throws IOException { - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java index 01b633da0f9e..d797be0d25d6 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java @@ -19,6 +19,8 @@ package org.apache.iceberg.spark.data; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assumptions.assumeThat; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -27,10 +29,14 @@ import java.util.Iterator; import java.util.List; import java.util.Set; +import java.util.UUID; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.iceberg.Files; import org.apache.iceberg.MetadataColumns; +import org.apache.iceberg.Parameter; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; import org.apache.iceberg.Schema; import org.apache.iceberg.data.DeleteFilter; import org.apache.iceberg.deletes.PositionDeleteIndex; @@ -57,16 +63,12 @@ import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.vectorized.ColumnarBatch; import org.apache.spark.unsafe.types.UTF8String; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; + +@ExtendWith(ParameterizedTestExtension.class) public class TestSparkParquetReadMetadataColumns { private static final Schema DATA_SCHEMA = new Schema( @@ -114,28 +116,24 @@ public class TestSparkParquetReadMetadataColumns { } } - @Parameterized.Parameters(name = "vectorized = {0}") + @Parameters(name = "vectorized = {0}") public static Object[][] parameters() { return new Object[][] {new Object[] {false}, new Object[] {true}}; } - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir protected java.nio.file.Path temp; - private final boolean vectorized; + @Parameter private boolean vectorized; private File testFile; - public TestSparkParquetReadMetadataColumns(boolean vectorized) { - this.vectorized = vectorized; - } - - @Before + @BeforeEach public void writeFile() throws IOException { List fileSplits = Lists.newArrayList(); StructType struct = SparkSchemaUtil.convert(DATA_SCHEMA); Configuration conf = new Configuration(); - testFile = temp.newFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); ParquetFileWriter parquetFileWriter = new ParquetFileWriter( conf, @@ -144,8 +142,8 @@ public void writeFile() throws IOException { parquetFileWriter.start(); for (int i = 0; i < NUM_ROW_GROUPS; i += 1) { - File split = temp.newFile(); - Assert.assertTrue("Delete should succeed", split.delete()); + File split = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + assertThat(split.delete()).as("Delete should succeed").isTrue(); fileSplits.add(new Path(split.getAbsolutePath())); try (FileAppender writer = Parquet.write(Files.localOutput(split)) @@ -164,14 +162,14 @@ public void writeFile() throws IOException { .getKeyValueMetaData()); } - @Test + @TestTemplate public void testReadRowNumbers() throws IOException { readAndValidate(null, null, null, EXPECTED_ROWS); } - @Test + @TestTemplate public void testReadRowNumbersWithDelete() throws IOException { - Assume.assumeTrue(vectorized); + assumeThat(vectorized).isTrue(); List expectedRowsAfterDelete = Lists.newArrayList(); EXPECTED_ROWS.forEach(row -> expectedRowsAfterDelete.add(row.copy())); @@ -229,7 +227,7 @@ public boolean isEmpty() { } } - @Test + @TestTemplate public void testReadRowNumbersWithFilter() throws IOException { // current iceberg supports row group filter. for (int i = 1; i < 5; i += 1) { @@ -243,7 +241,7 @@ public void testReadRowNumbersWithFilter() throws IOException { } } - @Test + @TestTemplate public void testReadRowNumbersWithSplits() throws IOException { ParquetFileReader fileReader = new ParquetFileReader( @@ -295,11 +293,11 @@ private void validate(List expected, Parquet.ReadBuilder builder) final Iterator actualRows = reader.iterator(); for (InternalRow internalRow : expected) { - Assert.assertTrue("Should have expected number of rows", actualRows.hasNext()); + assertThat(actualRows).as("Should have expected number of rows").hasNext(); TestHelpers.assertEquals(PROJECTION_SCHEMA, internalRow, actualRows.next()); } - Assert.assertFalse("Should not have extra rows", actualRows.hasNext()); + assertThat(actualRows).as("Should not have extra rows").isExhausted(); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index cda00cc4d1a3..113abce99500 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -28,6 +28,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.avro.generic.GenericData; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.DataFiles; @@ -72,7 +73,7 @@ protected void writeAndValidate(Schema schema) throws IOException { List expected = RandomData.generateList(schema, 100, 0L); - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java index 9cb77ac51275..ffffb78e3f9e 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Iterator; +import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.CloseableIterable; @@ -88,7 +89,7 @@ public void testCorrectness() throws IOException { int numRows = 50_000; Iterable records = RandomData.generateSpark(COMPLEX_SCHEMA, numRows, 19981); - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java index e0027b45e2e6..6f6b6e2392b6 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java @@ -26,6 +26,7 @@ import java.math.BigDecimal; import java.util.Iterator; import java.util.List; +import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.data.GenericRecord; @@ -45,7 +46,8 @@ public class TestSparkRecordOrcReaderWriter extends AvroDataTest { private static final int NUM_RECORDS = 200; private void writeAndValidate(Schema schema, List expectedRecords) throws IOException { - final File originalFile = temp.toFile(); + final File originalFile = + File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(originalFile.delete()).as("Delete should succeed").isTrue(); // Write few generic records into the original test file. @@ -68,7 +70,7 @@ private void writeAndValidate(Schema schema, List expectedRecords) throw assertEqualsUnsafe(schema.asStruct(), expectedRecords, reader, expectedRecords.size()); } - final File anotherFile = temp.toFile(); + final File anotherFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(anotherFile.delete()).as("Delete should succeed").isTrue(); // Write those spark InternalRows into a new file again. diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java index 2c9f4d0ebdec..f5bf18e0bcd2 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java @@ -23,6 +23,7 @@ import java.io.File; import java.io.IOException; +import java.util.UUID; import org.apache.avro.generic.GenericData; import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; @@ -58,7 +59,8 @@ public void testVectorizedReadsWithNewContainers() throws IOException {} @Test public void testMixedDictionaryNonDictionaryReads() throws IOException { Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dictionaryEncodedFile = temp.toFile(); + File dictionaryEncodedFile = + File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(dictionaryEncodedFile.delete()).as("Delete should succeed").isTrue(); Iterable dictionaryEncodableData = RandomData.generateDictionaryEncodableData( @@ -68,7 +70,7 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { writer.addAll(dictionaryEncodableData); } - File plainEncodingFile = temp.toFile(); + File plainEncodingFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(plainEncodingFile.delete()).as("Delete should succeed").isTrue(); Iterable nonDictionaryData = RandomData.generate(schema, 10000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE); @@ -77,7 +79,7 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { } int rowGroupSize = PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT; - File mixedFile = temp.toFile(); + File mixedFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(mixedFile.delete()).as("Delete should succeed").isTrue(); Parquet.concat( ImmutableList.of(dictionaryEncodedFile, plainEncodingFile, dictionaryEncodedFile), diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java index 30e7cb875398..85bb065b3b74 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java @@ -27,6 +27,7 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; +import java.util.UUID; import org.apache.avro.generic.GenericData; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; @@ -91,7 +92,7 @@ private void writeAndValidate( generateData(schema, numRecords, seed, nullPercentage, transform); // write a test parquet file using iceberg writer - File testFile = temp.toFile(); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = getParquetWriter(schema, testFile)) { @@ -274,7 +275,7 @@ public void testReadsForTypePromotedColumns() throws Exception { optional(102, "float_data", Types.FloatType.get()), optional(103, "decimal_data", Types.DecimalType.of(10, 5))); - File dataFile = temp.toFile(); + File dataFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(writeSchema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); @@ -303,7 +304,7 @@ public void testSupportedReadsForParquetV2() throws Exception { optional(103, "double_data", Types.DoubleType.get()), optional(104, "decimal_data", Types.DecimalType.of(25, 5))); - File dataFile = temp.toFile(); + File dataFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); @@ -318,7 +319,7 @@ public void testUnsupportedReadsForParquetV2() throws Exception { // Longs, ints, string types etc use delta encoding and which are not supported for vectorized // reads Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dataFile = temp.toFile(); + File dataFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java index 9491adde4605..c14b33f58dbe 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java @@ -19,9 +19,11 @@ package org.apache.iceberg.spark.source; import static org.apache.iceberg.Files.localOutput; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.List; import java.util.UUID; import org.apache.avro.generic.GenericData.Record; @@ -41,25 +43,23 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.io.TempDir; public class TestAvroScan extends AvroDataTest { private static final Configuration CONF = new Configuration(); - @Rule public TemporaryFolder temp = new TemporaryFolder(); + @TempDir private Path temp; private static SparkSession spark = null; - @BeforeClass + @BeforeAll public static void startSpark() { TestAvroScan.spark = SparkSession.builder().master("local[2]").getOrCreate(); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestAvroScan.spark; TestAvroScan.spark = null; @@ -68,10 +68,11 @@ public static void stopSpark() { @Override protected void writeAndValidate(Schema schema) throws IOException { - File parent = temp.newFolder("avro"); + File parent = new File(temp.toFile(), "avro"); File location = new File(parent, "test"); File dataFolder = new File(location, "data"); dataFolder.mkdirs(); + // assertThat(dataFolder.mkdirs()).as("Mkdir should succeed").isTrue(); File avroFile = new File(dataFolder, FileFormat.AVRO.addExtension(UUID.randomUUID().toString())); @@ -102,7 +103,7 @@ protected void writeAndValidate(Schema schema) throws IOException { Dataset df = spark.read().format("iceberg").load(location.toString()); List rows = df.collectAsList(); - Assert.assertEquals("Should contain 100 rows", 100, rows.size()); + assertThat(rows).as("Should contain 100 rows").hasSize(100); for (int i = 0; i < expected.size(); i += 1) { TestHelpers.assertEqualsSafe(tableSchema.asStruct(), expected.get(i), rows.get(i)); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index 73558d2d8453..5ad42f0c014b 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -21,18 +21,26 @@ import static org.apache.iceberg.spark.SparkSchemaUtil.convert; import static org.apache.iceberg.spark.data.TestHelpers.assertEqualsSafe; import static org.apache.iceberg.spark.data.TestHelpers.assertEqualsUnsafe; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; import java.net.URI; import java.util.Arrays; +import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.Files; +import org.apache.iceberg.Parameter; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; @@ -63,29 +71,21 @@ import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.InternalRow; -import org.assertj.core.api.Assertions; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(ParameterizedTestExtension.class) public class TestDataFrameWrites extends AvroDataTest { private static final Configuration CONF = new Configuration(); - private final String format; - - @Parameterized.Parameters(name = "format = {0}") - public static Object[] parameters() { - return new Object[] {"parquet", "avro", "orc"}; + @Parameters(name = "format = {0}") + public static Collection parameters() { + return Arrays.asList("parquet", "avro", "orc"); } - public TestDataFrameWrites(String format) { - this.format = format; - } + @Parameter private String format = "parquet"; private static SparkSession spark = null; private static JavaSparkContext sc = null; @@ -123,13 +123,13 @@ public TestDataFrameWrites(String format) { "{\"optionalField\": \"d3\", \"requiredField\": \"bid_103\"}", "{\"optionalField\": \"d4\", \"requiredField\": \"bid_104\"}"); - @BeforeClass + @BeforeAll public static void startSpark() { TestDataFrameWrites.spark = SparkSession.builder().master("local[2]").getOrCreate(); TestDataFrameWrites.sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestDataFrameWrites.spark; TestDataFrameWrites.spark = null; @@ -144,10 +144,10 @@ protected void writeAndValidate(Schema schema) throws IOException { writeAndValidateWithLocations(table, location, new File(location, "data")); } - @Test + @TestTemplate public void testWriteWithCustomDataLocation() throws IOException { File location = createTableFolder(); - File tablePropertyDataLocation = temp.resolve("test-table-property-data-dir").toFile(); + File tablePropertyDataLocation = new File(temp.toFile(), "test-table-property-data-dir"); Table table = createTable(new Schema(SUPPORTED_PRIMITIVES.fields()), location); table .updateProperties() @@ -157,9 +157,9 @@ public void testWriteWithCustomDataLocation() throws IOException { } private File createTableFolder() throws IOException { - File parent = temp.resolve("parquet").toFile(); + File parent = new File(temp.toFile(), "parquet"); File location = new File(parent, "test"); - Assert.assertTrue("Mkdir should succeed", location.mkdirs()); + assertThat(location.mkdirs()).as("Mkdir should succeed").isTrue(); return location; } @@ -186,21 +186,24 @@ private void writeAndValidateWithLocations(Table table, File location, File expe while (expectedIter.hasNext() && actualIter.hasNext()) { assertEqualsSafe(tableSchema.asStruct(), expectedIter.next(), actualIter.next()); } - Assert.assertEquals( - "Both iterators should be exhausted", expectedIter.hasNext(), actualIter.hasNext()); + assertThat(actualIter.hasNext()) + .as("Both iterators should be exhausted") + .isEqualTo(expectedIter.hasNext()); table .currentSnapshot() .addedDataFiles(table.io()) .forEach( dataFile -> - Assert.assertTrue( - String.format( - "File should have the parent directory %s, but has: %s.", - expectedDataDir.getAbsolutePath(), dataFile.path()), - URI.create(dataFile.path().toString()) - .getPath() - .startsWith(expectedDataDir.getAbsolutePath()))); + assertThat( + URI.create(dataFile.path().toString()) + .getPath() + .startsWith(expectedDataDir.getAbsolutePath())) + .as( + String.format( + "File should have the parent directory %s, but has: %s.", + expectedDataDir.getAbsolutePath(), dataFile.path())) + .isTrue()); } private List readTable(String location) { @@ -247,8 +250,8 @@ private void writeDataWithFailOnPartition( private Dataset createDataset(Iterable records, Schema schema) throws IOException { // this uses the SparkAvroReader to create a DataFrame from the list of records // it assumes that SparkAvroReader is correct - File testFile = temp.toFile(); - Assert.assertTrue("Delete should succeed", testFile.delete()); + File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) { @@ -272,20 +275,22 @@ private Dataset createDataset(Iterable records, Schema schema) thro assertEqualsUnsafe(schema.asStruct(), recordIter.next(), row); rows.add(row); } - Assert.assertEquals( - "Both iterators should be exhausted", recordIter.hasNext(), readIter.hasNext()); + assertThat(readIter.hasNext()) + .as("Both iterators should be exhausted") + .isEqualTo(recordIter.hasNext()); } JavaRDD rdd = sc.parallelize(rows); return spark.internalCreateDataFrame(JavaRDD.toRDD(rdd), convert(schema), false); } - @Test + @TestTemplate public void testNullableWithWriteOption() throws IOException { - Assume.assumeTrue( - "Spark 3 rejects writing nulls to a required column", spark.version().startsWith("2")); + assumeThat(spark.version()) + .as("Spark 3 rejects writing nulls to a required column") + .startsWith("2"); - File location = new File(temp.resolve("parquet").toFile(), "test"); + File location = new File(new File(temp.toFile(), "parquet"), "test"); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); @@ -330,15 +335,16 @@ public void testNullableWithWriteOption() throws IOException { // read all data List rows = spark.read().format("iceberg").load(targetPath).collectAsList(); - Assert.assertEquals("Should contain 6 rows", 6, rows.size()); + assumeThat(rows).as("Should contain 6 rows").hasSize(6); } - @Test + @TestTemplate public void testNullableWithSparkSqlOption() throws IOException { - Assume.assumeTrue( - "Spark 3 rejects writing nulls to a required column", spark.version().startsWith("2")); + assumeThat(spark.version()) + .as("Spark 3 rejects writing nulls to a required column") + .startsWith("2"); - File location = new File(temp.resolve("parquet").toFile(), "test"); + File location = new File(new File(temp.toFile(), "parquet"), "test"); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); @@ -389,10 +395,10 @@ public void testNullableWithSparkSqlOption() throws IOException { // read all data List rows = newSparkSession.read().format("iceberg").load(targetPath).collectAsList(); - Assert.assertEquals("Should contain 6 rows", 6, rows.size()); + assumeThat(rows).as("Should contain 6 rows").hasSize(6); } - @Test + @TestTemplate public void testFaultToleranceOnWrite() throws IOException { File location = createTableFolder(); Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); @@ -408,8 +414,7 @@ public void testFaultToleranceOnWrite() throws IOException { Iterable records2 = RandomData.generate(schema, 100, 0L); - Assertions.assertThatThrownBy( - () -> writeDataWithFailOnPartition(records2, schema, location.toString())) + assertThatThrownBy(() -> writeDataWithFailOnPartition(records2, schema, location.toString())) .isInstanceOf(SparkException.class); table.refresh(); @@ -417,7 +422,7 @@ public void testFaultToleranceOnWrite() throws IOException { Snapshot snapshotAfterFailingWrite = table.currentSnapshot(); List resultAfterFailingWrite = readTable(location.toString()); - Assert.assertEquals(snapshotAfterFailingWrite, snapshotBeforeFailingWrite); - Assert.assertEquals(resultAfterFailingWrite, resultBeforeFailingWrite); + assertThat(snapshotBeforeFailingWrite).isEqualTo(snapshotAfterFailingWrite); + assertThat(resultBeforeFailingWrite).isEqualTo(resultAfterFailingWrite); } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java index a9daa0df4536..2c3e0dc09b34 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java @@ -23,9 +23,13 @@ import static org.apache.iceberg.types.Types.NestedField.required; import static org.apache.spark.sql.functions.monotonically_increasing_id; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assumptions.assumeThat; import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.UUID; import org.apache.avro.generic.GenericData; @@ -33,6 +37,9 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.Parameter; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; @@ -48,56 +55,49 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; + +@ExtendWith(ParameterizedTestExtension.class) public class TestParquetScan extends AvroDataTest { private static final Configuration CONF = new Configuration(); private static SparkSession spark = null; - @BeforeClass + @BeforeAll public static void startSpark() { TestParquetScan.spark = SparkSession.builder().master("local[2]").getOrCreate(); } - @AfterClass + @AfterAll public static void stopSpark() { SparkSession currentSpark = TestParquetScan.spark; TestParquetScan.spark = null; currentSpark.stop(); } - @Rule public TemporaryFolder temp = new TemporaryFolder(); - - @Parameterized.Parameters(name = "vectorized = {0}") - public static Object[] parameters() { - return new Object[] {false, true}; - } + @TempDir private Path temp; - private final boolean vectorized; + @Parameter private boolean vectorized; - public TestParquetScan(boolean vectorized) { - this.vectorized = vectorized; + @Parameters(name = "vectorized = {0}") + public static Collection parameters() { + return Arrays.asList(false, true); } @Override protected void writeAndValidate(Schema schema) throws IOException { - Assume.assumeTrue( - "Cannot handle non-string map keys in parquet-avro", - null - == TypeUtil.find( + assumeThat( + TypeUtil.find( schema, - type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())); - + type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) + .as("Cannot handle non-string map keys in parquet-avro") + .isNull(); + ; + assertThat(vectorized).as("should not be null").isNotNull(); Table table = createTable(schema); // Important: use the table's schema for the rest of the test @@ -110,14 +110,14 @@ protected void writeAndValidate(Schema schema) throws IOException { Dataset df = spark.read().format("iceberg").load(table.location()); List rows = df.collectAsList(); - Assert.assertEquals("Should contain 100 rows", 100, rows.size()); + assertThat(rows).as("Should contain 100 rows").hasSize(100); for (int i = 0; i < expected.size(); i += 1) { TestHelpers.assertEqualsSafe(table.schema().asStruct(), expected.get(i), rows.get(i)); } } - @Test + @TestTemplate public void testEmptyTableProjection() throws IOException { Types.StructType structType = Types.StructType.of( @@ -143,7 +143,7 @@ public void testEmptyTableProjection() throws IOException { } private Table createTable(Schema schema) throws IOException { - File parent = temp.newFolder("parquet"); + File parent = new File(temp.toFile(), "parquet"); File location = new File(parent, "test"); HadoopTables tables = new HadoopTables(CONF); return tables.create(schema, PartitionSpec.unpartitioned(), location.toString()); From 877c91c93a30708a0c4c6507b6ff1b14314d5569 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 21 Dec 2023 18:50:01 +0530 Subject: [PATCH 14/21] remove commented code --- .../test/java/org/apache/iceberg/spark/source/TestAvroScan.java | 1 - 1 file changed, 1 deletion(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java index c14b33f58dbe..3c36a5dfde12 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java @@ -72,7 +72,6 @@ protected void writeAndValidate(Schema schema) throws IOException { File location = new File(parent, "test"); File dataFolder = new File(location, "data"); dataFolder.mkdirs(); - // assertThat(dataFolder.mkdirs()).as("Mkdir should succeed").isTrue(); File avroFile = new File(dataFolder, FileFormat.AVRO.addExtension(UUID.randomUUID().toString())); From 1340faf896690cb355f7ce8783dcad0f2e23a51a Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 21 Dec 2023 20:35:46 +0530 Subject: [PATCH 15/21] remove UUID for filenames --- .../java/org/apache/iceberg/spark/data/TestOrcWrite.java | 3 +-- .../apache/iceberg/spark/data/TestParquetAvroReader.java | 5 ++--- .../apache/iceberg/spark/data/TestParquetAvroWriter.java | 2 +- .../apache/iceberg/spark/data/TestSparkAvroEnums.java | 2 +- .../apache/iceberg/spark/data/TestSparkAvroReader.java | 2 +- .../spark/data/TestSparkOrcReadMetadataColumns.java | 3 +-- .../apache/iceberg/spark/data/TestSparkOrcReader.java | 3 +-- .../spark/data/TestSparkParquetReadMetadataColumns.java | 5 ++--- .../iceberg/spark/data/TestSparkParquetReader.java | 3 +-- .../iceberg/spark/data/TestSparkParquetWriter.java | 3 +-- .../spark/data/TestSparkRecordOrcReaderWriter.java | 6 ++---- .../TestParquetDictionaryEncodedVectorizedReads.java | 8 +++----- .../parquet/vectorized/TestParquetVectorizedReads.java | 9 ++++----- .../apache/iceberg/spark/source/TestDataFrameWrites.java | 3 +-- 14 files changed, 22 insertions(+), 35 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java index a1c0a1be8e16..cbaad6543076 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestOrcWrite.java @@ -24,7 +24,6 @@ import java.io.File; import java.io.IOException; import java.nio.file.Path; -import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; @@ -43,7 +42,7 @@ public class TestOrcWrite { @Test public void splitOffsets() throws IOException { - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); Iterable rows = RandomData.generateSpark(SCHEMA, 1, 0L); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java index db51f9e7c659..3f9b4bb587ba 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroReader.java @@ -26,7 +26,6 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Iterator; -import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; @@ -195,7 +194,7 @@ public void testWithOldReadPath() throws IOException { public void testCorrectness() throws IOException { Iterable records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139); - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = @@ -225,7 +224,7 @@ public void testCorrectness() throws IOException { } private File writeTestData(Schema schema, int numRecords, int seed) throws IOException { - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java index 444dbdff2d6d..e03abdc6dff9 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java @@ -91,7 +91,7 @@ public class TestParquetAvroWriter { public void testCorrectness() throws IOException { Iterable records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139); - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java index 248065a1fbe6..12b71ff9a7d0 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java @@ -66,7 +66,7 @@ public void writeAndValidateEnums() throws IOException { Record enumRecord3 = new GenericData.Record(avroSchema); // null enum List expected = ImmutableList.of(enumRecord1, enumRecord2, enumRecord3); - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java index d7233dc90ce0..4b0b17207f38 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java @@ -39,7 +39,7 @@ public class TestSparkAvroReader extends AvroDataTest { protected void writeAndValidate(Schema schema) throws IOException { List expected = RandomData.generateList(schema, 100, 0L); - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java index 5d4d355baf1c..eccfb807e2fc 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java @@ -27,7 +27,6 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; -import java.util.UUID; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -111,7 +110,7 @@ public static Collection parameters() { @BeforeEach public void writeFile() throws IOException { - testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java index a93c768b1f5e..5338eaf0855e 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReader.java @@ -27,7 +27,6 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; -import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.CloseableIterable; @@ -63,7 +62,7 @@ public void writeAndValidateRepeatingRecords() throws IOException { private void writeAndValidateRecords(Schema schema, Iterable expected) throws IOException { - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java index d797be0d25d6..044ea3d93c0b 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReadMetadataColumns.java @@ -29,7 +29,6 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -import java.util.UUID; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.iceberg.Files; @@ -132,7 +131,7 @@ public void writeFile() throws IOException { StructType struct = SparkSchemaUtil.convert(DATA_SCHEMA); Configuration conf = new Configuration(); - testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); ParquetFileWriter parquetFileWriter = new ParquetFileWriter( @@ -142,7 +141,7 @@ public void writeFile() throws IOException { parquetFileWriter.start(); for (int i = 0; i < NUM_ROW_GROUPS; i += 1) { - File split = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File split = File.createTempFile("junit", null, temp.toFile()); assertThat(split.delete()).as("Delete should succeed").isTrue(); fileSplits.add(new Path(split.getAbsolutePath())); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java index 113abce99500..ab0d45c3b7ca 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetReader.java @@ -28,7 +28,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.UUID; import org.apache.avro.generic.GenericData; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.DataFiles; @@ -73,7 +72,7 @@ protected void writeAndValidate(Schema schema) throws IOException { List expected = RandomData.generateList(schema, 100, 0L); - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java index ffffb78e3f9e..fa62d6228118 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkParquetWriter.java @@ -26,7 +26,6 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Iterator; -import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.io.CloseableIterable; @@ -89,7 +88,7 @@ public void testCorrectness() throws IOException { int numRows = 50_000; Iterable records = RandomData.generateSpark(COMPLEX_SCHEMA, numRows, 19981); - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java index 6f6b6e2392b6..e9a7c1c07a5a 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkRecordOrcReaderWriter.java @@ -26,7 +26,6 @@ import java.math.BigDecimal; import java.util.Iterator; import java.util.List; -import java.util.UUID; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; import org.apache.iceberg.data.GenericRecord; @@ -46,8 +45,7 @@ public class TestSparkRecordOrcReaderWriter extends AvroDataTest { private static final int NUM_RECORDS = 200; private void writeAndValidate(Schema schema, List expectedRecords) throws IOException { - final File originalFile = - File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + final File originalFile = File.createTempFile("junit", null, temp.toFile()); assertThat(originalFile.delete()).as("Delete should succeed").isTrue(); // Write few generic records into the original test file. @@ -70,7 +68,7 @@ private void writeAndValidate(Schema schema, List expectedRecords) throw assertEqualsUnsafe(schema.asStruct(), expectedRecords, reader, expectedRecords.size()); } - final File anotherFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + final File anotherFile = File.createTempFile("junit", null, temp.toFile()); assertThat(anotherFile.delete()).as("Delete should succeed").isTrue(); // Write those spark InternalRows into a new file again. diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java index f5bf18e0bcd2..eeed9d1a03ce 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetDictionaryEncodedVectorizedReads.java @@ -23,7 +23,6 @@ import java.io.File; import java.io.IOException; -import java.util.UUID; import org.apache.avro.generic.GenericData; import org.apache.iceberg.Schema; import org.apache.iceberg.io.FileAppender; @@ -59,8 +58,7 @@ public void testVectorizedReadsWithNewContainers() throws IOException {} @Test public void testMixedDictionaryNonDictionaryReads() throws IOException { Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dictionaryEncodedFile = - File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File dictionaryEncodedFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dictionaryEncodedFile.delete()).as("Delete should succeed").isTrue(); Iterable dictionaryEncodableData = RandomData.generateDictionaryEncodableData( @@ -70,7 +68,7 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { writer.addAll(dictionaryEncodableData); } - File plainEncodingFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File plainEncodingFile = File.createTempFile("junit", null, temp.toFile()); assertThat(plainEncodingFile.delete()).as("Delete should succeed").isTrue(); Iterable nonDictionaryData = RandomData.generate(schema, 10000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE); @@ -79,7 +77,7 @@ public void testMixedDictionaryNonDictionaryReads() throws IOException { } int rowGroupSize = PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT; - File mixedFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File mixedFile = File.createTempFile("junit", null, temp.toFile()); assertThat(mixedFile.delete()).as("Delete should succeed").isTrue(); Parquet.concat( ImmutableList.of(dictionaryEncodedFile, plainEncodingFile, dictionaryEncodedFile), diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java index 85bb065b3b74..5c4b216aff94 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/parquet/vectorized/TestParquetVectorizedReads.java @@ -27,7 +27,6 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; -import java.util.UUID; import org.apache.avro.generic.GenericData; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; @@ -92,7 +91,7 @@ private void writeAndValidate( generateData(schema, numRecords, seed, nullPercentage, transform); // write a test parquet file using iceberg writer - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = getParquetWriter(schema, testFile)) { @@ -275,7 +274,7 @@ public void testReadsForTypePromotedColumns() throws Exception { optional(102, "float_data", Types.FloatType.get()), optional(103, "decimal_data", Types.DecimalType.of(10, 5))); - File dataFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(writeSchema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); @@ -304,7 +303,7 @@ public void testSupportedReadsForParquetV2() throws Exception { optional(103, "double_data", Types.DoubleType.get()), optional(104, "decimal_data", Types.DecimalType.of(25, 5))); - File dataFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); @@ -319,7 +318,7 @@ public void testUnsupportedReadsForParquetV2() throws Exception { // Longs, ints, string types etc use delta encoding and which are not supported for vectorized // reads Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields()); - File dataFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File dataFile = File.createTempFile("junit", null, temp.toFile()); assertThat(dataFile.delete()).as("Delete should succeed").isTrue(); Iterable data = generateData(schema, 30000, 0L, RandomData.DEFAULT_NULL_PERCENTAGE, IDENTITY); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index 5ad42f0c014b..8409f8cc8056 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -34,7 +34,6 @@ import java.util.List; import java.util.Map; import java.util.Random; -import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.Files; @@ -250,7 +249,7 @@ private void writeDataWithFailOnPartition( private Dataset createDataset(Iterable records, Schema schema) throws IOException { // this uses the SparkAvroReader to create a DataFrame from the list of records // it assumes that SparkAvroReader is correct - File testFile = File.createTempFile(UUID.randomUUID().toString(), null, temp.toFile()); + File testFile = File.createTempFile("junit", null, temp.toFile()); assertThat(testFile.delete()).as("Delete should succeed").isTrue(); try (FileAppender writer = From 73b47a676ec56f1df5f0da4dd496c6ee783c98e2 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 21 Dec 2023 20:41:45 +0530 Subject: [PATCH 16/21] run spotlessApply --- .../org/apache/iceberg/spark/data/TestParquetAvroWriter.java | 1 - .../java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java | 1 - .../java/org/apache/iceberg/spark/data/TestSparkAvroReader.java | 1 - 3 files changed, 3 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java index e03abdc6dff9..83f8f7f168b1 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestParquetAvroWriter.java @@ -26,7 +26,6 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Iterator; -import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java index 12b71ff9a7d0..11e60187fdc3 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroEnums.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.nio.file.Path; import java.util.List; -import java.util.UUID; import org.apache.avro.SchemaBuilder; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java index 4b0b17207f38..3e5088258a49 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroReader.java @@ -24,7 +24,6 @@ import java.io.File; import java.io.IOException; import java.util.List; -import java.util.UUID; import org.apache.avro.generic.GenericData.Record; import org.apache.iceberg.Files; import org.apache.iceberg.Schema; From 9152f98e9bd18396fbba0a9f923601592483b6d0 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 21 Dec 2023 20:53:12 +0530 Subject: [PATCH 17/21] fix file create to temp.resolve() --- .../org/apache/iceberg/spark/source/TestAvroScan.java | 2 +- .../apache/iceberg/spark/source/TestDataFrameWrites.java | 8 ++++---- .../org/apache/iceberg/spark/source/TestParquetScan.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java index 3c36a5dfde12..8345a4e0a697 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestAvroScan.java @@ -68,7 +68,7 @@ public static void stopSpark() { @Override protected void writeAndValidate(Schema schema) throws IOException { - File parent = new File(temp.toFile(), "avro"); + File parent = temp.resolve("avro").toFile(); File location = new File(parent, "test"); File dataFolder = new File(location, "data"); dataFolder.mkdirs(); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index 8409f8cc8056..b2731cd8f9b3 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -146,7 +146,7 @@ protected void writeAndValidate(Schema schema) throws IOException { @TestTemplate public void testWriteWithCustomDataLocation() throws IOException { File location = createTableFolder(); - File tablePropertyDataLocation = new File(temp.toFile(), "test-table-property-data-dir"); + File tablePropertyDataLocation = temp.resolve("test-table-property-data-dir").toFile(); Table table = createTable(new Schema(SUPPORTED_PRIMITIVES.fields()), location); table .updateProperties() @@ -156,7 +156,7 @@ public void testWriteWithCustomDataLocation() throws IOException { } private File createTableFolder() throws IOException { - File parent = new File(temp.toFile(), "parquet"); + File parent = temp.resolve("parquet").toFile(); File location = new File(parent, "test"); assertThat(location.mkdirs()).as("Mkdir should succeed").isTrue(); return location; @@ -289,7 +289,7 @@ public void testNullableWithWriteOption() throws IOException { .as("Spark 3 rejects writing nulls to a required column") .startsWith("2"); - File location = new File(new File(temp.toFile(), "parquet"), "test"); + File location = new File(temp.resolve("parquet").toFile(), "test"); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); @@ -343,7 +343,7 @@ public void testNullableWithSparkSqlOption() throws IOException { .as("Spark 3 rejects writing nulls to a required column") .startsWith("2"); - File location = new File(new File(temp.toFile(), "parquet"), "test"); + File location = new File(temp.resolve("parquet").toFile(), "test"); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java index 2c3e0dc09b34..012fe6f89c21 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java @@ -143,7 +143,7 @@ public void testEmptyTableProjection() throws IOException { } private Table createTable(Schema schema) throws IOException { - File parent = new File(temp.toFile(), "parquet"); + File parent = temp.resolve("parquet").toFile(); File location = new File(parent, "test"); HadoopTables tables = new HadoopTables(CONF); return tables.create(schema, PartitionSpec.unpartitioned(), location.toString()); From ddabe1a538692058940458aa21a77ec8a6359b4a Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 21 Dec 2023 21:09:28 +0530 Subject: [PATCH 18/21] chain temp.resolve() --- .../org/apache/iceberg/spark/source/TestDataFrameWrites.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index b2731cd8f9b3..f7aadcac512f 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -289,7 +289,7 @@ public void testNullableWithWriteOption() throws IOException { .as("Spark 3 rejects writing nulls to a required column") .startsWith("2"); - File location = new File(temp.resolve("parquet").toFile(), "test"); + File location = temp.resolve("parquet").resolve("test").toFile(); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); @@ -343,7 +343,7 @@ public void testNullableWithSparkSqlOption() throws IOException { .as("Spark 3 rejects writing nulls to a required column") .startsWith("2"); - File location = new File(temp.resolve("parquet").toFile(), "test"); + File location = temp.resolve("parquet").resolve("test").toFile(); String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location.toString()); String targetPath = String.format("%s/nullable_poc/targetFolder/", location.toString()); From 28d2c78ba3627bf4d22e62c62a13313c723c99dc Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 21 Dec 2023 21:35:44 +0530 Subject: [PATCH 19/21] improve assert statement --- .../apache/iceberg/spark/source/TestDataFrameWrites.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index f7aadcac512f..25d873721005 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -194,15 +194,12 @@ private void writeAndValidateWithLocations(Table table, File location, File expe .addedDataFiles(table.io()) .forEach( dataFile -> - assertThat( - URI.create(dataFile.path().toString()) - .getPath() - .startsWith(expectedDataDir.getAbsolutePath())) + assertThat(URI.create(dataFile.path().toString()).getPath()) .as( String.format( "File should have the parent directory %s, but has: %s.", expectedDataDir.getAbsolutePath(), dataFile.path())) - .isTrue()); + .startsWith(expectedDataDir.getAbsolutePath())); } private List readTable(String location) { From eaf5e67e80d9fea07ef6b3385ceb375082ca1820 Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Thu, 21 Dec 2023 18:26:34 +0100 Subject: [PATCH 20/21] fix test parameterization --- .../spark/data/ParameterizedAvroDataTest.java | 284 ++++++++++++++++++ .../data/TestSparkOrcReadMetadataColumns.java | 8 +- .../spark/source/TestDataFrameWrites.java | 6 +- .../iceberg/spark/source/TestParquetScan.java | 4 +- 4 files changed, 293 insertions(+), 9 deletions(-) create mode 100644 spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/ParameterizedAvroDataTest.java diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/ParameterizedAvroDataTest.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/ParameterizedAvroDataTest.java new file mode 100644 index 000000000000..85effe7d39a7 --- /dev/null +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/ParameterizedAvroDataTest.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.spark.data; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.iceberg.Schema; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.ListType; +import org.apache.iceberg.types.Types.LongType; +import org.apache.iceberg.types.Types.MapType; +import org.apache.iceberg.types.Types.StructType; +import org.apache.spark.sql.internal.SQLConf; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.io.TempDir; + +/** + * Copy of {@link AvroDataTest} that marks tests with @{@link org.junit.jupiter.api.TestTemplate} + * instead of @{@link Test} to make the tests work in a parameterized environment. + */ +public abstract class ParameterizedAvroDataTest { + + protected abstract void writeAndValidate(Schema schema) throws IOException; + + protected static final StructType SUPPORTED_PRIMITIVES = + StructType.of( + required(100, "id", LongType.get()), + optional(101, "data", Types.StringType.get()), + required(102, "b", Types.BooleanType.get()), + optional(103, "i", Types.IntegerType.get()), + required(104, "l", LongType.get()), + optional(105, "f", Types.FloatType.get()), + required(106, "d", Types.DoubleType.get()), + optional(107, "date", Types.DateType.get()), + required(108, "ts", Types.TimestampType.withZone()), + required(110, "s", Types.StringType.get()), + required(111, "uuid", Types.UUIDType.get()), + required(112, "fixed", Types.FixedType.ofLength(7)), + optional(113, "bytes", Types.BinaryType.get()), + required(114, "dec_9_0", Types.DecimalType.of(9, 0)), // int encoded + required(115, "dec_11_2", Types.DecimalType.of(11, 2)), // long encoded + required(116, "dec_20_5", Types.DecimalType.of(20, 5)), // requires padding + required(117, "dec_38_10", Types.DecimalType.of(38, 10)) // Spark's maximum precision + ); + + @TempDir protected Path temp; + + @TestTemplate + public void testSimpleStruct() throws IOException { + writeAndValidate(TypeUtil.assignIncreasingFreshIds(new Schema(SUPPORTED_PRIMITIVES.fields()))); + } + + @TestTemplate + public void testStructWithRequiredFields() throws IOException { + writeAndValidate( + TypeUtil.assignIncreasingFreshIds( + new Schema( + Lists.transform(SUPPORTED_PRIMITIVES.fields(), Types.NestedField::asRequired)))); + } + + @TestTemplate + public void testStructWithOptionalFields() throws IOException { + writeAndValidate( + TypeUtil.assignIncreasingFreshIds( + new Schema( + Lists.transform(SUPPORTED_PRIMITIVES.fields(), Types.NestedField::asOptional)))); + } + + @TestTemplate + public void testNestedStruct() throws IOException { + writeAndValidate( + TypeUtil.assignIncreasingFreshIds(new Schema(required(1, "struct", SUPPORTED_PRIMITIVES)))); + } + + @TestTemplate + public void testArray() throws IOException { + Schema schema = + new Schema( + required(0, "id", LongType.get()), + optional(1, "data", ListType.ofOptional(2, Types.StringType.get()))); + + writeAndValidate(schema); + } + + @TestTemplate + public void testArrayOfStructs() throws IOException { + Schema schema = + TypeUtil.assignIncreasingFreshIds( + new Schema( + required(0, "id", LongType.get()), + optional(1, "data", ListType.ofOptional(2, SUPPORTED_PRIMITIVES)))); + + writeAndValidate(schema); + } + + @TestTemplate + public void testMap() throws IOException { + Schema schema = + new Schema( + required(0, "id", LongType.get()), + optional( + 1, + "data", + MapType.ofOptional(2, 3, Types.StringType.get(), Types.StringType.get()))); + + writeAndValidate(schema); + } + + @TestTemplate + public void testNumericMapKey() throws IOException { + Schema schema = + new Schema( + required(0, "id", LongType.get()), + optional(1, "data", MapType.ofOptional(2, 3, LongType.get(), Types.StringType.get()))); + + writeAndValidate(schema); + } + + @TestTemplate + public void testComplexMapKey() throws IOException { + Schema schema = + new Schema( + required(0, "id", LongType.get()), + optional( + 1, + "data", + MapType.ofOptional( + 2, + 3, + StructType.of( + required(4, "i", Types.IntegerType.get()), + optional(5, "s", Types.StringType.get())), + Types.StringType.get()))); + + writeAndValidate(schema); + } + + @TestTemplate + public void testMapOfStructs() throws IOException { + Schema schema = + TypeUtil.assignIncreasingFreshIds( + new Schema( + required(0, "id", LongType.get()), + optional( + 1, + "data", + MapType.ofOptional(2, 3, Types.StringType.get(), SUPPORTED_PRIMITIVES)))); + + writeAndValidate(schema); + } + + @TestTemplate + public void testMixedTypes() throws IOException { + StructType structType = + StructType.of( + required(0, "id", LongType.get()), + optional( + 1, + "list_of_maps", + ListType.ofOptional( + 2, MapType.ofOptional(3, 4, Types.StringType.get(), SUPPORTED_PRIMITIVES))), + optional( + 5, + "map_of_lists", + MapType.ofOptional( + 6, 7, Types.StringType.get(), ListType.ofOptional(8, SUPPORTED_PRIMITIVES))), + required( + 9, + "list_of_lists", + ListType.ofOptional(10, ListType.ofOptional(11, SUPPORTED_PRIMITIVES))), + required( + 12, + "map_of_maps", + MapType.ofOptional( + 13, + 14, + Types.StringType.get(), + MapType.ofOptional(15, 16, Types.StringType.get(), SUPPORTED_PRIMITIVES))), + required( + 17, + "list_of_struct_of_nested_types", + ListType.ofOptional( + 19, + StructType.of( + Types.NestedField.required( + 20, + "m1", + MapType.ofOptional( + 21, 22, Types.StringType.get(), SUPPORTED_PRIMITIVES)), + Types.NestedField.optional( + 23, "l1", ListType.ofRequired(24, SUPPORTED_PRIMITIVES)), + Types.NestedField.required( + 25, "l2", ListType.ofRequired(26, SUPPORTED_PRIMITIVES)), + Types.NestedField.optional( + 27, + "m2", + MapType.ofOptional( + 28, 29, Types.StringType.get(), SUPPORTED_PRIMITIVES)))))); + + Schema schema = + new Schema( + TypeUtil.assignFreshIds(structType, new AtomicInteger(0)::incrementAndGet) + .asStructType() + .fields()); + + writeAndValidate(schema); + } + + @TestTemplate + public void testTimestampWithoutZone() throws IOException { + Schema schema = + TypeUtil.assignIncreasingFreshIds( + new Schema( + required(0, "id", LongType.get()), + optional(1, "ts_without_zone", Types.TimestampType.withoutZone()))); + + writeAndValidate(schema); + } + + protected void withSQLConf(Map conf, Action action) throws IOException { + SQLConf sqlConf = SQLConf.get(); + + Map currentConfValues = Maps.newHashMap(); + conf.keySet() + .forEach( + confKey -> { + if (sqlConf.contains(confKey)) { + String currentConfValue = sqlConf.getConfString(confKey); + currentConfValues.put(confKey, currentConfValue); + } + }); + + conf.forEach( + (confKey, confValue) -> { + if (SQLConf.isStaticConfigKey(confKey)) { + throw new RuntimeException("Cannot modify the value of a static config: " + confKey); + } + sqlConf.setConfString(confKey, confValue); + }); + + try { + action.invoke(); + } finally { + conf.forEach( + (confKey, confValue) -> { + if (currentConfValues.containsKey(confKey)) { + sqlConf.setConfString(confKey, currentConfValues.get(confKey)); + } else { + sqlConf.unsetConf(confKey); + } + }); + } + } + + @FunctionalInterface + protected interface Action { + void invoke() throws IOException; + } +} diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java index eccfb807e2fc..9d725250d3d2 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcReadMetadataColumns.java @@ -57,7 +57,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch; import org.apache.spark.unsafe.types.UTF8String; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestTemplate; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.io.TempDir; @@ -126,18 +126,18 @@ public void writeFile() throws IOException { } } - @Test + @TestTemplate public void testReadRowNumbers() throws IOException { readAndValidate(null, null, null, EXPECTED_ROWS); } - @Test + @TestTemplate public void testReadRowNumbersWithFilter() throws IOException { readAndValidate( Expressions.greaterThanOrEqual("id", 500), null, null, EXPECTED_ROWS.subList(500, 1000)); } - @Test + @TestTemplate public void testReadRowNumbersWithSplits() throws IOException { Reader reader; try { diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java index 25d873721005..e702f1f01eed 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWrites.java @@ -54,7 +54,7 @@ import org.apache.iceberg.spark.SparkSQLProperties; import org.apache.iceberg.spark.SparkSchemaUtil; import org.apache.iceberg.spark.SparkWriteOptions; -import org.apache.iceberg.spark.data.AvroDataTest; +import org.apache.iceberg.spark.data.ParameterizedAvroDataTest; import org.apache.iceberg.spark.data.RandomData; import org.apache.iceberg.spark.data.SparkAvroReader; import org.apache.iceberg.types.Types; @@ -76,7 +76,7 @@ import org.junit.jupiter.api.extension.ExtendWith; @ExtendWith(ParameterizedTestExtension.class) -public class TestDataFrameWrites extends AvroDataTest { +public class TestDataFrameWrites extends ParameterizedAvroDataTest { private static final Configuration CONF = new Configuration(); @Parameters(name = "format = {0}") @@ -84,7 +84,7 @@ public static Collection parameters() { return Arrays.asList("parquet", "avro", "orc"); } - @Parameter private String format = "parquet"; + @Parameter private String format; private static SparkSession spark = null; private static JavaSparkContext sc = null; diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java index 012fe6f89c21..1d7e1288dcc4 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java @@ -47,7 +47,7 @@ import org.apache.iceberg.hadoop.HadoopTables; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.parquet.Parquet; -import org.apache.iceberg.spark.data.AvroDataTest; +import org.apache.iceberg.spark.data.ParameterizedAvroDataTest; import org.apache.iceberg.spark.data.RandomData; import org.apache.iceberg.spark.data.TestHelpers; import org.apache.iceberg.types.TypeUtil; @@ -62,7 +62,7 @@ import org.junit.jupiter.api.io.TempDir; @ExtendWith(ParameterizedTestExtension.class) -public class TestParquetScan extends AvroDataTest { +public class TestParquetScan extends ParameterizedAvroDataTest { private static final Configuration CONF = new Configuration(); private static SparkSession spark = null; From 041eca26ba6a5aeaa737069e5a1ac2e1f62886cb Mon Sep 17 00:00:00 2001 From: Eduard Tudenhoefner Date: Thu, 21 Dec 2023 18:31:40 +0100 Subject: [PATCH 21/21] remove semicolon --- .../java/org/apache/iceberg/spark/source/TestParquetScan.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java index 1d7e1288dcc4..ebeed62acce4 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestParquetScan.java @@ -96,7 +96,7 @@ protected void writeAndValidate(Schema schema) throws IOException { type -> type.isMapType() && type.asMapType().keyType() != Types.StringType.get())) .as("Cannot handle non-string map keys in parquet-avro") .isNull(); - ; + assertThat(vectorized).as("should not be null").isNotNull(); Table table = createTable(schema);