From d490b1a32210e16b5485d2b5e51f445d79861e4c Mon Sep 17 00:00:00 2001 From: SimhadriG Date: Wed, 15 May 2024 08:13:48 +0530 Subject: [PATCH] Address issues with UTC+14 and UTC-12 --- .../hive/common/type/TimestampTZUtil.java | 6 + .../io/parquet/timestamp/NanoTimeUtils.java | 53 ++++++-- ...stParquetTimestampsHive2Compatibility.java | 113 ++++++++++++------ 3 files changed, 126 insertions(+), 46 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java index 7a15c4abad76..adc4dde15e24 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java @@ -191,6 +191,12 @@ public static Timestamp convertTimestampToZone(Timestamp ts, ZoneId fromZone, Zo DateFormat formatter = getLegacyDateFormatter(); formatter.setTimeZone(TimeZone.getTimeZone(fromZone)); java.util.Date date = formatter.parse(ts.format(TIMESTAMP_FORMATTER)); + +// if(formatter.format(date).contains("02-29") && legacyJulianLeapYearCheck(ts.getYear())){ +// legacyLeapYearAdjustment = -2; +// ts.minusDays(legacyLeapYearAdjustment); +// date = formatter.parse(ts.format(TIMESTAMP_FORMATTER)); +// } // Set the formatter to use a different timezone formatter.setTimeZone(TimeZone.getTimeZone(toZone)); Timestamp result = Timestamp.valueOf(formatter.format(date)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index 97b527ed366d..dc6c1d810c1b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -15,11 +15,14 @@ import java.time.DateTimeException; import java.time.LocalDateTime; +import java.time.Month; import java.time.ZoneId; import java.time.ZoneOffset; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; +import java.util.HashSet; +import java.util.Set; import java.util.TimeZone; import java.util.concurrent.TimeUnit; @@ -28,6 +31,7 @@ import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.threeten.extra.chrono.JulianChronology; /** * Utilities for converting from java.sql.Timestamp to parquet timestamp. @@ -105,18 +109,8 @@ public static Timestamp getTimestamp(NanoTime nt, ZoneId targetZone, boolean leg JulianDate jDateTime = JulianDate.of((double) julianDay); LocalDateTime localDateTime; - int leapYearDateAdjustment = 0; - - try { - localDateTime = jDateTime.toLocalDateTime(); - } catch (DateTimeException e) { - if (e.getMessage().contains(ErrorMsg.NOT_LEAP_YEAR.getMsg()) && legacyConversion) { - leapYearDateAdjustment = 1; - localDateTime = jDateTime.add(leapYearDateAdjustment).toLocalDateTime(); - } else { - throw e; - } - } + int leapYearDateAdjustment = legacyConversion ? julianLeapYearAdjustment(jDateTime) : 0; + localDateTime = jDateTime.add(leapYearDateAdjustment).toLocalDateTime(); Calendar calendar = getGMTCalendar(); calendar.set(Calendar.YEAR, localDateTime.getYear()); calendar.set(Calendar.MONTH, localDateTime.getMonth().getValue() - 1); //java calendar index starting at 1. @@ -139,4 +133,39 @@ public static Timestamp getTimestamp(NanoTime nt, ZoneId targetZone, boolean leg return ts; } + + private static int julianLeapYearAdjustment(JulianDate jDateTime) { + Set validDaysOfMonth = new HashSet<>(); + validDaysOfMonth.add(1); + validDaysOfMonth.add(2); + validDaysOfMonth.add(27); + validDaysOfMonth.add(28); + validDaysOfMonth.add(29); + LocalDateTime localDateTime; + try { + localDateTime = jDateTime.toLocalDateTime(); + } catch (DateTimeException e) { + if (e.getMessage().contains(ErrorMsg.NOT_LEAP_YEAR.getMsg())) { + return 2; + } else { + throw e; + } + } + + int year = localDateTime.getYear(); + int dayOfMonth = localDateTime.getDayOfMonth(); + boolean isJulianLeapYear = (year % 4 == 0 && year % 400 != 0 && year % 100 == 0); + + if (year < 1582 && isJulianLeapYear && validDaysOfMonth.contains(dayOfMonth)) { + switch (localDateTime.getMonth()) { + case FEBRUARY: + return -2; + case MARCH: + return 2; + default: + return 0; + } + } + return 0; + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java index 32fb612d4133..1fdb3dafe733 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java @@ -102,12 +102,11 @@ void testWriteHive2ReadHive4UsingLegacyConversionWithZone(String timestampString * Tests that timestamps written using Hive2 APIs on julian leap years are read correctly by Hive4 APIs when legacy * conversion is on. */ - @ParameterizedTest(name = "{0} - Zone: {1}") + @ParameterizedTest(name = "{0}") @MethodSource("generateTimestampsAndZoneIds") - void testWriteHive2ReadHive4UsingLegacyConversionWithJulianLeapYears(String timestampString) { + void testWriteHive2ReadHive4UsingLegacyConversionWithJulianLeapYears(String timestampString, String zoneId) { TimeZone original = TimeZone.getDefault(); try { - String zoneId = "Asia/Singapore"; TimeZone.setDefault(TimeZone.getTimeZone(zoneId)); NanoTime nt = writeHive2(timestampString); Timestamp ts = readHive4(nt, zoneId, true); @@ -117,6 +116,51 @@ void testWriteHive2ReadHive4UsingLegacyConversionWithJulianLeapYears(String time } } + @ParameterizedTest(name = "{0}") + @MethodSource("generateTimestampsAndZoneIds28thFeb") + void testWriteHive2ReadHive4UsingLegacyConversionWithJulianLeapYearsFor28thFeb(String timestampString, + String zoneId) { + TimeZone original = TimeZone.getDefault(); + try { + TimeZone.setDefault(TimeZone.getTimeZone(zoneId)); + NanoTime nt = writeHive2(timestampString); + Timestamp ts = readHive4(nt, zoneId, true); + assertEquals(timestampString, ts.toString()); + } finally { + TimeZone.setDefault(original); + } + } + + @ParameterizedTest(name = " - From: Zone {0}, timestamp: {2}, To: Zone:{1}, expected Timestamp {3}") + @MethodSource("julianLeapYearEdgeCases") + void testWriteHive2ReadHive4UsingLegacyConversionWithJulianLeapYearsEdgeCase(String fromZoneId, String toZoneId, + String timestampString, String expected) { + TimeZone original = TimeZone.getDefault(); + try { + TimeZone.setDefault(TimeZone.getTimeZone(fromZoneId)); + NanoTime nt = writeHive2(timestampString); + Timestamp ts = readHive4(nt, toZoneId, true); + assertEquals(expected, ts.toString()); + } finally { + TimeZone.setDefault(original); + } + } + + private static Stream julianLeapYearEdgeCases() { + return Stream.of(Arguments.of("Etc/GMT+12", "Pacific/Kiritimati", "0200-02-27 22:00:00.000000001", + "0200-03-01 00:00:00.000000001"), + Arguments.of("Pacific/Kiritimati", "Etc/GMT+12", "0200-03-01 00:00:00.000000001", + "0200-02-27 22:00:00.000000001"), + Arguments.of("Pacific/Kiritimati", "Etc/GMT+12", "0200-03-02 00:00:00.000000001", + "0200-02-28 22:00:00.000000001"), + Arguments.of("Etc/GMT+12", "Pacific/Kiritimati", "0200-03-02 00:00:00.000000001", + "0200-03-03 02:00:00.000000001"), + Arguments.of("Etc/GMT+12", "Etc/GMT-12", "0200-02-28 00:00:00.000000001", "0200-03-01 00:00:00.000000001"), + Arguments.of("Etc/GMT-12", "Etc/GMT+12", "0200-03-01 00:00:00.000000001", "0200-02-28 00:00:00.000000001"), + Arguments.of("Asia/Singapore", "Asia/Singapore", "0200-03-01 00:00:00.000000001", + "0200-03-01 00:00:00.000000001")); + } + /** * Tests that timestamps written using Hive4 APIs are read correctly by Hive4 APIs when legacy conversion is on. */ @@ -202,8 +246,14 @@ public String get() { * UTC-12 : Etc/GMT+12 along with few other zones */ private static Stream generateTimestampsAndZoneIds() { - return generateJulianLeapYearTimestamps() - .flatMap(timestampString -> Stream.of("Asia/Singapore", "Pacific/Kiritimati", "Etc/GMT+12", "Pacific/Niue") + return generateJulianLeapYearTimestamps().flatMap( + timestampString -> Stream.of("Asia/Singapore", "Pacific/Kiritimati", "Etc/GMT+12", "Pacific/Niue") + .map(zoneId -> Arguments.of(timestampString, zoneId))); + } + + private static Stream generateTimestampsAndZoneIds28thFeb() { + return generateJulianLeapYearTimestamps28thFeb().flatMap( + timestampString -> Stream.of("Asia/Singapore", "Pacific/Kiritimati", "Etc/GMT+12", "Pacific/Niue") .map(zoneId -> Arguments.of(timestampString, zoneId))); } @@ -214,42 +264,37 @@ private static Stream generateJulianLeapYearTimestamps() { @Override public String get() { StringBuilder sb = new StringBuilder(29); - int year = (i % 9999) + 1; + int year = ((i % 9999) + 1) * 100; + sb.append(zeros(4 - digits(year))); + sb.append(year); + sb.append("-03-01 00:00:00.000000001"); + i++; + return sb.toString(); + } + }) + // Exclude dates falling in the default Gregorian change date since legacy code does not handle that interval + // gracefully. It is expected that these do not work well when legacy APIs are in use. 0200-03-01 01:01:01.000000001 + .filter(s -> !s.startsWith("1582-10")).limit(3000), Stream.of("9999-12-31 23:59:59.999")); + } + + private static Stream generateJulianLeapYearTimestamps28thFeb() { + return Stream.concat(Stream.generate(new Supplier() { + int i = 0; + + @Override + public String get() { + StringBuilder sb = new StringBuilder(29); + int year = ((i % 9999) + 1) * 100; sb.append(zeros(4 - digits(year))); sb.append(year); - sb.append('-'); - int month = 3; - sb.append(zeros(2 - digits(month))); - sb.append(month); - sb.append('-'); - int day = 1; - sb.append(zeros(2 - digits(day))); - sb.append(day); - sb.append(' '); - int hour = i % 24; - sb.append(zeros(2 - digits(hour))); - sb.append(hour); - sb.append(':'); - int minute = i % 60; - sb.append(zeros(2 - digits(minute))); - sb.append(minute); - sb.append(':'); - int second = i % 60; - sb.append(zeros(2 - digits(second))); - sb.append(second); - sb.append('.'); - // Bitwise OR with one to avoid times with trailing zeros - int nano = (i % 1000000000) | 1; - sb.append(zeros(9 - digits(nano))); - sb.append(nano); + sb.append("-02-28 00:00:00.000000001"); i++; return sb.toString(); } }) // Exclude dates falling in the default Gregorian change date since legacy code does not handle that interval - // gracefully. It is expected that these do not work well when legacy APIs are in use. - .filter(s -> !s.startsWith("1582-10")) - .limit(3000), Stream.of("9999-12-31 23:59:59.999")); + // gracefully. It is expected that these do not work well when legacy APIs are in use. 0200-03-01 01:01:01.000000001 + .filter(s -> !s.startsWith("1582-10")).limit(3000), Stream.of("9999-12-31 23:59:59.999")); }