Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NIFI-8023 Added toLocalDate() and updated toDate() in DataTypeUtils #4734

Closed
wants to merge 6 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand All @@ -74,6 +81,8 @@
public class DataTypeUtils {
private static final Logger logger = LoggerFactory.getLogger(DataTypeUtils.class);

private static final String TIME_ZONE_PATTERN = "Z";

// Regexes for parsing Floating-Point numbers
private static final String OptionalSign = "[\\-\\+]?";
private static final String Infinity = "(Infinity)";
Expand Down Expand Up @@ -1040,6 +1049,93 @@ private static Object toEnum(Object value, EnumDataType dataType, String fieldNa
throw new IllegalTypeConversionException("Cannot convert value " + value + " of type " + dataType.toString() + " for field " + fieldName);
}

/**
* Convert value to Local Date with support for conversion from numbers or formatted strings
*
* @param value Value to be converted
* @param formatter Supplier for Date Time Formatter can be null values other than numeric strings
* @param fieldName Field Name for value to be converted
* @return Local Date or null when value to be converted is null
* @throws IllegalTypeConversionException Thrown when conversion from string fails or unsupported value provided
*/
public static LocalDate toLocalDate(final Object value, final Supplier<DateTimeFormatter> formatter, final String fieldName) {
LocalDate localDate;

if (value == null) {
return null;
} else if (value instanceof LocalDate) {
localDate = (LocalDate) value;
} else if (value instanceof java.util.Date) {
final java.util.Date date = (java.util.Date) value;
localDate = parseLocalDateEpochMillis(date.getTime());
} else if (value instanceof Number) {
final long epochMillis = ((Number) value).longValue();
localDate = parseLocalDateEpochMillis(epochMillis);
} else if (value instanceof String) {
try {
localDate = parseLocalDate((String) value, formatter);
} catch (final RuntimeException e) {
final String message = String.format("Failed Conversion of Field [%s] from String [%s] to LocalDate with Formatter [%s]", fieldName, value, formatter, e);
exceptionfactory marked this conversation as resolved.
Show resolved Hide resolved
throw new IllegalTypeConversionException(message);
}
} else {
final String message = String.format("Failed Conversion of Field [%s] from Value [%s] Type [%s] to LocalDate", fieldName, value, value.getClass());
throw new IllegalTypeConversionException(message);
}

return localDate;
}

/**
* Parse Local Date from String using Date Time Formatter when supplied
*
* @param value String not null containing either formatted string or number of epoch milliseconds
* @param formatter Supplier for Date Time Formatter
* @return Local Date or null when provided value is empty
*/
private static LocalDate parseLocalDate(final String value, final Supplier<DateTimeFormatter> formatter) {
LocalDate localDate = null;

final String normalized = value.trim();
if (!normalized.isEmpty()) {
if (formatter == null) {
localDate = parseLocalDateEpochMillis(normalized);
} else {
final DateTimeFormatter dateTimeFormatter = formatter.get();
if (dateTimeFormatter == null) {
localDate = parseLocalDateEpochMillis(normalized);
} else {
localDate = LocalDate.parse(normalized, dateTimeFormatter);
}
}
}

return localDate;
}

/**
* Parse Local Date from string expected to contain number of epoch milliseconds
*
* @param normalized Normalized String expected to contain epoch milliseconds
* @return Local Date converted from epoch milliseconds
*/
private static LocalDate parseLocalDateEpochMillis(final String normalized) {
final long epochMillis = Long.parseLong(normalized);
return parseLocalDateEpochMillis(epochMillis);
}

/**
* Parse Local Date from epoch milliseconds using System Default Zone Offset
*
* @param epochMillis Epoch milliseconds
* @return Local Date converted from epoch milliseconds
*/
private static LocalDate parseLocalDateEpochMillis(final long epochMillis) {
final Instant instant = Instant.ofEpochMilli(epochMillis);
final ZonedDateTime zonedDateTime = instant.atZone(ZoneOffset.systemDefault());
return zonedDateTime.toLocalDate();
}

public static java.sql.Date toDate(final Object value, final Supplier<DateFormat> format, final String fieldName) {
if (value == null) {
return null;
Expand Down Expand Up @@ -1074,8 +1170,7 @@ public static java.sql.Date toDate(final Object value, final Supplier<DateFormat
if (dateFormat == null) {
return new Date(Long.parseLong(string));
}
final java.util.Date utilDate = dateFormat.parse(string);
return new Date(utilDate.getTime());
return parseDate(string, dateFormat);
} catch (final ParseException | NumberFormatException e) {
throw new IllegalTypeConversionException("Could not convert value [" + value
+ "] of type java.lang.String to Date because the value is not in the expected date format: " + format + " for field " + fieldName);
Expand All @@ -1085,6 +1180,31 @@ public static java.sql.Date toDate(final Object value, final Supplier<DateFormat
throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Date for field " + fieldName);
}

private static Date parseDate(final String string, final DateFormat dateFormat) throws ParseException {
// DateFormat.parse() creates java.util.Date with System Default Time Zone
final java.util.Date parsed = dateFormat.parse(string);

Instant parsedInstant = parsed.toInstant();
if (isTimeZoneAdjustmentRequired(dateFormat)) {
// Adjust parsed date using System Default Time Zone offset milliseconds when time zone format not found
parsedInstant = parsedInstant.minus(TimeZone.getDefault().getRawOffset(), ChronoUnit.MILLIS);
}

return new Date(parsedInstant.toEpochMilli());
}

private static boolean isTimeZoneAdjustmentRequired(final DateFormat dateFormat) {
boolean adjustmentRequired = false;

if (dateFormat instanceof SimpleDateFormat) {
final SimpleDateFormat simpleDateFormat = (SimpleDateFormat) dateFormat;
final String pattern = simpleDateFormat.toPattern();
adjustmentRequired = !pattern.contains(TIME_ZONE_PATTERN);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe this is sufficient. According to https://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html, the timezone can be represented using z, Z, or X, each having a different meaning. I think we have to account for all of these.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I adjusted the check to use a regular expression pattern for all three characters.

}

return adjustmentRequired;
}

public static boolean isDateTypeCompatible(final Object value, final String format) {
if (value == null) {
return false;
Expand Down Expand Up @@ -1182,6 +1302,20 @@ public static DateFormat getDateFormat(final String format, final String timezon
return df;
}

/**
* Get Date Time Formatter using Zone Identifier
*
* @param pattern Date Format Pattern
* @param zoneId Time Zone Identifier
* @return Date Time Formatter or null when provided pattern is null
*/
public static DateTimeFormatter getDateTimeFormatter(final String pattern, final ZoneId zoneId) {
if (pattern == null || zoneId == null) {
return null;
}
return DateTimeFormatter.ofPattern(pattern).withZone(zoneId);
}

public static boolean isTimeTypeCompatible(final Object value, final String format) {
return isDateTypeCompatible(value, format);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@
import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
import java.sql.Types;
import java.text.DateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand All @@ -48,9 +55,22 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;

public class TestDataTypeUtils {
private static final LocalDate FIRST_EPOCH_DAY = LocalDate.ofEpochDay(0);

private static final String FIRST_EPOCH_DAY_FORMATTED = "1970-01-01";

private static final String YEAR_MONTH_DAY_PATTERN = "yyyy-MM-dd";

private static final String MONTH_DAY_YEAR_PATTERN = "MM-dd-yyyy";

private static final long TWELVE_HOURS_EPOCH_MILLIS = 43200000;

private static final String FIELD_NAME = "RecordField";

/**
* This is a unit test to verify conversion java Date objects to Timestamps. Support for this was
* required in order to help the MongoDB packages handle date/time logical types in the Record API.
Expand All @@ -75,13 +95,84 @@ public void testDateToTimestamp() {
*/
@Test
public void testTimestampToDate() {
java.util.Date date = new java.util.Date();
java.sql.Date date = java.sql.Date.valueOf(FIRST_EPOCH_DAY_FORMATTED);
Timestamp ts = DataTypeUtils.toTimestamp(date, null, null);
assertNotNull(ts);

java.sql.Date output = DataTypeUtils.toDate(ts, null, null);
assertNotNull(output);
assertEquals("Timestamps didn't match", output.getTime(), ts.getTime());
assertEquals("Timestamps didn't match", date.getTime(), output.getTime());
}

@Test
public void testLocalDateTimeStringToDate() {
final String localDateTime = "1970-01-01T00:00:00";
final java.sql.Date date = DataTypeUtils.toDate(localDateTime, () -> DataTypeUtils.getDateFormat("yyyy-MM-dd'T'HH:mm:ss"), FIELD_NAME);
final long expected = LocalDateTime.parse(localDateTime).atZone(ZoneId.systemDefault()).toInstant().toEpochMilli();
assertEquals(expected, date.getTime());
}

@Test
public void testInstantStringToDate() {
final String instant = "1970-01-01T00:00:00Z";
final java.sql.Date date = DataTypeUtils.toDate(instant, () -> DataTypeUtils.getDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), FIELD_NAME);
final long expected = Instant.parse(instant).toEpochMilli();
assertEquals(expected, date.getTime());
}

@Test
public void testEpochMillisNumberToLocalDate() {
final LocalDate localDate = DataTypeUtils.toLocalDate(TWELVE_HOURS_EPOCH_MILLIS, null, FIELD_NAME);
assertEquals(FIRST_EPOCH_DAY, localDate);
}

@Test
public void testEpochMillisStringToLocalDate() {
final LocalDate localDate = DataTypeUtils.toLocalDate(Long.toString(TWELVE_HOURS_EPOCH_MILLIS), null, FIELD_NAME);
assertEquals(FIRST_EPOCH_DAY, localDate);
}

@Test
public void testUtilDateToLocalDate() {
final LocalDate localDate = DataTypeUtils.toLocalDate(new java.util.Date(TWELVE_HOURS_EPOCH_MILLIS), null, FIELD_NAME);
assertEquals(FIRST_EPOCH_DAY, localDate);
}

@Test
public void testFormattedStringToLocalDate() {
final DateTimeFormatter formatter = DataTypeUtils.getDateTimeFormatter(YEAR_MONTH_DAY_PATTERN, ZoneOffset.systemDefault());
final LocalDate localDate = DataTypeUtils.toLocalDate(FIRST_EPOCH_DAY_FORMATTED, () -> formatter, FIELD_NAME);
assertEquals(FIRST_EPOCH_DAY, localDate);
}

@Test
public void testFormattedStringToLocalDateNullFormatter() {
assertThrows(IllegalTypeConversionException.class, () -> DataTypeUtils.toLocalDate(FIRST_EPOCH_DAY_FORMATTED, null, FIELD_NAME));
}

@Test
public void testFormattedStringToLocalDateIncorrectFormatter() {
final DateTimeFormatter formatter = DataTypeUtils.getDateTimeFormatter(MONTH_DAY_YEAR_PATTERN, ZoneOffset.systemDefault());
assertThrows(IllegalTypeConversionException.class, () -> DataTypeUtils.toLocalDate(FIRST_EPOCH_DAY_FORMATTED, () -> formatter, FIELD_NAME));
}

@Test
public void testListToLocalDateException() {
assertThrows(IllegalTypeConversionException.class, () -> DataTypeUtils.toLocalDate(Collections.emptyList(), null, FIELD_NAME));
}

@Test
public void testFormattedStringToDateWithImplicitUniversalZoneId() {
final DateFormat format = DataTypeUtils.getDateFormat(YEAR_MONTH_DAY_PATTERN);
final java.sql.Date date = DataTypeUtils.toDate(FIRST_EPOCH_DAY_FORMATTED, () -> format, FIELD_NAME);
assertEquals(FIRST_EPOCH_DAY_FORMATTED, date.toString());
}

@Test
public void testFormattedStringToDateWithUniversalZoneId() {
final DateFormat format = DataTypeUtils.getDateFormat(YEAR_MONTH_DAY_PATTERN, ZoneOffset.UTC.getId());
final java.sql.Date date = DataTypeUtils.toDate(FIRST_EPOCH_DAY_FORMATTED, () -> format, FIELD_NAME);
assertEquals(FIRST_EPOCH_DAY_FORMATTED, date.toString());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,44 @@ public void testConvertToAvroStreamForDateTimeAsLogicalType() throws SQLExceptio
);
}

@Test
public void testConvertToAvroStreamForDateLogicalType() throws SQLException, IOException {
final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder()
.convertNames(true)
.useLogicalTypes(true)
.build();

final int columnNumber = 1;
final String columnName = "DATE_COLUMN";

final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
when(metadata.getColumnCount()).thenReturn(columnNumber);
when(metadata.getTableName(anyInt())).thenReturn("TABLE");
when(metadata.getColumnType(columnNumber)).thenReturn(Types.DATE);
when(metadata.getColumnName(columnNumber)).thenReturn(columnName);

final int epochDays = 30;
final String resultSetDateFormatted = "1970-01-31";
final java.sql.Date resultSetDate = java.sql.Date.valueOf(resultSetDateFormatted);

final ResultSet rs = JdbcCommonTestUtils.resultSetReturningMetadata(metadata);
when(rs.getObject(columnNumber)).thenReturn(resultSetDate);

final ByteArrayOutputStream avroOutputStream = new ByteArrayOutputStream();
JdbcCommon.convertToAvroStream(rs, avroOutputStream, options, null);
final InputStream avroInputStream = new ByteArrayInputStream(avroOutputStream.toByteArray());

final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
try (final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(avroInputStream, datumReader)) {
GenericRecord record = null;
while (dataFileStream.hasNext()) {
record = dataFileStream.next(record);
final Object recordField = record.get(columnName);
assertEquals(epochDays, recordField);
}
}
}

private void testConvertToAvroStreamForDateTime(
JdbcCommon.AvroConversionOptions options, BiConsumer<GenericRecord, java.sql.Date> assertDate,
BiConsumer<GenericRecord, Time> assertTime, BiConsumer<GenericRecord, Timestamp> assertTimeStamp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
import java.sql.Time;
import java.sql.Timestamp;
import java.time.Duration;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.AbstractMap;
import java.util.ArrayList;
Expand Down Expand Up @@ -665,8 +666,9 @@ private static Object convertToAvroObject(final Object rawValue, final Schema fi

if (LOGICAL_TYPE_DATE.equals(logicalType.getName())) {
final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
final java.sql.Date date = DataTypeUtils.toDate(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
return (int) ChronoUnit.DAYS.between(Instant.EPOCH, Instant.ofEpochMilli(date.getTime()));
// Parse Local Date using System Default Time Zone since Local Date does not include Time Zone
final LocalDate localDate = DataTypeUtils.toLocalDate(rawValue, () -> DataTypeUtils.getDateTimeFormatter(format, ZoneOffset.systemDefault()), fieldName);
return (int) localDate.toEpochDay();
} else if (LOGICAL_TYPE_TIME_MILLIS.equals(logicalType.getName())) {
final String format = AvroTypeUtil.determineDataType(fieldSchema).getFormat();
final Time time = DataTypeUtils.toTime(rawValue, () -> DataTypeUtils.getDateFormat(format), fieldName);
Expand Down Expand Up @@ -987,7 +989,8 @@ private static Object normalizeValue(final Object value, final Schema avroSchema
final String logicalName = logicalType.getName();
if (LOGICAL_TYPE_DATE.equals(logicalName)) {
// date logical name means that the value is number of days since Jan 1, 1970
return new java.sql.Date(TimeUnit.DAYS.toMillis((int) value));
final LocalDate localDate = LocalDate.ofEpochDay((int) value);
return java.sql.Date.valueOf(localDate);
} else if (LOGICAL_TYPE_TIME_MILLIS.equals(logicalName)) {
// time-millis logical name means that the value is number of milliseconds since midnight.
return new java.sql.Time((int) value);
Expand Down