Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Hotfix] fix http source can not read yyyy-MM-dd HH:mm:ss format bug & Improve DateTime Utils #6601

Merged
merged 7 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,25 @@
package org.apache.seatunnel.common.utils;

import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.SignStyle;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalQueries;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import static java.time.temporal.ChronoField.DAY_OF_MONTH;
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
import static java.time.temporal.ChronoField.YEAR;

public class DateTimeUtils {

Expand All @@ -48,6 +62,162 @@ public class DateTimeUtils {
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_SSS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSS_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601.value));
}

// if the datatime string length is 19, find the DateTimeFormatter from this map
public static final Map<Pattern, DateTimeFormatter> YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP =
new LinkedHashMap<>();
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();

// if the datatime string length bigger than 19, find the DateTimeFormatter from this map
public static final Map<Pattern, DateTimeFormatter> YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP =
new LinkedHashMap<>();
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();

// if the datatime string length is 14, use this formatter
public static final DateTimeFormatter YYYY_MM_DD_HH_MM_SS_14_FORMATTER =
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_NO_SPLIT.value);

static {
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}.*"),
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(DateTimeFormatter.ISO_LOCAL_DATE)
.appendLiteral(' ')
.append(DateTimeFormatter.ISO_LOCAL_TIME)
.toFormatter());

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_ISO8601.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}.*"),
DateTimeFormatter.ISO_LOCAL_DATE_TIME);

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SLASH.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}.*"),
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('/')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('/')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.appendLiteral(' ')
.append(DateTimeFormatter.ISO_LOCAL_TIME)
.toFormatter());

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SPOT.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}.*"),
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('.')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('.')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.appendLiteral(' ')
.append(DateTimeFormatter.ISO_LOCAL_TIME)
.toFormatter());

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日\\s\\d{2}时\\d{2}分\\d{2}秒"),
DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH时mm分ss秒"));

YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET.addAll(
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.entrySet());
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET.addAll(
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.entrySet());
}

/**
* gave a datetime string and return the {@link DateTimeFormatter} which can be used to parse
* it.
*
* @param dateTime eg: 2020-02-03 12:12:10.101
* @return the DateTimeFormatter matched, will return null when not matched any pattern
*/
public static DateTimeFormatter matchDateTimeFormatter(String dateTime) {
if (dateTime.length() == 19) {
for (Map.Entry<Pattern, DateTimeFormatter> entry :
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET) {
if (entry.getKey().matcher(dateTime).matches()) {
return entry.getValue();
}
}
} else if (dateTime.length() > 19) {
for (Map.Entry<Pattern, DateTimeFormatter> entry :
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET) {
if (entry.getKey().matcher(dateTime).matches()) {
return entry.getValue();
}
}
} else if (dateTime.length() == 14) {
return YYYY_MM_DD_HH_MM_SS_14_FORMATTER;
}
return null;
}

public static LocalDateTime parse(String dateTime, DateTimeFormatter dateTimeFormatter) {
TemporalAccessor parsedTimestamp = dateTimeFormatter.parse(dateTime);
LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime());
LocalDate localDate = parsedTimestamp.query(TemporalQueries.localDate());
return LocalDateTime.of(localDate, localTime);
}

/**
* gave a datetime string and return {@link LocalDateTime}
*
* <p>Due to the need to determine the rules of the formatter through regular expressions, there
* will be a certain performance loss. When tested on 8c16g macos, the most significant
* performance decrease compared to directly passing the formatter is
* 'Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}\\s\\d{2}:\\d{2}.*")' has increased from 4.5
* seconds to 10 seconds in a scenario where 1000w calculations are performed.
*
* <p>Analysis shows that there are two main reasons: one is that the regular expression
* position in the map is 4, before this, three regular expression matches are required.
*
* <p>Another reason is to support the length of non fixed millisecond bits (minimum 0, maximum
* 9), we used {@link DateTimeFormatter#ISO_LOCAL_TIME}, which also increases the time for time
* conversion.
*
* @param dateTime eg: 2020-02-03 12:12:10.101
* @return {@link LocalDateTime}
*/
public static LocalDateTime parse(String dateTime) {
DateTimeFormatter dateTimeFormatter = matchDateTimeFormatter(dateTime);
return LocalDateTime.parse(dateTime, dateTimeFormatter);
}

public static LocalDateTime parse(String dateTime, Formatter formatter) {
Expand Down Expand Up @@ -78,7 +248,10 @@ public enum Formatter {
YYYY_MM_DD_HH_MM_SS_SPOT("yyyy.MM.dd HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SLASH("yyyy/MM/dd HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_NO_SPLIT("yyyyMMddHHmmss"),
YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss");
YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSS"),
YYYY_MM_DD_HH_MM_SS_SSSSSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSSSSS"),
YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS");

private final String value;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,15 @@

import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.SignStyle;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import static java.time.temporal.ChronoField.DAY_OF_MONTH;
import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
import static java.time.temporal.ChronoField.YEAR;

public class DateUtils {
private static final Map<Formatter, DateTimeFormatter> FORMATTER_MAP = new HashMap<>();
Expand All @@ -36,6 +43,106 @@ public class DateUtils {
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_SLASH.value));
}

public static final Pattern[] PATTERN_ARRAY =
new Pattern[] {
Pattern.compile("\\d{4}-\\d{2}-\\d{2}"),
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日"),
Pattern.compile("\\d{4}/\\d{2}/\\d{2}"),
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}"),
Pattern.compile("\\d{8}")
};

public static final Map<Pattern, DateTimeFormatter> DATE_FORMATTER_MAP = new HashMap();

static {
DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[0],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(DateTimeFormatter.ISO_LOCAL_DATE)
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[1],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral("年")
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral("月")
.appendValue(DAY_OF_MONTH, 2)
.appendLiteral("日")
.toFormatter())
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[2],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('/')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('/')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[3],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral('.')
.appendValue(MONTH_OF_YEAR, 2)
.appendLiteral('.')
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.toFormatter());

DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[4],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(
new DateTimeFormatterBuilder()
.appendValue(YEAR, 4, 10, SignStyle.EXCEEDS_PAD)
.appendValue(MONTH_OF_YEAR, 2)
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.toFormatter());
}

/**
* gave a date string and return the {@link DateTimeFormatter} which can be used to parse it.
*
* @param dateTime eg: 2020-02-03
* @return the DateTimeFormatter matched, will return null when not matched any pattern in
* {@link #PATTERN_ARRAY}
*/
public static DateTimeFormatter matchDateFormatter(String dateTime) {
for (int j = 0; j < PATTERN_ARRAY.length; j++) {
if (PATTERN_ARRAY[j].matcher(dateTime).matches()) {
return DATE_FORMATTER_MAP.get(PATTERN_ARRAY[j]);
}
}
return null;
}

public static LocalDate parse(String date) {
DateTimeFormatter dateTimeFormatter = matchDateFormatter(date);
return parse(date, dateTimeFormatter);
}

public static LocalDate parse(String date, DateTimeFormatter dateTimeFormatter) {
return LocalDate.parse(date, dateTimeFormatter);
}

public static LocalDate parse(String date, Formatter formatter) {
return LocalDate.parse(date, FORMATTER_MAP.get(formatter));
}
Expand Down
Loading
Loading