From 1635d43c64cb1d1b870430c60d3d6ab895cb6955 Mon Sep 17 00:00:00 2001 From: Satyam Raj Date: Wed, 18 May 2022 16:07:06 +0530 Subject: [PATCH 1/2] add first changes --- .../pinot/spi/data/DateTimeFormatSpec.java | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java index 338bd6b985c5..4ee57e6f0516 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java @@ -20,6 +20,8 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; + +import java.sql.Time; import java.sql.Timestamp; import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.StringUtils; @@ -36,16 +38,16 @@ public class DateTimeFormatSpec { public static final String NUMBER_REGEX = "[1-9][0-9]*"; - public static final String COLON_SEPARATOR = ":"; + public static final String COLON_SEPARATOR = "|"; /* DateTimeFieldSpec format is of format size:timeUnit:timeformat:pattern tz(timezone) * tz(timezone) is optional. If not specified, UTC timezone is used */ - public static final int FORMAT_SIZE_POSITION = 0; + public static final int FORMAT_SIZE_POSITION = 2; public static final int FORMAT_UNIT_POSITION = 1; - public static final int FORMAT_TIMEFORMAT_POSITION = 2; - public static final int FORMAT_PATTERN_POSITION = 3; - public static final int MIN_FORMAT_TOKENS = 3; - public static final int MAX_FORMAT_TOKENS = 4; + public static final int FORMAT_TIMEFORMAT_POSITION = 0; + public static final int FORMAT_PATTERN_POSITION = 2; + public static final int MIN_FORMAT_TOKENS = 1; + public static final int MAX_FORMAT_TOKENS = 3; private final String _format; private final int _size; @@ -181,22 +183,24 @@ public static void validateFormat(String format) { Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null"); String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS); Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS && formatTokens.length <= MAX_FORMAT_TOKENS, - "Incorrect format: %s. Must be of format 'size:timeunit:timeformat(:pattern)'", format); - Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].matches(NUMBER_REGEX), - "Incorrect format size: %s in format: %s. Must be of format '[0-9]+::(:pattern)'", - formatTokens[FORMAT_SIZE_POSITION], format); + "Incorrect format: %s. Must be of format 'EPOCH|| or SIMPLE_DATE_FORMAT|| or TIMESTAMP'", format); - DateTimeFormatUnitSpec.validateUnitSpec(formatTokens[FORMAT_UNIT_POSITION]); + if(formatTokens[0].equals(TimeFormat.EPOCH.toString())) { + Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].matches(NUMBER_REGEX), + "Incorrect format size: %s in format: %s. Must be of format 'EPOCH||[0-9]+'", + formatTokens[FORMAT_SIZE_POSITION], format); + DateTimeFormatUnitSpec.validateUnitSpec(formatTokens[FORMAT_UNIT_POSITION]); + } if (formatTokens.length == MIN_FORMAT_TOKENS) { - Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.EPOCH.toString()) - || formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.TIMESTAMP.toString()), - "Incorrect format type: %s in format: %s. Must be of '[0-9]+::EPOCH|TIMESTAMP'", + Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.TIMESTAMP.toString()), + "Incorrect format type: %s in format: %s. Must be of 'TIMESTAMP'", formatTokens[FORMAT_TIMEFORMAT_POSITION], format); } else { Preconditions - .checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()), - "Incorrect format type: %s in format: %s. Must be of '[0-9]+::SIMPLE_DATE_FORMAT:pattern'", + .checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()) || + formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.EPOCH.toString()), + "Incorrect format type: %s in format: %s. Must be of 'SIMPLE_DATE_FORMAT/EPOCH | / | ([0-9]+) / (timezone)'", formatTokens[FORMAT_TIMEFORMAT_POSITION], format); } } From c4c9ec7760f3a539fa2dba8044dba215f48b7a3a Mon Sep 17 00:00:00 2001 From: Satyam Raj Date: Thu, 19 May 2022 01:48:32 +0530 Subject: [PATCH 2/2] update DateTimeFormatSpec and tests --- .../common/data/DateTimeFormatSpecTest.java | 22 +++++------ .../spi/data/DateTimeFormatPatternSpec.java | 6 +-- .../pinot/spi/data/DateTimeFormatSpec.java | 37 +++++++++++++++---- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java index 22c359bcfa3d..c5e00d51a289 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java @@ -248,26 +248,26 @@ public Object[][] provideTestConstructFormatData() { List entries = new ArrayList<>(); - entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", null, new DateTimeFormatSpec("1:HOURS:EPOCH"), null}); - entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", "yyyyMMdd", new DateTimeFormatSpec("1:HOURS:EPOCH"), null}); - entries.add(new Object[]{5, TimeUnit.MINUTES, "EPOCH", null, new DateTimeFormatSpec("5:MINUTES:EPOCH"), null}); + entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", null, new DateTimeFormatSpec("EPOCH|HOURS|1"), null}); + entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", "yyyyMMdd", new DateTimeFormatSpec("EPOCH|HOURS|1"), null}); + entries.add(new Object[]{5, TimeUnit.MINUTES, "EPOCH", null, new DateTimeFormatSpec("EPOCH|MINUTES|5"), null}); entries.add(new Object[]{0, TimeUnit.HOURS, "EPOCH", null, null, null}); entries.add(new Object[]{1, null, "EPOCH", null, null, null}); entries.add(new Object[]{1, TimeUnit.HOURS, null, null, null, null}); entries.add(new Object[]{1, TimeUnit.HOURS, "DUMMY", "yyyyMMdd", null, null}); entries.add(new Object[]{ - 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMdd", null, - new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd") + 1, TimeUnit.DAYS, "SIMPLE_DATE_FORMAT", "yyyyMMdd tz(UTC)", null, + new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyyyMMdd|tz(UTC)") }); entries.add(new Object[]{ - 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMdd tz(America/Los_Angeles)", null, - new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(America/Los_Angeles)") + 1, TimeUnit.DAYS, "SIMPLE_DATE_FORMAT", "yyyyMMdd tz(America/Los_Angeles)", null, + new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyyyMMdd|tz(America/Los_Angeles)") }); - entries.add(new Object[]{1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", null, null, null}); - entries.add(new Object[]{-1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMDD", null, null}); + entries.add(new Object[]{1, TimeUnit.DAYS, "SIMPLE_DATE_FORMAT", null, null, null}); + entries.add(new Object[]{-1, TimeUnit.DAYS, "SIMPLE_DATE_FORMAT", "yyyyMMDD", null, null}); entries.add(new Object[]{ - 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "M/d/yyyy h:mm:ss a", null, - new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a") + 1, TimeUnit.DAYS, "SIMPLE_DATE_FORMAT", "M/d/yyyy h:mm:ss a tz(UTC)", null, + new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a|tz(UTC)") }); return entries.toArray(new Object[entries.size()][]); } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java index f26e52a82de8..a11c694cf394 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java @@ -33,9 +33,9 @@ public class DateTimeFormatPatternSpec { /** eg: yyyyMMdd tz(CST) or yyyyMMdd HH tz(GMT+0700) or yyyyMMddHH tz(America/Chicago) **/ - private static final Pattern SDF_PATTERN_WITH_TIMEZONE = Pattern.compile("^(.+)( tz[ ]*\\((.+)\\))[ ]*"); - private static final int SDF_PATTERN_GROUP = 1; - private static final int TIMEZONE_GROUP = 3; + public static final Pattern SDF_PATTERN_WITH_TIMEZONE = Pattern.compile("^(.+)( tz[ ]*\\((.+)\\))[ ]*"); + public static final int SDF_PATTERN_GROUP = 1; + public static final int TIMEZONE_GROUP = 2; public static final DateTimeZone DEFAULT_DATETIMEZONE = DateTimeZone.UTC; public static final Locale DEFAULT_LOCALE = Locale.ENGLISH; diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java index 4ee57e6f0516..a21b54aa379c 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java @@ -23,7 +23,10 @@ import java.sql.Time; import java.sql.Timestamp; +import java.util.TimeZone; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; + import org.apache.commons.lang3.StringUtils; import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat; import org.apache.pinot.spi.utils.EqualityUtils; @@ -31,6 +34,9 @@ import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormatter; +import static org.apache.pinot.spi.data.DateTimeFormatPatternSpec.SDF_PATTERN_GROUP; +import static org.apache.pinot.spi.data.DateTimeFormatPatternSpec.TIMEZONE_GROUP; + /** * Class to represent format from {@link DateTimeFieldSpec} @@ -60,7 +66,7 @@ public DateTimeFormatSpec(String format) { String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS); if (formatTokens.length == MAX_FORMAT_TOKENS) { _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION], - formatTokens[FORMAT_PATTERN_POSITION]); + String.format("%s tz(%s)",formatTokens[FORMAT_UNIT_POSITION], formatTokens[FORMAT_PATTERN_POSITION])); } else { _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]); } @@ -68,7 +74,11 @@ public DateTimeFormatSpec(String format) { // TIMESTAMP type stores millis since epoch _size = 1; _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS"); - } else { + } else if (_patternSpec.getTimeFormat() == TimeFormat.SIMPLE_DATE_FORMAT) { + _size = 1; + _unitSpec = new DateTimeFormatUnitSpec("DAYS"); + } + else { _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]); _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]); } @@ -78,7 +88,11 @@ public DateTimeFormatSpec(String format) { * Constructs a dateTimeSpec format, given the components of a format */ public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFormat) { - _format = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit, columnTimeFormat); + if(columnTimeFormat.equals(TimeFormat.TIMESTAMP.toString())) { + _format = "TIMESTAMP"; + } else { + _format = Joiner.on(COLON_SEPARATOR).join(columnTimeFormat, columnUnit, columnSize); + } validateFormat(_format); _size = columnSize; @@ -88,15 +102,24 @@ public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFo /** * Constructs a dateTimeSpec format, given the components of a format - * @param sdfPattern and tz + * @param sdfPatternWithTz and tz */ - public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFormat, String sdfPattern) { - _format = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit, columnTimeFormat, sdfPattern); + public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFormat, String sdfPatternWithTz) { + String timeFormat = null; + String timeZoneString = null; + Matcher m = DateTimeFormatPatternSpec.SDF_PATTERN_WITH_TIMEZONE.matcher(sdfPatternWithTz); + if (m.find()) { + timeFormat = m.group(SDF_PATTERN_GROUP).trim(); + timeZoneString = m.group(TIMEZONE_GROUP).trim(); + } else { + timeFormat = sdfPatternWithTz; + } + _format = Joiner.on(COLON_SEPARATOR).join(columnTimeFormat, timeFormat, timeZoneString); validateFormat(_format); _size = columnSize; _unitSpec = new DateTimeFormatUnitSpec(columnUnit); - _patternSpec = new DateTimeFormatPatternSpec(columnTimeFormat, sdfPattern); + _patternSpec = new DateTimeFormatPatternSpec(columnTimeFormat, sdfPatternWithTz); } public String getFormat() {