Navigation Menu

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added epoch date formats to configure parsing of unix dates #11453

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/reference/mapping/date-format.asciidoc
Expand Up @@ -198,6 +198,11 @@ year.

|`year_month_day`|A formatter for a four digit year, two digit month of
year, and two digit day of month.

|`epoch_second`|A formatter for the number of seconds since the epoch.

|`epoch_millis`|A formatter for the number of milliseconds since
the epoch.
|=======================================================================

[float]
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/mapping/fields/timestamp-field.asciidoc
Expand Up @@ -79,7 +79,7 @@ format>> used to parse the provided timestamp value. For example:
}
--------------------------------------------------

Note, the default format is `dateOptionalTime`. The timestamp value will
Note, the default format is `epoch_millis||dateOptionalTime`. The timestamp value will
first be parsed as a number and if it fails the format will be tried.

[float]
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/mapping/types/core-types.asciidoc
Expand Up @@ -349,7 +349,7 @@ date type:
Defaults to the property/field name.

|`format` |The <<mapping-date-format,date
format>>. Defaults to `dateOptionalTime`.
format>>. Defaults to `epoch_millis||dateOptionalTime`.

|`store` |Set to `true` to store actual field in the index, `false` to not
store it. Defaults to `false` (note, the JSON document itself is stored,
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/mapping/types/root-object-type.asciidoc
Expand Up @@ -42,8 +42,8 @@ and will use the matching format as its format attribute. The date
format itself is explained
<<mapping-date-format,here>>.

The default formats are: `dateOptionalTime` (ISO) and
`yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z`.
The default formats are: `dateOptionalTime` (ISO),
`yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z` and `epoch_millis`.

*Note:* `dynamic_date_formats` are used *only* for dynamically added
date fields, not for `date` fields that you specify in your mapping.
Expand Down
Expand Up @@ -32,6 +32,11 @@ public TimestampParsingException(String timestamp) {
this.timestamp = timestamp;
}

public TimestampParsingException(String timestamp, Throwable cause) {
super("failed to parse timestamp [" + timestamp + "]", cause);
this.timestamp = timestamp;
}

public String timestamp() {
return timestamp;
}
Expand Down
Expand Up @@ -161,19 +161,11 @@ public int hashCode() {
public static class Timestamp {

public static String parseStringTimestamp(String timestampAsString, FormatDateTimeFormatter dateTimeFormatter) throws TimestampParsingException {
long ts;
try {
// if we manage to parse it, its a millisecond timestamp, just return the string as is
ts = Long.parseLong(timestampAsString);
return timestampAsString;
} catch (NumberFormatException e) {
try {
ts = dateTimeFormatter.parser().parseMillis(timestampAsString);
} catch (RuntimeException e1) {
throw new TimestampParsingException(timestampAsString);
}
return Long.toString(dateTimeFormatter.parser().parseMillis(timestampAsString));
} catch (RuntimeException e) {
throw new TimestampParsingException(timestampAsString, e);
}
return Long.toString(ts);
}


Expand Down
22 changes: 4 additions & 18 deletions src/main/java/org/elasticsearch/common/joda/DateMathParser.java
Expand Up @@ -19,14 +19,14 @@

package org.elasticsearch.common.joda;

import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.ElasticsearchParseException;
import org.joda.time.DateTimeZone;
import org.joda.time.MutableDateTime;
import org.joda.time.format.DateTimeFormatter;

import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;

import static com.google.common.base.Preconditions.checkNotNull;

/**
* A parser for date/time formatted text with optional date math.
Expand All @@ -38,13 +38,10 @@
public class DateMathParser {

private final FormatDateTimeFormatter dateTimeFormatter;
private final TimeUnit timeUnit;

public DateMathParser(FormatDateTimeFormatter dateTimeFormatter, TimeUnit timeUnit) {
if (dateTimeFormatter == null) throw new NullPointerException();
if (timeUnit == null) throw new NullPointerException();
public DateMathParser(FormatDateTimeFormatter dateTimeFormatter) {
checkNotNull(dateTimeFormatter);
this.dateTimeFormatter = dateTimeFormatter;
this.timeUnit = timeUnit;
}

public long parse(String text, Callable<Long> now) {
Expand Down Expand Up @@ -195,17 +192,6 @@ private long parseMath(String mathString, long time, boolean roundUp, DateTimeZo
}

private long parseDateTime(String value, DateTimeZone timeZone) {

// first check for timestamp
if (value.length() > 4 && StringUtils.isNumeric(value)) {
try {
long time = Long.parseLong(value);
return timeUnit.toMillis(time);
} catch (NumberFormatException e) {
throw new ElasticsearchParseException("failed to parse date field [" + value + "] as timestamp", e);
}
}

DateTimeFormatter parser = dateTimeFormatter.parser();
if (timeZone != null) {
parser = parser.withZone(timeZone);
Expand Down
51 changes: 51 additions & 0 deletions src/main/java/org/elasticsearch/common/joda/Joda.java
Expand Up @@ -27,6 +27,7 @@
import org.joda.time.format.*;

import java.util.Locale;
import java.util.regex.Pattern;

/**
*
Expand Down Expand Up @@ -133,6 +134,10 @@ public static FormatDateTimeFormatter forPattern(String input, Locale locale) {
formatter = ISODateTimeFormat.yearMonth();
} else if ("yearMonthDay".equals(input) || "year_month_day".equals(input)) {
formatter = ISODateTimeFormat.yearMonthDay();
} else if ("epoch_second".equals(input)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is confusing if we support camelCase in some of the options in this parser and not others (even if they are new). We should either support camelCase for all options or for none to be consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we plan to resolve this before 2.0? If so, I am fine leaving it as it is...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it will be resolved (rejecting camelCase) as its a huge change across 100s of files (because most parser use this style and not ParseField) and I don't see us doing that change quickly

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it matters. We should not force making huge changes to the entire codebase in order to not add things which will just be deprecated and/or confusing to the user.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I think this is confusing. If I have a format specified as yearMonthDay that works then I would expect to be able to change it to epochSecond and it would work. Supporting some values in camelCase for date formats and not other values is very confusing to a user. I'm all for removing camelCase but we should be consistent with it, especially when its different values for the same setting (in the case different values of format for date fields).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would also be fine with removing all the camelCase options for all formats in this PR to make it consistent.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already do not support camelCase for all settings, and I don't think there is any consistency even within the same query/field type/whatever.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would also be fine with removing all the camelCase options for all formats in this PR to make it consistent.

This is the kind of statement that stalls progress. Requiring huge changes just to make a small improvement should not be necessary.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am only talking about the date formats here, not across the whole codebase (i can see the above statement might have been a bit ambiguous on that). All the multi-word date format values above support both a camelCase and an underscored version. That should be consistent, whether that means supporting both for now or only supporting the underscored version I don't have a strong opinion but its hardly a huge change to update the date format values to be consistent and its not a huge overhead to maintain an extra 2 camelCase options given that any change to that policy would require a change to all the other date formats too

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized we aren't even talking about setting names, but the valid values for the format setting. This argument to use ParseValue does not make sense. We don't support camelCase in eg the index option. We should not do it here, it will just add more work for users if we allow them to start using a new value that will just go away in the future (and will require them to change the value to what they would have found in the first place if they had tried using camelCase and seen an error).

formatter = new DateTimeFormatterBuilder().append(new EpochTimeParser(false)).toFormatter();
} else if ("epoch_millis".equals(input)) {
formatter = new DateTimeFormatterBuilder().append(new EpochTimeParser(true)).toFormatter();
} else if (Strings.hasLength(input) && input.contains("||")) {
String[] formats = Strings.delimitedListToStringArray(input, "||");
DateTimeParser[] parsers = new DateTimeParser[formats.length];
Expand Down Expand Up @@ -192,4 +197,50 @@ public DateTimeField getField(Chronology chronology) {
return new OffsetDateTimeField(new DividedDateTimeField(new OffsetDateTimeField(chronology.monthOfYear(), -1), QuarterOfYear, 3), 1);
}
};

public static class EpochTimeParser implements DateTimeParser {

private static final Pattern MILLI_SECOND_PRECISION_PATTERN = Pattern.compile("^\\d{1,13}$");
private static final Pattern SECOND_PRECISION_PATTERN = Pattern.compile("^\\d{1,10}$");

Copy link

@tj tj Aug 22, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might be busted by-design, JSON numerical values only have a float type, so "1.470417092e+09" (produced by Go's json encoder) will just result in IllegalArgumentException[Invalid format: "1.470417092e+09"];. Any recommended way around this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, pass the value as a json string.

Copy link

@tj tj Aug 22, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah I suppose that might be ok, in this case it's user-defined input so that's pretty awkward but it beats breaking.

private final boolean hasMilliSecondPrecision;
private final Pattern pattern;

public EpochTimeParser(boolean hasMilliSecondPrecision) {
this.hasMilliSecondPrecision = hasMilliSecondPrecision;
this.pattern = hasMilliSecondPrecision ? MILLI_SECOND_PRECISION_PATTERN : SECOND_PRECISION_PATTERN;
}

@Override
public int estimateParsedLength() {
return hasMilliSecondPrecision ? 13 : 10;
}

@Override
public int parseInto(DateTimeParserBucket bucket, String text, int position) {
if (text.length() > estimateParsedLength() ||
// timestamps have to have UTC timezone
bucket.getZone() != DateTimeZone.UTC ||
pattern.matcher(text).matches() == false) {
return -1;
}

int factor = hasMilliSecondPrecision ? 1 : 1000;
try {
long millis = Long.valueOf(text) * factor;
DateTime dt = new DateTime(millis, DateTimeZone.UTC);
bucket.saveField(DateTimeFieldType.year(), dt.getYear());
bucket.saveField(DateTimeFieldType.monthOfYear(), dt.getMonthOfYear());
bucket.saveField(DateTimeFieldType.dayOfMonth(), dt.getDayOfMonth());
bucket.saveField(DateTimeFieldType.hourOfDay(), dt.getHourOfDay());
bucket.saveField(DateTimeFieldType.minuteOfHour(), dt.getMinuteOfHour());
bucket.saveField(DateTimeFieldType.secondOfMinute(), dt.getSecondOfMinute());
bucket.saveField(DateTimeFieldType.millisOfSecond(), dt.getMillisOfSecond());
bucket.setZone(DateTimeZone.UTC);
} catch (Exception e) {
return -1;
}
return text.length();
}
};
}
Expand Up @@ -46,12 +46,7 @@
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NumericDateAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MergeMappingException;
import org.elasticsearch.index.mapper.MergeResult;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.LongFieldMapper.CustomLongNumericField;
import org.elasticsearch.index.query.QueryParseContext;
import org.elasticsearch.search.internal.SearchContext;
Expand Down Expand Up @@ -223,7 +218,7 @@ public String toString(String s) {

protected FormatDateTimeFormatter dateTimeFormatter = Defaults.DATE_TIME_FORMATTER;
protected TimeUnit timeUnit = Defaults.TIME_UNIT;
protected DateMathParser dateMathParser = new DateMathParser(dateTimeFormatter, timeUnit);
protected DateMathParser dateMathParser = new DateMathParser(dateTimeFormatter);

public DateFieldType() {}

Expand All @@ -245,7 +240,7 @@ public FormatDateTimeFormatter dateTimeFormatter() {
public void setDateTimeFormatter(FormatDateTimeFormatter dateTimeFormatter) {
checkIfFrozen();
this.dateTimeFormatter = dateTimeFormatter;
this.dateMathParser = new DateMathParser(dateTimeFormatter, timeUnit);
this.dateMathParser = new DateMathParser(dateTimeFormatter);
}

public TimeUnit timeUnit() {
Expand All @@ -255,7 +250,7 @@ public TimeUnit timeUnit() {
public void setTimeUnit(TimeUnit timeUnit) {
checkIfFrozen();
this.timeUnit = timeUnit;
this.dateMathParser = new DateMathParser(dateTimeFormatter, timeUnit);
this.dateMathParser = new DateMathParser(dateTimeFormatter);
}

protected DateMathParser dateMathParser() {
Expand Down Expand Up @@ -365,9 +360,6 @@ private Query innerRangeQuery(Object lowerTerm, Object upperTerm, boolean includ
}

public long parseToMilliseconds(Object value, boolean inclusive, @Nullable DateTimeZone zone, @Nullable DateMathParser forcedDateParser) {
if (value instanceof Number) {
return ((Number) value).longValue();
}
DateMathParser dateParser = dateMathParser();
if (forcedDateParser != null) {
dateParser = forcedDateParser;
Expand Down Expand Up @@ -434,25 +426,20 @@ protected boolean customBoost() {
@Override
protected void innerParseCreateField(ParseContext context, List<Field> fields) throws IOException {
String dateAsString = null;
Long value = null;
float boost = this.fieldType.boost();
if (context.externalValueSet()) {
Object externalValue = context.externalValue();
if (externalValue instanceof Number) {
value = ((Number) externalValue).longValue();
} else {
dateAsString = (String) externalValue;
if (dateAsString == null) {
dateAsString = nullValue;
}
dateAsString = (String) externalValue;
if (dateAsString == null) {
dateAsString = nullValue;
}
} else {
XContentParser parser = context.parser();
XContentParser.Token token = parser.currentToken();
if (token == XContentParser.Token.VALUE_NULL) {
dateAsString = nullValue;
} else if (token == XContentParser.Token.VALUE_NUMBER) {
value = parser.longValue(coerce.value());
dateAsString = parser.text();
} else if (token == XContentParser.Token.START_OBJECT) {
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
Expand All @@ -462,8 +449,6 @@ protected void innerParseCreateField(ParseContext context, List<Field> fields) t
if ("value".equals(currentFieldName) || "_value".equals(currentFieldName)) {
if (token == XContentParser.Token.VALUE_NULL) {
dateAsString = nullValue;
} else if (token == XContentParser.Token.VALUE_NUMBER) {
value = parser.longValue(coerce.value());
} else {
dateAsString = parser.text();
}
Expand All @@ -479,14 +464,12 @@ protected void innerParseCreateField(ParseContext context, List<Field> fields) t
}
}

Long value = null;
if (dateAsString != null) {
assert value == null;
if (context.includeInAll(includeInAll, this)) {
context.allEntries().addText(fieldType.names().fullName(), dateAsString, boost);
}
value = fieldType().parseStringValue(dateAsString);
} else if (value != null) {
value = ((DateFieldType)fieldType).timeUnit().toMillis(value);
}

if (value != null) {
Expand Down
Expand Up @@ -58,7 +58,7 @@ public class TimestampFieldMapper extends DateFieldMapper implements RootMapper

public static final String NAME = "_timestamp";
public static final String CONTENT_TYPE = "_timestamp";
public static final String DEFAULT_DATE_TIME_FORMAT = "dateOptionalTime";
public static final String DEFAULT_DATE_TIME_FORMAT = "epoch_millis||dateOptionalTime";

public static class Defaults extends DateFieldMapper.Defaults {
public static final String NAME = "_timestamp";
Expand Down
Expand Up @@ -102,7 +102,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
} else if ("time_zone".equals(currentFieldName) || "timeZone".equals(currentFieldName)) {
timeZone = DateTimeZone.forID(parser.text());
} else if ("format".equals(currentFieldName)) {
forcedDateParser = new DateMathParser(Joda.forPattern(parser.text()), DateFieldMapper.Defaults.TIME_UNIT);
forcedDateParser = new DateMathParser(Joda.forPattern(parser.text()));
} else {
throw new QueryParsingException(parseContext, "[range] query does not support [" + currentFieldName + "]");
}
Expand All @@ -123,11 +123,6 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
FieldMapper mapper = parseContext.fieldMapper(fieldName);
if (mapper != null) {
if (mapper instanceof DateFieldMapper) {
if ((from instanceof Number || to instanceof Number) && timeZone != null) {
throw new QueryParsingException(parseContext,
"[range] time_zone when using ms since epoch format as it's UTC based can not be applied to [" + fieldName
+ "]");
}
query = ((DateFieldMapper) mapper).fieldType().rangeQuery(from, to, includeLower, includeUpper, timeZone, forcedDateParser, parseContext);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we still have this? It doesn't really make sense to set seconds or milliseconds since epoch in anything but UTC?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the problem here is, that a date like 2015121212 supplied as a number could be a date in the format yyyyDDMMHH which cannot be distinguished, so this check would trigger even though the number is valid and going to be parsed with a date that correctly owns a timezone. Not sure, when/where to postpone this check, need to think about it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually found a solution to this in my latest commit and do check this in the parser now

} else {
if (timeZone != null) {
Expand Down
Expand Up @@ -68,7 +68,7 @@ public static class DateTime extends Patternable<DateTime> {
public static final DateTime DEFAULT = new DateTime(DateFieldMapper.Defaults.DATE_TIME_FORMATTER.format(), ValueFormatter.DateTime.DEFAULT, ValueParser.DateMath.DEFAULT);

public static DateTime format(String format) {
return new DateTime(format, new ValueFormatter.DateTime(format), new ValueParser.DateMath(format, DateFieldMapper.Defaults.TIME_UNIT));
return new DateTime(format, new ValueFormatter.DateTime(format), new ValueParser.DateMath(format));
}

public static DateTime mapper(DateFieldMapper mapper) {
Expand Down
Expand Up @@ -32,7 +32,6 @@
import java.text.ParseException;
import java.util.Locale;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;

/**
*
Expand Down Expand Up @@ -81,12 +80,12 @@ public double parseDouble(String value, SearchContext searchContext) {
*/
static class DateMath implements ValueParser {

public static final DateMath DEFAULT = new ValueParser.DateMath(new DateMathParser(DateFieldMapper.Defaults.DATE_TIME_FORMATTER, DateFieldMapper.Defaults.TIME_UNIT));
public static final DateMath DEFAULT = new ValueParser.DateMath(new DateMathParser(DateFieldMapper.Defaults.DATE_TIME_FORMATTER));

private DateMathParser parser;

public DateMath(String format, TimeUnit timeUnit) {
this(new DateMathParser(Joda.forPattern(format), timeUnit));
public DateMath(String format) {
this(new DateMathParser(Joda.forPattern(format)));
}

public DateMath(DateMathParser parser) {
Expand All @@ -110,7 +109,7 @@ public double parseDouble(String value, SearchContext searchContext) {
}

public static DateMath mapper(DateFieldMapper mapper) {
return new DateMath(new DateMathParser(mapper.fieldType().dateTimeFormatter(), DateFieldMapper.Defaults.TIME_UNIT));
return new DateMath(new DateMathParser(mapper.fieldType().dateTimeFormatter()));
}
}

Expand Down