Skip to content

Commit

Permalink
[ML] Add earliest and latest timestamps to field stats (#42890)
Browse files Browse the repository at this point in the history
This change adds the earliest and latest timestamps into
the field stats for fields of type "date" in the output of
the ML find_file_structure endpoint.  This will enable the
cards for date fields in the file data visualizer in the UI
to be made to look more similar to the cards for date
fields in the index data visualizer in the UI.
  • Loading branch information
droberts195 committed Jun 6, 2019
1 parent 280a2c9 commit b202a59
Show file tree
Hide file tree
Showing 16 changed files with 365 additions and 100 deletions.
Expand Up @@ -38,12 +38,14 @@ public class FieldStats implements ToXContentObject {
public static final ParseField MAX_VALUE = new ParseField("max_value");
public static final ParseField MEAN_VALUE = new ParseField("mean_value");
public static final ParseField MEDIAN_VALUE = new ParseField("median_value");
public static final ParseField EARLIEST = new ParseField("earliest");
public static final ParseField LATEST = new ParseField("latest");
public static final ParseField TOP_HITS = new ParseField("top_hits");

@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", true,
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
(List<Map<String, Object>>) a[6]));
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));

static {
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
Expand All @@ -52,6 +54,8 @@ public class FieldStats implements ToXContentObject {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
}

Expand All @@ -61,16 +65,20 @@ public class FieldStats implements ToXContentObject {
private final Double maxValue;
private final Double meanValue;
private final Double medianValue;
private final String earliestTimestamp;
private final String latestTimestamp;
private final List<Map<String, Object>> topHits;

FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
List<Map<String, Object>> topHits) {
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this.count = count;
this.cardinality = cardinality;
this.minValue = minValue;
this.maxValue = maxValue;
this.meanValue = meanValue;
this.medianValue = medianValue;
this.earliestTimestamp = earliestTimestamp;
this.latestTimestamp = latestTimestamp;
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
}

Expand Down Expand Up @@ -98,6 +106,14 @@ public Double getMedianValue() {
return medianValue;
}

public String getEarliestTimestamp() {
return earliestTimestamp;
}

public String getLatestTimestamp() {
return latestTimestamp;
}

public List<Map<String, Object>> getTopHits() {
return topHits;
}
Expand All @@ -120,6 +136,12 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (earliestTimestamp != null) {
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
}
if (latestTimestamp != null) {
builder.field(LATEST.getPreferredName(), latestTimestamp);
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
}
Expand All @@ -140,7 +162,7 @@ static Number toIntegerIfInteger(double d) {
@Override
public int hashCode() {

return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand All @@ -161,6 +183,8 @@ public boolean equals(Object other) {
Objects.equals(this.maxValue, that.maxValue) &&
Objects.equals(this.meanValue, that.meanValue) &&
Objects.equals(this.medianValue, that.medianValue) &&
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
Objects.equals(this.topHits, that.topHits);
}
}
Expand Up @@ -43,6 +43,8 @@ static FieldStats createTestFieldStats() {
Double maxValue = null;
Double meanValue = null;
Double medianValue = null;
String earliestTimestamp = null;
String latestTimestamp = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
Expand All @@ -54,6 +56,12 @@ static FieldStats createTestFieldStats() {
}
meanValue = randomDouble();
medianValue = randomDouble();
} else {
boolean isDate = randomBoolean();
if (isDate) {
earliestTimestamp = randomAlphaOfLength(20);
latestTimestamp = randomAlphaOfLength(20);
}
}

List<Map<String, Object>> topHits = new ArrayList<>();
Expand All @@ -68,7 +76,7 @@ static FieldStats createTestFieldStats() {
topHits.add(topHit);
}

return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand Down
10 changes: 10 additions & 0 deletions docs/reference/ml/apis/find-file-structure.asciidoc
Expand Up @@ -445,6 +445,8 @@ If the request does not encounter errors, you receive the following result:
"release_date" : {
"count" : 24,
"cardinality" : 20,
"earliest" : "1932-06-01",
"latest" : "2011-06-02",
"top_hits" : [
{
"value" : "1985-06-01",
Expand Down Expand Up @@ -1152,6 +1154,8 @@ If the request does not encounter errors, you receive the following result:
"tpep_dropoff_datetime" : {
"count" : 19998,
"cardinality" : 9066,
"earliest" : "2018-05-31 06:18:15",
"latest" : "2018-06-02 02:25:44",
"top_hits" : [
{
"value" : "2018-06-01 01:12:12",
Expand Down Expand Up @@ -1198,6 +1202,8 @@ If the request does not encounter errors, you receive the following result:
"tpep_pickup_datetime" : {
"count" : 19998,
"cardinality" : 8760,
"earliest" : "2018-05-31 06:08:31",
"latest" : "2018-06-02 01:21:21",
"top_hits" : [
{
"value" : "2018-06-01 00:01:23",
Expand Down Expand Up @@ -1457,6 +1463,8 @@ this:
"timestamp" : {
"count" : 53,
"cardinality" : 28,
"earliest" : "2018-09-27T14:39:28,518",
"latest" : "2018-09-27T14:39:37,012",
"top_hits" : [
{
"value" : "2018-09-27T14:39:29,859",
Expand Down Expand Up @@ -1719,6 +1727,8 @@ this:
"timestamp" : {
"count" : 53,
"cardinality" : 28,
"earliest" : "2018-09-27T14:39:28,518",
"latest" : "2018-09-27T14:39:37,012",
"top_hits" : [
{
"value" : "2018-09-27T14:39:29,859",
Expand Down
Expand Up @@ -5,7 +5,9 @@
*/
package org.elasticsearch.xpack.core.ml.filestructurefinder;

import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
Expand All @@ -27,12 +29,14 @@ public class FieldStats implements ToXContentObject, Writeable {
static final ParseField MAX_VALUE = new ParseField("max_value");
static final ParseField MEAN_VALUE = new ParseField("mean_value");
static final ParseField MEDIAN_VALUE = new ParseField("median_value");
static final ParseField EARLIEST = new ParseField("earliest");
static final ParseField LATEST = new ParseField("latest");
static final ParseField TOP_HITS = new ParseField("top_hits");

@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<FieldStats, Void> PARSER = new ConstructingObjectParser<>("field_stats", false,
a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5],
(List<Map<String, Object>>) a[6]));
(String) a[6], (String) a[7], (List<Map<String, Object>>) a[8]));

static {
PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT);
Expand All @@ -41,6 +45,8 @@ public class FieldStats implements ToXContentObject, Writeable {
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE);
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), EARLIEST);
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), LATEST);
PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS);
}

Expand All @@ -50,20 +56,33 @@ public class FieldStats implements ToXContentObject, Writeable {
private final Double maxValue;
private final Double meanValue;
private final Double medianValue;
private final String earliestTimestamp;
private final String latestTimestamp;
private final List<Map<String, Object>> topHits;

public FieldStats(long count, int cardinality, List<Map<String, Object>> topHits) {
this(count, cardinality, null, null, null, null, topHits);
this(count, cardinality, null, null, null, null, null, null, topHits);
}

public FieldStats(long count, int cardinality, String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this(count, cardinality, null, null, null, null, earliestTimestamp, latestTimestamp, topHits);
}

public FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
List<Map<String, Object>> topHits) {
this(count, cardinality, minValue, maxValue, meanValue, medianValue, null, null, topHits);
}

FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue,
String earliestTimestamp, String latestTimestamp, List<Map<String, Object>> topHits) {
this.count = count;
this.cardinality = cardinality;
this.minValue = minValue;
this.maxValue = maxValue;
this.meanValue = meanValue;
this.medianValue = medianValue;
this.earliestTimestamp = earliestTimestamp;
this.latestTimestamp = latestTimestamp;
this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits);
}

Expand All @@ -74,6 +93,13 @@ public FieldStats(StreamInput in) throws IOException {
maxValue = in.readOptionalDouble();
meanValue = in.readOptionalDouble();
medianValue = in.readOptionalDouble();
if (in.getVersion().onOrAfter(Version.V_7_3_0)) {
earliestTimestamp = in.readOptionalString();
latestTimestamp = in.readOptionalString();
} else {
earliestTimestamp = null;
latestTimestamp = null;
}
topHits = in.readList(StreamInput::readMap);
}

Expand All @@ -85,6 +111,10 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeOptionalDouble(maxValue);
out.writeOptionalDouble(meanValue);
out.writeOptionalDouble(medianValue);
if (out.getVersion().onOrAfter(Version.V_7_3_0)) {
out.writeOptionalString(earliestTimestamp);
out.writeOptionalString(latestTimestamp);
}
out.writeCollection(topHits, StreamOutput::writeMap);
}

Expand Down Expand Up @@ -112,6 +142,14 @@ public Double getMedianValue() {
return medianValue;
}

public String getEarliestTimestamp() {
return earliestTimestamp;
}

public String getLatestTimestamp() {
return latestTimestamp;
}

public List<Map<String, Object>> getTopHits() {
return topHits;
}
Expand All @@ -134,6 +172,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (earliestTimestamp != null) {
builder.field(EARLIEST.getPreferredName(), earliestTimestamp);
}
if (latestTimestamp != null) {
builder.field(LATEST.getPreferredName(), latestTimestamp);
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
}
Expand All @@ -154,7 +198,7 @@ public static Number toIntegerIfInteger(double d) {
@Override
public int hashCode() {

return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand All @@ -175,6 +219,13 @@ public boolean equals(Object other) {
Objects.equals(this.maxValue, that.maxValue) &&
Objects.equals(this.meanValue, that.meanValue) &&
Objects.equals(this.medianValue, that.medianValue) &&
Objects.equals(this.earliestTimestamp, that.earliestTimestamp) &&
Objects.equals(this.latestTimestamp, that.latestTimestamp) &&
Objects.equals(this.topHits, that.topHits);
}

@Override
public String toString() {
return Strings.toString(this);
}
}
Expand Up @@ -30,6 +30,8 @@ static FieldStats createTestFieldStats() {
Double maxValue = null;
Double meanValue = null;
Double medianValue = null;
String earliestTimestamp = null;
String latestTimestamp = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
Expand All @@ -41,6 +43,12 @@ static FieldStats createTestFieldStats() {
}
meanValue = randomDouble();
medianValue = randomDouble();
} else {
boolean isDate = randomBoolean();
if (isDate) {
earliestTimestamp = randomAlphaOfLength(20);
latestTimestamp = randomAlphaOfLength(20);
}
}

List<Map<String, Object>> topHits = new ArrayList<>();
Expand All @@ -55,7 +63,7 @@ static FieldStats createTestFieldStats() {
topHits.add(topHit);
}

return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits);
return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, earliestTimestamp, latestTimestamp, topHits);
}

@Override
Expand Down
Expand Up @@ -159,8 +159,7 @@ static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List<String

SortedMap<String, Object> mappings = mappingsAndFieldStats.v1();
if (timeField != null) {
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD,
Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "date"));
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
}

if (mappingsAndFieldStats.v2() != null) {
Expand Down

0 comments on commit b202a59

Please sign in to comment.