Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QL: constant_keyword support #53241

Merged
merged 6 commits into from
Mar 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions docs/reference/sql/language/data-types.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,22 @@ s|SQL precision

4+h| Core types

| <<null-value, `null`>> | null | NULL | 0
| <<boolean, `boolean`>> | boolean | BOOLEAN | 1
| <<number, `byte`>> | byte | TINYINT | 3
| <<number, `short`>> | short | SMALLINT | 5
| <<number, `integer`>> | integer | INTEGER | 10
| <<number, `long`>> | long | BIGINT | 19
| <<number, `double`>> | double | DOUBLE | 15
| <<number, `float`>> | float | REAL | 7
| <<number, `half_float`>> | half_float | FLOAT | 3
| <<number, `scaled_float`>> | scaled_float | DOUBLE | 15
| <<keyword, `keyword`>> | keyword | VARCHAR | 32,766
| <<text, `text`>> | text | VARCHAR | 2,147,483,647
| <<binary, `binary`>> | binary | VARBINARY | 2,147,483,647
| <<date, `date`>> | datetime | TIMESTAMP | 29
| <<ip, `ip`>> | ip | VARCHAR | 39
| <<null-value, `null`>> | null | NULL | 0
| <<boolean, `boolean`>> | boolean | BOOLEAN | 1
| <<number, `byte`>> | byte | TINYINT | 3
| <<number, `short`>> | short | SMALLINT | 5
| <<number, `integer`>> | integer | INTEGER | 10
| <<number, `long`>> | long | BIGINT | 19
| <<number, `double`>> | double | DOUBLE | 15
| <<number, `float`>> | float | REAL | 7
| <<number, `half_float`>> | half_float | FLOAT | 3
| <<number, `scaled_float`>> | scaled_float | DOUBLE | 15
| <<keyword, `keyword`>> | keyword | VARCHAR | 32,766
| <<constant-keyword, `constant_keyword`>> | constant_keyword| VARCHAR | 32,766
| <<text, `text`>> | text | VARCHAR | 2,147,483,647
| <<binary, `binary`>> | binary | VARBINARY | 2,147,483,647
| <<date, `date`>> | datetime | TIMESTAMP | 29
| <<ip, `ip`>> | ip | VARCHAR | 39

4+h| Complex types

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Objects;
import java.util.StringJoiner;

import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.DATETIME;
import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.SCALED_FLOAT;
Expand Down Expand Up @@ -213,6 +214,7 @@ protected Object unwrapMultiValue(Object values) {
protected boolean isFromDocValuesOnly(DataType dataType) {
return dataType == KEYWORD // because of ignore_above.
|| dataType == DATETIME
|| dataType == CONSTANT_KEYWORD // because a non-existent value is considered the constant value itself
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain please, I don't get it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot extract the value of a constant_keyword from _source because if there is no source for the field, Elasticsearch will still consider the field as having the constant value.
Take a look at the example in our docs. No value at indexing time means level is debug. So, for the second document in the sample, there is no _source for level, but the value is still debug in case there is a query filtering on that field.

|| dataType == SCALED_FLOAT; // because of scaling_factor
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.xpack.ql.QlIllegalArgumentException;
import org.elasticsearch.xpack.ql.type.ConstantKeywordEsField;
import org.elasticsearch.xpack.ql.type.DataType;
import org.elasticsearch.xpack.ql.type.DataTypeRegistry;
import org.elasticsearch.xpack.ql.type.DateEsField;
Expand Down Expand Up @@ -60,6 +61,7 @@
import static java.util.Collections.emptyMap;
import static java.util.Collections.emptySet;
import static org.elasticsearch.action.ActionListener.wrap;
import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.DATETIME;
import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.OBJECT;
Expand Down Expand Up @@ -298,8 +300,13 @@ public static IndexResolution mergedMappings(DataTypeRegistry typeRegistry, Stri
StringBuilder errorMessage = new StringBuilder();

boolean hasUnmapped = types.containsKey(UNMAPPED);
// a keyword field and a constant_keyword field with the same name in two different indices are considered "compatible"
// since a common use case of constant_keyword field involves two indices with a field having the same name: one being
// a keyword, the other being a constant_keyword
boolean hasCompatibleKeywords = types.containsKey(KEYWORD.esType()) && types.containsKey(CONSTANT_KEYWORD.esType());
int allowedTypesCount = (hasUnmapped ? 2 : 1) + (hasCompatibleKeywords ? 1 : 0);

if (types.size() > (hasUnmapped ? 2 : 1)) {
if (types.size() > allowedTypesCount) {
// build the error message
// and create a MultiTypeField

Expand Down Expand Up @@ -344,6 +351,11 @@ public static IndexResolution mergedMappings(DataTypeRegistry typeRegistry, Stri
}
}

// if there are both a keyword and a constant_keyword type for this field, only keep the keyword as a common compatible type
if (hasCompatibleKeywords) {
types.remove(CONSTANT_KEYWORD.esType());
Copy link
Member

@costin costin Mar 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing the type all together might remove useful information.
For example if a field a is mapped as keyword in index X and constant_keyword in index Y, the piece above will find the collision and remove the field from Y resulting in a mapping only in X.
On one hand it does indicate that there's only field a however each field has index information underneath so in this case it would be just for index X. I'm not sure though whether we take that into account or not.
What happens is there's no removal?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This scenario is the one where two indices have the same named field with both keyword and constant_keyword types and their mappings need to be merged. For other scenarios (where the same name is used for two fields with different types) we tell the user that the field cannot be shown (because of the different mapping types), but in this very specific case and since these two field types are so similar I chose to display the field with the "common" data type as keyword.

The controversy for what to do in this specific case came from the scenario described here and is specifically tested here.

}

// everything checks
return null;
});
Expand Down Expand Up @@ -435,6 +447,9 @@ private static EsField createField(DataTypeRegistry typeRegistry, String fieldNa
if (esType == DATETIME) {
return new DateEsField(fieldName, props, isAggregateable);
}
if (esType == CONSTANT_KEYWORD) {
return new ConstantKeywordEsField(fieldName);
}
if (esType == UNSUPPORTED) {
return new UnsupportedEsField(fieldName, typeName, null, props);
}
Expand Down Expand Up @@ -501,14 +516,14 @@ private static List<EsIndex> buildIndices(DataTypeRegistry typeRegistry, String[

for (Entry<String, Map<String, FieldCapabilities>> entry : sortedFields) {
String fieldName = entry.getKey();
Map<String, FieldCapabilities> types = entry.getValue();

// ignore size added by the mapper plugin
if (FIELD_NAMES_BLACKLIST.contains(fieldName)) {
continue;
}

// apply verification
Map<String, FieldCapabilities> types = new LinkedHashMap<>(entry.getValue());
// apply verification and possibly remove the "duplicate" CONSTANT_KEYWORD field type
final InvalidMappedField invalidField = validityVerifier.apply(fieldName, types);

// filter meta fields and unmapped
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.ql.type;

import java.util.Collections;

import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;

/**
* SQL-related information about an index field with a constant_keyword type
*/
public class ConstantKeywordEsField extends KeywordEsField {

public ConstantKeywordEsField(String name) {
super(name, CONSTANT_KEYWORD, Collections.emptyMap(), true, Short.MAX_VALUE, false, false);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import static org.elasticsearch.xpack.ql.type.DataTypes.BOOLEAN;
import static org.elasticsearch.xpack.ql.type.DataTypes.BYTE;
import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.DATETIME;
import static org.elasticsearch.xpack.ql.type.DataTypes.DOUBLE;
import static org.elasticsearch.xpack.ql.type.DataTypes.FLOAT;
Expand Down Expand Up @@ -59,9 +60,12 @@ public static DataType commonType(DataType left, DataType right) {
return left;
}
if (isString(left) && isString(right)) {
if (left == TEXT) {
if (left == TEXT || right == TEXT) {
return TEXT;
}
if (left == KEYWORD) {
bpintea marked this conversation as resolved.
Show resolved Hide resolved
return KEYWORD;
}
return right;
}
if (left.isNumeric() && right.isNumeric()) {
Expand Down Expand Up @@ -120,7 +124,7 @@ public static Converter converterFor(DataType from, DataType to) {
return DefaultConverter.TO_NULL;
}
// proper converters
if (to == KEYWORD || to == TEXT) {
if (to == KEYWORD || to == TEXT || to == CONSTANT_KEYWORD) {
return conversionToString(from);
}
if (to == LONG) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,34 @@
public final class DataTypes {

// @formatter:off
public static final DataType UNSUPPORTED = new DataType("UNSUPPORTED", null, 0, false, false, false);
public static final DataType UNSUPPORTED = new DataType("UNSUPPORTED", null, 0, false, false, false);

public static final DataType NULL = new DataType("null", 0, false, false, false);
public static final DataType NULL = new DataType("null", 0, false, false, false);

public static final DataType BOOLEAN = new DataType("boolean", 1, false, false, false);
public static final DataType BOOLEAN = new DataType("boolean", 1, false, false, false);
// integer numeric
public static final DataType BYTE = new DataType("byte", Byte.BYTES, true, false, true);
public static final DataType SHORT = new DataType("short", Short.BYTES, true, false, true);
public static final DataType INTEGER = new DataType("integer", Integer.BYTES, true, false, true);
public static final DataType LONG = new DataType("long", Long.BYTES, true, false, true);
public static final DataType BYTE = new DataType("byte", Byte.BYTES, true, false, true);
public static final DataType SHORT = new DataType("short", Short.BYTES, true, false, true);
public static final DataType INTEGER = new DataType("integer", Integer.BYTES, true, false, true);
public static final DataType LONG = new DataType("long", Long.BYTES, true, false, true);
// decimal numeric
public static final DataType DOUBLE = new DataType("double", Double.BYTES, false, true, true);
public static final DataType FLOAT = new DataType("float", Float.BYTES, false, true, true);
public static final DataType HALF_FLOAT = new DataType("half_float", Float.BYTES, false, true, true);
public static final DataType SCALED_FLOAT = new DataType("scaled_float", Long.BYTES, false, true, true);
public static final DataType DOUBLE = new DataType("double", Double.BYTES, false, true, true);
public static final DataType FLOAT = new DataType("float", Float.BYTES, false, true, true);
public static final DataType HALF_FLOAT = new DataType("half_float", Float.BYTES, false, true, true);
public static final DataType SCALED_FLOAT = new DataType("scaled_float", Long.BYTES, false, true, true);
// string
public static final DataType KEYWORD = new DataType("keyword", Integer.MAX_VALUE, false, false, true);
public static final DataType TEXT = new DataType("text", Integer.MAX_VALUE, false, false, false);
public static final DataType KEYWORD = new DataType("keyword", Integer.MAX_VALUE, false, false, true);
public static final DataType TEXT = new DataType("text", Integer.MAX_VALUE, false, false, false);
public static final DataType CONSTANT_KEYWORD = new DataType("constant_keyword", Integer.MAX_VALUE, false, false, true);
// date
public static final DataType DATETIME = new DataType("DATETIME", "date", Long.BYTES, false, false, true);
public static final DataType DATETIME = new DataType("DATETIME", "date", Long.BYTES, false, false, true);
// ip
public static final DataType IP = new DataType("ip", 45, false, false, true);
// binary
public static final DataType BINARY = new DataType("binary", Integer.MAX_VALUE, false, false, true);
public static final DataType BINARY = new DataType("binary", Integer.MAX_VALUE, false, false, true);
// complex types
public static final DataType OBJECT = new DataType("object", 0, false, false, false);
public static final DataType NESTED = new DataType("nested", 0, false, false, false);
public static final DataType OBJECT = new DataType("object", 0, false, false, false);
public static final DataType NESTED = new DataType("nested", 0, false, false, false);
//@formatter:on

private static final Collection<DataType> TYPES = Arrays.asList(
Expand All @@ -61,6 +62,7 @@ public final class DataTypes {
SCALED_FLOAT,
KEYWORD,
TEXT,
CONSTANT_KEYWORD,
DATETIME,
IP,
BINARY,
Expand Down Expand Up @@ -132,7 +134,7 @@ public static boolean isUnsupported(DataType from) {
}

public static boolean isString(DataType t) {
return t == KEYWORD || t == TEXT;
return t == KEYWORD || t == TEXT || t == CONSTANT_KEYWORD;
}

public static boolean isPrimitive(DataType t) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@ public KeywordEsField(String name, Map<String, EsField> properties, boolean hasD

public KeywordEsField(String name, Map<String, EsField> properties, boolean hasDocValues, int precision,
boolean normalized, boolean isAlias) {
super(name, KEYWORD, properties, hasDocValues, isAlias);
this(name, KEYWORD, properties, hasDocValues, precision, normalized, isAlias);
}

protected KeywordEsField(String name, DataType esDataType, Map<String, EsField> properties, boolean hasDocValues, int precision,
boolean normalized, boolean isAlias) {
super(name, esDataType, properties, hasDocValues, isAlias);
this.precision = precision;
this.normalized = normalized;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.Map;
import java.util.function.Function;

import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.TEXT;

Expand Down Expand Up @@ -44,7 +45,7 @@ public Exact getExactInfo() {
private Tuple<EsField, String> findExact() {
EsField field = null;
for (EsField property : getProperties().values()) {
if (property.getDataType() == KEYWORD && property.getExactInfo().hasExact()) {
if ((property.getDataType() == KEYWORD || property.getDataType() == CONSTANT_KEYWORD) && property.getExactInfo().hasExact()) {
if (field != null) {
return new Tuple<>(null, "Multiple exact keyword candidates available for [" + getName() +
"]; specify which one to use");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.Map.Entry;

import static java.util.Collections.emptyMap;
import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.DATETIME;
import static org.elasticsearch.xpack.ql.type.DataTypes.KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.NESTED;
Expand Down Expand Up @@ -89,6 +90,8 @@ private static void walkMapping(DataTypeRegistry typeRegistry, String name, Obje
int length = intSetting(content.get("ignore_above"), Short.MAX_VALUE);
boolean normalized = Strings.hasText(textSetting(content.get("normalizer"), null));
field = new KeywordEsField(name, properties, docValues, length, normalized);
} else if (esDataType == CONSTANT_KEYWORD) {
field = new ConstantKeywordEsField(name);
} else if (esDataType == DATETIME) {
field = new DateEsField(name, properties, docValues);
} else if (esDataType == UNSUPPORTED) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.function.Supplier;

import static java.util.Collections.emptyList;
import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.BOOLEAN;
import static org.elasticsearch.xpack.ql.type.DataTypes.BYTE;
import static org.elasticsearch.xpack.ql.type.DataTypes.DOUBLE;
Expand Down Expand Up @@ -53,7 +54,7 @@ static class ValueAndCompatibleTypes {
new ValueAndCompatibleTypes(ESTestCase::randomLong, LONG, FLOAT, DOUBLE, BOOLEAN),
new ValueAndCompatibleTypes(ESTestCase::randomFloat, FLOAT, LONG, DOUBLE, BOOLEAN),
new ValueAndCompatibleTypes(ESTestCase::randomDouble, DOUBLE, LONG, FLOAT, BOOLEAN),
new ValueAndCompatibleTypes(() -> randomAlphaOfLength(5), KEYWORD));
new ValueAndCompatibleTypes(() -> randomAlphaOfLength(5), KEYWORD, CONSTANT_KEYWORD));

public static Literal randomLiteral() {
ValueAndCompatibleTypes gen = randomFrom(GENERATORS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import static org.elasticsearch.xpack.ql.type.DataTypeConverter.converterFor;
import static org.elasticsearch.xpack.ql.type.DataTypes.BOOLEAN;
import static org.elasticsearch.xpack.ql.type.DataTypes.BYTE;
import static org.elasticsearch.xpack.ql.type.DataTypes.CONSTANT_KEYWORD;
import static org.elasticsearch.xpack.ql.type.DataTypes.DATETIME;
import static org.elasticsearch.xpack.ql.type.DataTypes.DOUBLE;
import static org.elasticsearch.xpack.ql.type.DataTypes.FLOAT;
Expand Down Expand Up @@ -361,6 +362,7 @@ public void testCommonType() {
assertEquals(BOOLEAN, commonType(BOOLEAN, BOOLEAN));
assertEquals(NULL, commonType(NULL, NULL));
assertEquals(INTEGER, commonType(INTEGER, KEYWORD));
assertEquals(DOUBLE, commonType(DOUBLE, CONSTANT_KEYWORD));
assertEquals(LONG, commonType(TEXT, LONG));
assertEquals(SHORT, commonType(SHORT, BYTE));
assertEquals(FLOAT, commonType(BYTE, FLOAT));
Expand All @@ -370,6 +372,11 @@ public void testCommonType() {
// strings
assertEquals(TEXT, commonType(TEXT, KEYWORD));
assertEquals(TEXT, commonType(KEYWORD, TEXT));
assertEquals(TEXT, commonType(TEXT, CONSTANT_KEYWORD));
assertEquals(TEXT, commonType(CONSTANT_KEYWORD, TEXT));
assertEquals(KEYWORD, commonType(KEYWORD, CONSTANT_KEYWORD));
assertEquals(KEYWORD, commonType(CONSTANT_KEYWORD, KEYWORD));
assertEquals(CONSTANT_KEYWORD, commonType(CONSTANT_KEYWORD, CONSTANT_KEYWORD));
}

public void testEsDataTypes() {
Expand Down
Loading