Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ public final void test() throws Throwable {
}

protected void shouldSkipTest(String testName) throws IOException {
if (testCase.requiredCapabilities.contains("semantic_text_type")) {
if (testCase.requiredCapabilities.contains("semantic_text_type")
|| testCase.requiredCapabilities.contains("semantic_text_aggregations")) {
assumeTrue("Inference test service needs to be supported for semantic_text", supportsInferenceTestService());
}
checkCapabilities(adminClient(), testFeatureService, testName, testCase);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
"st_base64": {
"type": "semantic_text",
"inference_id": "test_sparse_inference"
},
"st_logs": {
"type": "semantic_text",
"inference_id": "test_sparse_inference"
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
_id:keyword,semantic_text_field:semantic_text,st_bool:semantic_text,st_cartesian_point:semantic_text,st_cartesian_shape:semantic_text,st_datetime:semantic_text,st_double:semantic_text,st_geopoint:semantic_text,st_geoshape:semantic_text,st_integer:semantic_text,st_ip:semantic_text,st_long:semantic_text,st_unsigned_long:semantic_text,st_version:semantic_text,st_multi_value:semantic_text,st_unicode:semantic_text,host:keyword,description:text,value:long,st_base64:semantic_text
1,live long and prosper,false,"POINT(4297.11 -1475.53)",,1953-09-02T00:00:00.000Z,5.20128E11,"POINT(42.97109630194 14.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",23,1.1.1.1,2147483648,2147483648,1.2.3,["Hello there!", "This is a random value", "for testing purposes"],你吃饭了吗,"host1","some description1",1001,ZWxhc3RpYw==
2,all we have to decide is what to do with the time that is given to us,true,"POINT(7580.93 2272.77)",,2023-09-24T15:57:00.000Z,4541.11,"POINT(37.97109630194 21.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",122,1.1.2.1,123,2147483648.2,9.0.0,["nice to meet you", "bye bye!"],["谢谢", "对不起我的中文不好"],"host2","some description2",1002,aGVsbG8=
3,be excellent to each other,,,,,,,,,,,,,,,"host3","some description3",1003,
_id:keyword,semantic_text_field:semantic_text,st_bool:semantic_text,st_cartesian_point:semantic_text,st_cartesian_shape:semantic_text,st_datetime:semantic_text,st_double:semantic_text,st_geopoint:semantic_text,st_geoshape:semantic_text,st_integer:semantic_text,st_ip:semantic_text,st_long:semantic_text,st_unsigned_long:semantic_text,st_version:semantic_text,st_multi_value:semantic_text,st_unicode:semantic_text,host:keyword,description:text,value:long,st_base64:semantic_text,st_logs:semantic_text
1,live long and prosper,false,"POINT(4297.11 -1475.53)",,1953-09-02T00:00:00.000Z,5.20128E11,"POINT(42.97109630194 14.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",23,1.1.1.1,2147483648,2147483648,1.2.3,["Hello there!", "This is a random value", "for testing purposes"],你吃饭了吗,"host1","some description1",1001,ZWxhc3RpYw==,"2024-12-23T12:15:00.000Z 1.2.3.4 example@example.com 4553"
2,all we have to decide is what to do with the time that is given to us,true,"POINT(7580.93 2272.77)",,2023-09-24T15:57:00.000Z,4541.11,"POINT(37.97109630194 21.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",122,1.1.2.1,123,2147483648.2,9.0.0,["nice to meet you", "bye bye!"],["谢谢", "对不起我的中文不好"],"host2","some description2",1002,aGVsbG8=,"2024-01-23T12:15:00.000Z 1.2.3.4 foo@example.com 42"
3,be excellent to each other,,,,,,,,,,,,,,,"host3","some description3",1003,,"2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,75 @@ _id:keyword | my_field:semantic_text
3 | be excellent to each other
;

simpleStats
required_capability: semantic_text_type
statsWithCount
required_capability: semantic_text_aggregations

FROM semantic_text METADATA _id
| STATS result = COUNT(st_version)
;

result:long
2
;

statsWithCountDistinct
required_capability: semantic_text_aggregations

FROM semantic_text METADATA _id
| STATS result = COUNT_DISTINCT(st_version)
;

result:long
2
;

statsWithValues
required_capability: semantic_text_aggregations

FROM semantic_text METADATA _id
| STATS result = VALUES(st_version)
| EVAL result = MV_SORT(result)
;

result:keyword
["1.2.3", "9.0.0"]
;

statsWithMin
required_capability: semantic_text_aggregations

FROM semantic_text METADATA _id
| STATS result = min(st_version)
;

result:keyword
1.2.3
;

statsWithMax
required_capability: semantic_text_aggregations

FROM semantic_text METADATA _id
| STATS COUNT(*)
| STATS result = max(st_version)
;

COUNT(*):long
3
result:keyword
9.0.0
;

statsWithTop
required_capability: semantic_text_aggregations

FROM semantic_text METADATA _id
| STATS result = top(st_version, 2, "asc")
;

result:keyword
["1.2.3", "9.0.0"]
;

statsWithGrouping
required_capability: semantic_text_type
required_capability: semantic_text_aggregations

FROM semantic_text METADATA _id
| STATS COUNT(*) BY st_version
Expand Down Expand Up @@ -132,6 +188,36 @@ COUNT(*):long | my_field:semantic_text
1 | bye bye!
;

grok
required_capability: semantic_text_type

FROM semantic_text METADATA _id
| GROK st_logs """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}"""
| KEEP st_logs, date, ip, email, num
| SORT st_logs
;

st_logs:semantic_text | date:keyword | ip:keyword | email:keyword | num:keyword
2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42 | 2023-01-23T12:15:00.000Z | 127.0.0.1 | some.email@foo.com | 42
2024-01-23T12:15:00.000Z 1.2.3.4 foo@example.com 42 | 2024-01-23T12:15:00.000Z | 1.2.3.4 | foo@example.com | 42
2024-12-23T12:15:00.000Z 1.2.3.4 example@example.com 4553 | 2024-12-23T12:15:00.000Z | 1.2.3.4 | example@example.com | 4553
;

dissect
required_capability: semantic_text_type

FROM semantic_text METADATA _id
| DISSECT st_logs """%{date} %{ip} %{email} %{num}"""
| KEEP st_logs, date, ip, email, num
| SORT st_logs
;

st_logs:semantic_text | date:keyword | ip:keyword | email:keyword | num:keyword
2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42 | 2023-01-23T12:15:00.000Z | 127.0.0.1 | some.email@foo.com | 42
2024-01-23T12:15:00.000Z 1.2.3.4 foo@example.com 42 | 2024-01-23T12:15:00.000Z | 1.2.3.4 | foo@example.com | 42
2024-12-23T12:15:00.000Z 1.2.3.4 example@example.com 4553 | 2024-12-23T12:15:00.000Z | 1.2.3.4 | example@example.com | 4553
;

simpleWithLongValue
required_capability: semantic_text_type

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,12 @@ public enum Cap {
/**
* Fix for https://github.com/elastic/elasticsearch/issues/117054
*/
FIX_NESTED_FIELDS_NAME_CLASH_IN_INDEXRESOLVER;
FIX_NESTED_FIELDS_NAME_CLASH_IN_INDEXRESOLVER,

/**
* support for aggregations on semantic_text
*/
SEMANTIC_TEXT_AGGREGATIONS(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG);

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ public class CountDistinct extends AggregateFunction implements OptionalArgument
Map.entry(DataType.KEYWORD, CountDistinctBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.IP, CountDistinctBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.VERSION, CountDistinctBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.TEXT, CountDistinctBytesRefAggregatorFunctionSupplier::new)
Map.entry(DataType.TEXT, CountDistinctBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.SEMANTIC_TEXT, CountDistinctBytesRefAggregatorFunctionSupplier::new)
);

private static final int DEFAULT_PRECISION = 3000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public class Max extends AggregateFunction implements ToAggregator, SurrogateExp
Map.entry(DataType.IP, MaxIpAggregatorFunctionSupplier::new),
Map.entry(DataType.KEYWORD, MaxBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.TEXT, MaxBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.SEMANTIC_TEXT, MaxBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.VERSION, MaxBytesRefAggregatorFunctionSupplier::new)
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ public class Min extends AggregateFunction implements ToAggregator, SurrogateExp
Map.entry(DataType.IP, MinIpAggregatorFunctionSupplier::new),
Map.entry(DataType.VERSION, MinBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.KEYWORD, MinBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.TEXT, MinBytesRefAggregatorFunctionSupplier::new)
Map.entry(DataType.TEXT, MinBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.SEMANTIC_TEXT, MinBytesRefAggregatorFunctionSupplier::new)
);

@FunctionInfo(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public class Values extends AggregateFunction implements ToAggregator {
Map.entry(DataType.DOUBLE, ValuesDoubleAggregatorFunctionSupplier::new),
Map.entry(DataType.KEYWORD, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.TEXT, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.SEMANTIC_TEXT, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.IP, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.VERSION, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.BOOLEAN, ValuesBooleanAggregatorFunctionSupplier::new)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,12 +302,13 @@ private static String dataTypeToString(DataType type, Class<?> aggClass) {
case DataType.INTEGER, DataType.COUNTER_INTEGER -> "Int";
case DataType.LONG, DataType.DATETIME, DataType.COUNTER_LONG, DataType.DATE_NANOS -> "Long";
case DataType.DOUBLE, DataType.COUNTER_DOUBLE -> "Double";
case DataType.KEYWORD, DataType.IP, DataType.VERSION, DataType.TEXT -> "BytesRef";
case DataType.KEYWORD, DataType.IP, DataType.VERSION, DataType.TEXT, DataType.SEMANTIC_TEXT -> "BytesRef";
case GEO_POINT -> "GeoPoint";
case CARTESIAN_POINT -> "CartesianPoint";
case SEMANTIC_TEXT, UNSUPPORTED, NULL, UNSIGNED_LONG, SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, SOURCE, DATE_PERIOD,
TIME_DURATION, CARTESIAN_SHAPE, GEO_SHAPE, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG ->
throw new EsqlIllegalArgumentException("illegal agg type: " + type.typeName());
case UNSUPPORTED, NULL, UNSIGNED_LONG, SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, SOURCE, DATE_PERIOD, TIME_DURATION,
CARTESIAN_SHAPE, GEO_SHAPE, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG -> throw new EsqlIllegalArgumentException(
"illegal agg type: " + type.typeName()
);
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.ipCases(1, 1000),
MultiRowTestCaseSupplier.versionCases(1, 1000),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.SEMANTIC_TEXT)
).flatMap(List::stream).forEach(fieldCaseSupplier -> {
// With precision
for (var precisionCaseSupplier : precisionSuppliers) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.geoPointCases(1, 1000, true),
MultiRowTestCaseSupplier.cartesianPointCases(1, 1000, true),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.SEMANTIC_TEXT)
).flatMap(List::stream).map(CountTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));

// No rows
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.ipCases(1, 1000),
MultiRowTestCaseSupplier.versionCases(1, 1000),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.SEMANTIC_TEXT)
).flatMap(List::stream).map(MaxTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));

suppliers.addAll(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.ipCases(1, 1000),
MultiRowTestCaseSupplier.versionCases(1, 1000),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.SEMANTIC_TEXT)
).flatMap(List::stream).map(MinTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));

suppliers.addAll(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.booleanCases(1, 1000),
MultiRowTestCaseSupplier.ipCases(1, 1000),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT),
MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.SEMANTIC_TEXT)
)
.flatMap(List::stream)
.map(fieldCaseSupplier -> TopTests.makeSupplier(fieldCaseSupplier, limitCaseSupplier, order))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ public static Iterable<Object[]> parameters() {
MultiRowTestCaseSupplier.versionCases(1, 1000),
// Lower values for strings, as they take more space and may trigger the circuit breaker
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.TEXT)
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.TEXT),
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.SEMANTIC_TEXT)
).flatMap(List::stream).map(ValuesTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));

return parameterSuppliersFromTypedDataWithDefaultChecks(
Expand Down