Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/117246.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 117246
summary: LOOKUP JOIN using field-caps for field mapping
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V3;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST;
import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC;
Expand Down Expand Up @@ -124,7 +124,7 @@ protected void shouldSkipTest(String testName) throws IOException {
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName()));
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName()));
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName()));
assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V3.capabilityName()));
assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V4.capabilityName()));
}

private TestFeatureService remoteFeaturesService() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ public class CsvTestsDataLoader {
private static final TestsDataset APPS = new TestsDataset("apps");
private static final TestsDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short"));
private static final TestsDataset LANGUAGES = new TestsDataset("languages");
// private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup")
// .withSetting("languages_lookup-settings.json");
private static final TestsDataset ALERTS = new TestsDataset("alerts");
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs");
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data");
Expand All @@ -70,6 +72,11 @@ public class CsvTestsDataLoader {
.withTypeMapping(Map.of("@timestamp", "date_nanos"));
private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset("missing_ip_sample_data");
private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips");
// private static final TestsDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup")
// .withSetting("clientips_lookup-settings.json");
private static final TestsDataset MESSAGE_TYPES = new TestsDataset("message_types");
// private static final TestsDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup")
// .withSetting("message_types_lookup-settings.json");
private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr");
private static final TestsDataset AGES = new TestsDataset("ages");
private static final TestsDataset HEIGHTS = new TestsDataset("heights");
Expand All @@ -94,14 +101,13 @@ public class CsvTestsDataLoader {
private static final TestsDataset BOOKS = new TestsDataset("books");
private static final TestsDataset SEMANTIC_TEXT = new TestsDataset("semantic_text").withInferenceEndpoint(true);

private static final String LOOKUP_INDEX_SUFFIX = "_lookup";

public static final Map<String, TestsDataset> CSV_DATASET_MAP = Map.ofEntries(
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
Map.entry(HOSTS.indexName, HOSTS),
Map.entry(APPS.indexName, APPS),
Map.entry(APPS_SHORT.indexName, APPS_SHORT),
Map.entry(LANGUAGES.indexName, LANGUAGES),
// Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP),
Map.entry(UL_LOGS.indexName, UL_LOGS),
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA),
Expand All @@ -111,6 +117,9 @@ public class CsvTestsDataLoader {
Map.entry(SAMPLE_DATA_TS_NANOS.indexName, SAMPLE_DATA_TS_NANOS),
Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA),
Map.entry(CLIENT_IPS.indexName, CLIENT_IPS),
// Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP),
Map.entry(MESSAGE_TYPES.indexName, MESSAGE_TYPES),
// Map.entry(MESSAGE_TYPES_LOOKUP.indexName, MESSAGE_TYPES_LOOKUP),
Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR),
Map.entry(AGES.indexName, AGES),
Map.entry(HEIGHTS.indexName, HEIGHTS),
Expand All @@ -132,9 +141,7 @@ public class CsvTestsDataLoader {
Map.entry(DISTANCES.indexName, DISTANCES),
Map.entry(ADDRESSES.indexName, ADDRESSES),
Map.entry(BOOKS.indexName, BOOKS),
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
// JOIN LOOKUP alias
Map.entry(LANGUAGES.indexName + LOOKUP_INDEX_SUFFIX, LANGUAGES.withIndex(LANGUAGES.indexName + LOOKUP_INDEX_SUFFIX))
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT)
);

private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
Expand Down Expand Up @@ -174,13 +181,14 @@ public class CsvTestsDataLoader {
* </p>
* <p>
* Accepts an URL as first argument, eg. http://localhost:9200 or http://user:pass@localhost:9200
*</p>
* </p>
* <p>
* If no arguments are specified, the default URL is http://localhost:9200 without authentication
* </p>
* <p>
* It also supports HTTPS
* </p>
*
* @param args the URL to connect
* @throws IOException
*/
Expand Down Expand Up @@ -270,7 +278,9 @@ private static void loadDataSetIntoEs(RestClient client, IndexCreator indexCreat
}
}

/** The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index. */
/**
* The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index.
*/
public static void createInferenceEndpoint(RestClient client) throws IOException {
Request request = new Request("PUT", "_inference/sparse_embedding/test_sparse_inference");
request.setJsonEntity("""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"index": {
"mode": "lookup"
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
language_code:keyword,language_name:keyword
language_code:integer,language_name:keyword
1,English
2,French
3,Spanish
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"index": {
"mode": "lookup"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
//

//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
basicOnTheDataNode-Ignore
required_capability: join_lookup_v3
basicOnTheDataNode
required_capability: join_lookup_v4

FROM employees
| EVAL language_code = languages
Expand All @@ -21,19 +21,19 @@ emp_no:integer | language_code:integer | language_name:keyword
10093 | 3 | Spanish
;

basicRow-Ignore
required_capability: join_lookup_v3
basicRow
required_capability: join_lookup_v4

ROW language_code = 1
| LOOKUP JOIN languages_lookup ON language_code
;

language_code:keyword | language_name:keyword
language_code:integer | language_name:keyword
1 | English
;

basicOnTheCoordinator
required_capability: join_lookup_v3
required_capability: join_lookup_v4

FROM employees
| SORT emp_no
Expand All @@ -49,9 +49,8 @@ emp_no:integer | language_code:integer | language_name:keyword
10003 | 4 | German
;

//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
subsequentEvalOnTheDataNode-Ignore
required_capability: join_lookup_v3
subsequentEvalOnTheDataNode
required_capability: join_lookup_v4

FROM employees
| EVAL language_code = languages
Expand All @@ -69,7 +68,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
;

subsequentEvalOnTheCoordinator
required_capability: join_lookup_v3
required_capability: join_lookup_v4

FROM employees
| SORT emp_no
Expand All @@ -85,3 +84,208 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
10002 | 5 | null | 10
10003 | 4 | german | 8
;

lookupIPFromRow
required_capability: join_lookup_v4

ROW left = "left", client_ip = "172.21.0.5", right = "right"
| LOOKUP JOIN clientips_lookup ON client_ip
;

left:keyword | client_ip:keyword | right:keyword | env:keyword
left | 172.21.0.5 | right | Development
;

lookupIPFromRowWithShadowing
required_capability: join_lookup_v4

ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
| LOOKUP JOIN clientips_lookup ON client_ip
;

left:keyword | client_ip:keyword | right:keyword | env:keyword
left | 172.21.0.5 | right | Development
;

lookupIPFromRowWithShadowingKeep
required_capability: join_lookup_v4

ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| KEEP left, client_ip, right, env
;

left:keyword | client_ip:keyword | right:keyword | env:keyword
left | 172.21.0.5 | right | Development
;

lookupIPFromIndex
required_capability: join_lookup_v4

FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
;

@timestamp:date | event_duration:long | message:keyword | client_ip:keyword | env:keyword
2023-10-23T13:55:01.543Z | 1756467 | Connected to 10.1.0.1 | 172.21.3.15 | Production
2023-10-23T13:53:55.832Z | 5033755 | Connection error | 172.21.3.15 | Production
2023-10-23T13:52:55.015Z | 8268153 | Connection error | 172.21.3.15 | Production
2023-10-23T13:51:54.732Z | 725448 | Connection error | 172.21.3.15 | Production
2023-10-23T13:33:34.937Z | 1232382 | Disconnected | 172.21.0.5 | Development
2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 | 172.21.2.113 | QA
2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 | 172.21.2.162 | QA
;

lookupIPFromIndexKeep
required_capability: join_lookup_v4

FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| KEEP @timestamp, client_ip, event_duration, message, env
;

@timestamp:date | client_ip:keyword | event_duration:long | message:keyword | env:keyword
2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Production
2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Production
2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Production
2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Production
2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Development
2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | QA
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | QA
;

lookupIPFromIndexStats
required_capability: join_lookup_v4

FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| STATS count = count(client_ip) BY env
| SORT count DESC, env ASC
;

count:long | env:keyword
4 | Production
2 | QA
1 | Development
;

lookupIPFromIndexStatsKeep
required_capability: join_lookup_v4

FROM sample_data
| EVAL client_ip = client_ip::keyword
| LOOKUP JOIN clientips_lookup ON client_ip
| KEEP client_ip, env
| STATS count = count(client_ip) BY env
| SORT count DESC, env ASC
;

count:long | env:keyword
4 | Production
2 | QA
1 | Development
;

lookupMessageFromRow
required_capability: join_lookup_v4

ROW left = "left", message = "Connected to 10.1.0.1", right = "right"
| LOOKUP JOIN message_types_lookup ON message
;

left:keyword | message:keyword | right:keyword | type:keyword
left | Connected to 10.1.0.1 | right | Success
;

lookupMessageFromRowWithShadowing
required_capability: join_lookup_v4

ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
| LOOKUP JOIN message_types_lookup ON message
;

left:keyword | message:keyword | right:keyword | type:keyword
left | Connected to 10.1.0.1 | right | Success
;

lookupMessageFromRowWithShadowingKeep
required_capability: join_lookup_v4

ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
| LOOKUP JOIN message_types_lookup ON message
| KEEP left, message, right, type
;

left:keyword | message:keyword | right:keyword | type:keyword
left | Connected to 10.1.0.1 | right | Success
;

lookupMessageFromIndex
required_capability: join_lookup_v4

FROM sample_data
| LOOKUP JOIN message_types_lookup ON message
;

@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword
2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success
2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error
2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error
2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error
2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected
2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success
;

lookupMessageFromIndexKeep
required_capability: join_lookup_v4

FROM sample_data
| LOOKUP JOIN message_types_lookup ON message
| KEEP @timestamp, client_ip, event_duration, message, type
;

@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword
2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success
2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error
2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error
2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error
2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected
2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success
;

lookupMessageFromIndexStats
required_capability: join_lookup_v4

FROM sample_data
| LOOKUP JOIN message_types_lookup ON message
| STATS count = count(message) BY type
| SORT count DESC, type ASC
;

count:long | type:keyword
3 | Error
3 | Success
1 | Disconnected
;

lookupMessageFromIndexStatsKeep
required_capability: join_lookup_v4

FROM sample_data
| LOOKUP JOIN message_types_lookup ON message
| KEEP message, type
| STATS count = count(message) BY type
| SORT count DESC, type ASC
;

count:long | type:keyword
3 | Error
3 | Success
1 | Disconnected
;
Loading