diff --git a/docs/changelog/117246.yaml b/docs/changelog/117246.yaml
new file mode 100644
index 0000000000000..29c4464855967
--- /dev/null
+++ b/docs/changelog/117246.yaml
@@ -0,0 +1,5 @@
+pr: 117246
+summary: LOOKUP JOIN using field-caps for field mapping
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
index 6c7b700af5b1a..e658d169cbce8 100644
--- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
@@ -48,7 +48,7 @@
import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2;
-import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V3;
+import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST;
import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC;
@@ -124,7 +124,7 @@ protected void shouldSkipTest(String testName) throws IOException {
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName()));
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName()));
assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName()));
- assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V3.capabilityName()));
+ assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V4.capabilityName()));
}
private TestFeatureService remoteFeaturesService() throws IOException {
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
index 32e244f4b729d..2cb89a9c0aca5 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
@@ -56,6 +56,8 @@ public class CsvTestsDataLoader {
private static final TestsDataset APPS = new TestsDataset("apps");
private static final TestsDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short"));
private static final TestsDataset LANGUAGES = new TestsDataset("languages");
+ // private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup")
+ // .withSetting("languages_lookup-settings.json");
private static final TestsDataset ALERTS = new TestsDataset("alerts");
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs");
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data");
@@ -70,6 +72,11 @@ public class CsvTestsDataLoader {
.withTypeMapping(Map.of("@timestamp", "date_nanos"));
private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset("missing_ip_sample_data");
private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips");
+ // private static final TestsDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup")
+ // .withSetting("clientips_lookup-settings.json");
+ private static final TestsDataset MESSAGE_TYPES = new TestsDataset("message_types");
+ // private static final TestsDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup")
+ // .withSetting("message_types_lookup-settings.json");
private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr");
private static final TestsDataset AGES = new TestsDataset("ages");
private static final TestsDataset HEIGHTS = new TestsDataset("heights");
@@ -94,14 +101,13 @@ public class CsvTestsDataLoader {
private static final TestsDataset BOOKS = new TestsDataset("books");
private static final TestsDataset SEMANTIC_TEXT = new TestsDataset("semantic_text").withInferenceEndpoint(true);
- private static final String LOOKUP_INDEX_SUFFIX = "_lookup";
-
public static final Map
* Accepts an URL as first argument, eg. http://localhost:9200 or http://user:pass@localhost:9200 - *
+ * ** If no arguments are specified, the default URL is http://localhost:9200 without authentication *
** It also supports HTTPS *
+ * * @param args the URL to connect * @throws IOException */ @@ -270,7 +278,9 @@ private static void loadDataSetIntoEs(RestClient client, IndexCreator indexCreat } } - /** The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index. */ + /** + * The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index. + */ public static void createInferenceEndpoint(RestClient client) throws IOException { Request request = new Request("PUT", "_inference/sparse_embedding/test_sparse_inference"); request.setJsonEntity(""" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json new file mode 100644 index 0000000000000..b73d1f9accf92 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json @@ -0,0 +1,5 @@ +{ + "index": { + "mode": "lookup" + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv index 3ee60b79970ba..1c1a9776df6cc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv @@ -1,4 +1,4 @@ -language_code:keyword,language_name:keyword +language_code:integer,language_name:keyword 1,English 2,French 3,Spanish diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json new file mode 100644 index 0000000000000..b73d1f9accf92 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json @@ -0,0 +1,5 @@ +{ + "index": { + "mode": "lookup" + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 5de353978b307..f2800456ceb33 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -4,8 +4,8 @@ // //TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) -basicOnTheDataNode-Ignore -required_capability: join_lookup_v3 +basicOnTheDataNode +required_capability: join_lookup_v4 FROM employees | EVAL language_code = languages @@ -21,19 +21,19 @@ emp_no:integer | language_code:integer | language_name:keyword 10093 | 3 | Spanish ; -basicRow-Ignore -required_capability: join_lookup_v3 +basicRow +required_capability: join_lookup_v4 ROW language_code = 1 | LOOKUP JOIN languages_lookup ON language_code ; -language_code:keyword | language_name:keyword +language_code:integer | language_name:keyword 1 | English ; basicOnTheCoordinator -required_capability: join_lookup_v3 +required_capability: join_lookup_v4 FROM employees | SORT emp_no @@ -49,9 +49,8 @@ emp_no:integer | language_code:integer | language_name:keyword 10003 | 4 | German ; -//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) -subsequentEvalOnTheDataNode-Ignore -required_capability: join_lookup_v3 +subsequentEvalOnTheDataNode +required_capability: join_lookup_v4 FROM employees | EVAL language_code = languages @@ -69,7 +68,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x ; subsequentEvalOnTheCoordinator -required_capability: join_lookup_v3 +required_capability: join_lookup_v4 FROM employees | SORT emp_no @@ -85,3 +84,208 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x 10002 | 5 | null | 10 10003 | 4 | german | 8 ; + +lookupIPFromRow +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", right = "right" +| LOOKUP JOIN clientips_lookup ON client_ip +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromRowWithShadowing +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" +| LOOKUP JOIN clientips_lookup ON client_ip +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromRowWithShadowingKeep +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP left, client_ip, right, env +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromIndex +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +; + +@timestamp:date | event_duration:long | message:keyword | client_ip:keyword | env:keyword +2023-10-23T13:55:01.543Z | 1756467 | Connected to 10.1.0.1 | 172.21.3.15 | Production +2023-10-23T13:53:55.832Z | 5033755 | Connection error | 172.21.3.15 | Production +2023-10-23T13:52:55.015Z | 8268153 | Connection error | 172.21.3.15 | Production +2023-10-23T13:51:54.732Z | 725448 | Connection error | 172.21.3.15 | Production +2023-10-23T13:33:34.937Z | 1232382 | Disconnected | 172.21.0.5 | Development +2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 | 172.21.2.113 | QA +2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 | 172.21.2.162 | QA +; + +lookupIPFromIndexKeep +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP @timestamp, client_ip, event_duration, message, env +; + +@timestamp:date | client_ip:keyword | event_duration:long | message:keyword | env:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Production +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Production +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Production +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Production +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Development +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | QA +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | QA +; + +lookupIPFromIndexStats +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| STATS count = count(client_ip) BY env +| SORT count DESC, env ASC +; + +count:long | env:keyword +4 | Production +2 | QA +1 | Development +; + +lookupIPFromIndexStatsKeep +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP client_ip, env +| STATS count = count(client_ip) BY env +| SORT count DESC, env ASC +; + +count:long | env:keyword +4 | Production +2 | QA +1 | Development +; + +lookupMessageFromRow +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", right = "right" +| LOOKUP JOIN message_types_lookup ON message +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromRowWithShadowing +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right" +| LOOKUP JOIN message_types_lookup ON message +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromRowWithShadowingKeep +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right" +| LOOKUP JOIN message_types_lookup ON message +| KEEP left, message, right, type +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromIndex +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success +; + +lookupMessageFromIndexKeep +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| KEEP @timestamp, client_ip, event_duration, message, type +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success +; + +lookupMessageFromIndexStats +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| STATS count = count(message) BY type +| SORT count DESC, type ASC +; + +count:long | type:keyword +3 | Error +3 | Success +1 | Disconnected +; + +lookupMessageFromIndexStatsKeep +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| KEEP message, type +| STATS count = count(message) BY type +| SORT count DESC, type ASC +; + +count:long | type:keyword +3 | Error +3 | Success +1 | Disconnected +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json index 39bd37ce26c7f..d491810f9134e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json @@ -1,10 +1,10 @@ { - "properties": { - "client_ip": { - "type": "keyword" - }, - "env": { - "type": "keyword" - } + "properties": { + "client_ip": { + "type": "keyword" + }, + "env": { + "type": "keyword" } - } \ No newline at end of file + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json index 0cec0caf17304..327b692369242 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json @@ -1,7 +1,7 @@ { "properties" : { "language_code" : { - "type" : "keyword" + "type" : "integer" }, "language_name" : { "type" : "keyword" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json new file mode 100644 index 0000000000000..af545b48da3d2 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json @@ -0,0 +1,10 @@ +{ + "properties": { + "message": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv new file mode 100644 index 0000000000000..8e00485771445 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv @@ -0,0 +1,6 @@ +message:keyword,type:keyword +Connection error,Error +Disconnected,Disconnected +Connected to 10.1.0.1,Success +Connected to 10.1.0.2,Success +Connected to 10.1.0.3,Success diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json new file mode 100644 index 0000000000000..b73d1f9accf92 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json @@ -0,0 +1,5 @@ +{ + "index": { + "mode": "lookup" + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 9fad9123944ff..a8f51bd4dc24d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -518,7 +518,7 @@ public enum Cap { /** * LOOKUP JOIN */ - JOIN_LOOKUP_V3(Build.current().isSnapshot()), + JOIN_LOOKUP_V4(false && Build.current().isSnapshot()), /** * Fix for https://github.com/elastic/elasticsearch/issues/117054 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index d127c26298a28..b847508d2b161 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -61,6 +61,7 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; import org.elasticsearch.xpack.esql.parser.ParsingException; import org.elasticsearch.xpack.esql.plan.TableIdentifier; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; @@ -105,7 +106,6 @@ import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; -import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.function.Function; @@ -198,11 +198,12 @@ private static class ResolveTable extends ParameterizedAnalyzerRule
@@ -102,15 +100,18 @@ public PhysicalPlan apply(PhysicalPlan plan) {
private static Set