From b2d5f521438ddf7b3f574b8dd9e3e1f01f150331 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 9 Dec 2024 17:02:21 +0100 Subject: [PATCH 1/3] Add tests with non-unique key in LU index --- .../xpack/esql/CsvTestsDataLoader.java | 3 ++ .../resources/languages_non_unique_key.csv | 8 +++ .../src/main/resources/lookup-join.csv-spec | 49 ++++++++++++++++++- 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 34af1edb9f99b..77cc1f2c618b5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -63,6 +63,8 @@ public class CsvTestsDataLoader { private static final TestsDataset LANGUAGES = new TestsDataset("languages"); private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup") .withSetting("languages_lookup-settings.json"); + private static final TestsDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key") + .withData("languages_non_unique_key.csv"); private static final TestsDataset ALERTS = new TestsDataset("alerts"); private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs"); private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data"); @@ -114,6 +116,7 @@ public class CsvTestsDataLoader { Map.entry(APPS_SHORT.indexName, APPS_SHORT), Map.entry(LANGUAGES.indexName, LANGUAGES), Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP), + Map.entry(LANGUAGES_LOOKUP_NON_UNIQUE_KEY.indexName, LANGUAGES_LOOKUP_NON_UNIQUE_KEY), Map.entry(UL_LOGS.indexName, UL_LOGS), Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA), Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv new file mode 100644 index 0000000000000..b37b8bdb0b326 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv @@ -0,0 +1,8 @@ +language_code:integer,language_name:keyword +1,English (US) +1,English (UK) +2,French (F) +2,French (CA) +4,German (D) +4,German (A) +4,German (CH) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index b01e12fa4f470..f9441b0e4d776 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -3,7 +3,6 @@ // Reuses the sample dataset and commands from enrich.csv-spec // -//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) basicOnTheDataNode required_capability: join_lookup_v4 @@ -325,3 +324,51 @@ count:long | type:keyword 3 | Success 1 | Disconnected ; + +nonUniqueKeyOnTheDataNode +required_capability: join_lookup_v4 + +FROM employees +| EVAL language_code = languages +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| WHERE emp_no >= 10091 AND emp_no < 10094 +| SORT emp_no +| EVAL language_name = MV_SORT(language_name) +| KEEP emp_no, language_code, language_name +; + +emp_no:integer | language_code:integer | language_name:keyword +10091 | 3 | null +10092 | 1 | [English (UK), English (US)] +10093 | 3 | null +; + +nonUniqueKeyOnTheCoordinator +required_capability: join_lookup_v4 + +FROM employees +| SORT emp_no +| LIMIT 3 +| EVAL language_code = languages +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| EVAL language_name = MV_SORT(language_name) +| KEEP emp_no, language_code, language_name +; + +emp_no:integer | language_code:integer | language_name:keyword +10001 | 2 | [French (CA), French (F)] +10002 | 5 | null +10003 | 4 | [German (A), German (CH), German (D)] +; + +nonUniqueKeyFromRow +required_capability: join_lookup_v4 + +ROW language_code = 1 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| EVAL language_name = MV_SORT(language_name) +; + +language_code:integer | language_name:keyword +1 | [English (UK), English (US)] +; From dfaac5de9fcc032d5663b10a7583e3db7ef672d2 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 11 Dec 2024 18:39:36 +0100 Subject: [PATCH 2/3] Denormalize data a little + add a field --- .../resources/languages_non_unique_key.csv | 18 +++-- .../src/main/resources/lookup-join.csv-spec | 73 +++++++++++++------ 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv index b37b8bdb0b326..1578762f8d1cb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv @@ -1,8 +1,10 @@ -language_code:integer,language_name:keyword -1,English (US) -1,English (UK) -2,French (F) -2,French (CA) -4,German (D) -4,German (A) -4,German (CH) +language_code:integer,language_name:keyword,country:keyword +1,English,Canada +1,English, +1,,United Kingdom +1,English,United States of America +2,German,[Germany,Austria] +2,German,Switzerland +2,German, +4,Quenya, +5,,Atlantis diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index f9441b0e4d776..59fd0362b6cca 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -325,50 +325,79 @@ count:long | type:keyword 1 | Disconnected ; -nonUniqueKeyOnTheDataNode +nonUniqueLeftKeyOnTheDataNode required_capability: join_lookup_v4 FROM employees -| EVAL language_code = languages -| LOOKUP JOIN languages_lookup_non_unique_key ON language_code -| WHERE emp_no >= 10091 AND emp_no < 10094 +| WHERE emp_no <= 10030 +| EVAL language_code = emp_no % 10 +| WHERE language_code < 3 +| LOOKUP JOIN languages_lookup ON language_code | SORT emp_no -| EVAL language_name = MV_SORT(language_name) | KEEP emp_no, language_code, language_name ; emp_no:integer | language_code:integer | language_name:keyword -10091 | 3 | null -10092 | 1 | [English (UK), English (US)] -10093 | 3 | null +10001 |1 | English +10002 |2 | French +10010 |0 | null +10011 |1 | English +10012 |2 | French +10020 |0 | null +10021 |1 | English +10022 |2 | French +10030 |0 | null +; + +nonUniqueRightKeyOnTheDataNode +required_capability: join_lookup_v4 + +FROM employees +| EVAL language_code = emp_no % 10 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| WHERE emp_no > 10090 AND emp_no < 10096 +| SORT emp_no +| EVAL country = MV_SORT(country) +| KEEP emp_no, language_code, language_name, country ; -nonUniqueKeyOnTheCoordinator +emp_no:integer | language_code:integer | language_name:keyword | country:keyword +10091 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] +10092 | 2 | [German, German, German] | [Austria, Germany, Switzerland] +10093 | 3 | null | null +10094 | 4 | Quenya | null +10095 | 5 | null | Atlantis +; + +nonUniqueRightKeyOnTheCoordinator required_capability: join_lookup_v4 FROM employees | SORT emp_no -| LIMIT 3 -| EVAL language_code = languages +| LIMIT 5 +| EVAL language_code = emp_no % 10 | LOOKUP JOIN languages_lookup_non_unique_key ON language_code -| EVAL language_name = MV_SORT(language_name) -| KEEP emp_no, language_code, language_name +| EVAL country = MV_SORT(country) +| KEEP emp_no, language_code, language_name, country ; -emp_no:integer | language_code:integer | language_name:keyword -10001 | 2 | [French (CA), French (F)] -10002 | 5 | null -10003 | 4 | [German (A), German (CH), German (D)] +emp_no:integer | language_code:integer | language_name:keyword | country:keyword +10001 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] +10002 | 2 | [German, German, German] | [Austria, Germany, Switzerland] +10003 | 3 | null | null +10004 | 4 | Quenya | null +10005 | 5 | null | Atlantis ; -nonUniqueKeyFromRow +nonUniqueRightKeyFromRow required_capability: join_lookup_v4 -ROW language_code = 1 +ROW language_code = 2 | LOOKUP JOIN languages_lookup_non_unique_key ON language_code -| EVAL language_name = MV_SORT(language_name) +| DROP country.keyword +| EVAL country = MV_SORT(country) ; -language_code:integer | language_name:keyword -1 | [English (UK), English (US)] +language_code:integer | language_name:keyword | country:keyword +2 | [German, German, German] | [Austria, Germany, Switzerland] ; From 4c4726d5acd5cda0e8fec5afb119c33ee581c7a7 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 12 Dec 2024 18:39:16 +0100 Subject: [PATCH 3/3] Move new tests up Just after the tests with the languages_lookup index, for better organization of the tests. --- .../src/main/resources/lookup-join.csv-spec | 154 +++++++++--------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 59fd0362b6cca..6327e2e7889c0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -84,6 +84,83 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x 10003 | 4 | german | 8 ; +nonUniqueLeftKeyOnTheDataNode +required_capability: join_lookup_v4 + +FROM employees +| WHERE emp_no <= 10030 +| EVAL language_code = emp_no % 10 +| WHERE language_code < 3 +| LOOKUP JOIN languages_lookup ON language_code +| SORT emp_no +| KEEP emp_no, language_code, language_name +; + +emp_no:integer | language_code:integer | language_name:keyword +10001 |1 | English +10002 |2 | French +10010 |0 | null +10011 |1 | English +10012 |2 | French +10020 |0 | null +10021 |1 | English +10022 |2 | French +10030 |0 | null +; + +nonUniqueRightKeyOnTheDataNode +required_capability: join_lookup_v4 + +FROM employees +| EVAL language_code = emp_no % 10 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| WHERE emp_no > 10090 AND emp_no < 10096 +| SORT emp_no +| EVAL country = MV_SORT(country) +| KEEP emp_no, language_code, language_name, country +; + +emp_no:integer | language_code:integer | language_name:keyword | country:keyword +10091 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] +10092 | 2 | [German, German, German] | [Austria, Germany, Switzerland] +10093 | 3 | null | null +10094 | 4 | Quenya | null +10095 | 5 | null | Atlantis +; + +nonUniqueRightKeyOnTheCoordinator +required_capability: join_lookup_v4 + +FROM employees +| SORT emp_no +| LIMIT 5 +| EVAL language_code = emp_no % 10 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| EVAL country = MV_SORT(country) +| KEEP emp_no, language_code, language_name, country +; + +emp_no:integer | language_code:integer | language_name:keyword | country:keyword +10001 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] +10002 | 2 | [German, German, German] | [Austria, Germany, Switzerland] +10003 | 3 | null | null +10004 | 4 | Quenya | null +10005 | 5 | null | Atlantis +; + +nonUniqueRightKeyFromRow +required_capability: join_lookup_v4 + +ROW language_code = 2 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| DROP country.keyword +| EVAL country = MV_SORT(country) +; + +language_code:integer | language_name:keyword | country:keyword +2 | [German, German, German] | [Austria, Germany, Switzerland] +; + lookupIPFromRow required_capability: join_lookup_v4 @@ -324,80 +401,3 @@ count:long | type:keyword 3 | Success 1 | Disconnected ; - -nonUniqueLeftKeyOnTheDataNode -required_capability: join_lookup_v4 - -FROM employees -| WHERE emp_no <= 10030 -| EVAL language_code = emp_no % 10 -| WHERE language_code < 3 -| LOOKUP JOIN languages_lookup ON language_code -| SORT emp_no -| KEEP emp_no, language_code, language_name -; - -emp_no:integer | language_code:integer | language_name:keyword -10001 |1 | English -10002 |2 | French -10010 |0 | null -10011 |1 | English -10012 |2 | French -10020 |0 | null -10021 |1 | English -10022 |2 | French -10030 |0 | null -; - -nonUniqueRightKeyOnTheDataNode -required_capability: join_lookup_v4 - -FROM employees -| EVAL language_code = emp_no % 10 -| LOOKUP JOIN languages_lookup_non_unique_key ON language_code -| WHERE emp_no > 10090 AND emp_no < 10096 -| SORT emp_no -| EVAL country = MV_SORT(country) -| KEEP emp_no, language_code, language_name, country -; - -emp_no:integer | language_code:integer | language_name:keyword | country:keyword -10091 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] -10092 | 2 | [German, German, German] | [Austria, Germany, Switzerland] -10093 | 3 | null | null -10094 | 4 | Quenya | null -10095 | 5 | null | Atlantis -; - -nonUniqueRightKeyOnTheCoordinator -required_capability: join_lookup_v4 - -FROM employees -| SORT emp_no -| LIMIT 5 -| EVAL language_code = emp_no % 10 -| LOOKUP JOIN languages_lookup_non_unique_key ON language_code -| EVAL country = MV_SORT(country) -| KEEP emp_no, language_code, language_name, country -; - -emp_no:integer | language_code:integer | language_name:keyword | country:keyword -10001 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] -10002 | 2 | [German, German, German] | [Austria, Germany, Switzerland] -10003 | 3 | null | null -10004 | 4 | Quenya | null -10005 | 5 | null | Atlantis -; - -nonUniqueRightKeyFromRow -required_capability: join_lookup_v4 - -ROW language_code = 2 -| LOOKUP JOIN languages_lookup_non_unique_key ON language_code -| DROP country.keyword -| EVAL country = MV_SORT(country) -; - -language_code:integer | language_name:keyword | country:keyword -2 | [German, German, German] | [Austria, Germany, Switzerland] -;