Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public class CsvTestsDataLoader {
private static final TestsDataset LANGUAGES = new TestsDataset("languages");
private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup")
.withSetting("languages_lookup-settings.json");
private static final TestsDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key")
.withData("languages_non_unique_key.csv");
private static final TestsDataset ALERTS = new TestsDataset("alerts");
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs");
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data");
Expand Down Expand Up @@ -114,6 +116,7 @@ public class CsvTestsDataLoader {
Map.entry(APPS_SHORT.indexName, APPS_SHORT),
Map.entry(LANGUAGES.indexName, LANGUAGES),
Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP),
Map.entry(LANGUAGES_LOOKUP_NON_UNIQUE_KEY.indexName, LANGUAGES_LOOKUP_NON_UNIQUE_KEY),
Map.entry(UL_LOGS.indexName, UL_LOGS),
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
language_code:integer,language_name:keyword,country:keyword
1,English,Canada
1,English,
1,,United Kingdom
Comment on lines +3 to +4
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding some intentional denormalization ("dirty data") to show how we're collecting nulls and mvs.

1,English,United States of America
2,German,[Germany,Austria]
2,German,Switzerland
2,German,
4,Quenya,
5,,Atlantis
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// Reuses the sample dataset and commands from enrich.csv-spec
//

//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this in my PR too!

basicOnTheDataNode
required_capability: join_lookup_v5

Expand Down Expand Up @@ -102,6 +101,83 @@ emp_no:integer | language_code:integer | language_name:keyword
10003 | 4 | German
;

nonUniqueLeftKeyOnTheDataNode
required_capability: join_lookup_v5

FROM employees
| WHERE emp_no <= 10030
| EVAL language_code = emp_no % 10
| WHERE language_code < 3
| LOOKUP JOIN languages_lookup ON language_code
| SORT emp_no
| KEEP emp_no, language_code, language_name
;

emp_no:integer | language_code:integer | language_name:keyword
10001 |1 | English
10002 |2 | French
10010 |0 | null
10011 |1 | English
10012 |2 | French
10020 |0 | null
10021 |1 | English
10022 |2 | French
10030 |0 | null
;

nonUniqueRightKeyOnTheDataNode
required_capability: join_lookup_v5

FROM employees
| EVAL language_code = emp_no % 10
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| WHERE emp_no > 10090 AND emp_no < 10096
| SORT emp_no
| EVAL country = MV_SORT(country)
| KEEP emp_no, language_code, language_name, country
;

emp_no:integer | language_code:integer | language_name:keyword | country:keyword
10091 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America]
10092 | 2 | [German, German, German] | [Austria, Germany, Switzerland]
10093 | 3 | null | null
10094 | 4 | Quenya | null
10095 | 5 | null | Atlantis
;

nonUniqueRightKeyOnTheCoordinator
required_capability: join_lookup_v5

FROM employees
| SORT emp_no
| LIMIT 5
| EVAL language_code = emp_no % 10
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| EVAL country = MV_SORT(country)
| KEEP emp_no, language_code, language_name, country
;

emp_no:integer | language_code:integer | language_name:keyword | country:keyword
10001 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America]
10002 | 2 | [German, German, German] | [Austria, Germany, Switzerland]
10003 | 3 | null | null
10004 | 4 | Quenya | null
10005 | 5 | null | Atlantis
;

nonUniqueRightKeyFromRow
required_capability: join_lookup_v5

ROW language_code = 2
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| DROP country.keyword
| EVAL country = MV_SORT(country)
;

language_code:integer | language_name:keyword | country:keyword
2 | [German, German, German] | [Austria, Germany, Switzerland]
;

lookupIPFromRow
required_capability: join_lookup_v5

Expand Down