Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix] No duplicates in list_intersect #9947

Merged
merged 3 commits into from Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/catalog/default/default_functions.cpp
Expand Up @@ -105,14 +105,14 @@ static DefaultMacro internal_macros[] = {
{DEFAULT_SCHEMA, "array_reverse", {"l", nullptr}, "list_reverse(l)"},

// FIXME implement as actual function if we encounter a lot of performance issues. Complexity now: n * m, with hashing possibly n + m
{DEFAULT_SCHEMA, "list_intersect", {"l1", "l2", nullptr}, "list_filter(l1, (x) -> list_contains(l2, x))"},
{DEFAULT_SCHEMA, "list_intersect", {"l1", "l2", nullptr}, "list_filter(list_distinct(l1), (x) -> list_contains(l2, x))"},
{DEFAULT_SCHEMA, "array_intersect", {"l1", "l2", nullptr}, "list_intersect(l1, l2)"},

{DEFAULT_SCHEMA, "list_has_any", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_intersect(l1, l2)) > 0 THEN true ELSE false END"},
{DEFAULT_SCHEMA, "list_has_any", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_filter(l1, (x) -> list_contains(l2, x))) > 0 THEN true ELSE false END"},
{DEFAULT_SCHEMA, "array_has_any", {"l1", "l2", nullptr}, "list_has_any(l1, l2)" },
{DEFAULT_SCHEMA, "&&", {"l1", "l2", nullptr}, "list_has_any(l1, l2)" }, // "&&" is the operator for "list_has_any

{DEFAULT_SCHEMA, "list_has_all", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_intersect(l2, l1)) = len(list_filter(l2, x -> x IS NOT NULL)) THEN true ELSE false END"},
{DEFAULT_SCHEMA, "list_has_all", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_filter(l2, (x) -> list_contains(l1, x))) = len(list_filter(l2, x -> x IS NOT NULL)) THEN true ELSE false END"},
{DEFAULT_SCHEMA, "array_has_all", {"l1", "l2", nullptr}, "list_has_all(l1, l2)" },
{DEFAULT_SCHEMA, "@>", {"l1", "l2", nullptr}, "list_has_all(l1, l2)" }, // "@>" is the operator for "list_has_all
{DEFAULT_SCHEMA, "<@", {"l1", "l2", nullptr}, "list_has_all(l2, l1)" }, // "<@" is the operator for "list_has_all
Expand Down
46 changes: 45 additions & 1 deletion test/sql/function/list/list_distinct.test
Expand Up @@ -31,19 +31,27 @@ SELECT list_distinct([]) WHERE 1 = 0

statement error
SELECT list_distinct()
----
No function matches

statement error
SELECT list_distinct(*)
----
No function matches

statement error
SELECT list_distinct([1, 2], 2)
----
No function matches

# test incorrect parameter type

foreach type boolean varchar tinyint smallint integer bigint hugeint utinyint usmallint uinteger ubigint float double decimal(4,1) decimal(9,4) decimal(18,6) decimal(38,10) date time timestamp timestamp_s timestamp_ms timestamp_ns timetz timestamptz interval blob

statement error
SELECT list_distinct(NULL::${type})
----
No function matches

endloop

Expand Down Expand Up @@ -257,4 +265,40 @@ two1
query I
SELECT list_sort(list_distinct(['a', 'b、c', 'a']))
----
[a, b、c]
[a, b、c]

statement ok
CREATE TABLE all_types AS SELECT * FROM test_all_types();

# unsupported histogram types
foreach colname bool tinyint smallint int bigint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 uuid interval varchar small_enum medium_enum large_enum

statement ok
select list_distinct(["${colname}"]) FROM all_types;

endloop

# we don't support histogram for the min/max values of these types (casting errors)
foreach colname hugeint dec_18_6 dec38_10

statement error
select list_distinct(["${colname}"]) FROM all_types;
----
Invalid Input Error

endloop

statement error
select list_distinct(["blob"]) FROM all_types;
----
No matching aggregate function

# we don't support histogram for nested types
foreach colname int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs map

statement error
select list_distinct(["${colname}"]) FROM all_types;
----
Not implemented Error

endloop
21 changes: 14 additions & 7 deletions test/sql/function/list/list_intersect.test
Expand Up @@ -5,7 +5,7 @@
foreach f list_intersect array_intersect

query I
select ${f}([1,2,3], [2,3,4]);
select list_sort(${f}([1,2,3], [2,3,4]));
----
[2, 3]

Expand All @@ -28,7 +28,7 @@ statement ok
INSERT INTO list_data VALUES ([1,2,NULL], [2,3,NULL]);

query I
select ${f}(l1, l2) from list_data;
select list_sort(${f}(l1, l2)) from list_data;
----
NULL
NULL
Expand All @@ -48,11 +48,10 @@ insert into list_of_list values (NULL, NULL);
statement ok
insert into list_of_list values ([[1 , 2, 3], NULL, [3, 2, 1]], [[ 2, 3, 4], NULL, [1, 2, 3]]);

query I
statement error
select ${f}(l1, l2) from list_of_list;
----
NULL
[[1, 2, 3]]
Not implemented

statement ok
drop table list_of_list;
Expand All @@ -70,7 +69,7 @@ statement ok
insert into list_of_strings values (['here is a very long long string that is def more than 12 bytes', 'and a shorty'], ['here is a very long long string that is def more than 12 bytes', 'here is a very long long string that is def more than 12 bytes', 'c', 'd']);

query I
select ${f}(l1, l2) from list_of_strings;
select list_sort(${f}(l1, l2)) from list_of_strings;
----
NULL
[b, c]
Expand Down Expand Up @@ -127,9 +126,17 @@ execute q1(['abc', 'def'], ['def', 'ghi']);
statement ok
CREATE TABLE all_types AS SELECT * FROM test_all_types();

foreach colname bool tinyint smallint int bigint hugeint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 dec_18_6 dec38_10 uuid interval varchar blob small_enum medium_enum large_enum int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs map
foreach colname bool tinyint smallint int bigint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 uuid interval varchar small_enum medium_enum large_enum

statement ok
select list_intersect(["${colname}"], ["${colname}"]) FROM all_types;

endloop

# issue 9942

query I
SELECT list_sort(list_intersect([1, 4, 3, 5, 5, 2, 2], [5, 5, 5, 1, 1, 2, 4]));
----
[1, 2, 4, 5]