diff --git a/src/catalog/default/default_functions.cpp b/src/catalog/default/default_functions.cpp index 1c8a528fb1f..dfd66cece42 100644 --- a/src/catalog/default/default_functions.cpp +++ b/src/catalog/default/default_functions.cpp @@ -105,14 +105,14 @@ static DefaultMacro internal_macros[] = { {DEFAULT_SCHEMA, "array_reverse", {"l", nullptr}, "list_reverse(l)"}, // FIXME implement as actual function if we encounter a lot of performance issues. Complexity now: n * m, with hashing possibly n + m - {DEFAULT_SCHEMA, "list_intersect", {"l1", "l2", nullptr}, "list_filter(l1, (x) -> list_contains(l2, x))"}, + {DEFAULT_SCHEMA, "list_intersect", {"l1", "l2", nullptr}, "list_filter(list_distinct(l1), (x) -> list_contains(l2, x))"}, {DEFAULT_SCHEMA, "array_intersect", {"l1", "l2", nullptr}, "list_intersect(l1, l2)"}, - {DEFAULT_SCHEMA, "list_has_any", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_intersect(l1, l2)) > 0 THEN true ELSE false END"}, + {DEFAULT_SCHEMA, "list_has_any", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_filter(l1, (x) -> list_contains(l2, x))) > 0 THEN true ELSE false END"}, {DEFAULT_SCHEMA, "array_has_any", {"l1", "l2", nullptr}, "list_has_any(l1, l2)" }, {DEFAULT_SCHEMA, "&&", {"l1", "l2", nullptr}, "list_has_any(l1, l2)" }, // "&&" is the operator for "list_has_any - {DEFAULT_SCHEMA, "list_has_all", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_intersect(l2, l1)) = len(list_filter(l2, x -> x IS NOT NULL)) THEN true ELSE false END"}, + {DEFAULT_SCHEMA, "list_has_all", {"l1", "l2", nullptr}, "CASE WHEN l1 IS NULL THEN NULL WHEN l2 IS NULL THEN NULL WHEN len(list_filter(l2, (x) -> list_contains(l1, x))) = len(list_filter(l2, x -> x IS NOT NULL)) THEN true ELSE false END"}, {DEFAULT_SCHEMA, "array_has_all", {"l1", "l2", nullptr}, "list_has_all(l1, l2)" }, {DEFAULT_SCHEMA, "@>", {"l1", "l2", nullptr}, "list_has_all(l1, l2)" }, // "@>" is the operator for "list_has_all {DEFAULT_SCHEMA, "<@", {"l1", "l2", nullptr}, "list_has_all(l2, l1)" }, // "<@" is the operator for "list_has_all diff --git a/test/sql/function/list/list_distinct.test b/test/sql/function/list/list_distinct.test index 36f6ab34052..973c2917837 100644 --- a/test/sql/function/list/list_distinct.test +++ b/test/sql/function/list/list_distinct.test @@ -31,12 +31,18 @@ SELECT list_distinct([]) WHERE 1 = 0 statement error SELECT list_distinct() +---- +No function matches statement error SELECT list_distinct(*) +---- +No function matches statement error SELECT list_distinct([1, 2], 2) +---- +No function matches # test incorrect parameter type @@ -44,6 +50,8 @@ foreach type boolean varchar tinyint smallint integer bigint hugeint utinyint us statement error SELECT list_distinct(NULL::${type}) +---- +No function matches endloop @@ -257,4 +265,40 @@ two1 query I SELECT list_sort(list_distinct(['a', 'b、c', 'a'])) ---- -[a, b、c] \ No newline at end of file +[a, b、c] + +statement ok +CREATE TABLE all_types AS SELECT * FROM test_all_types(); + +# unsupported histogram types +foreach colname bool tinyint smallint int bigint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 uuid interval varchar small_enum medium_enum large_enum + +statement ok +select list_distinct(["${colname}"]) FROM all_types; + +endloop + +# we don't support histogram for the min/max values of these types (casting errors) +foreach colname hugeint dec_18_6 dec38_10 + +statement error +select list_distinct(["${colname}"]) FROM all_types; +---- +Invalid Input Error + +endloop + +statement error +select list_distinct(["blob"]) FROM all_types; +---- +No matching aggregate function + +# we don't support histogram for nested types +foreach colname int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs map + +statement error +select list_distinct(["${colname}"]) FROM all_types; +---- +Not implemented Error + +endloop \ No newline at end of file diff --git a/test/sql/function/list/list_intersect.test b/test/sql/function/list/list_intersect.test index b63a6212da8..ff94de8916d 100644 --- a/test/sql/function/list/list_intersect.test +++ b/test/sql/function/list/list_intersect.test @@ -5,7 +5,7 @@ foreach f list_intersect array_intersect query I -select ${f}([1,2,3], [2,3,4]); +select list_sort(${f}([1,2,3], [2,3,4])); ---- [2, 3] @@ -28,7 +28,7 @@ statement ok INSERT INTO list_data VALUES ([1,2,NULL], [2,3,NULL]); query I -select ${f}(l1, l2) from list_data; +select list_sort(${f}(l1, l2)) from list_data; ---- NULL NULL @@ -48,11 +48,10 @@ insert into list_of_list values (NULL, NULL); statement ok insert into list_of_list values ([[1 , 2, 3], NULL, [3, 2, 1]], [[ 2, 3, 4], NULL, [1, 2, 3]]); -query I +statement error select ${f}(l1, l2) from list_of_list; ---- -NULL -[[1, 2, 3]] +Not implemented statement ok drop table list_of_list; @@ -70,7 +69,7 @@ statement ok insert into list_of_strings values (['here is a very long long string that is def more than 12 bytes', 'and a shorty'], ['here is a very long long string that is def more than 12 bytes', 'here is a very long long string that is def more than 12 bytes', 'c', 'd']); query I -select ${f}(l1, l2) from list_of_strings; +select list_sort(${f}(l1, l2)) from list_of_strings; ---- NULL [b, c] @@ -127,9 +126,17 @@ execute q1(['abc', 'def'], ['def', 'ghi']); statement ok CREATE TABLE all_types AS SELECT * FROM test_all_types(); -foreach colname bool tinyint smallint int bigint hugeint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 dec_18_6 dec38_10 uuid interval varchar blob small_enum medium_enum large_enum int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs map +foreach colname bool tinyint smallint int bigint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 uuid interval varchar small_enum medium_enum large_enum statement ok select list_intersect(["${colname}"], ["${colname}"]) FROM all_types; endloop + +# issue 9942 + +query I +SELECT list_sort(list_intersect([1, 4, 3, 5, 5, 2, 2], [5, 5, 5, 1, 1, 2, 4])); +---- +[1, 2, 4, 5] +