From e0c35dadab63ae83584dfa8e0781188758fcbd29 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 21 Apr 2024 11:01:52 +1000 Subject: [PATCH 1/2] Add distinct aggregate tests to sqllogictest --- .../sqllogictest/test_files/aggregate.slt | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 457cd11211f1..16712780fedf 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -373,6 +373,12 @@ SELECT var(c2) FROM aggregate_test_100 ---- 1.886363636364 +# csv_query_distinct_variance +query R +SELECT var(distinct c2) FROM aggregate_test_100 +---- +2.5 + # csv_query_variance_5 query R SELECT var_samp(c2) FROM aggregate_test_100 @@ -457,6 +463,18 @@ SELECT median(col_i8) FROM median_table ---- -14 +# distinct_median_i8 +query I +SELECT median(distinct col_i8) FROM median_table +---- +100 + +# approx_distinct_median_i8 +query I +SELECT approx_median(distinct col_i8) FROM median_table +---- +100 + # median_i16 query I SELECT median(col_i16) FROM median_table @@ -2498,6 +2516,11 @@ select avg(x_dict) from value_dict; ---- 2.625 +query R +select avg(distinct x_dict) from value_dict; +---- +3 + query I select min(x_dict) from value_dict; ---- From cbad651791086d52a0fc02302dfeefee51c7c19a Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 21 Apr 2024 14:19:41 +1000 Subject: [PATCH 2/2] Update tests --- datafusion/sqllogictest/test_files/aggregate.slt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 16712780fedf..c25f6d50b3a3 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -379,6 +379,9 @@ SELECT var(distinct c2) FROM aggregate_test_100 ---- 2.5 +statement error DataFusion error: This feature is not implemented: VAR\(DISTINCT\) aggregations are not available +SELECT var(c2), var(distinct c2) FROM aggregate_test_100 + # csv_query_variance_5 query R SELECT var_samp(c2) FROM aggregate_test_100 @@ -469,12 +472,18 @@ SELECT median(distinct col_i8) FROM median_table ---- 100 +statement error DataFusion error: This feature is not implemented: MEDIAN\(DISTINCT\) aggregations are not available +SELECT median(col_i8), median(distinct col_i8) FROM median_table + # approx_distinct_median_i8 query I SELECT approx_median(distinct col_i8) FROM median_table ---- 100 +statement error DataFusion error: This feature is not implemented: APPROX_MEDIAN\(DISTINCT\) aggregations are not available +SELECT approx_median(col_i8), approx_median(distinct col_i8) FROM median_table + # median_i16 query I SELECT median(col_i16) FROM median_table @@ -2516,11 +2525,15 @@ select avg(x_dict) from value_dict; ---- 2.625 +# distinct_average query R select avg(distinct x_dict) from value_dict; ---- 3 +statement error DataFusion error: This feature is not implemented: AVG\(DISTINCT\) aggregations are not available +select avg(x_dict), avg(distinct x_dict) from value_dict; + query I select min(x_dict) from value_dict; ----