From 292f4729ba61867dd0eeed3cb4d13bb7503b2323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 12:44:43 +0200 Subject: [PATCH 01/26] Simplify table_builder --- test/Table_Tests/src/Common_Table_Spec.enso | 60 +++++++++---------- .../src/Database/Postgresql_Spec.enso | 2 +- .../src/Database/Redshift_Spec.enso | 2 +- .../Table_Tests/src/Database/Sqlite_Spec.enso | 2 +- test/Table_Tests/src/Table_Spec.enso | 5 +- 5 files changed, 34 insertions(+), 37 deletions(-) diff --git a/test/Table_Tests/src/Common_Table_Spec.enso b/test/Table_Tests/src/Common_Table_Spec.enso index f0865f3fa79d..017e4287b517 100644 --- a/test/Table_Tests/src/Common_Table_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Spec.enso @@ -26,13 +26,13 @@ from Standard.Table.Data.Position as Position_Module import all spec : Text -> (Vector -> Any) -> Boolean -> Text -> Nothing spec prefix table_builder supports_case_sensitive_columns pending=Nothing = table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Baz", Integer, [7,8,9]] - col4 = ["foo_1", Integer, [10,11,12]] - col5 = ["foo_2", Integer, [13,14,15]] - col6 = ["ab.+123", Integer, [16,17,18]] - col7 = ["abcd123", Integer, [19,20,21]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Baz", [7,8,9]] + col4 = ["foo_1", [10,11,12]] + col5 = ["foo_2", [13,14,15]] + col6 = ["ab.+123", [16,17,18]] + col7 = ["abcd123", [19,20,21]] table_builder [col1, col2, col3, col4, col5, col6, col7] expect_column_names names table = @@ -67,9 +67,9 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = if supports_case_sensitive_columns then Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <| table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Bar", Integer, [7,8,9]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] expect_column_names ["bar", "Bar"] <| table.select_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new)) @@ -122,7 +122,7 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: unmatched columns" <| - table_2 = table_builder [["foo", Integer, [0,0,0]], ["weird_column", Integer, [0,0,0]]] + table_2 = table_builder [["foo", [0,0,0]], ["weird_column", [0,0,0]]] foo = table_2.at "foo" weird_column = table_2.at "weird_column" bar = table.at "bar" @@ -177,9 +177,9 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = if supports_case_sensitive_columns then Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <| table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Bar", Integer, [7,8,9]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] expect_column_names ["foo"] <| table.remove_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new)) @@ -231,7 +231,7 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: unmatched columns" <| - table_2 = table_builder [["foo", Integer, [0,0,0]], ["weird_column", Integer, [0,0,0]]] + table_2 = table_builder [["foo", [0,0,0]], ["weird_column", [0,0,0]]] foo = table_2.at "foo" weird_column = table_2.at "weird_column" bar = table.at "bar" @@ -286,9 +286,9 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = if supports_case_sensitive_columns then Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <| table = - col1 = ["foo", Integer, [1,2,3]] - col2 = ["bar", Integer, [4,5,6]] - col3 = ["Bar", Integer, [7,8,9]] + col1 = ["foo", [1,2,3]] + col2 = ["bar", [4,5,6]] + col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new)) @@ -340,7 +340,7 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Problems.test_problem_handling action problems tester Test.specify "should correctly handle problems: unmatched columns" <| - table_2 = table_builder [["foo", Integer, [0,0,0]], ["weird_column", Integer, [0,0,0]]] + table_2 = table_builder [["foo", [0,0,0]], ["weird_column", [0,0,0]]] foo = table_2.at "foo" weird_column = table_2.at "weird_column" bar = table.at "bar" @@ -359,13 +359,13 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Test.group prefix+"Table.sort_columns" pending=pending <| table = - col1 = ["foo_21", Integer, [1,2,3]] - col2 = ["foo_100", Integer, [4,5,6]] - col3 = ["foo_1", Integer, [7,8,9]] - col4 = ["Foo_2", Integer, [10,11,12]] - col5 = ["foo_3", Integer, [13,14,15]] - col6 = ["foo_001", Integer, [16,17,18]] - col7 = ["bar", Integer, [19,20,21]] + col1 = ["foo_21", [1,2,3]] + col2 = ["foo_100", [4,5,6]] + col3 = ["foo_1", [7,8,9]] + col4 = ["Foo_2", [10,11,12]] + col5 = ["foo_3", [13,14,15]] + col6 = ["foo_001", [16,17,18]] + col7 = ["bar", [19,20,21]] table_builder [col1, col2, col3, col4, col5, col6, col7] Test.specify "should work as shown in the doc examples" <| @@ -387,10 +387,10 @@ spec prefix table_builder supports_case_sensitive_columns pending=Nothing = Test.group prefix+"Table.rename_columns" pending=pending <| table = - col1 = ["alpha", Integer, [1,2,3]] - col2 = ["beta", Integer, [4,5,6]] - col3 = ["gamma", Integer, [16,17,18]] - col4 = ["delta", Integer, [19,20,21]] + col1 = ["alpha", [1,2,3]] + col2 = ["beta", [4,5,6]] + col3 = ["gamma", [16,17,18]] + col4 = ["delta", [19,20,21]] table_builder [col1, col2, col3, col4] Test.specify "should work as shown in the doc examples" <| diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index 0b353e56a777..36e2d3d454e9 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -39,7 +39,7 @@ run_tests connection pending=Nothing = Ref.put name_counter ix+1 name = Name_Generator.random_name "table_"+ix.to_text - in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2] + in_mem_table = Materialized_Table.new columns table = connection.upload_table name in_mem_table tables.append name table diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index 0a74df6ff48b..e03e0ba47f67 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -39,7 +39,7 @@ run_tests connection pending=Nothing = Ref.put name_counter ix+1 name = Name_Generator.random_name "table_"+ix.to_text - in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2] + in_mem_table = Materialized_Table.new columns table = connection.upload_table name in_mem_table tables.append name table diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index 3231e4609b7a..33a896724b23 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -55,7 +55,7 @@ spec = Ref.put name_counter ix+1 name = Name_Generator.random_name "table_"+ix.to_text - in_mem_table = Materialized_Table.new <| columns.map description-> [description.at 0, description.at 2] + in_mem_table = Materialized_Table.new columns connection.upload_table name in_mem_table Common_Spec.spec prefix connection diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 449f86f5a17c..4ad55c4f8944 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -635,10 +635,7 @@ spec = t_3 = Table.new [c_3_1, c_3_2, c_3_3] t_3.default_visualization.should_equal Visualization.Id.table - table_builder columns = - Table.new <| columns.map description-> [description.at 0, description.at 2] - - Common_Table_Spec.spec "[In-Memory] " table_builder supports_case_sensitive_columns=True + Common_Table_Spec.spec "[In-Memory] " Table.new supports_case_sensitive_columns=True Test.group "Use First Row As Names" <| expect_column_names names table = From 1a6fa6c94d631447f472c43cc5ae0414c494acdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 14:19:04 +0200 Subject: [PATCH 02/26] Add skeleton for tests --- test/Table_Tests/src/Aggregate_Spec.enso | 67 ++++++++++++++++++- .../src/Database/Postgresql_Spec.enso | 2 +- .../src/Database/Redshift_Spec.enso | 2 +- .../Table_Tests/src/Database/Sqlite_Spec.enso | 2 +- 4 files changed, 68 insertions(+), 5 deletions(-) diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 05cd7d625630..6af4abb5a5d1 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -18,7 +18,7 @@ spec = table = Table.from_csv file_contents empty_table = Table.new <| table.columns.map c->[c.name, []] materialize = x->x - here.aggregate_spec "[In-Memory] " table empty_table materialize + here.aggregate_spec "[In-Memory] " table empty_table Table.new materialize ## Runs the common aggregate tests. @@ -27,6 +27,8 @@ spec = - table: A table using the tested backend containing data from `data/data.csv`. - empty_table: An empty table using the tested backend. + - table_builder: A function used to build a table using the tested backend + from a vector of columns represented as pairs of name and vector of values. - materialize: A helper function which materializes a table from the tested backend as an in-memory table. Used to easily inspect results of a particular query/operation. @@ -34,7 +36,7 @@ spec = skip checks for backends which do not support particular features. - pending: An optional mark to disable all test groups. Can be used to indicate that some tests are disabled due to missing test setup. -aggregate_spec prefix table empty_table materialize test_selection=here.all_tests pending=Nothing = +aggregate_spec prefix table empty_table table_builder materialize test_selection=here.all_tests pending=Nothing = expect_column_names names table = table.columns . map .name . should_equal names frames_to_skip=2 @@ -765,6 +767,67 @@ aggregate_spec prefix table empty_table materialize test_selection=here.all_test materialized.columns.at 2 . name . should_equal "Concatenate Code" materialized.columns.at 2 . at idx . length . should_equal 381 + Test.group "Table.aggregate Concat" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should insert the separator, add prefix and suffix" <| + Nothing + + Test.specify "should correctly escape separator and quote characters but only if necessary" <| + Nothing + + Test.specify "should correctly handle missing values and empty values with quote character" <| + Nothing + + Test.specify "will not be able to distinguish missing values from empty values without quote character" <| + Nothing + + Test.group "Table.aggregate Count_Distinct" <| + Test.specify "should correctly count null values" <| + Nothing + + Test.specify "should correctly count all-null keys in multi-column mode" (pending=if test_selection.multi_distinct.not then "Not supported.") <| + Nothing + + Test.group "Table.aggregate should correctly select result types" <| + Test.specify " widening to decimals on Average" <| + Nothing + Test.specify " widening to decimals on Median" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Nothing + Test.specify " widening to decimals on Percentile" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Nothing + Test.specify " widening to decimals on Standard_Deviation" (pending=if test_selection.std_dev.not then "Not supported.") <| + Nothing + + Test.group "Table.aggregate should correctly handle infinities" <| + pos_inf = 1/0 + neg_inf = -1/0 + Test.specify " widening to decimals on Average" <| + Nothing + Test.specify " widening to decimals on Median" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Nothing + Test.specify " widening to decimals on Percentile" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Nothing + Test.specify " widening to decimals on Standard_Deviation" (pending=if test_selection.std_dev.not then "Not supported.") <| + Nothing + + Test.group "Table.aggregate should correctly handle NaN" <| + nan = 0.log 0 + Test.specify " widening to decimals on Average" <| + Nothing + Test.specify " widening to decimals on Median" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Nothing + Test.specify " widening to decimals on Percentile" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Nothing + Test.specify " widening to decimals on Standard_Deviation" (pending=if test_selection.std_dev.not then "Not supported.") <| + Nothing + + Test.group "Table.aggregate Mode" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should ignore missing values" <| + Nothing + + Test.group "Table.aggregate First and Last" <| + Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending=if test_selection.first_last.not then "Not supported.") <| + Nothing + problem_pending = case pending.is_nothing of False -> pending True -> if test_selection.problem_handling.not then "Not supported." diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index 36e2d3d454e9..3c0f0ecd82dc 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -59,7 +59,7 @@ run_tests connection pending=Nothing = empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index e03e0ba47f67..b888d99ff43d 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -59,7 +59,7 @@ run_tests connection pending=Nothing = empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index 33a896724b23..b6f36635e0d5 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -74,7 +74,7 @@ spec = agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table materialize selection + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize selection connection.close file.delete From c5ef87f12a414e7df47ce65c5fc2f489d632cfd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 14:59:55 +0200 Subject: [PATCH 03/26] Add Concat tests --- test/Table_Tests/src/Aggregate_Spec.enso | 52 ++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 6af4abb5a5d1..9801e9933957 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -769,16 +769,60 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection Test.group "Table.aggregate Concat" (pending=if test_selection.text_concat.not then "Not supported.") <| Test.specify "should insert the separator, add prefix and suffix" <| - Nothing + table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]] + result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")] + result.row_count . should_equal 2 + materialized = materialize result . sort "A" + materialized.columns.length . should_equal 2 + materialized.columns.at 0 . name . should_equal "A" + materialized.columns.at 0 . to_vector . should_equal ["bar", "foo"] + materialized.columns.at 1 . name . should_equal "Concatenate B" + materialized.columns.at 1 . to_vector . should_equal ["[[b]]", "[[a; c; d]]"] Test.specify "should correctly escape separator and quote characters but only if necessary" <| - Nothing + table = table_builder [["A", ["1,0", "b", "'c", "''", ","]]] + result = table.aggregate [(Concatenate "A" prefix="[[" suffix="]]" separator="," quote_char="'")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["[['1,0',b,'''c','''''',',']]"] Test.specify "should correctly handle missing values and empty values with quote character" <| - Nothing + table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A" prefix="[[" suffix="]]" separator="," quote_char="'")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["[['1,0',A,'','',B,,,C]]"] Test.specify "will not be able to distinguish missing values from empty values without quote character" <| - Nothing + table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A" prefix="[[" suffix="]]" separator=",")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["[[1,0,A,,,B,,,C]]"] + + Test.specify "should work with empty separator" <| + table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["1,0ABC"] + + Test.specify "should work with empty separator but non-empty quote" <| + table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + result = table.aggregate [(Concatenate "A" quote_char="'")] + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.at 0 . name . should_equal "Concatenate A" + materialized.columns.at 0 . to_vector . should_equal ["1,0A''''BC"] Test.group "Table.aggregate Count_Distinct" <| Test.specify "should correctly count null values" <| From 30e4bc4438a6c46ca7200ccc6426680a7305ef3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 15:24:07 +0200 Subject: [PATCH 04/26] fixes for concat --- .../0.0.0-dev/src/Data/Dialect/Helpers.enso | 3 ++- .../enso/table/aggregations/Concatenate.java | 25 ++++++++++++------- test/Table_Tests/src/Aggregate_Spec.enso | 4 +-- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso index 13815a974a45..8f1477374735 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso @@ -38,7 +38,8 @@ make_concat make_raw_concat_expr make_contains_expr has_quote args = includes_separator = separator ++ Sql.code " != '' AND " ++ make_contains_expr expr separator ## We use the assumption that `has_quote` is True iff `quote` is not empty. includes_quote = make_contains_expr expr quote - needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren + is_empty = expr ++ Sql.code " = ''" + needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren ++ Sql.code " OR " ++ is_empty.paren escaped = Sql.code "replace(" ++ expr ++ Sql.code ", " ++ quote ++ Sql.code ", " ++ quote ++ append ++ quote ++ Sql.code ")" quoted = quote ++ append ++ escaped ++ append ++ quote Sql.code "CASE WHEN " ++ needs_quoting ++ Sql.code " THEN " ++ quoted ++ Sql.code " ELSE " ++ expr ++ Sql.code " END" diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java index 25ec7b378cc3..6ee6ac247504 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java @@ -1,5 +1,6 @@ package org.enso.table.aggregations; +import javax.swing.JSeparator; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; @@ -9,16 +10,16 @@ public class Concatenate extends Aggregator { private final Storage storage; - private final String join; + private final String separator; private final String prefix; private final String suffix; private final String quote; - public Concatenate(String name, Column column, String join, String prefix, String suffix, String quote) { + public Concatenate(String name, Column column, String separator, String prefix, String suffix, String quote) { super(name, Storage.Type.STRING); this.storage = column.getStorage(); - this.join = join == null ? "" : join; + this.separator = separator == null ? "" : separator; this.prefix = prefix; this.suffix = suffix; this.quote = quote == null ? "" : quote; @@ -30,9 +31,9 @@ public Object aggregate(List indexes) { for (int row: indexes) { Object value = storage.getItemBoxed(row); if (value == null || value instanceof String) { - String textValue = toQuotedString(value, quote, join); + String textValue = toQuotedString(value, quote, separator); - if (quote.equals("") && textValue.contains(join)) { + if (quote.equals("") && textValue.contains(separator)) { this.addProblem(new UnquotedDelimiter(this.getName(), row, "Unquoted delimiter.")); } @@ -40,7 +41,7 @@ public Object aggregate(List indexes) { current = new StringBuilder(); current.append(textValue); } else { - current.append(join); + current.append(separator); current.append(textValue); } } else { @@ -58,14 +59,20 @@ public Object aggregate(List indexes) { return current.toString(); } - private static String toQuotedString(Object value, final String quote, final String join) { + private static String toQuotedString(Object value, final String quote, final String separator) { if (value == null) { return ""; } String textValue = value.toString(); - if (!quote.equals("") && (textValue.equals("") || textValue.contains(join))) { - return quote + textValue.replace(quote, quote + quote) + quote; + boolean quote_is_defined = !quote.isEmpty(); + if (quote_is_defined) { + boolean includes_separator = !separator.isEmpty() && textValue.contains(separator); + boolean includes_quote = textValue.contains(quote); + boolean needs_quoting = textValue.isEmpty() || includes_separator || includes_quote; + if (needs_quoting) { + return quote + textValue.replace(quote, quote + quote) + quote; + } } return textValue; diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 9801e9933957..ccd8e8d2be09 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -816,13 +816,13 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 0 . to_vector . should_equal ["1,0ABC"] Test.specify "should work with empty separator but non-empty quote" <| - table = table_builder [["A", ["1,0", "A", "", "", "B", Nothing, Nothing, "C"]]] + table = table_builder [["A", ["1'0", "A", "", "", "B", Nothing, Nothing, "C"]]] result = table.aggregate [(Concatenate "A" quote_char="'")] result.row_count . should_equal 1 materialized = materialize result materialized.columns.length . should_equal 1 materialized.columns.at 0 . name . should_equal "Concatenate A" - materialized.columns.at 0 . to_vector . should_equal ["1,0A''''BC"] + materialized.columns.at 0 . to_vector . should_equal ["'1''0'A''''BC"] Test.group "Table.aggregate Count_Distinct" <| Test.specify "should correctly count null values" <| From 1aa6009bdfdb997cab07bad947d81e7445641296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 15:52:18 +0200 Subject: [PATCH 05/26] Improve SQL error handling: add contextual query info. --- .../0.0.0-dev/src/Connection/Connection.enso | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso index 02d2df9cd8df..b02920fee6f7 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso @@ -65,7 +65,7 @@ type Connection meant only for internal use. execute_query : Text | Sql.Statement -> Vector Sql.Sql_Type -> Materialized_Table = execute_query query expected_types=Nothing = here.handle_sql_errors <| - Resource.bracket (this.prepare_statement query) .close stmt-> + this.with_prepared_statement query stmt-> rs = stmt.executeQuery metadata = rs.getMetaData ncols = metadata.getColumnCount @@ -97,31 +97,27 @@ type Connection representing the query to execute. execute_update : Text | Sql.Statement -> Integer execute_update query = here.handle_sql_errors <| - Resource.bracket (this.prepare_statement query) .close stmt-> - ## FIXME USE CATCH HERE! - result = Panic.recover Any stmt.executeLargeUpdate - result.catch err-> case err of - Polyglot_Error exc -> - case Java.is_instance exc UnsupportedOperationException of - True -> - stmt.executeUpdate - False -> Error.throw err - _ -> Error.throw err + this.with_prepared_statement query stmt-> + Panic.catch UnsupportedOperationException stmt.executeLargeUpdate _-> + stmt.executeUpdate ## PRIVATE - Prepares the statement by ensuring that it is sanitised. - - Arguments: - - query: The query to prepare the SQL statement in. - prepare_statement : Text | Sql.Statement -> PreparedStatement - prepare_statement query = - go template holes=[] = Managed_Resource.with this.connection_resource java_connection-> + Runs the provided action with a prepared statement, adding contextual + information to any thrown SQL errors. + with_prepared_statement : Text | Sql.Statement -> (PreparedStatement -> Any) -> Any + with_prepared_statement query action = + prepare template holes = Managed_Resource.with this.connection_resource java_connection-> stmt = java_connection.prepareStatement template Panic.catch Any (here.set_statement_values stmt holes) caught_panic-> stmt.close Panic.throw caught_panic stmt + + go template holes = + here.wrap_sql_errors related_query=template <| + Resource.bracket (prepare template holes) .close action + case query of Text -> go query [] Sql.Statement _ -> @@ -140,7 +136,7 @@ type Connection fetch_columns table_name = query = IR.Select_All (IR.make_ctx_from table_name) compiled = this.dialect.generate_sql query - Resource.bracket (this.prepare_statement compiled) .close stmt-> + this.with_prepared_statement compiled stmt-> rs = stmt.executeQuery metadata = rs.getMetaData ncols = metadata.getColumnCount @@ -363,7 +359,7 @@ type Sql_Error Convert the SQL error to a textual representation. to_text : Text to_text = - query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else "" + query = if this.related_query.is_nothing.not then " [Query was: " + this.related_query + "]" else "" "There was an SQL error: " + this.java_exception.getMessage.to_text + "." + query ## UNSTABLE @@ -406,10 +402,10 @@ type Sql_Timeout_Error Arguments: - action: The computation to execute. This computation may throw SQL errors. -handle_sql_errors : Any -> Any ! (Sql_Error | Sql_Timeout_Error) -handle_sql_errors ~action = +handle_sql_errors : Any -> (Text | Nothing) -> Any ! (Sql_Error | Sql_Timeout_Error) +handle_sql_errors ~action related_query=Nothing = Panic.recover [Sql_Error, Sql_Timeout_Error] <| - here.wrap_sql_errors action + here.wrap_sql_errors action related_query ## PRIVATE From f7aed9b6a136dcc360986f5e63de44caa8cbc215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 16:11:05 +0200 Subject: [PATCH 06/26] Fix Postgres, make pending status clearer; fix Sqlite spec --- .../0.0.0-dev/src/Data/Dialect/Postgres.enso | 2 +- test/Table_Tests/src/Aggregate_Spec.enso | 119 +++++++++--------- .../Table_Tests/src/Database/Sqlite_Spec.enso | 2 +- 3 files changed, 61 insertions(+), 62 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index 444b812a3a53..defac1db510c 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -174,6 +174,6 @@ concat_ops = make_raw_concat_expr expr separator = Sql.code "array_to_string(array_agg(" ++ expr ++ Sql.code "), " ++ separator ++ Sql.code ")" make_contains_expr expr substring = - Sql.code "position(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0" + Sql.code "position(" ++ substring ++ Sql.code " in " ++ expr ++ Sql.code ") > 0" concat = Helpers.make_concat make_raw_concat_expr make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index ccd8e8d2be09..1b9e73abf98c 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -45,6 +45,11 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection 0.up_to table.row_count . find i-> 0.up_to key.length . all j-> (table_columns.at j . at i)==(key.at j) + resolve_pending enabled_flag pending=Nothing = + case pending of + Nothing -> if enabled_flag.not then "Not supported." + _ -> pending + Test.group prefix+"Table.aggregate should summarize whole table" pending=pending <| Test.specify "should be able to count" <| grouped = table.aggregate [Count Nothing] @@ -81,7 +86,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "Count Distinct Flag" materialized.columns.at 2 . at 0 . should_equal 2 - Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported.") <| + Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| ## TODO [RW] add Count_Distinct with overridden ignore_nothing! also need to modify data.csv to include some nulls on index and flag grouped = table.aggregate [Count_Distinct (By_Name ["Index", "Flag"])] materialized = materialize grouped @@ -104,7 +109,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . name . should_equal "Average ValueWithNothing" materialized.columns.at 3 . at 0 . should_equal 1.228650 epsilon=0.000001 - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = table.aggregate [Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -118,7 +123,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing_1" materialized.columns.at 3 . at 0 . should_equal 58.575554 epsilon=0.000001 - Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median, mode and percentile values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -136,7 +141,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 5 . name . should_equal "40%-ile ValueWithNothing" materialized.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001 - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = table.aggregate [First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -146,7 +151,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Last ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal -89.78 epsilon=0.000001 - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = table.aggregate [First "Index", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -170,7 +175,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001 - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -180,7 +185,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Longest TextWithNothing" materialized.columns.at 1 . at 0 . should_equal "setp295gjvbanana" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = table.aggregate [Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -229,7 +234,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Average ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = empty_table.aggregate [Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -239,7 +244,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Standard Deviation ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to create median, mode and percentile values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median, mode and percentile values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -251,7 +256,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "25%-ile Value" materialized.columns.at 2 . at 0 . should_equal Nothing - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = empty_table.aggregate [First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -261,7 +266,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Last ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = empty_table.aggregate [First "Index", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -281,7 +286,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -291,7 +296,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Longest TextWithNothing" materialized.columns.at 1 . at 0 . should_equal Nothing - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = empty_table.aggregate [Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -336,7 +341,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Sum Value" materialized.columns.at 2 . name . should_equal "Average ValueWithNothing" - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = empty_table.aggregate [Group_By 0, Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -345,7 +350,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Standard Deviation Value" materialized.columns.at 2 . name . should_equal "Standard Deviation ValueWithNothing" - Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -355,7 +360,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "Mode Index" materialized.columns.at 3 . name . should_equal "25%-ile Value" - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = empty_table.aggregate [Group_By 0, First "Index" (order_by = By_Name ["Hexadecimal", "TextWithNothing"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -364,7 +369,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "First Index" materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = empty_table.aggregate [Group_By 0, First "Index", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -382,7 +387,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Minimum Value" materialized.columns.at 2 . name . should_equal "Maximum ValueWithNothing" - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -391,7 +396,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 1 . name . should_equal "Shortest TextWithNothing" materialized.columns.at 2 . name . should_equal "Longest TextWithNothing" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = empty_table.aggregate [Group_By 0, Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 0 @@ -443,7 +448,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . name . should_equal "Count Distinct Flag" materialized.columns.at 3 . at idx . should_equal 2 - Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported.") <| + Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| ## TODO probably should use different cols for multi-distinct and also should check ignore_nothing grouped = table.aggregate [Group_By "Index", Count_Distinct (By_Name ["Index", "Flag"])] materialized = materialize grouped @@ -472,7 +477,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 4 . name . should_equal "Average ValueWithNothing" materialized.columns.at 4 . at idx . should_equal 0.646213 epsilon=0.000001 - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = table.aggregate [Group_By "Index", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -489,7 +494,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing_1" materialized.columns.at 4 . at idx . should_equal 56.677714 epsilon=0.000001 - Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -510,7 +515,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 6 . name . should_equal "40%-ile ValueWithNothing" materialized.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001 - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = table.aggregate [Group_By "Index", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"])] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -523,7 +528,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" materialized.columns.at 2 . at idx . should_equal 19.77 epsilon=0.000001 - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -553,7 +558,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 4 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001 - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -566,7 +571,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "Longest TextWithNothing" materialized.columns.at 2 . at idx . should_equal "byo6kn5l3sz" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = table.aggregate [Group_By "Index", Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -624,7 +629,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . name . should_equal "Count Distinct Flag" materialized.columns.at 3 . at idx . should_equal 1 - Test.specify "should be able to count distinct values over multiple columns" (pending=if test_selection.multi_distinct.not then "Not supported.") <| + Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| ## TODO probably should use different cols for multi-distinct and also should check ignore_nothing grouped = table.aggregate [Group_By "Index", Count_Distinct (By_Name ["Index", "Flag"]), Group_By "Flag"] materialized = materialize grouped @@ -655,7 +660,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 4 . name . should_equal "Average ValueWithNothing" materialized.columns.at 4 . at idx . should_equal 4.721858 epsilon=0.000001 - Test.specify "should be able to compute standard deviation of values" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify "should be able to compute standard deviation of values" (pending = resolve_pending test_selection.std_dev) <| grouped = table.aggregate [Group_By "Index", Group_By "Flag", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -673,7 +678,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing_1" materialized.columns.at 5 . at idx . should_equal 57.306492 epsilon=0.000001 - Test.specify "should be able to create median values" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify "should be able to create median values" (pending = resolve_pending test_selection.advanced_stats) <| grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -695,7 +700,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 7 . name . should_equal "40%-ile ValueWithNothing" materialized.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001 - Test.specify "should be able to get first and last values" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.specify "should be able to get first and last values" (pending = resolve_pending test_selection.first_last) <| grouped = table.aggregate [Group_By "Flag", First "TextWithNothing" (order_by = By_Name ["Hexadecimal", "Flag"]), Last "ValueWithNothing" (order_by = By_Name ["Value"]), Group_By "Index"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -709,7 +714,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "Last ValueWithNothing" materialized.columns.at 2 . at idx . should_equal 42.17 epsilon=0.000001 - Test.specify "should be able to get first and last values with default row order" (pending=if test_selection.first_last_row_order.not then "Not supported.") <| + Test.specify "should be able to get first and last values with default row order" (pending = resolve_pending test_selection.first_last_row_order) <| grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -741,7 +746,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 5 . name . should_equal "Maximum ValueWithNothing" materialized.columns.at 5 . at idx . should_equal 97.17 epsilon=0.000001 - Test.specify "should be able to get shortest and longest text values" (pending=if test_selection.text_shortest_longest.not then "Not supported.") <| + Test.specify "should be able to get shortest and longest text values" (pending = resolve_pending test_selection.text_shortest_longest) <| grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -755,7 +760,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . name . should_equal "Longest TextWithNothing" materialized.columns.at 3 . at idx . should_equal "byo6kn5l3sz" - Test.specify "should be able to get concatenated text values" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.specify "should be able to get concatenated text values" (pending = resolve_pending test_selection.text_concat) <| grouped = table.aggregate [Group_By "Index", Group_By "Flag", Concatenate "Code"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -767,7 +772,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "Concatenate Code" materialized.columns.at 2 . at idx . length . should_equal 381 - Test.group "Table.aggregate Concat" (pending=if test_selection.text_concat.not then "Not supported.") <| + Test.group prefix+"Table.aggregate Concat" (pending = resolve_pending test_selection.text_concat) <| Test.specify "should insert the separator, add prefix and suffix" <| table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]] result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")] @@ -824,58 +829,55 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 0 . name . should_equal "Concatenate A" materialized.columns.at 0 . to_vector . should_equal ["'1''0'A''''BC"] - Test.group "Table.aggregate Count_Distinct" <| + Test.group prefix+"Table.aggregate Count_Distinct" pending=pending <| Test.specify "should correctly count null values" <| Nothing - Test.specify "should correctly count all-null keys in multi-column mode" (pending=if test_selection.multi_distinct.not then "Not supported.") <| + Test.specify "should correctly count all-null keys in multi-column mode" (pending = resolve_pending test_selection.multi_distinct) <| Nothing - Test.group "Table.aggregate should correctly select result types" <| + Test.group prefix+"Table.aggregate should correctly select result types" pending=pending <| Test.specify " widening to decimals on Average" <| Nothing - Test.specify " widening to decimals on Median" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify " widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Percentile" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify " widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Standard_Deviation" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify " widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| Nothing - Test.group "Table.aggregate should correctly handle infinities" <| + Test.group prefix+"Table.aggregate should correctly handle infinities" pending=pending <| pos_inf = 1/0 neg_inf = -1/0 Test.specify " widening to decimals on Average" <| Nothing - Test.specify " widening to decimals on Median" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify " widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Percentile" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify " widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Standard_Deviation" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify " widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| Nothing - Test.group "Table.aggregate should correctly handle NaN" <| + Test.group prefix+"Table.aggregate should correctly handle NaN" pending=pending <| nan = 0.log 0 Test.specify " widening to decimals on Average" <| Nothing - Test.specify " widening to decimals on Median" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify " widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Percentile" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.specify " widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Standard_Deviation" (pending=if test_selection.std_dev.not then "Not supported.") <| + Test.specify " widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| Nothing - Test.group "Table.aggregate Mode" (pending=if test_selection.advanced_stats.not then "Not supported.") <| + Test.group prefix+"Table.aggregate Mode" (pending = resolve_pending test_selection.advanced_stats pending) <| Test.specify "should ignore missing values" <| Nothing - Test.group "Table.aggregate First and Last" <| - Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending=if test_selection.first_last.not then "Not supported.") <| + Test.group prefix+"Table.aggregate First and Last" pending=pending <| + Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending = resolve_pending test_selection.first_last) <| Nothing - problem_pending = case pending.is_nothing of - False -> pending - True -> if test_selection.problem_handling.not then "Not supported." - Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=problem_pending <| + Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=(resolve_pending test_selection.problem_handling pending) <| table = col1 = ["Index", [1, 2, 3]] col2 = ["Value", [1, 2, 3]] @@ -935,10 +937,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection tester = expect_column_names [] Problems.test_problem_handling action problems tester - aggregate_pending = case pending.is_nothing of - False -> pending - True -> if test_selection.aggregation_problems.not then "Not supported." - Test.group prefix+"Table.aggregate should raise warnings when there are issues computing aggregation" pending=aggregate_pending <| + Test.group prefix+"Table.aggregate should raise warnings when there are issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems pending) <| table = col1 = ["Index", [1, 2, 3]] col2 = ["Value", [1, 2, 3.1]] @@ -1007,7 +1006,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection tester = expect_column_names ["Maximum Mixed"] Problems.test_problem_handling action problems tester - Test.group prefix+"Table.aggregate should merge warnings when issues computing aggregation" pending=aggregate_pending <| + Test.group prefix+"Table.aggregate should merge warnings when issues computing aggregation" pending=(resolve_pending test_selection.aggregation_problems pending) <| table = col1 = ["Key", ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O"]] col2 = ["Value", [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]] diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index b6f36635e0d5..fbc975c6bcfb 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -17,7 +17,7 @@ sqlite_specific_spec connection = action = connection.execute_query "SELECT A FROM undefined_table" action . should_fail_with Sql_Error - action.catch.to_text . should_equal "There was an SQL error: '[SQLITE_ERROR] SQL error or missing database (no such table: undefined_table)'." + action.catch.to_text . should_equal "There was an SQL error: '[SQLITE_ERROR] SQL error or missing database (no such table: undefined_table)'. [Query was: SELECT A FROM undefined_table]" Test.group "[SQLite] Metadata" <| tinfo = Name_Generator.random_name "Tinfo" From 217847f15b8c0674372d99e24f329aa49aa9468f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 16:40:46 +0200 Subject: [PATCH 07/26] Add no agg edges --- test/Table_Tests/src/Aggregate_Spec.enso | 28 +++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 1b9e73abf98c..2850c96950b7 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -4,6 +4,7 @@ import Standard.Table from Standard.Table.Data.Column_Selector import By_Name, By_Index from Standard.Table.Data.Aggregate_Column import all from Standard.Table.Error as Error_Module import Missing_Input_Columns, Column_Indexes_Out_Of_Range, No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings +from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error import Standard.Test import Standard.Test.Problems @@ -877,11 +878,36 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending = resolve_pending test_selection.first_last) <| Nothing + Test.group prefix+"Table.aggregate" pending=pending <| + Test.specify "should work even if no aggregations apart from groupings are specified" <| + table = table_builder [["A", [1, 1, 2, 1]], ["B", [3, 2, 2, 3]], ["C", [11, 12, 13, 14]]] + grouped = table.aggregate [Group_By "B", Group_By "A"] + grouped.row_count . should_equal 3 + materialized = materialize grouped . sort ["A", "B"] + materialized.columns.length . should_equal 2 + materialized.columns.at 1 . name . should_equal "A" + materialized.columns.at 1 . to_vector . should_equal [1, 1, 2] + materialized.columns.at 0 . name . should_equal "B" + materialized.columns.at 0 . to_vector . should_equal [2, 3, 2] + + if test_selection.first_last && test_selection.first_last_row_order.not then + Test.specify "should report a warning and ignore problematic columns if a feature is not supported" <| + table = table_builder [["A", [1,2,Nothing,3]]] + action = table.aggregate [Sum "A", First "A", Last "A"] on_problems=_ + tester result = + result.row_count . should_equal 1 + materialized = materialize result + materialized.columns.length . should_equal 1 + materialized.columns.first.name . should_equal "Sum A" + materialized.columns.first.to_vector . should_equal [6] + problems = [Unsupported_Database_Operation_Error "`First` aggregation requires at least one `order_by` column.", Unsupported_Database_Operation_Error "`Last` aggregation requires at least one `order_by` column."] + Problems.test_problem_handling action problems tester + Test.group prefix+"Table.aggregate should raise warnings when there are issues" pending=(resolve_pending test_selection.problem_handling pending) <| table = col1 = ["Index", [1, 2, 3]] col2 = ["Value", [1, 2, 3]] - Table.new [col1, col2] + table_builder [col1, col2] Test.specify "should raise a warning when there are no output columns" <| action = table.aggregate [] on_problems=_ From d187af6bee5129975f3e4a304f64a9b22b68ae5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 6 Apr 2022 18:12:52 +0200 Subject: [PATCH 08/26] Count_Distinct tests WIP: Postgres not returning correct results ? --- .../0.0.0-dev/src/Connection/Connection.enso | 4 +- test/Table_Tests/src/Aggregate_Spec.enso | 66 ++++++++++++++++--- .../Table_Tests/src/Database/Common_Spec.enso | 24 ------- 3 files changed, 60 insertions(+), 34 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso index b02920fee6f7..9b299b585f46 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso @@ -433,7 +433,9 @@ default_storage_type storage_type = case storage_type of Storage.Integer -> Sql_Type.integer Storage.Decimal -> Sql_Type.double Storage.Boolean -> Sql_Type.boolean - Storage.Any -> Sql_Type.blob + ## Support for mixed type columns in Table upload is currently very limited, + falling back to treating everything as text. + Storage.Any -> Sql_Type.text ## PRIVATE Sets values inside of a prepared statement. diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 2850c96950b7..016095f47021 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -55,7 +55,6 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection Test.specify "should be able to count" <| grouped = table.aggregate [Count Nothing] materialized = materialize grouped - ## TODO check row count of not materialized one grouped.row_count . should_equal 1 materialized.columns.length . should_equal 1 materialized.columns.at 0 . name . should_equal "Count" @@ -88,7 +87,6 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . at 0 . should_equal 2 Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| - ## TODO [RW] add Count_Distinct with overridden ignore_nothing! also need to modify data.csv to include some nulls on index and flag grouped = table.aggregate [Count_Distinct (By_Name ["Index", "Flag"])] materialized = materialize grouped grouped.row_count . should_equal 1 @@ -218,12 +216,14 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . at 0 . should_equal 0 Test.specify "should be able to count distinct values" <| - grouped = empty_table.aggregate [Count_Distinct "Code"] + grouped = empty_table.aggregate [Count_Distinct "Code" (ignore_nothing=False), Count_Distinct "Code" (ignore_nothing=True)] materialized = materialize grouped grouped.row_count . should_equal 1 - materialized.columns.length . should_equal 1 + materialized.columns.length . should_equal 2 materialized.columns.at 0 . name . should_equal "Count Distinct Code" materialized.columns.at 0 . at 0 . should_equal 0 + materialized.columns.at 1 . name . should_equal "Count Distinct Code_1" + materialized.columns.at 1 . at 0 . should_equal 0 Test.specify "should be able to compute sum and average of values" <| grouped = empty_table.aggregate [Sum "Value", Average "ValueWithNothing"] @@ -450,7 +450,6 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . at idx . should_equal 2 Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| - ## TODO probably should use different cols for multi-distinct and also should check ignore_nothing grouped = table.aggregate [Group_By "Index", Count_Distinct (By_Name ["Index", "Flag"])] materialized = materialize grouped grouped.row_count . should_equal 10 @@ -631,7 +630,6 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 3 . at idx . should_equal 1 Test.specify "should be able to count distinct values over multiple columns" (pending = resolve_pending test_selection.multi_distinct) <| - ## TODO probably should use different cols for multi-distinct and also should check ignore_nothing grouped = table.aggregate [Group_By "Index", Count_Distinct (By_Name ["Index", "Flag"]), Group_By "Flag"] materialized = materialize grouped grouped.row_count . should_equal 20 @@ -831,11 +829,61 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 0 . to_vector . should_equal ["'1''0'A''''BC"] Test.group prefix+"Table.aggregate Count_Distinct" pending=pending <| - Test.specify "should correctly count null values" <| - Nothing + Test.specify "should correctly count missing values" <| + get_value t = + columns = materialize t . columns + columns.length . should_equal 1 frames_to_skip=1 + columns.first.length . should_equal 1 frames_to_skip=1 + columns.first . at 0 + + t1 = table_builder [["A", []]] + get_value (t1.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 0 + get_value (t1.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 0 + + t2 = table_builder [["A", [Nothing, Nothing]]] + get_value (t2.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 0 + get_value (t2.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 1 + + t3 = table_builder [["A", [1, 2]]] + get_value (t3.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 2 + get_value (t3.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 2 + + t4 = table_builder [["A", [1, 2, Nothing, Nothing]]] + get_value (t4.aggregate [Count_Distinct "A" (ignore_nothing=True)]) . should_equal 2 + get_value (t4.aggregate [Count_Distinct "A" (ignore_nothing=False)]) . should_equal 3 + + t5 = table_builder [["G", ["foo", "foo", "bar", "foo"]], ["A", [Nothing, 0, Nothing, Nothing]]] + + r1 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=True)] + r1.row_count . should_equal 2 + m1 = materialize r1 . sort "G" + m1.columns.length . should_equal 2 + m1.columns.first.to_vector . should_equal ["bar", "foo"] + m1.columns.second.to_vector . should_equal [0, 1] + + r2 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=False)] + r2.row_count . should_equal 2 + m2 = materialize r2 . sort "G" + m2.columns.length . should_equal 2 + m2.columns.first.to_vector . should_equal ["bar", "foo"] + m2.columns.second.to_vector . should_equal [1, 2] Test.specify "should correctly count all-null keys in multi-column mode" (pending = resolve_pending test_selection.multi_distinct) <| - Nothing + table = table_builder [["A", ["foo", "foo", Nothing, Nothing, Nothing]], ["B", ["baz", Nothing, Nothing, Nothing, "baz"]], ["C", [1, 2, 3, 4, 5]]] + + r1 = table.aggregate [Count_Distinct (By_Name ["A", "B"]) (ignore_nothing=True)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length.should_equal 1 + m1.columns.first.name . should_equal "Count Distinct A B" + m1.columns.first.to_vector . should_equal [3] + + r2 = table.aggregate [Count_Distinct (By_Name ["A", "B"]) (ignore_nothing=False)] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length.should_equal 1 + m2.columns.first.name . should_equal "Count Distinct A B" + m2.columns.first.to_vector . should_equal [4] Test.group prefix+"Table.aggregate should correctly select result types" pending=pending <| Test.specify " widening to decimals on Average" <| diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index f52c54fb9d27..228c03471661 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -376,30 +376,6 @@ spec prefix connection pending=Nothing = t2.at "Count Not Nothing price" . to_vector . should_equal [11] t2.at "Count Nothing price" . to_vector . should_equal [5] - Test.specify "should allow to count distinct values" <| - aggregates = [Count_Distinct "quantity", Count_Distinct "price" (ignore_nothing=True), Count_Distinct "price" (ignore_nothing=False)] - - t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe) - t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"] - # t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0] - # TODO - - t2 = t.aggregate aggregates . to_dataframe - t2 . at "Count Distinct quantity" . to_vector . should_equal [10] - t2 . at "Count Distinct price" . to_vector . should_equal [7] - #t2 . at "Count Distinct price 2" . to_vector . should_equal [8] - - Test.specify "should allow to count distinct values over multiple fields" pending="TODO" <| - aggregates = [Count_Distinct ["price", "quantity"]] - - t1 = determinize_by "name" (t.aggregate [Group_By "name"]+aggregates . to_dataframe) - t1.at "name" . to_vector . should_equal ["bar", "baz", "foo", "quux", "zzzz"] - # t1.at "Count Distinct quantity" . to_vector . should_equal [2, 1, 3, 0] - # TODO - - t2 = t.aggregate aggregates . to_dataframe - t2 . at "Count Distinct price quantity" . to_vector . should_equal [13] - Test.specify "should allow simple arithmetic aggregations" <| aggregates = [Sum "price" Nothing, Sum "quantity" Nothing, Average "price" Nothing] ## TODO can check the datatypes From bf9d2a603df9714e9ebfbf22406ff4bd42a30762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 7 Apr 2022 11:27:56 +0200 Subject: [PATCH 09/26] Fix count distinct for Postgres --- .../0.0.0-dev/src/Data/Dialect/Postgres.enso | 23 ++++++++++++++++++- .../0.0.0-dev/src/Data/Dialect/Sqlite.enso | 14 ++++++++++- .../src/Data/Internal/Base_Generator.enso | 13 +---------- test/Table_Tests/src/Aggregate_Spec.enso | 18 +++++++-------- .../src/Database/Postgresql_Spec.enso | 7 +++--- .../src/Database/Redshift_Spec.enso | 7 +++--- 6 files changed, 53 insertions(+), 29 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index defac1db510c..e2b5ec600155 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -73,7 +73,7 @@ make_internal_generator_dialect = False -> Error.throw ("Invalid amount of arguments for operation contains") text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with], here.agg_shortest, here.agg_longest]+here.concat_ops - counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty] + counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]] stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"] stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"] @@ -177,3 +177,24 @@ concat_ops = Sql.code "position(" ++ substring ++ Sql.code " in " ++ expr ++ Sql.code ") > 0" concat = Helpers.make_concat make_raw_concat_expr make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] + + +## PRIVATE +agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument_Error "COUNT_DISTINCT requires at least one argument.")) else + case args.length == 1 of + True -> + ## A single null value will be skipped. + Sql.code "COUNT(DISTINCT " ++ args.first ++ Sql.code ")" + False -> + ## A tuple of nulls is not a null, so it will not be skipped - but + we want to ignore all-null columns. So we manually filter them + out. + count = Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))" + are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL" + all_nulls_filter = Sql.code " FILTER (WHERE NOT (" ++ Sql.join " AND " are_nulls ++ Sql.code "))" + (count ++ all_nulls_filter).paren + +## PRIVATE +agg_count_distinct_include_null args = + ## If we always count as tuples, then even null fields are counted. + Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code ", 0))" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso index b94d821f7c8e..03df0fb5429c 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso @@ -72,7 +72,7 @@ make_internal_generator_dialect = False -> Error.throw ("Invalid amount of arguments for operation contains") text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]+here.concat_ops - counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty] + counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]] stats = [here.agg_stddev_pop, here.agg_stddev_samp] my_mappings = text + counts + stats Base_Generator.base_dialect . extend_with my_mappings @@ -162,3 +162,15 @@ concat_ops = Sql.code "instr(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0" concat = Helpers.make_concat make_raw_concat_expr make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] + + +## PRIVATE +agg_count_distinct args = + Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))" + +## PRIVATE +agg_count_distinct_include_null args = + count = here.count_distinct args + are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL" + all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ Sql.join " AND " are_nulls ++ Sql.code " THEN 1 END) > 0 THEN 1 ELSE 0 END" + count ++ Sql.code " + " ++ all_nulls_case diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso index f7b31fccc90d..edfb4f7761fa 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Base_Generator.enso @@ -168,22 +168,11 @@ base_dialect = logic = [bin "AND", bin "OR", unary "NOT"] compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">="] agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"] - counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"], ["COUNT_DISTINCT", here.count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.count_distinct_include_null]] + counts = [fun "COUNT", ["COUNT_ROWS", here.make_constant "COUNT(*)"]] nulls = [["ISNULL", here.make_right_unary_op "IS NULL"], ["FILLNULL", here.make_function "COALESCE"]] base_map = Map.from_vector (arith + logic + compare + agg + nulls + counts) Internal_Dialect base_map here.wrap_in_quotes -## PRIVATE -count_distinct args = - Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))" - -## PRIVATE -count_distinct_include_null args = - count = here.count_distinct args - are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL" - all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ Sql.join " AND " are_nulls ++ Sql.code " THEN 1 END) > 0 THEN 1 ELSE 0 END" - count ++ Sql.code " + " ++ all_nulls_case - ## PRIVATE Builds code for an expression. diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 016095f47021..38ffc75ac24a 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -771,7 +771,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection materialized.columns.at 2 . name . should_equal "Concatenate Code" materialized.columns.at 2 . at idx . length . should_equal 381 - Test.group prefix+"Table.aggregate Concat" (pending = resolve_pending test_selection.text_concat) <| + Test.group prefix+"Table.aggregate Concat" (pending = resolve_pending test_selection.text_concat pending) <| Test.specify "should insert the separator, add prefix and suffix" <| table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]] result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")] @@ -869,14 +869,7 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection m2.columns.second.to_vector . should_equal [1, 2] Test.specify "should correctly count all-null keys in multi-column mode" (pending = resolve_pending test_selection.multi_distinct) <| - table = table_builder [["A", ["foo", "foo", Nothing, Nothing, Nothing]], ["B", ["baz", Nothing, Nothing, Nothing, "baz"]], ["C", [1, 2, 3, 4, 5]]] - - r1 = table.aggregate [Count_Distinct (By_Name ["A", "B"]) (ignore_nothing=True)] - r1.row_count.should_equal 1 - m1 = materialize r1 - m1.columns.length.should_equal 1 - m1.columns.first.name . should_equal "Count Distinct A B" - m1.columns.first.to_vector . should_equal [3] + table = table_builder [["A", ["foo", "foo", Nothing, Nothing, Nothing]], ["B", ["baz", Nothing, Nothing, Nothing, "baz"]], ["C", [1, 2, 3, Nothing, 5]]] r2 = table.aggregate [Count_Distinct (By_Name ["A", "B"]) (ignore_nothing=False)] r2.row_count.should_equal 1 @@ -885,6 +878,13 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection m2.columns.first.name . should_equal "Count Distinct A B" m2.columns.first.to_vector . should_equal [4] + r1 = table.aggregate [Count_Distinct (By_Name ["A", "B"]) (ignore_nothing=True)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length.should_equal 1 + m1.columns.first.name . should_equal "Count Distinct A B" + m1.columns.first.to_vector . should_equal [3] + Test.group prefix+"Table.aggregate should correctly select result types" pending=pending <| Test.specify " widening to decimals on Average" <| Nothing diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index 3c0f0ecd82dc..9fc60fbd840a 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -40,9 +40,10 @@ run_tests connection pending=Nothing = name = Name_Generator.random_name "table_"+ix.to_text in_mem_table = Materialized_Table.new columns - table = connection.upload_table name in_mem_table - tables.append name - table + case connection.upload_table name in_mem_table of + table -> + tables.append name + table clean_tables table_names = table_names.each name-> sql = 'DROP TABLE "' + name + '"' diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index b888d99ff43d..6c73e55a2459 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -40,9 +40,10 @@ run_tests connection pending=Nothing = name = Name_Generator.random_name "table_"+ix.to_text in_mem_table = Materialized_Table.new columns - table = connection.upload_table name in_mem_table - tables.append name - table + case connection.upload_table name in_mem_table of + table -> + tables.append name + table clean_tables table_names = table_names.each name-> sql = 'DROP TABLE "' + name + '"' From fe5d988888f85176b678f9ad080aac8a05b6ea29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 7 Apr 2022 12:07:15 +0200 Subject: [PATCH 10/26] Sort out SQLite --- .../0.0.0-dev/src/Data/Dialect/Sqlite.enso | 17 ++++++++++------- test/Table_Tests/src/Aggregate_Spec.enso | 9 +++++++-- .../src/Database/Postgresql_Spec.enso | 2 +- .../Table_Tests/src/Database/Redshift_Spec.enso | 2 +- test/Table_Tests/src/Database/Sqlite_Spec.enso | 11 ++++++----- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso index 03df0fb5429c..4779c44dd56a 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso @@ -165,12 +165,15 @@ concat_ops = ## PRIVATE -agg_count_distinct args = - Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))" +agg_count_distinct args = case args.length == 1 of + True -> Sql.code "COUNT(DISTINCT (" ++ args.first ++ Sql.code "))" + False -> Error.throw (Illegal_Argument_Error "COUNT_DISTINCT supports only single arguments in SQLite.") ## PRIVATE -agg_count_distinct_include_null args = - count = here.count_distinct args - are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL" - all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ Sql.join " AND " are_nulls ++ Sql.code " THEN 1 END) > 0 THEN 1 ELSE 0 END" - count ++ Sql.code " + " ++ all_nulls_case +agg_count_distinct_include_null args = case args.length == 1 of + True -> + arg = args.first + count = Sql.code "COUNT(DISTINCT " ++ arg ++ Sql.code ")" + all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ arg ++ Sql.code "IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END" + count ++ Sql.code " + " ++ all_nulls_case + False -> Error.throw (Illegal_Argument_Error "COUNT_DISTINCT supports only single arguments in SQLite.") diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 38ffc75ac24a..9b1bdb6e5c2a 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -19,7 +19,7 @@ spec = table = Table.from_csv file_contents empty_table = Table.new <| table.columns.map c->[c.name, []] materialize = x->x - here.aggregate_spec "[In-Memory] " table empty_table Table.new materialize + here.aggregate_spec "[In-Memory] " table empty_table Table.new materialize is_database=False ## Runs the common aggregate tests. @@ -37,7 +37,7 @@ spec = skip checks for backends which do not support particular features. - pending: An optional mark to disable all test groups. Can be used to indicate that some tests are disabled due to missing test setup. -aggregate_spec prefix table empty_table table_builder materialize test_selection=here.all_tests pending=Nothing = +aggregate_spec prefix table empty_table table_builder materialize is_database test_selection=here.all_tests pending=Nothing = expect_column_names names table = table.columns . map .name . should_equal names frames_to_skip=2 @@ -1101,4 +1101,9 @@ aggregate_spec prefix table empty_table table_builder materialize test_selection problems.at 0 . is_an Floating_Point_Grouping . should_be_true problems.at 0 . rows . length . should_equal 9 + if is_database then + Test.group prefix+"Table.aggregate should report unsupported operations but not block other aggregations in warning mode" pending=pending <| + ## TODO + Nothing + main = Test.Suite.run_main here.spec diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index 9fc60fbd840a..c84c7a676e7c 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -60,7 +60,7 @@ run_tests connection pending=Nothing = empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=False selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index 6c73e55a2459..adab1919da92 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -60,7 +60,7 @@ run_tests connection pending=Nothing = empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=False selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index fbc975c6bcfb..129e376090a5 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -62,10 +62,11 @@ spec = here.sqlite_specific_spec connection Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=False - ## For now `advanced_stats` remain disabled, because SQLite does not provide - aggregate functions for median, mode and percentile and emulating them is - highly problematic. We can rethink in the future how these could be - emulated. Two of the possible solutions are: + ## For now `advanced_stats`, `first_last`, `text_shortest_longest` and + `multi_distinct` remain disabled, because SQLite does not provide the + needed aggregate functions and emulating them is highly problematic. + We can rethink in the future how these could be emulated. Two of the + possible solutions are: - creating complex nested queries using NTILE to compute the stats, - compiling SQLite library on our own and adding native extensions for the missing statistics. @@ -74,7 +75,7 @@ spec = agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize selection + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=False selection connection.close file.delete From 1a43d0ba1215c664fc419eef2b33b49edc815ae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 7 Apr 2022 12:23:11 +0200 Subject: [PATCH 11/26] Refactor text ops to be in the same format as others --- .../0.0.0-dev/src/Data/Dialect/Postgres.enso | 50 +++++++------------ .../0.0.0-dev/src/Data/Dialect/Sqlite.enso | 50 +++++++------------ .../src/Database/Codegen_Spec.enso | 2 +- 3 files changed, 39 insertions(+), 63 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index e2b5ec600155..56391387a5c1 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -45,34 +45,7 @@ type Postgresql_Dialect ## PRIVATE make_internal_generator_dialect = - starts_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation starts_with") - ends_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation ends_with") - contains arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation contains") - text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with], here.agg_shortest, here.agg_longest]+here.concat_ops + text = [here.starts_with, here.contains, here.ends_with, here.agg_shortest, here.agg_longest]+here.concat_ops counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]] stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"] @@ -173,9 +146,7 @@ agg_longest = Base_Generator.lift_unary_op "LONGEST" arg-> concat_ops = make_raw_concat_expr expr separator = Sql.code "array_to_string(array_agg(" ++ expr ++ Sql.code "), " ++ separator ++ Sql.code ")" - make_contains_expr expr substring = - Sql.code "position(" ++ substring ++ Sql.code " in " ++ expr ++ Sql.code ") > 0" - concat = Helpers.make_concat make_raw_concat_expr make_contains_expr + concat = Helpers.make_concat make_raw_concat_expr here.make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] @@ -198,3 +169,20 @@ agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument_E agg_count_distinct_include_null args = ## If we always count as tuples, then even null fields are counted. Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code ", 0))" + +## PRIVATE +starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub-> + res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')") + res.paren + +## PRIVATE +ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> + res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")") + res.paren + +## PRIVATE +make_contains_expr expr substring = + Sql.code "position(" ++ substring ++ Sql.code " in " ++ expr ++ Sql.code ") > 0" + +## PRIVATE +contains = Base_Generator.lift_binary_op "contains" here.make_contains_expr diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso index 4779c44dd56a..67a829cacf01 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso @@ -44,34 +44,7 @@ type Sqlite_Dialect ## PRIVATE make_internal_generator_dialect = - starts_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation starts_with") - ends_with arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation ends_with") - contains arguments = - case arguments.length == 2 of - True -> - str = arguments.at 0 - sub = arguments.at 1 - res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code " || '%')") - res.paren - False -> - Error.throw ("Invalid amount of arguments for operation contains") - text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with]]+here.concat_ops + text = [here.starts_with, here.contains, here.ends_with]+here.concat_ops counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]] stats = [here.agg_stddev_pop, here.agg_stddev_samp] my_mappings = text + counts + stats @@ -158,9 +131,7 @@ window_aggregate window_type ignore_null args = concat_ops = make_raw_concat_expr expr separator = Sql.code "group_concat(" ++ expr ++ Sql.code ", " ++ separator ++ Sql.code ")" - make_contains_expr expr substring = - Sql.code "instr(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0" - concat = Helpers.make_concat make_raw_concat_expr make_contains_expr + concat = Helpers.make_concat make_raw_concat_expr here.make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] @@ -177,3 +148,20 @@ agg_count_distinct_include_null args = case args.length == 1 of all_nulls_case = Sql.code "CASE WHEN COUNT(CASE WHEN " ++ arg ++ Sql.code "IS NULL THEN 1 END) > 0 THEN 1 ELSE 0 END" count ++ Sql.code " + " ++ all_nulls_case False -> Error.throw (Illegal_Argument_Error "COUNT_DISTINCT supports only single arguments in SQLite.") + +## PRIVATE +starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub-> + res = str ++ (Sql.code " LIKE (") ++ sub ++ (Sql.code " || '%')") + res.paren + +## PRIVATE +ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub-> + res = str ++ (Sql.code " LIKE ('%' || ") ++ sub ++ (Sql.code ")") + res.paren + +## PRIVATE +make_contains_expr expr substring = + Sql.code "instr(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0" + +## PRIVATE +contains = Base_Generator.lift_binary_op "contains" here.make_contains_expr diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index e15047481c5d..a4525aa1703b 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -83,7 +83,7 @@ spec = contains = b.contains "inf" ends.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ?)) AS "B" FROM "T1" AS "T1"', [["suf", str]]] starts.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (? || \'%\')) AS "B" FROM "T1" AS "T1"', [["pref", str]]] - contains.to_sql.prepare . should_equal ['SELECT ("T1"."B" LIKE (\'%\' || ? || \'%\')) AS "B" FROM "T1" AS "T1"', [["inf", str]]] + contains.to_sql.prepare . should_equal ['SELECT instr("T1"."B", ?) > 0 AS "B" FROM "T1" AS "T1"', [["inf", str]]] Test.group "[Codegen] Masking Tables and Columns" <| Test.specify "should allow filtering table rows based on a boolean expression" <| From b841969997032a13c2451080fd5e8c22bb92c9a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 7 Apr 2022 13:29:08 +0200 Subject: [PATCH 12/26] Fix SQLite stddev --- .../0.0.0-dev/src/Data/Dialect/Sqlite.enso | 4 +- test/Table_Tests/src/Aggregate_Spec.enso | 65 +++++++++++++++---- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso index 67a829cacf01..fd1b0015fc06 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso @@ -92,7 +92,7 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg-> agg_stddev_pop = Base_Generator.lift_unary_op "STDDEV_POP" arg-> sum_of_squares = Sql.code "SUM(" ++ arg.paren ++ Sql.code "*" ++ arg.paren ++ Sql.code ")" square_of_sums = Sql.code "SUM(" ++ arg ++ Sql.code ") * SUM(" ++ arg ++ Sql.code ")" - n = Sql.code "COUNT(" ++ arg ++ Sql.code ")" + n = Sql.code "CAST(COUNT(" ++ arg ++ Sql.code ") AS REAL)" var = Sql.code "(" ++ sum_of_squares ++ Sql.code " - (" ++ square_of_sums ++ Sql.code " / " ++ n ++ Sql.code ")) / " ++ n Sql.code "SQRT(" ++ var ++ Sql.code ")" @@ -100,7 +100,7 @@ agg_stddev_pop = Base_Generator.lift_unary_op "STDDEV_POP" arg-> agg_stddev_samp = Base_Generator.lift_unary_op "STDDEV_SAMP" arg-> sum_of_squares = Sql.code "SUM(" ++ arg.paren ++ Sql.code "*" ++ arg.paren ++ Sql.code ")" square_of_sums = Sql.code "SUM(" ++ arg ++ Sql.code ") * SUM(" ++ arg ++ Sql.code ")" - n = Sql.code "COUNT(" ++ arg ++ Sql.code ")" + n = Sql.code "CAST(COUNT(" ++ arg ++ Sql.code ") AS REAL)" var = Sql.code "(" ++ sum_of_squares ++ Sql.code " - (" ++ square_of_sums ++ Sql.code " / " ++ n ++ Sql.code ")) / (" ++ n ++ Sql.code " - 1)" Sql.code "SQRT(" ++ var ++ Sql.code ")" diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 9b1bdb6e5c2a..6dc04bab45c0 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -885,37 +885,78 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m1.columns.first.name . should_equal "Count Distinct A B" m1.columns.first.to_vector . should_equal [3] + Test.group prefix+"Table.aggregate Standard_Deviation" pending=(resolve_pending test_selection.std_dev pending) <| + Test.specify "should correctly handle single elements" <| + r1 = table_builder [["X", [1]]] . aggregate [Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + m1.columns.first.at 0 . should_equal Nothing + m1.columns.first.at 0 . should_equal Nothing + Test.group prefix+"Table.aggregate should correctly select result types" pending=pending <| Test.specify " widening to decimals on Average" <| - Nothing + table = table_builder [["G", ["a", "a", "b", "b"]], ["X", [0, 1, 1, Nothing]]] + r1 = table.aggregate [Average "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal (2/3) epsilon=0.00001 + + r2 = table.aggregate [Group_By "G", Average "X"] + r2.row_count.should_equal 2 + m2 = materialize r2 . sort "G" + m2.columns.length . should_equal 2 + m2.columns.first.to_vector . should_equal ["a", "b"] + m2.columns.second.to_vector . should_equal [0.5, 1] + Test.specify " widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| - Nothing + table = table_builder [["X", [-1000, 0, 1, 100000, Nothing]]] + r1 = table.aggregate [Median "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.to_vector . should_equal [0.5] + Test.specify " widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| - Nothing + table = table_builder [["X", [1, 2, 3, 4, 5, 6, Nothing]]] + r1 = table.aggregate [Percentile 0.3 "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.to_vector . should_equal [2.5] + Test.specify " widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| - Nothing + table = table_builder [["X", [1, 2, 3, 4, Nothing]]] + r1 = table.aggregate [Standard_Deviation "X" (population=True), Standard_Deviation "X" (population=False)] + r1.row_count.should_equal 1 + r1.print + m1 = materialize r1 + m1.columns.length . should_equal 2 + m1.columns.first.at 0 . should_equal 1.1180339887499 epsilon=0.000001 + m1.columns.second.at 0 . should_equal 1.2909944487358 epsilon=0.000001 Test.group prefix+"Table.aggregate should correctly handle infinities" pending=pending <| pos_inf = 1/0 neg_inf = -1/0 - Test.specify " widening to decimals on Average" <| + Test.specify " on Average" <| Nothing - Test.specify " widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify " on Median" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify " on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + Test.specify " on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| Nothing Test.group prefix+"Table.aggregate should correctly handle NaN" pending=pending <| nan = 0.log 0 - Test.specify " widening to decimals on Average" <| + Test.specify " on Average" <| Nothing - Test.specify " widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify " on Median" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify " on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| Nothing - Test.specify " widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + Test.specify " on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| Nothing Test.group prefix+"Table.aggregate Mode" (pending = resolve_pending test_selection.advanced_stats pending) <| From 3ef2a11d76fbb7ff27ca7edba03faa8ba65a357c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 7 Apr 2022 17:21:21 +0200 Subject: [PATCH 13/26] FIgure out stddev --- .../lib/Standard/Test/0.0.0-dev/src/Main.enso | 16 +++++++++------ .../table/aggregations/StandardDeviation.java | 20 ++++++++++--------- test/Table_Tests/src/Aggregate_Spec.enso | 3 +-- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso index abb63e94953d..3b819cd8dbe2 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso @@ -272,12 +272,16 @@ Error.should_equal _ frames_to_skip=0 = here.fail_match_on_unexpected_error this example_should_equal = 1.00000001 . should_equal 1.00000002 epsilon=0.0001 Decimal.should_equal : Decimal -> Decimal -> Integer -> Assertion -Decimal.should_equal that (epsilon = 0) (frames_to_skip=0) = case this.equals that epsilon of - True -> Success - False -> - loc = Meta.get_source_location 2+frames_to_skip - msg = this.to_text + " did not equal " + that.to_text + " (at " + loc + ")." - Panic.throw (Failure msg) +Decimal.should_equal that (epsilon = 0) (frames_to_skip=0) = + matches = case that of + Number -> this.equals that epsilon + _ -> False + case matches of + True -> Success + False -> + loc = Meta.get_source_location 2+frames_to_skip + msg = this.to_text + " did not equal " + that.to_text + " (at " + loc + ")." + Panic.throw (Failure msg) ## Asserts that the given `Boolean` is `True` diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java index a738c392da6d..954dcdfb31ac 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java @@ -1,11 +1,10 @@ package org.enso.table.aggregations; +import java.util.List; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; -import java.util.List; - /*** * Aggregate Column computing the standard deviation of a group. */ @@ -25,7 +24,7 @@ public Calculation(double value) { private final Storage storage; private final boolean population; - public StandardDeviation(String name, Column column,boolean population) { + public StandardDeviation(String name, Column column, boolean population) { super(name, Storage.Type.DOUBLE); this.storage = column.getStorage(); this.population = population; @@ -34,12 +33,13 @@ public StandardDeviation(String name, Column column,boolean population) { @Override public Object aggregate(List indexes) { Calculation current = null; - for (int row: indexes) { + for (int row : indexes) { Object value = storage.getItemBoxed(row); if (value != null) { Double dValue = CastToDouble(value); if (dValue == null) { - this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + this.addProblem( + new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); return null; } @@ -48,12 +48,14 @@ public Object aggregate(List indexes) { } else { current.count++; current.total += dValue; - current.total_sqr += dValue*dValue; + current.total_sqr += dValue * dValue; } } } - return current == null ? null : - (population ? 1 : Math.sqrt(current.count / (current.count - 1.0))) * - Math.sqrt(current.total_sqr / current.count - Math.pow(current.total / current.count, 2)); + + if (current == null) return null; + if (!population && current.count <= 1) return null; + return (population ? 1 : Math.sqrt(current.count / (current.count - 1.0))) + * Math.sqrt(current.total_sqr / current.count - Math.pow(current.total / current.count, 2)); } } diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 6dc04bab45c0..32ca77e5b980 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -892,7 +892,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m1 = materialize r1 m1.columns.length . should_equal 2 m1.columns.first.at 0 . should_equal Nothing - m1.columns.first.at 0 . should_equal Nothing + m1.columns.second.at 0 . should_equal 0 Test.group prefix+"Table.aggregate should correctly select result types" pending=pending <| Test.specify " widening to decimals on Average" <| @@ -930,7 +930,6 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te table = table_builder [["X", [1, 2, 3, 4, Nothing]]] r1 = table.aggregate [Standard_Deviation "X" (population=True), Standard_Deviation "X" (population=False)] r1.row_count.should_equal 1 - r1.print m1 = materialize r1 m1.columns.length . should_equal 2 m1.columns.first.at 0 . should_equal 1.1180339887499 epsilon=0.000001 From 38184011d85ce830fc6312aa2896e25d793ae333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 11:38:43 +0200 Subject: [PATCH 14/26] Add null special cases to not fail displaying recursive definition errors This is a temporary solution which may need to be reverted once a proper solution is developed. --- .../node/expression/builtin/text/AnyToTextNode.java | 4 +++- .../expression/builtin/text/util/TypeToDisplayTextNode.java | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java index 7d091d4a7399..6c79eb61a6fb 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java @@ -64,7 +64,9 @@ private Text doComplexAtom(Atom atom) { @CompilerDirectives.TruffleBoundary private String showObject(Object child) throws UnsupportedMessageException { - if (child instanceof Boolean) { + if (child == null) { + return "null"; + } else if (child instanceof Boolean) { return (boolean) child ? "True" : "False"; } else { return strings.asString(displays.toDisplayString(child)); diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java index 3fa7fb269dc0..dab2d1c7140a 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java @@ -29,7 +29,9 @@ String doDisplay( @CachedLibrary(limit = "5") InteropLibrary objects, @CachedLibrary(limit = "5") InteropLibrary displays, @CachedLibrary(limit = "5") InteropLibrary strings) { - if (TypesGen.isLong(value)) { + if (value == null) { + return "null"; + } else if (TypesGen.isLong(value)) { return value + " (Integer)"; } else if (TypesGen.isEnsoBigInteger(value)) { return "Integer"; @@ -60,7 +62,7 @@ String doDisplay( return strings.asString(displays.toDisplayString(objects.getMetaObject(value))); } catch (UnsupportedMessageException e) { throw new IllegalStateException( - "Receiver declares a meta object, but does not it return it."); + "Receiver declares a meta object, but does not return it."); } } else { return "a polyglot object"; From 2e46e2499a49a2171aa866c2591705b40c9cc9fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 12:59:46 +0200 Subject: [PATCH 15/26] Some fixes to handling infinities in Percentile (and Median) --- .../0.0.0-dev/src/Data/Number/Extensions.enso | 3 +- .../enso/table/aggregations/Percentile.java | 15 +++- test/Table_Tests/src/Aggregate_Spec.enso | 85 ++++++++++++++++++- test/Tests/src/Data/Numbers_Spec.enso | 18 ++++ 4 files changed, 115 insertions(+), 6 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso index 5faefeb9deb1..ab965f9978a9 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso @@ -209,7 +209,8 @@ Integer.up_to n = Range this n 1.equals 1.0000001 epsilon=0.001 Number.equals : Number -> Number -> Boolean -Number.equals that epsilon=0.0 = (this - that).abs <= epsilon +Number.equals that epsilon=0.0 = + if this == that then True else (this - that).abs <= epsilon ## Returns the smaller value of `this` and `that`. diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java index 82ebf90de8d7..050eb6a0203f 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java @@ -66,7 +66,7 @@ public Object aggregate(List indexes) { if (current <= mid && nextCurrent > mid) { double second = entry.getKey(); - return first + (second - first) * (mid_value - mid); + return interpolate(first, second, mid_value - mid); } current = nextCurrent; @@ -75,4 +75,17 @@ public Object aggregate(List indexes) { this.addProblem(new InvalidAggregation(this.getName(), -1, "Failed calculating the percentile.")); return null; } + + double interpolate(double first, double second, double alpha) { + if (Double.isInfinite(first) && Double.isInfinite(second)) { + if (first == second) return first; + else return Double.NaN; + } + + // If both are not infinite, then if one of them is infinite, the other must be finite. + if (Double.isInfinite(first)) return first; + if (Double.isInfinite(second)) return second; + + return first + (second - first) * alpha; + } } diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 32ca77e5b980..5d291ed95afb 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -10,6 +10,8 @@ import Standard.Test import Standard.Test.Problems import Standard.Base.Error.Problem_Behavior +polyglot java import java.lang.Double + type Test_Selection problem_handling=True advanced_stats=True text_concat=True text_shortest_longest=True first_last=True first_last_row_order=True std_dev=True multi_distinct=True aggregation_problems=True all_tests = Test_Selection True True True True True True True True True @@ -935,17 +937,92 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m1.columns.first.at 0 . should_equal 1.1180339887499 epsilon=0.000001 m1.columns.second.at 0 . should_equal 1.2909944487358 epsilon=0.000001 + expect_null_or_nan value = + matches = case value of + Nothing -> True + Decimal -> Double.isNaN value + _ -> False + if matches.not then + loc = Meta.get_source_location 2 + Test.fail "Expected a Nothing or NaN but got: "+value.to_text+" (at "+loc+")." + Test.group prefix+"Table.aggregate should correctly handle infinities" pending=pending <| pos_inf = 1/0 neg_inf = -1/0 Test.specify " on Average" <| - Nothing + t1 = table_builder [["X", [Nothing, pos_inf, pos_inf, 0]]] + r1 = t1.aggregate [Average "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal pos_inf + + t2 = table_builder [["X", [Nothing, pos_inf, neg_inf, 0]]] + r2 = t2.aggregate [Average "X"] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length . should_equal 1 + expect_null_or_nan <| m2.columns.first.at 0 + Test.specify " on Median" (pending = resolve_pending test_selection.advanced_stats) <| - Nothing + t1 = table_builder [["X", [Nothing, neg_inf, pos_inf, 0, pos_inf, pos_inf]]] + r1 = t1.aggregate [Median "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal pos_inf + + t2 = table_builder [["X", [pos_inf, pos_inf, neg_inf, neg_inf]]] + r2 = t2.aggregate [Median "X"] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length . should_equal 1 + expect_null_or_nan <| m2.columns.first.at 0 + + t3 = table_builder [["X", [pos_inf, pos_inf, Nothing, 0, 10, 20, neg_inf, neg_inf]]] + r3 = t3.aggregate [Median "X"] + r3.row_count.should_equal 1 + m3 = materialize r3 + m3.columns.length . should_equal 1 + m3.columns.first.at 0 . should_equal 10 + + t4 = table_builder [["X", [Nothing, pos_inf, pos_inf, 10, 12]]] + r4 = t4.aggregate [Median "X"] + r4.row_count.should_equal 1 + m4 = materialize r4 + m4.columns.length . should_equal 1 + m4.columns.first.at 0 . should_equal pos_inf + Test.specify " on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| - Nothing + t1 = table_builder [["X", [Nothing, neg_inf, 2, 3, 4, pos_inf]]] + r1 = t1.aggregate [Percentile 0.3 "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal 2.2 + + t2 = table_builder [["X", [Nothing, neg_inf, neg_inf, 3, 4, pos_inf]]] + r2 = t2.aggregate [Percentile 0.25 "X"] + r2.row_count.should_equal 1 + m2 = materialize r2 + m2.columns.length . should_equal 1 + m2.columns.first.at 0 . should_equal neg_inf + + t3 = table_builder [["X", [Nothing, neg_inf, neg_inf, pos_inf, pos_inf, pos_inf]]] + r3 = t3.aggregate [Percentile 0.3 "X"] + r3.row_count.should_equal 1 + m3 = materialize r3 + m3.columns.length . should_equal 1 + expect_null_or_nan <| m3.columns.first.at 0 + Test.specify " on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| - Nothing + t1 = table_builder [["X", [neg_inf, 1]]] + r1 = t1.aggregate [Standard_Deviation "X" (population=True), Standard_Deviation "X" (population=False)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + expect_null_or_nan <| m1.columns.first.at 0 + expect_null_or_nan <| m1.columns.second.at 0 Test.group prefix+"Table.aggregate should correctly handle NaN" pending=pending <| nan = 0.log 0 diff --git a/test/Tests/src/Data/Numbers_Spec.enso b/test/Tests/src/Data/Numbers_Spec.enso index 3d939b819bfb..1a2dd9935a9e 100644 --- a/test/Tests/src/Data/Numbers_Spec.enso +++ b/test/Tests/src/Data/Numbers_Spec.enso @@ -262,4 +262,22 @@ spec = almost_max_long_times_three_decimal.ceil.to_decimal . should_equal almost_max_long_times_three_plus_1.to_decimal almost_max_long_times_three_plus_1.ceil . should_equal almost_max_long_times_three_plus_1 + Test.specify "should support inexact equality comparisons" <| + 1.0001 . equals 1.0002 epsilon=0.01 . should_be_true + 1.0001 . equals 1.0002 epsilon=0.0000001 . should_be_false + + 1 . equals 2 . should_be_false + 1 . equals (0+1) . should_be_true + + inf = 1/0 + inf . equals inf . should_be_true + + neg_inf = -inf + neg_inf . equals neg_inf . should_be_true + neg_inf . equals inf . should_be_false + + nan = 0.log 0 + nan . equals nan . should_be_false + nan . equals 0 . should_be_false + main = Test.Suite.run_main here.spec From d2e550df1d8875709f0b9d5bea05fb08afe65ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 13:43:16 +0200 Subject: [PATCH 16/26] NaN in In-Mem Median and Percentile --- .../enso/table/aggregations/Percentile.java | 26 ++++--- test/Table_Tests/src/Aggregate_Spec.enso | 70 ++++++++++++++----- .../Table_Tests/src/Database/Sqlite_Spec.enso | 2 +- 3 files changed, 67 insertions(+), 31 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java index 050eb6a0203f..d90311bd34c9 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java @@ -1,11 +1,13 @@ package org.enso.table.aggregations; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; -import java.util.*; - /*** * Aggregate Column computing a percentile value in a group. */ @@ -22,19 +24,20 @@ public Percentile(String name, Column column, double percentile) { @Override public Object aggregate(List indexes) { int count = 0; - SortedMap currentMap = null; - for (int row: indexes) { + SortedMap currentMap = new TreeMap<>(); + for (int row : indexes) { Object value = storage.getItemBoxed(row); if (value != null) { Double dValue = CastToDouble(value); if (dValue == null) { - this.addProblem(new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); + this.addProblem( + new InvalidAggregation(this.getName(), row, "Cannot convert to a number.")); return null; - } else if (count == 0) { - count = 1; - currentMap = new TreeMap<>(); - currentMap.put(dValue, 1); + } else if (dValue.isNaN()) { + // If any of the input values is a NaN, we do not know where in the ordering it should be + // and so we return NaN. + return Double.NaN; } else { count++; currentMap.put(dValue, currentMap.getOrDefault(dValue, 0) + 1); @@ -42,7 +45,7 @@ public Object aggregate(List indexes) { } } - if (count == 0) { + if (count == 0) { return null; } @@ -72,7 +75,8 @@ public Object aggregate(List indexes) { current = nextCurrent; } - this.addProblem(new InvalidAggregation(this.getName(), -1, "Failed calculating the percentile.")); + this.addProblem( + new InvalidAggregation(this.getName(), -1, "Failed calculating the percentile.")); return null; } diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 5d291ed95afb..3d0b5cf03dbd 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -12,9 +12,9 @@ import Standard.Base.Error.Problem_Behavior polyglot java import java.lang.Double -type Test_Selection problem_handling=True advanced_stats=True text_concat=True text_shortest_longest=True first_last=True first_last_row_order=True std_dev=True multi_distinct=True aggregation_problems=True +type Test_Selection problem_handling=True advanced_stats=True text_concat=True text_shortest_longest=True first_last=True first_last_row_order=True std_dev=True multi_distinct=True aggregation_problems=True nan=True -all_tests = Test_Selection True True True True True True True True True +all_tests = Test_Selection True True True True True True True True True True spec = file_contents = (Enso_Project.data / "data.csv") . read @@ -897,7 +897,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m1.columns.second.at 0 . should_equal 0 Test.group prefix+"Table.aggregate should correctly select result types" pending=pending <| - Test.specify " widening to decimals on Average" <| + Test.specify "widening to decimals on Average" <| table = table_builder [["G", ["a", "a", "b", "b"]], ["X", [0, 1, 1, Nothing]]] r1 = table.aggregate [Average "X"] r1.row_count.should_equal 1 @@ -912,7 +912,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m2.columns.first.to_vector . should_equal ["a", "b"] m2.columns.second.to_vector . should_equal [0.5, 1] - Test.specify " widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify "widening to decimals on Median" (pending = resolve_pending test_selection.advanced_stats) <| table = table_builder [["X", [-1000, 0, 1, 100000, Nothing]]] r1 = table.aggregate [Median "X"] r1.row_count.should_equal 1 @@ -920,7 +920,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m1.columns.length . should_equal 1 m1.columns.first.to_vector . should_equal [0.5] - Test.specify " widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify "widening to decimals on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| table = table_builder [["X", [1, 2, 3, 4, 5, 6, Nothing]]] r1 = table.aggregate [Percentile 0.3 "X"] r1.row_count.should_equal 1 @@ -928,7 +928,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m1.columns.length . should_equal 1 m1.columns.first.to_vector . should_equal [2.5] - Test.specify " widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + Test.specify "widening to decimals on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| table = table_builder [["X", [1, 2, 3, 4, Nothing]]] r1 = table.aggregate [Standard_Deviation "X" (population=True), Standard_Deviation "X" (population=False)] r1.row_count.should_equal 1 @@ -949,7 +949,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te Test.group prefix+"Table.aggregate should correctly handle infinities" pending=pending <| pos_inf = 1/0 neg_inf = -1/0 - Test.specify " on Average" <| + Test.specify "on Average" <| t1 = table_builder [["X", [Nothing, pos_inf, pos_inf, 0]]] r1 = t1.aggregate [Average "X"] r1.row_count.should_equal 1 @@ -964,7 +964,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m2.columns.length . should_equal 1 expect_null_or_nan <| m2.columns.first.at 0 - Test.specify " on Median" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify "on Median" (pending = resolve_pending test_selection.advanced_stats) <| t1 = table_builder [["X", [Nothing, neg_inf, pos_inf, 0, pos_inf, pos_inf]]] r1 = t1.aggregate [Median "X"] r1.row_count.should_equal 1 @@ -993,7 +993,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m4.columns.length . should_equal 1 m4.columns.first.at 0 . should_equal pos_inf - Test.specify " on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + Test.specify "on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| t1 = table_builder [["X", [Nothing, neg_inf, 2, 3, 4, pos_inf]]] r1 = t1.aggregate [Percentile 0.3 "X"] r1.row_count.should_equal 1 @@ -1015,7 +1015,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m3.columns.length . should_equal 1 expect_null_or_nan <| m3.columns.first.at 0 - Test.specify " on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + Test.specify "on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| t1 = table_builder [["X", [neg_inf, 1]]] r1 = t1.aggregate [Standard_Deviation "X" (population=True), Standard_Deviation "X" (population=False)] r1.row_count.should_equal 1 @@ -1024,16 +1024,48 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te expect_null_or_nan <| m1.columns.first.at 0 expect_null_or_nan <| m1.columns.second.at 0 - Test.group prefix+"Table.aggregate should correctly handle NaN" pending=pending <| + Test.group prefix+"Table.aggregate should correctly handle NaN" pending=(resolve_pending test_selection.nan pending) <| nan = 0.log 0 - Test.specify " on Average" <| - Nothing - Test.specify " on Median" (pending = resolve_pending test_selection.advanced_stats) <| - Nothing - Test.specify " on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| - Nothing - Test.specify " on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| - Nothing + Test.specify "on Average" <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] + r1 = t1.aggregate [Average "X"] + t1.print + r1.print + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + + Test.specify "on Median" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] + r1 = t1.aggregate [Median "X"] + t1.print + r1.print + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + + Test.specify "on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2, 4, 5]]] + r1 = t1.aggregate [Percentile 0.3 "X"] + t1.print + r1.print + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + + Test.specify "on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| + t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] + r1 = t1.aggregate [Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)] + t1.print + r1.print + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + Double.isNaN (m1.columns.first.at 0) . should_be_true + Double.isNaN (m1.columns.second.at 0) . should_be_true Test.group prefix+"Table.aggregate Mode" (pending = resolve_pending test_selection.advanced_stats pending) <| Test.specify "should ignore missing values" <| diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index 129e376090a5..3d3a6b38957d 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -70,7 +70,7 @@ spec = - creating complex nested queries using NTILE to compute the stats, - compiling SQLite library on our own and adding native extensions for the missing statistics. - selection = Aggregate_Spec.Test_Selection advanced_stats=False text_shortest_longest=False first_last=False first_last_row_order=False multi_distinct=False aggregation_problems=False + selection = Aggregate_Spec.Test_Selection advanced_stats=False text_shortest_longest=False first_last=False first_last_row_order=False multi_distinct=False aggregation_problems=False nan=False agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) From 4c2e1140e1ab7952f150ae5c63552d8b95b9e702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 14:01:39 +0200 Subject: [PATCH 17/26] Add NaN edge case to Postgres --- .../Database/0.0.0-dev/src/Data/Dialect/Postgres.enso | 8 ++++++-- test/Table_Tests/src/Aggregate_Spec.enso | 8 -------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index 56391387a5c1..bf881f474a25 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -94,14 +94,18 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg-> ## PRIVATE agg_median = Base_Generator.lift_unary_op "MEDIAN" arg-> - Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")" + median = Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")" + has_nan = Sql.code "bool_or(" ++ arg ++ Sql.code " = double precision 'NaN')" + Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ median ++ Sql.code " END" ## PRIVATE agg_mode = Base_Generator.lift_unary_op "MODE" arg-> Sql.code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")" agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr-> - Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")" + percentile = Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")" + has_nan = Sql.code "bool_or(" ++ expr ++ Sql.code " = double precision 'NaN')" + Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ percentile ++ Sql.code " END" ## PRIVATE These are written in a not most-efficient way, but a way that makes them diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 3d0b5cf03dbd..042092c1e71e 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -1029,8 +1029,6 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te Test.specify "on Average" <| t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] r1 = t1.aggregate [Average "X"] - t1.print - r1.print r1.row_count.should_equal 1 m1 = materialize r1 m1.columns.length . should_equal 1 @@ -1039,8 +1037,6 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te Test.specify "on Median" (pending = resolve_pending test_selection.advanced_stats) <| t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] r1 = t1.aggregate [Median "X"] - t1.print - r1.print r1.row_count.should_equal 1 m1 = materialize r1 m1.columns.length . should_equal 1 @@ -1049,8 +1045,6 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te Test.specify "on Percentile" (pending = resolve_pending test_selection.advanced_stats) <| t1 = table_builder [["X", [Nothing, nan, 0, 1, 2, 4, 5]]] r1 = t1.aggregate [Percentile 0.3 "X"] - t1.print - r1.print r1.row_count.should_equal 1 m1 = materialize r1 m1.columns.length . should_equal 1 @@ -1059,8 +1053,6 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te Test.specify "on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] r1 = t1.aggregate [Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)] - t1.print - r1.print r1.row_count.should_equal 1 m1 = materialize r1 m1.columns.length . should_equal 2 From 2559f715c5a9317c6736230ea8182160982bbcb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 14:04:52 +0200 Subject: [PATCH 18/26] TODO comment --- .../0.0.0-dev/src/Data/Dialect/Postgres.enso | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index bf881f474a25..ce71f7ec62de 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -95,6 +95,12 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg-> ## PRIVATE agg_median = Base_Generator.lift_unary_op "MEDIAN" arg-> median = Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")" + ## TODO Technically, this check may not be necessary if the input column has + type INTEGER, because it is impossible to represent a NaN in that type. + However, currently the column type inference is not tested well-enough to + rely on this, so leaving an uniform approach regardless of type. This + could be revisited when further work on column types takes place. + See issue: https://www.pivotaltracker.com/story/show/180854759 has_nan = Sql.code "bool_or(" ++ arg ++ Sql.code " = double precision 'NaN')" Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ median ++ Sql.code " END" @@ -104,6 +110,12 @@ agg_mode = Base_Generator.lift_unary_op "MODE" arg-> agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr-> percentile = Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")" + ## TODO Technically, this check may not be necessary if the input column has + type INTEGER, because it is impossible to represent a NaN in that type. + However, currently the column type inference is not tested well-enough to + rely on this, so leaving an uniform approach regardless of type. This + could be revisited when further work on column types takes place. + See issue: https://www.pivotaltracker.com/story/show/180854759 has_nan = Sql.code "bool_or(" ++ expr ++ Sql.code " = double precision 'NaN')" Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ percentile ++ Sql.code " END" From 16eb3e2161c8c853bc798415801adef9ec9c3e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 14:17:32 +0200 Subject: [PATCH 19/26] Mode tests --- test/Table_Tests/src/Aggregate_Spec.enso | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 042092c1e71e..e3eeb640ac7d 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -1050,6 +1050,14 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te m1.columns.length . should_equal 1 Double.isNaN (m1.columns.first.at 0) . should_be_true + Test.specify "on Mode" (pending = resolve_pending test_selection.advanced_stats) <| + t1 = table_builder [["X", [Nothing, nan, nan, nan, nan, 4, 5]]] + r1 = t1.aggregate [Mode "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + Double.isNaN (m1.columns.first.at 0) . should_be_true + Test.specify "on Standard_Deviation" (pending = resolve_pending test_selection.std_dev) <| t1 = table_builder [["X", [Nothing, nan, 0, 1, 2]]] r1 = t1.aggregate [Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)] @@ -1061,7 +1069,12 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te Test.group prefix+"Table.aggregate Mode" (pending = resolve_pending test_selection.advanced_stats pending) <| Test.specify "should ignore missing values" <| - Nothing + t1 = table_builder [["X", [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 2, 2, 1]]] + r1 = t1.aggregate [Mode "X"] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 1 + m1.columns.first.at 0 . should_equal 2 Test.group prefix+"Table.aggregate First and Last" pending=pending <| Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending = resolve_pending test_selection.first_last) <| From aadfc9db847ec7ec868c3c53d719bb3a56210eef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 14:22:10 +0200 Subject: [PATCH 20/26] Add first-last test --- test/Table_Tests/src/Aggregate_Spec.enso | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index e3eeb640ac7d..001766d61673 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -1078,7 +1078,15 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te Test.group prefix+"Table.aggregate First and Last" pending=pending <| Test.specify "should not return the same value for groups with different values but equal ordering keys" (pending = resolve_pending test_selection.first_last) <| - Nothing + t1 = table_builder [["G", ["a", "a"]], ["X", [1, 2]]] + order = By_Name ["G"] + r1 = t1.aggregate [First "X" (order_by=order), Last "X" (order_by=order)] + r1.row_count.should_equal 1 + m1 = materialize r1 + m1.columns.length . should_equal 2 + first = m1.columns.first.at 0 + last = m1.columns.second.at 0 + (first != last).should_be_true Test.group prefix+"Table.aggregate" pending=pending <| Test.specify "should work even if no aggregations apart from groupings are specified" <| From 073d922ecebd347f08462b8626c36754bb59d480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 15:23:02 +0200 Subject: [PATCH 21/26] Test unsupported operations --- .../Database/0.0.0-dev/src/Data/Dialect.enso | 2 +- .../0.0.0-dev/src/Data/Dialect/Postgres.enso | 6 +-- .../0.0.0-dev/src/Data/Dialect/Redshift.enso | 2 +- .../0.0.0-dev/src/Data/Dialect/Sqlite.enso | 28 ++++++---- .../src/Data/Internal/Aggregate_Helper.enso | 9 ++++ .../src/Internal/Aggregate_Column_Helper.enso | 2 +- test/Table_Tests/src/Aggregate_Spec.enso | 53 ++++++++++++++++++- .../src/Database/Postgresql_Spec.enso | 4 +- .../src/Database/Redshift_Spec.enso | 2 +- .../Table_Tests/src/Database/Sqlite_Spec.enso | 2 +- 10 files changed, 87 insertions(+), 23 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso index 042056d74acb..48cf0d582033 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso @@ -35,7 +35,7 @@ type Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type = Errors.unimplemented "This is an interface only." diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index ce71f7ec62de..8db1f6228676 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -39,7 +39,7 @@ type Postgresql_Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate @@ -56,7 +56,7 @@ make_internal_generator_dialect = ## PRIVATE The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type aggregate = case aggregate of Group_By c _ -> c.sql_type Count _ -> Sql_Type.bigint @@ -75,7 +75,7 @@ resolve_target_sql_type aggregate = case aggregate of Longest c _ -> c.sql_type Standard_Deviation _ _ _ -> Sql_Type.double Concatenate _ _ _ _ _ _ -> Sql_Type.text - ## TODO [RW] revise these + ## FIXME [RW] revise these Sum _ _ -> Sql_Type.numeric # TODO can also be bigint, real, double Average _ _ -> Sql_Type.numeric # TODO can be double sometimes Median _ _ -> Sql_Type.numeric # TODO can be double sometimes diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso index a693413e6fdc..624fc74174c1 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso @@ -38,7 +38,7 @@ type Redshift_Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type aggregate = Postgres.resolve_target_sql_type aggregate diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso index fd1b0015fc06..a684a6b65e48 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Sqlite.enso @@ -5,6 +5,7 @@ from Standard.Database.Data.Sql import Sql_Type import Standard.Database.Data.Dialect import Standard.Database.Data.Dialect.Helpers import Standard.Database.Data.Internal.Base_Generator +from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error ## PRIVATE @@ -38,7 +39,7 @@ type Sqlite_Dialect Deduces the result type for an aggregation operation. The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type : Aggregate_Column -> Sql_Type resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate @@ -52,29 +53,34 @@ make_internal_generator_dialect = ## PRIVATE The provided aggregate is assumed to contain only already resolved columns. - You may need to transform it with `resolve_columns` first. + You may need to transform it with `resolve_aggregate` first. resolve_target_sql_type aggregate = case aggregate of Group_By c _ -> c.sql_type Count _ -> Sql_Type.integer - Count_Distinct _ _ _ -> Sql_Type.integer + Count_Distinct columns _ _ -> + if columns.length == 1 then Sql_Type.integer else + here.unsupported "Count_Distinct on multiple columns" Count_Not_Nothing _ _ -> Sql_Type.integer Count_Nothing _ _ -> Sql_Type.integer Count_Not_Empty _ _ -> Sql_Type.integer Count_Empty _ _ -> Sql_Type.integer - Percentile _ _ _ -> Sql_Type.real - Mode c _ -> c.sql_type - First c _ _ _ -> c.sql_type - Last c _ _ _ -> c.sql_type + Percentile _ _ _ -> here.unsupported "Percentile" + Mode _ _ -> here.unsupported "Mode" + First _ _ _ _ -> here.unsupported "First" + Last _ _ _ _ -> here.unsupported "Last" Maximum c _ -> c.sql_type Minimum c _ -> c.sql_type - Shortest c _ -> c.sql_type - Longest c _ -> c.sql_type + Shortest _ _ -> here.unsupported "Shortest" + Longest _ _ -> here.unsupported "Longest" Standard_Deviation _ _ _ -> Sql_Type.real Concatenate _ _ _ _ _ _ -> Sql_Type.text - ## TODO revise these Sum c _ -> c.sql_type Average _ _ -> Sql_Type.real - Median _ _ -> Sql_Type.real + Median _ _ -> here.unsupported "Median" + +## PRIVATE +unsupported name = + Error.throw (Unsupported_Database_Operation_Error name+" is not supported by SQLite backend. You may need to materialize the table and perform the operation in-memory.") ## PRIVATE agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg-> diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso index 048322413ef6..df63005f2e71 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Internal/Aggregate_Helper.enso @@ -5,12 +5,21 @@ import Standard.Database.Data.Internal.IR from Standard.Database.Data.Sql import Sql_Type from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error +## PRIVATE + Creates an `Internal_Column` that computes the specified statistic. + It returns a dataflow error if the given operation is not supported. + + The provided `aggregate` is assumed to contain only already resolved columns. + You may need to transform it with `resolve_aggregate` first. make_aggregate_column : Table -> Aggregate_Column -> Text -> IR.Internal_Column make_aggregate_column table aggregate new_name = sql_type = table.connection.dialect.resolve_target_sql_type aggregate expression = here.make_expression aggregate IR.Internal_Column new_name sql_type expression +## PRIVATE + Creates an Internal Representation of the expression that computes a + requested statistic. make_expression : Aggregate_Column -> IR.Expression make_expression aggregate = is_non_empty_vector v = if v.is_nothing then False else v.not_empty diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index 74438d2b5f03..2f8f7fd58347 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -86,7 +86,7 @@ prepare_aggregate_columns aggregates table = To be used when `new_name` is `Nothing`. Assumes that the `Aggregate_Column` is resolved. You may need to transform it - with `resolve_columns` first. + with `resolve_aggregate` first. default_aggregate_column_name aggregate_column = case aggregate_column of Group_By c _ -> c.name diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 001766d61673..19c64a13bfa1 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -1265,7 +1265,56 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te if is_database then Test.group prefix+"Table.aggregate should report unsupported operations but not block other aggregations in warning mode" pending=pending <| - ## TODO - Nothing + expect_sum_and_unsupported_errors error_count result = + result.columns.length . should_equal 1 + result.row_count . should_equal 1 + result.columns.first.to_vector . should_equal [6] + warnings = Warning.get_all result . map .value + warnings.length . should_equal error_count + warnings.each warning-> + warning.should_be_an Unsupported_Database_Operation_Error + + if test_selection.first_last_row_order.not then + Test.specify "with First and Last in row order" <| + table = table_builder [["X", [1,2,3]]] + expect_sum_and_unsupported_errors 2 <| + table.aggregate [Sum "X", First "X", Last "X"] + + if test_selection.first_last.not then + Test.specify "with First and Last with ordering" <| + table = table_builder [["A", [3,2,1]], ["X", [1,2,3]]] + order = By_Name ["A"] + expect_sum_and_unsupported_errors 2 <| + table.aggregate [Sum "X", First "X" (order_by=order), Last "X" (order_by=order)] + + if test_selection.advanced_stats.not then + Test.specify "with Median, Mode and Percentile" <| + table = table_builder [["X", [1,2,3]]] + expect_sum_and_unsupported_errors 3 <| + table.aggregate [Sum "X", Median "X", Mode "X", Percentile 0.3 "X"] + + if test_selection.std_dev.not then + Test.specify "with Standard_Deviation" <| + table = table_builder [["X", [1,2,3]]] + expect_sum_and_unsupported_errors 1 <| + table.aggregate [Sum "X", Standard_Deviation "X"] + + if test_selection.text_shortest_longest.not then + Test.specify "with Shortest and Longest" <| + table = table_builder [["X", [1,2,3]], ["Y", ["a", "bb", "ccc"]]] + expect_sum_and_unsupported_errors 2 <| + table.aggregate [Sum "X", Shortest "Y", Longest "Y"] + + if test_selection.text_concat.not then + Test.specify "with Concatenate" <| + table = table_builder [["X", [1,2,3]], ["Y", ["a", "bb", "ccc"]]] + expect_sum_and_unsupported_errors 1 <| + table.aggregate [Sum "X", Concatenate "Y"] + + if test_selection.multi_distinct.not then + Test.specify "with Count_Distinct on multiple fields" <| + table = table_builder [["X", [1,2,3]], ["Y", ["a", "bb", "ccc"]]] + expect_sum_and_unsupported_errors 1 <| + table.aggregate [Sum "X", Count_Distinct (By_Name ["X", "Y"])] main = Test.Suite.run_main here.spec diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index c84c7a676e7c..bbe29f63bb5a 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -53,14 +53,14 @@ run_tests connection pending=Nothing = here.postgres_specific_spec connection pending=pending Common_Table_Spec.spec prefix table_builder supports_case_sensitive_columns=True pending=pending - selection = Aggregate_Spec.Test_Selection text_shortest_longest=True first_last_row_order=False aggregation_problems=False + selection = Aggregate_Spec.Test_Selection first_last_row_order=False aggregation_problems=False agg_in_memory_table = (Enso_Project.data / "data.csv") . read_csv agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table tables.append agg_table.name empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=False selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=True selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Redshift_Spec.enso b/test/Table_Tests/src/Database/Redshift_Spec.enso index adab1919da92..39bb0ab41ab9 100644 --- a/test/Table_Tests/src/Database/Redshift_Spec.enso +++ b/test/Table_Tests/src/Database/Redshift_Spec.enso @@ -60,7 +60,7 @@ run_tests connection pending=Nothing = empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) tables.append empty_agg_table.name materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=False selection pending=pending + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=True selection pending=pending clean_tables tables.to_vector diff --git a/test/Table_Tests/src/Database/Sqlite_Spec.enso b/test/Table_Tests/src/Database/Sqlite_Spec.enso index 3d3a6b38957d..94408132a7fe 100644 --- a/test/Table_Tests/src/Database/Sqlite_Spec.enso +++ b/test/Table_Tests/src/Database/Sqlite_Spec.enso @@ -75,7 +75,7 @@ spec = agg_table = connection.upload_table (Name_Generator.random_name "Agg1") agg_in_memory_table empty_agg_table = connection.upload_table (Name_Generator.random_name "Agg_Empty") (agg_in_memory_table.take_start 0) materialize = .to_dataframe - Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=False selection + Aggregate_Spec.aggregate_spec prefix agg_table empty_agg_table table_builder materialize is_database=True selection connection.close file.delete From 52e2ae7099c3a77b5f45337f1b2271d05d64a0e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 17:31:11 +0200 Subject: [PATCH 22/26] Add Postgres type tests and fix info --- .../0.0.0-dev/src/Data/Dialect/Postgres.enso | 15 ++++-- .../Database/0.0.0-dev/src/Data/Sql.enso | 9 ++++ .../Database/0.0.0-dev/src/Data/Table.enso | 16 +++++-- .../src/Database/Postgresql_Spec.enso | 46 +++++++++++++++++++ 4 files changed, 78 insertions(+), 8 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso index 8db1f6228676..58ed3795a1ab 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso @@ -75,10 +75,15 @@ resolve_target_sql_type aggregate = case aggregate of Longest c _ -> c.sql_type Standard_Deviation _ _ _ -> Sql_Type.double Concatenate _ _ _ _ _ _ -> Sql_Type.text - ## FIXME [RW] revise these - Sum _ _ -> Sql_Type.numeric # TODO can also be bigint, real, double - Average _ _ -> Sql_Type.numeric # TODO can be double sometimes - Median _ _ -> Sql_Type.numeric # TODO can be double sometimes + Sum c _ -> + if (c.sql_type == Sql_Type.integer) || (c.sql_type == Sql_Type.smallint) then Sql_Type.bigint else + if c.sql_type == Sql_Type.bigint then Sql_Type.numeric else + c.sql_type + Average c _ -> + if c.sql_type.is_definitely_integer then Sql_Type.numeric else + if c.sql_type.is_definitely_double then Sql_Type.double else + c.sql_type + Median _ _ -> Sql_Type.double ## PRIVATE agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg-> @@ -161,7 +166,7 @@ agg_longest = Base_Generator.lift_unary_op "LONGEST" arg-> ## PRIVATE concat_ops = make_raw_concat_expr expr separator = - Sql.code "array_to_string(array_agg(" ++ expr ++ Sql.code "), " ++ separator ++ Sql.code ")" + Sql.code "string_agg(" ++ expr ++ Sql.code ", " ++ separator ++ Sql.code ")" concat = Helpers.make_concat make_raw_concat_expr here.make_contains_expr [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]] diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso index 36c5ca344869..3d56a1e5d090 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Sql.enso @@ -59,6 +59,11 @@ type Sql_Type - name: a database-specific type name, used for pretty printing. type Sql_Type typeid name + == that = case that of + Sql_Type that_id _ -> + this.typeid == that_id + _ -> False + ## The SQL representation of `Boolean` type. boolean : Sql_Type boolean = Sql_Type Types.BOOLEAN "BOOLEAN" @@ -71,6 +76,10 @@ type Sql_Type bigint : Sql_Type bigint = Sql_Type Types.BIGINT "BIGINT" + ## The SQL representation of the `SMALLINT` type. + smallint : Sql_Type + smallint = Sql_Type Types.SMALLINT "SMALLINT" + ## The SQL type representing decimal numbers. decimal : Sql_Type decimal = Sql_Type Types.DECIMAL "DECIMAL" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 51847ea1ca5e..e791a86cc68e 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -761,9 +761,19 @@ type Table info : Table info = cols = this.internal_columns - count_columns = cols.map c-> IR.Internal_Column c.name Sql.Sql_Type.integer (IR.Operation "COUNT" [c.expression]) - count_table = this.updated_columns count_columns . to_dataframe - counts = count_table.columns.map c-> c.at 0 + count_query = + ## Performing a subquery is the most robust way to handle both + regular columns and aggregates. + Naively wrapping each column in a `COUNT(...)` will not + always work as aggregates cannot be nested. + setup = this.context.as_subquery this.name [this.internal_columns] + new_ctx = IR.subquery_as_ctx setup.first + new_columns = setup.second.first.map column-> + [column.name, IR.Operation "COUNT" [column.expression]] + query = IR.Select new_columns new_ctx + this.connection.dialect.generate_sql query + count_table = this.connection.execute_query count_query + counts = if cols.is_empty then [] else count_table.columns.map c-> c.at 0 types = cols.map c-> c.sql_type.name Materialized_Table.new [["Column", cols.map .name], ["Items Count", counts], ["SQL Type", types]] . set_index "Column" diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index bbe29f63bb5a..8a4d6130d6b7 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -9,6 +9,8 @@ import project.Database.Common_Spec import project.Database.Helpers.Name_Generator import project.Common_Table_Spec import project.Aggregate_Spec +from Standard.Table.Data.Aggregate_Column import all +from Standard.Database.Data.Sql import Sql_Type postgres_specific_spec connection pending = Test.group "[PostgreSQL] Info" pending=pending <| @@ -18,11 +20,19 @@ postgres_specific_spec connection pending = t.insert ["a", Nothing, False, 1.2, 0.000000000001] t.insert ["abc", Nothing, Nothing, 1.3, Nothing] t.insert ["def", 42, True, 1.4, 10] + Test.specify "should return Table information" <| i = t.info i.index . to_vector . should_equal ["strs", "ints", "bools", "reals", "doubles"] i.at "Items Count" . to_vector . should_equal [3, 1, 2, 3, 2] i.at "SQL Type" . to_vector . should_equal ["varchar", "int4", "bool", "float4", "float8"] + + Test.specify "should return Table information, also for aggregated results" <| + i = t.aggregate [Concatenate "strs", Sum "ints", Count_Distinct "bools"] . info + i.index . to_vector . should_equal ["Concatenate strs", "Sum ints", "Count_Distinct bools"] + i.at "Items Count" . to_vector . should_equal [1, 1, 1] + i.at "SQL Type" . to_vector . should_equal ["varchar", "int8", "bool"] + Test.specify "should infer standard types correctly" <| t.at "strs" . sql_type . is_definitely_text . should_be_true t.at "ints" . sql_type . is_definitely_integer . should_be_true @@ -30,6 +40,42 @@ postgres_specific_spec connection pending = t.at "reals" . sql_type . is_definitely_double . should_be_true connection.execute_update 'DROP TABLE "'+tinfo+'"' + Test.group "[PostgreSQL] Table.aggregate should correctly infer result types" pending=pending <| + name = Name_Generator.random_name "Ttypes" + connection.execute_update 'CREATE TEMPORARY TABLE "'+name+'" ("txt" VARCHAR, "i1" SMALLINT, "i2" INT, "i3" BIGINT, "i4" NUMERIC, "r1" REAL, "r2" DOUBLE PRECISION, "bools" BOOLEAN)' + t = connection.access_table name + Test.specify "Concatenate, Shortest and Longest" <| + r = t.aggregate [Concatenate "txt", Shortest "txt", Longest "txt"] + r.columns.at 0 . sql_type . should_equal Sql_Type.text + r.columns.at 1 . sql_type . should_equal Sql_Type.text + r.columns.at 2 . sql_type . should_equal Sql_Type.text + + Test.specify "Counts" <| + r = t.aggregate [Count, Count_Empty "txt", Count_Not_Empty "txt", Count_Distinct "i1", Count_Not_Nothing "i2", Count_Nothing "i3"] + r.columns.length . should_equal 6 + r.columns.each column-> + column.sql_type . should_equal Sql_Type.bigint + + Test.specify "Sum" <| + r = t.aggregate [Sum "i1", Sum "i2", Sum "i3", Sum "i4", Sum "r1", Sum "r2"] + r.columns.at 0 . sql_type . should_equal Sql_Type.bigint + r.columns.at 1 . sql_type . should_equal Sql_Type.bigint + r.columns.at 2 . sql_type . should_equal Sql_Type.numeric + r.columns.at 3 . sql_type . should_equal Sql_Type.numeric + r.columns.at 4 . sql_type . should_equal Sql_Type.real + r.columns.at 5 . sql_type . should_equal Sql_Type.double + + Test.specify "Average" <| + r = t.aggregate [Average "i1", Average "i2", Average "i3", Average "i4", Average "r1", Average "r2"] + r.columns.at 0 . sql_type . should_equal Sql_Type.numeric + r.columns.at 1 . sql_type . should_equal Sql_Type.numeric + r.columns.at 2 . sql_type . should_equal Sql_Type.numeric + r.columns.at 3 . sql_type . should_equal Sql_Type.numeric + r.columns.at 4 . sql_type . should_equal Sql_Type.double + r.columns.at 5 . sql_type . should_equal Sql_Type.double + + connection.execute_update 'DROP TABLE "'+name+'"' + run_tests connection pending=Nothing = prefix = "[PostgreSQL] " name_counter = Ref.new 0 From fb7cb80a92cce18887cb1340e7b2fb676a94fc21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 8 Apr 2022 17:33:54 +0200 Subject: [PATCH 23/26] Changelog, note, fix test --- CHANGELOG.md | 7 +++++-- .../lib/Standard/Database/0.0.0-dev/src/Data/Table.enso | 4 ++++ test/Table_Tests/src/Database/Postgresql_Spec.enso | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70f077a5f5d3..27dea42380d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,9 +94,11 @@ - [Implemented `Panic.catch` and helper functions for handling errors. Added a type parameter to `Panic.recover` to recover specific types of errors.][3344] - [Added warning handling to `Table.aggregate`][3349] -- [Improved performance of `Table.aggregate` and full warnings implementation] - [3364] +- [Improved performance of `Table.aggregate` and full warnings + implementation][3364] - [Implemented `Text.reverse`][3377] +- [Implemented support for most Table aggregations in the Database + backend.][3383] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -149,6 +151,7 @@ [3366]: https://github.com/enso-org/enso/pull/3366 [3379]: https://github.com/enso-org/enso/pull/3379 [3381]: https://github.com/enso-org/enso/pull/3381 +[3383]: https://github.com/enso-org/enso/pull/3383 #### Enso Compiler diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index e791a86cc68e..89ae00d443cc 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -657,6 +657,10 @@ type Table new_name = p.first Aggregate_Helper.make_aggregate_column this agg new_name . catch partitioned = results.partition (_.is_an Internal_Column) + ## When working on join we may encounter further issues with having + aggregate columns exposed directly, it may be useful to re-use + the `lift_aggregate` method to push the aggregates into a + subquery. new_columns = partitioned.first problems = partitioned.second on_problems.attach_problems_before problems <| diff --git a/test/Table_Tests/src/Database/Postgresql_Spec.enso b/test/Table_Tests/src/Database/Postgresql_Spec.enso index 8a4d6130d6b7..133222a9c038 100644 --- a/test/Table_Tests/src/Database/Postgresql_Spec.enso +++ b/test/Table_Tests/src/Database/Postgresql_Spec.enso @@ -29,9 +29,9 @@ postgres_specific_spec connection pending = Test.specify "should return Table information, also for aggregated results" <| i = t.aggregate [Concatenate "strs", Sum "ints", Count_Distinct "bools"] . info - i.index . to_vector . should_equal ["Concatenate strs", "Sum ints", "Count_Distinct bools"] + i.index . to_vector . should_equal ["Concatenate strs", "Sum ints", "Count Distinct bools"] i.at "Items Count" . to_vector . should_equal [1, 1, 1] - i.at "SQL Type" . to_vector . should_equal ["varchar", "int8", "bool"] + i.at "SQL Type" . to_vector . should_equal ["VARCHAR", "BIGINT", "BIGINT"] Test.specify "should infer standard types correctly" <| t.at "strs" . sql_type . is_definitely_text . should_be_true From ff6e2bd617630ffe600d144ea16b644864bbafd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 11 Apr 2022 12:07:41 +0200 Subject: [PATCH 24/26] Reformat Concatenate --- .../org/enso/table/aggregations/Concatenate.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java index 6ee6ac247504..8d84d4f7a799 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java @@ -1,13 +1,11 @@ package org.enso.table.aggregations; -import javax.swing.JSeparator; +import java.util.List; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; import org.enso.table.data.table.problems.UnquotedDelimiter; -import java.util.List; - public class Concatenate extends Aggregator { private final Storage storage; private final String separator; @@ -15,7 +13,8 @@ public class Concatenate extends Aggregator { private final String suffix; private final String quote; - public Concatenate(String name, Column column, String separator, String prefix, String suffix, String quote) { + public Concatenate( + String name, Column column, String separator, String prefix, String suffix, String quote) { super(name, Storage.Type.STRING); this.storage = column.getStorage(); @@ -28,7 +27,7 @@ public Concatenate(String name, Column column, String separator, String prefix, @Override public Object aggregate(List indexes) { StringBuilder current = null; - for (int row: indexes) { + for (int row : indexes) { Object value = storage.getItemBoxed(row); if (value == null || value instanceof String) { String textValue = toQuotedString(value, quote, separator); @@ -54,7 +53,9 @@ public Object aggregate(List indexes) { return null; } - if (prefix != null) { current.insert(0, prefix); } + if (prefix != null) { + current.insert(0, prefix); + } current.append(suffix); return current.toString(); } From 95df8157700b5d11042596a5cd2fcdf4437e3abd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 11 Apr 2022 12:48:14 +0200 Subject: [PATCH 25/26] Add numbers tests --- .../0.0.0-dev/src/Data/Number/Extensions.enso | 20 ++++++++++++++ test/Tests/src/Data/Numbers_Spec.enso | 26 +++++++++++++------ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso index ab965f9978a9..e0804712151d 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso @@ -302,3 +302,23 @@ Parse_Error.to_display_text : Text Parse_Error.to_display_text = "Could not parse " + this.text.to_text + " as a double." +## A constant holding the floating-point positive infinity. +Number.positive_infinity : Decimal +Number.positive_infinity = Double.POSITIVE_INFINITY + +## A constant holding the floating-point negative infinity. +Number.negative_infinity : Decimal +Number.negative_infinity = Double.NEGATIVE_INFINITY + +## A constant holding the floating-point Not-a-Number value. +Number.nan : Decimal +Number.nan = Double.NaN + +## Checks if the given number is the floating-point Not-a-Number value. + + This is needed, because the NaN value will return `False` even when being + compared with itself, so `x == Number.nan` would not work. +Number.is_nan : Boolean +Number.is_nan = case this of + Decimal -> Double.isNaN this + _ -> False diff --git a/test/Tests/src/Data/Numbers_Spec.enso b/test/Tests/src/Data/Numbers_Spec.enso index 1a2dd9935a9e..51323d20b6ea 100644 --- a/test/Tests/src/Data/Numbers_Spec.enso +++ b/test/Tests/src/Data/Numbers_Spec.enso @@ -262,6 +262,16 @@ spec = almost_max_long_times_three_decimal.ceil.to_decimal . should_equal almost_max_long_times_three_plus_1.to_decimal almost_max_long_times_three_plus_1.ceil . should_equal almost_max_long_times_three_plus_1 + Test.specify "should expose a NaN value" <| + Number.nan.is_nan . should_be_true + 0.is_nan . should_be_false + Number.positive_infinity.is_nan . should_be_false + Number.negative_infinity.is_nan . should_be_false + + Number.nan==Number.nan . should_be_false + Number.nan==0 . should_be_false + Number.nan!=Number.nan . should_be_true + Test.specify "should support inexact equality comparisons" <| 1.0001 . equals 1.0002 epsilon=0.01 . should_be_true 1.0001 . equals 1.0002 epsilon=0.0000001 . should_be_false @@ -269,15 +279,15 @@ spec = 1 . equals 2 . should_be_false 1 . equals (0+1) . should_be_true - inf = 1/0 - inf . equals inf . should_be_true + Number.positive_infinity . equals Number.positive_infinity . should_be_true + + Number.negative_infinity . equals Number.negative_infinity . should_be_true + Number.negative_infinity . equals Number.positive_infinity . should_be_false - neg_inf = -inf - neg_inf . equals neg_inf . should_be_true - neg_inf . equals inf . should_be_false + Number.negative_infinity . should_equal (-Number.positive_infinity) + Number.negative_infinity . equals (-Number.positive_infinity) . should_be_true - nan = 0.log 0 - nan . equals nan . should_be_false - nan . equals 0 . should_be_false + Number.nan . equals Number.nan . should_be_false + Number.nan . equals 0 . should_be_false main = Test.Suite.run_main here.spec From 1e3c91d46afdf7e6e24dc24834575245466aa7a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 11 Apr 2022 17:55:44 +0200 Subject: [PATCH 26/26] cr --- .../Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso | 2 +- distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso | 2 +- .../node/expression/builtin/text/AnyToTextNode.java | 3 +++ .../expression/builtin/text/util/TypeToDisplayTextNode.java | 6 ++++-- .../main/java/org/enso/table/aggregations/Concatenate.java | 3 +-- .../java/org/enso/table/aggregations/StandardDeviation.java | 3 +-- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso index e0804712151d..dc3280f2d26a 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso @@ -210,7 +210,7 @@ Integer.up_to n = Range this n 1.equals 1.0000001 epsilon=0.001 Number.equals : Number -> Number -> Boolean Number.equals that epsilon=0.0 = - if this == that then True else (this - that).abs <= epsilon + (this == that) || ((this - that).abs <= epsilon) ## Returns the smaller value of `this` and `that`. diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso index 3b819cd8dbe2..ae7d5b8137ae 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Main.enso @@ -272,7 +272,7 @@ Error.should_equal _ frames_to_skip=0 = here.fail_match_on_unexpected_error this example_should_equal = 1.00000001 . should_equal 1.00000002 epsilon=0.0001 Decimal.should_equal : Decimal -> Decimal -> Integer -> Assertion -Decimal.should_equal that (epsilon = 0) (frames_to_skip=0) = +Decimal.should_equal that epsilon=0 frames_to_skip=0 = matches = case that of Number -> this.equals that epsilon _ -> False diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java index 6c79eb61a6fb..de37bfe7edec 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/AnyToTextNode.java @@ -65,6 +65,9 @@ private Text doComplexAtom(Atom atom) { @CompilerDirectives.TruffleBoundary private String showObject(Object child) throws UnsupportedMessageException { if (child == null) { + // TODO [RW] This is a temporary workaround to make it possible to display errors related to + // https://www.pivotaltracker.com/story/show/181652974 + // Most likely it should be removed once that is implemented. return "null"; } else if (child instanceof Boolean) { return (boolean) child ? "True" : "False"; diff --git a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java index dab2d1c7140a..34eff81d4b06 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/text/util/TypeToDisplayTextNode.java @@ -30,6 +30,9 @@ String doDisplay( @CachedLibrary(limit = "5") InteropLibrary displays, @CachedLibrary(limit = "5") InteropLibrary strings) { if (value == null) { + // TODO [RW] This is a temporary workaround to make it possible to display errors related to + // https://www.pivotaltracker.com/story/show/181652974 + // Most likely it should be removed once that is implemented. return "null"; } else if (TypesGen.isLong(value)) { return value + " (Integer)"; @@ -61,8 +64,7 @@ String doDisplay( try { return strings.asString(displays.toDisplayString(objects.getMetaObject(value))); } catch (UnsupportedMessageException e) { - throw new IllegalStateException( - "Receiver declares a meta object, but does not return it."); + throw new IllegalStateException("Receiver declares a meta object, but does not return it."); } } else { return "a polyglot object"; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java index 8d84d4f7a799..8f0578503a92 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java @@ -66,8 +66,7 @@ private static String toQuotedString(Object value, final String quote, final Str } String textValue = value.toString(); - boolean quote_is_defined = !quote.isEmpty(); - if (quote_is_defined) { + if (!quote.isEmpty()) { boolean includes_separator = !separator.isEmpty() && textValue.contains(separator); boolean includes_quote = textValue.contains(quote); boolean needs_quoting = textValue.isEmpty() || includes_separator || includes_quote; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java index 954dcdfb31ac..ac0b99dc35de 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java @@ -53,8 +53,7 @@ public Object aggregate(List indexes) { } } - if (current == null) return null; - if (!population && current.count <= 1) return null; + if (current == null || (!population && current.count <= 1)) return null; return (population ? 1 : Math.sqrt(current.count / (current.count - 1.0))) * Math.sqrt(current.total_sqr / current.count - Math.pow(current.total / current.count, 2)); }