Merge remote-tracking branch 'origin/develop' into wip/farmaazon/text…

…-sdf-bold-181641027
enso-org · Apr 12, 2022 · 2ee2a65 · 2ee2a65
2 parents c951849 + 891f064
commit 2ee2a65
Show file tree

Hide file tree

Showing 28 changed files with 874 additions and 307 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -100,9 +100,11 @@
 - [Implemented `Panic.catch` and helper functions for handling errors. Added a
   type parameter to `Panic.recover` to recover specific types of errors.][3344]
 - [Added warning handling to `Table.aggregate`][3349]
-- [Improved performance of `Table.aggregate` and full warnings implementation]
-  [3364]
+- [Improved performance of `Table.aggregate` and full warnings
+  implementation][3364]
 - [Implemented `Text.reverse`][3377]
+- [Implemented support for most Table aggregations in the Database
+  backend.][3383]
 
 [debug-shortcuts]:
   https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -155,6 +157,7 @@
 [3366]: https://github.com/enso-org/enso/pull/3366
 [3379]: https://github.com/enso-org/enso/pull/3379
 [3381]: https://github.com/enso-org/enso/pull/3381
+[3383]: https://github.com/enso-org/enso/pull/3383
 [3385]: https://github.com/enso-org/enso/pull/3385
 
 #### Enso Compiler

diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Number/Extensions.enso
@@ -209,7 +209,8 @@ Integer.up_to n = Range this n
 
          1.equals 1.0000001 epsilon=0.001
 Number.equals : Number -> Number -> Boolean
-Number.equals that epsilon=0.0 = (this - that).abs <= epsilon
+Number.equals that epsilon=0.0 =
+    (this == that) || ((this - that).abs <= epsilon)
 
 ## Returns the smaller value of `this` and `that`.
 
@@ -301,3 +302,23 @@ Parse_Error.to_display_text : Text
 Parse_Error.to_display_text =
     "Could not parse " + this.text.to_text + " as a double."
 
+## A constant holding the floating-point positive infinity.
+Number.positive_infinity : Decimal
+Number.positive_infinity = Double.POSITIVE_INFINITY
+
+## A constant holding the floating-point negative infinity.
+Number.negative_infinity : Decimal
+Number.negative_infinity = Double.NEGATIVE_INFINITY
+
+## A constant holding the floating-point Not-a-Number value.
+Number.nan : Decimal
+Number.nan = Double.NaN
+
+## Checks if the given number is the floating-point Not-a-Number value.
+
+   This is needed, because the NaN value will return `False` even when being
+   compared with itself, so `x == Number.nan` would not work.
+Number.is_nan : Boolean
+Number.is_nan = case this of
+    Decimal -> Double.isNaN this
+    _ -> False
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso
@@ -65,7 +65,7 @@ type Connection
          meant only for internal use.
     execute_query : Text | Sql.Statement -> Vector Sql.Sql_Type -> Materialized_Table =
     execute_query query expected_types=Nothing = here.handle_sql_errors <|
-        Resource.bracket (this.prepare_statement query) .close stmt->
+        this.with_prepared_statement query stmt->
             rs = stmt.executeQuery
             metadata = rs.getMetaData
             ncols = metadata.getColumnCount
@@ -97,31 +97,27 @@ type Connection
          representing the query to execute.
     execute_update : Text | Sql.Statement -> Integer
     execute_update query = here.handle_sql_errors <|
-        Resource.bracket (this.prepare_statement query) .close stmt->
-            ## FIXME USE CATCH HERE!
-            result = Panic.recover Any stmt.executeLargeUpdate
-            result.catch err-> case err of
-                Polyglot_Error exc ->
-                    case Java.is_instance exc UnsupportedOperationException of
-                        True ->
-                            stmt.executeUpdate
-                        False -> Error.throw err
-                _ -> Error.throw err
+        this.with_prepared_statement query stmt->
+            Panic.catch UnsupportedOperationException stmt.executeLargeUpdate _->
+                stmt.executeUpdate
 
     ## PRIVATE
 
-       Prepares the statement by ensuring that it is sanitised.
-
-       Arguments:
-       - query: The query to prepare the SQL statement in.
-    prepare_statement : Text | Sql.Statement -> PreparedStatement
-    prepare_statement query =
-        go template holes=[] = Managed_Resource.with this.connection_resource java_connection->
+       Runs the provided action with a prepared statement, adding contextual
+       information to any thrown SQL errors.
+    with_prepared_statement : Text  | Sql.Statement -> (PreparedStatement -> Any) -> Any
+    with_prepared_statement query action =
+        prepare template holes = Managed_Resource.with this.connection_resource java_connection->
             stmt = java_connection.prepareStatement template
             Panic.catch Any (here.set_statement_values stmt holes) caught_panic->
                 stmt.close
                 Panic.throw caught_panic
             stmt
+
+        go template holes =
+            here.wrap_sql_errors related_query=template <|
+                Resource.bracket (prepare template holes) .close action
+
         case query of
             Text -> go query []
             Sql.Statement _ ->
@@ -140,7 +136,7 @@ type Connection
     fetch_columns table_name =
         query = IR.Select_All (IR.make_ctx_from table_name)
         compiled = this.dialect.generate_sql query
-        Resource.bracket (this.prepare_statement compiled) .close stmt->
+        this.with_prepared_statement compiled stmt->
             rs = stmt.executeQuery
             metadata = rs.getMetaData
             ncols = metadata.getColumnCount
@@ -363,7 +359,7 @@ type Sql_Error
        Convert the SQL error to a textual representation.
     to_text : Text
     to_text =
-        query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else ""
+        query = if this.related_query.is_nothing.not then " [Query was: " + this.related_query + "]" else ""
         "There was an SQL error: " + this.java_exception.getMessage.to_text + "." + query
 
     ## UNSTABLE
@@ -406,10 +402,10 @@ type Sql_Timeout_Error
 
    Arguments:
    - action: The computation to execute. This computation may throw SQL errors.
-handle_sql_errors : Any -> Any ! (Sql_Error | Sql_Timeout_Error)
-handle_sql_errors ~action =
+handle_sql_errors : Any -> (Text | Nothing) -> Any ! (Sql_Error | Sql_Timeout_Error)
+handle_sql_errors ~action related_query=Nothing =
     Panic.recover [Sql_Error, Sql_Timeout_Error] <|
-        here.wrap_sql_errors action
+        here.wrap_sql_errors action related_query
 
 ## PRIVATE
 
@@ -437,7 +433,9 @@ default_storage_type storage_type = case storage_type of
     Storage.Integer -> Sql_Type.integer
     Storage.Decimal -> Sql_Type.double
     Storage.Boolean -> Sql_Type.boolean
-    Storage.Any -> Sql_Type.blob
+    ## Support for mixed type columns in Table upload is currently very limited,
+       falling back to treating everything as text.
+    Storage.Any -> Sql_Type.text
 
 ## PRIVATE
    Sets values inside of a prepared statement.

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
@@ -35,7 +35,7 @@ type Dialect
        Deduces the result type for an aggregation operation.
 
        The provided aggregate is assumed to contain only already resolved columns.
-       You may need to transform it with `resolve_columns` first.
+       You may need to transform it with `resolve_aggregate` first.
     resolve_target_sql_type : Aggregate_Column -> Sql_Type
     resolve_target_sql_type = Errors.unimplemented "This is an interface only."
 

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Helpers.enso
@@ -38,7 +38,8 @@ make_concat make_raw_concat_expr make_contains_expr has_quote args =
                 includes_separator = separator ++ Sql.code " != '' AND " ++ make_contains_expr expr separator
                 ## We use the assumption that `has_quote` is True iff `quote` is not empty.
                 includes_quote = make_contains_expr expr quote
-                needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren
+                is_empty = expr ++ Sql.code " = ''"
+                needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren ++ Sql.code " OR " ++ is_empty.paren
                 escaped = Sql.code "replace(" ++ expr ++ Sql.code ", " ++ quote ++ Sql.code ", " ++ quote ++ append ++ quote ++ Sql.code ")"
                 quoted = quote ++ append ++ escaped ++ append ++ quote
                 Sql.code "CASE WHEN " ++ needs_quoting ++ Sql.code " THEN " ++ quoted ++ Sql.code " ELSE " ++ expr ++ Sql.code " END"

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Postgres.enso
@@ -39,41 +39,14 @@ type Postgresql_Dialect
        Deduces the result type for an aggregation operation.
 
        The provided aggregate is assumed to contain only already resolved columns.
-       You may need to transform it with `resolve_columns` first.
+       You may need to transform it with `resolve_aggregate` first.
     resolve_target_sql_type : Aggregate_Column -> Sql_Type
     resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate
 
 ## PRIVATE
 make_internal_generator_dialect =
-    starts_with arguments =
-        case arguments.length == 2 of
-            True ->
-                str = arguments.at 0
-                sub = arguments.at 1
-                res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
-                res.paren
-            False ->
-                Error.throw ("Invalid amount of arguments for operation starts_with")
-    ends_with arguments =
-        case arguments.length == 2 of
-            True ->
-                str = arguments.at 0
-                sub = arguments.at 1
-                res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
-                res.paren
-            False ->
-                Error.throw ("Invalid amount of arguments for operation ends_with")
-    contains arguments =
-        case arguments.length == 2 of
-            True ->
-                str = arguments.at 0
-                sub = arguments.at 1
-                res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')")
-                res.paren
-            False ->
-                Error.throw ("Invalid amount of arguments for operation contains")
-    text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with], here.agg_shortest, here.agg_longest]+here.concat_ops
-    counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty]
+    text = [here.starts_with, here.contains, here.ends_with, here.agg_shortest, here.agg_longest]+here.concat_ops
+    counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]]
 
     stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"]
     stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"]
@@ -83,7 +56,7 @@ make_internal_generator_dialect =
 
 ## PRIVATE
    The provided aggregate is assumed to contain only already resolved columns.
-   You may need to transform it with `resolve_columns` first.
+   You may need to transform it with `resolve_aggregate` first.
 resolve_target_sql_type aggregate = case aggregate of
     Group_By c _ -> c.sql_type
     Count _ -> Sql_Type.bigint
@@ -102,10 +75,15 @@ resolve_target_sql_type aggregate = case aggregate of
     Longest c _ -> c.sql_type
     Standard_Deviation _ _ _ -> Sql_Type.double
     Concatenate _ _ _ _ _ _ -> Sql_Type.text
-    ## TODO [RW] revise these
-    Sum _ _ -> Sql_Type.numeric # TODO can also be bigint, real, double
-    Average _ _ -> Sql_Type.numeric # TODO can be double sometimes
-    Median _ _ -> Sql_Type.numeric # TODO can be double sometimes
+    Sum c _ ->
+        if (c.sql_type == Sql_Type.integer) || (c.sql_type == Sql_Type.smallint) then Sql_Type.bigint else
+            if c.sql_type == Sql_Type.bigint then Sql_Type.numeric else
+                c.sql_type
+    Average c _ ->
+        if c.sql_type.is_definitely_integer then Sql_Type.numeric else
+            if c.sql_type.is_definitely_double then Sql_Type.double else
+                c.sql_type
+    Median _ _ -> Sql_Type.double
 
 ## PRIVATE
 agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
@@ -121,14 +99,30 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->
 
 ## PRIVATE
 agg_median = Base_Generator.lift_unary_op "MEDIAN" arg->
-    Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
+    median = Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
+    ## TODO Technically, this check may not be necessary if the input column has
+       type INTEGER, because it is impossible to represent a NaN in that type.
+       However, currently the column type inference is not tested well-enough to
+       rely on this, so leaving an uniform approach regardless of type. This
+       could be revisited when further work on column types takes place.
+       See issue: https://www.pivotaltracker.com/story/show/180854759
+    has_nan = Sql.code "bool_or(" ++ arg ++ Sql.code " = double precision 'NaN')"
+    Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ median ++ Sql.code " END"
 
 ## PRIVATE
 agg_mode = Base_Generator.lift_unary_op "MODE" arg->
     Sql.code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
 
 agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr->
-    Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")"
+    percentile = Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")"
+    ## TODO Technically, this check may not be necessary if the input column has
+       type INTEGER, because it is impossible to represent a NaN in that type.
+       However, currently the column type inference is not tested well-enough to
+       rely on this, so leaving an uniform approach regardless of type. This
+       could be revisited when further work on column types takes place.
+       See issue: https://www.pivotaltracker.com/story/show/180854759
+    has_nan = Sql.code "bool_or(" ++ expr ++ Sql.code " = double precision 'NaN')"
+    Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ percentile ++ Sql.code " END"
 
 ## PRIVATE
    These are written in a not most-efficient way, but a way that makes them
@@ -172,8 +166,44 @@ agg_longest = Base_Generator.lift_unary_op "LONGEST" arg->
 ## PRIVATE
 concat_ops =
     make_raw_concat_expr expr separator =
-        Sql.code "array_to_string(array_agg(" ++ expr ++ Sql.code "), " ++ separator ++ Sql.code ")"
-    make_contains_expr expr substring =
-        Sql.code "position(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0"
-    concat = Helpers.make_concat make_raw_concat_expr make_contains_expr
+        Sql.code "string_agg(" ++ expr ++ Sql.code ", " ++ separator ++ Sql.code ")"
+    concat = Helpers.make_concat make_raw_concat_expr here.make_contains_expr
     [["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]]
+
+
+## PRIVATE
+agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument_Error "COUNT_DISTINCT requires at least one argument.")) else
+    case args.length == 1 of
+        True ->
+            ## A single null value will be skipped.
+            Sql.code "COUNT(DISTINCT " ++ args.first ++ Sql.code ")"
+        False ->
+            ## A tuple of nulls is not a null, so it will not be skipped - but
+               we want to ignore all-null columns. So we manually filter them
+               out.
+            count = Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))"
+            are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL"
+            all_nulls_filter = Sql.code " FILTER (WHERE NOT (" ++ Sql.join " AND " are_nulls ++ Sql.code "))"
+            (count ++ all_nulls_filter).paren
+
+## PRIVATE
+agg_count_distinct_include_null args =
+    ## If we always count as tuples, then even null fields are counted.
+    Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code ", 0))"
+
+## PRIVATE
+starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
+    res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
+    res.paren
+
+## PRIVATE
+ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
+    res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
+    res.paren
+
+## PRIVATE
+make_contains_expr expr substring =
+    Sql.code "position(" ++ substring ++ Sql.code " in " ++ expr ++ Sql.code ") > 0"
+
+## PRIVATE
+contains = Base_Generator.lift_binary_op "contains" here.make_contains_expr
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect/Redshift.enso
@@ -38,7 +38,7 @@ type Redshift_Dialect
        Deduces the result type for an aggregation operation.
 
        The provided aggregate is assumed to contain only already resolved columns.
-       You may need to transform it with `resolve_columns` first.
+       You may need to transform it with `resolve_aggregate` first.
     resolve_target_sql_type : Aggregate_Column -> Sql_Type
     resolve_target_sql_type aggregate =
         Postgres.resolve_target_sql_type aggregate