Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into wip/farmaazon/text…
Browse files Browse the repository at this point in the history
…-sdf-bold-181641027
  • Loading branch information
farmaazon committed Apr 12, 2022
2 parents c951849 + 891f064 commit 2ee2a65
Show file tree
Hide file tree
Showing 28 changed files with 874 additions and 307 deletions.
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,11 @@
- [Implemented `Panic.catch` and helper functions for handling errors. Added a
type parameter to `Panic.recover` to recover specific types of errors.][3344]
- [Added warning handling to `Table.aggregate`][3349]
- [Improved performance of `Table.aggregate` and full warnings implementation]
[3364]
- [Improved performance of `Table.aggregate` and full warnings
implementation][3364]
- [Implemented `Text.reverse`][3377]
- [Implemented support for most Table aggregations in the Database
backend.][3383]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -155,6 +157,7 @@
[3366]: https://github.com/enso-org/enso/pull/3366
[3379]: https://github.com/enso-org/enso/pull/3379
[3381]: https://github.com/enso-org/enso/pull/3381
[3383]: https://github.com/enso-org/enso/pull/3383
[3385]: https://github.com/enso-org/enso/pull/3385

#### Enso Compiler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ Integer.up_to n = Range this n

1.equals 1.0000001 epsilon=0.001
Number.equals : Number -> Number -> Boolean
Number.equals that epsilon=0.0 = (this - that).abs <= epsilon
Number.equals that epsilon=0.0 =
(this == that) || ((this - that).abs <= epsilon)

## Returns the smaller value of `this` and `that`.

Expand Down Expand Up @@ -301,3 +302,23 @@ Parse_Error.to_display_text : Text
Parse_Error.to_display_text =
"Could not parse " + this.text.to_text + " as a double."

## A constant holding the floating-point positive infinity.
Number.positive_infinity : Decimal
Number.positive_infinity = Double.POSITIVE_INFINITY

## A constant holding the floating-point negative infinity.
Number.negative_infinity : Decimal
Number.negative_infinity = Double.NEGATIVE_INFINITY

## A constant holding the floating-point Not-a-Number value.
Number.nan : Decimal
Number.nan = Double.NaN

## Checks if the given number is the floating-point Not-a-Number value.

This is needed, because the NaN value will return `False` even when being
compared with itself, so `x == Number.nan` would not work.
Number.is_nan : Boolean
Number.is_nan = case this of
Decimal -> Double.isNaN this
_ -> False
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type Connection
meant only for internal use.
execute_query : Text | Sql.Statement -> Vector Sql.Sql_Type -> Materialized_Table =
execute_query query expected_types=Nothing = here.handle_sql_errors <|
Resource.bracket (this.prepare_statement query) .close stmt->
this.with_prepared_statement query stmt->
rs = stmt.executeQuery
metadata = rs.getMetaData
ncols = metadata.getColumnCount
Expand Down Expand Up @@ -97,31 +97,27 @@ type Connection
representing the query to execute.
execute_update : Text | Sql.Statement -> Integer
execute_update query = here.handle_sql_errors <|
Resource.bracket (this.prepare_statement query) .close stmt->
## FIXME USE CATCH HERE!
result = Panic.recover Any stmt.executeLargeUpdate
result.catch err-> case err of
Polyglot_Error exc ->
case Java.is_instance exc UnsupportedOperationException of
True ->
stmt.executeUpdate
False -> Error.throw err
_ -> Error.throw err
this.with_prepared_statement query stmt->
Panic.catch UnsupportedOperationException stmt.executeLargeUpdate _->
stmt.executeUpdate

## PRIVATE

Prepares the statement by ensuring that it is sanitised.

Arguments:
- query: The query to prepare the SQL statement in.
prepare_statement : Text | Sql.Statement -> PreparedStatement
prepare_statement query =
go template holes=[] = Managed_Resource.with this.connection_resource java_connection->
Runs the provided action with a prepared statement, adding contextual
information to any thrown SQL errors.
with_prepared_statement : Text | Sql.Statement -> (PreparedStatement -> Any) -> Any
with_prepared_statement query action =
prepare template holes = Managed_Resource.with this.connection_resource java_connection->
stmt = java_connection.prepareStatement template
Panic.catch Any (here.set_statement_values stmt holes) caught_panic->
stmt.close
Panic.throw caught_panic
stmt

go template holes =
here.wrap_sql_errors related_query=template <|
Resource.bracket (prepare template holes) .close action

case query of
Text -> go query []
Sql.Statement _ ->
Expand All @@ -140,7 +136,7 @@ type Connection
fetch_columns table_name =
query = IR.Select_All (IR.make_ctx_from table_name)
compiled = this.dialect.generate_sql query
Resource.bracket (this.prepare_statement compiled) .close stmt->
this.with_prepared_statement compiled stmt->
rs = stmt.executeQuery
metadata = rs.getMetaData
ncols = metadata.getColumnCount
Expand Down Expand Up @@ -363,7 +359,7 @@ type Sql_Error
Convert the SQL error to a textual representation.
to_text : Text
to_text =
query = if this.related_query.is_nothing.not then " [Query was: " + query + "]" else ""
query = if this.related_query.is_nothing.not then " [Query was: " + this.related_query + "]" else ""
"There was an SQL error: " + this.java_exception.getMessage.to_text + "." + query

## UNSTABLE
Expand Down Expand Up @@ -406,10 +402,10 @@ type Sql_Timeout_Error

Arguments:
- action: The computation to execute. This computation may throw SQL errors.
handle_sql_errors : Any -> Any ! (Sql_Error | Sql_Timeout_Error)
handle_sql_errors ~action =
handle_sql_errors : Any -> (Text | Nothing) -> Any ! (Sql_Error | Sql_Timeout_Error)
handle_sql_errors ~action related_query=Nothing =
Panic.recover [Sql_Error, Sql_Timeout_Error] <|
here.wrap_sql_errors action
here.wrap_sql_errors action related_query

## PRIVATE

Expand Down Expand Up @@ -437,7 +433,9 @@ default_storage_type storage_type = case storage_type of
Storage.Integer -> Sql_Type.integer
Storage.Decimal -> Sql_Type.double
Storage.Boolean -> Sql_Type.boolean
Storage.Any -> Sql_Type.blob
## Support for mixed type columns in Table upload is currently very limited,
falling back to treating everything as text.
Storage.Any -> Sql_Type.text

## PRIVATE
Sets values inside of a prepared statement.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type Dialect
Deduces the result type for an aggregation operation.

The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
You may need to transform it with `resolve_aggregate` first.
resolve_target_sql_type : Aggregate_Column -> Sql_Type
resolve_target_sql_type = Errors.unimplemented "This is an interface only."

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ make_concat make_raw_concat_expr make_contains_expr has_quote args =
includes_separator = separator ++ Sql.code " != '' AND " ++ make_contains_expr expr separator
## We use the assumption that `has_quote` is True iff `quote` is not empty.
includes_quote = make_contains_expr expr quote
needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren
is_empty = expr ++ Sql.code " = ''"
needs_quoting = includes_separator.paren ++ Sql.code " OR " ++ includes_quote.paren ++ Sql.code " OR " ++ is_empty.paren
escaped = Sql.code "replace(" ++ expr ++ Sql.code ", " ++ quote ++ Sql.code ", " ++ quote ++ append ++ quote ++ Sql.code ")"
quoted = quote ++ append ++ escaped ++ append ++ quote
Sql.code "CASE WHEN " ++ needs_quoting ++ Sql.code " THEN " ++ quoted ++ Sql.code " ELSE " ++ expr ++ Sql.code " END"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,41 +39,14 @@ type Postgresql_Dialect
Deduces the result type for an aggregation operation.

The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
You may need to transform it with `resolve_aggregate` first.
resolve_target_sql_type : Aggregate_Column -> Sql_Type
resolve_target_sql_type aggregate = here.resolve_target_sql_type aggregate

## PRIVATE
make_internal_generator_dialect =
starts_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation starts_with")
ends_with arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation ends_with")
contains arguments =
case arguments.length == 2 of
True ->
str = arguments.at 0
sub = arguments.at 1
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ", '%')")
res.paren
False ->
Error.throw ("Invalid amount of arguments for operation contains")
text = [["starts_with", starts_with], ["contains", contains], ["ends_with", ends_with], here.agg_shortest, here.agg_longest]+here.concat_ops
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty]
text = [here.starts_with, here.contains, here.ends_with, here.agg_shortest, here.agg_longest]+here.concat_ops
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]]

stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"]
stddev_samp = ["STDDEV_SAMP", Base_Generator.make_function "stddev_samp"]
Expand All @@ -83,7 +56,7 @@ make_internal_generator_dialect =

## PRIVATE
The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
You may need to transform it with `resolve_aggregate` first.
resolve_target_sql_type aggregate = case aggregate of
Group_By c _ -> c.sql_type
Count _ -> Sql_Type.bigint
Expand All @@ -102,10 +75,15 @@ resolve_target_sql_type aggregate = case aggregate of
Longest c _ -> c.sql_type
Standard_Deviation _ _ _ -> Sql_Type.double
Concatenate _ _ _ _ _ _ -> Sql_Type.text
## TODO [RW] revise these
Sum _ _ -> Sql_Type.numeric # TODO can also be bigint, real, double
Average _ _ -> Sql_Type.numeric # TODO can be double sometimes
Median _ _ -> Sql_Type.numeric # TODO can be double sometimes
Sum c _ ->
if (c.sql_type == Sql_Type.integer) || (c.sql_type == Sql_Type.smallint) then Sql_Type.bigint else
if c.sql_type == Sql_Type.bigint then Sql_Type.numeric else
c.sql_type
Average c _ ->
if c.sql_type.is_definitely_integer then Sql_Type.numeric else
if c.sql_type.is_definitely_double then Sql_Type.double else
c.sql_type
Median _ _ -> Sql_Type.double

## PRIVATE
agg_count_is_null = Base_Generator.lift_unary_op "COUNT_IS_NULL" arg->
Expand All @@ -121,14 +99,30 @@ agg_count_not_empty = Base_Generator.lift_unary_op "COUNT_NOT_EMPTY" arg->

## PRIVATE
agg_median = Base_Generator.lift_unary_op "MEDIAN" arg->
Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
median = Sql.code "percentile_cont(0.5) WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"
## TODO Technically, this check may not be necessary if the input column has
type INTEGER, because it is impossible to represent a NaN in that type.
However, currently the column type inference is not tested well-enough to
rely on this, so leaving an uniform approach regardless of type. This
could be revisited when further work on column types takes place.
See issue: https://www.pivotaltracker.com/story/show/180854759
has_nan = Sql.code "bool_or(" ++ arg ++ Sql.code " = double precision 'NaN')"
Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ median ++ Sql.code " END"

## PRIVATE
agg_mode = Base_Generator.lift_unary_op "MODE" arg->
Sql.code "mode() WITHIN GROUP (ORDER BY " ++ arg ++ Sql.code ")"

agg_percentile = Base_Generator.lift_binary_op "PERCENTILE" p-> expr->
Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")"
percentile = Sql.code "percentile_cont(" ++ p ++ Sql.code ") WITHIN GROUP (ORDER BY " ++ expr ++ Sql.code ")"
## TODO Technically, this check may not be necessary if the input column has
type INTEGER, because it is impossible to represent a NaN in that type.
However, currently the column type inference is not tested well-enough to
rely on this, so leaving an uniform approach regardless of type. This
could be revisited when further work on column types takes place.
See issue: https://www.pivotaltracker.com/story/show/180854759
has_nan = Sql.code "bool_or(" ++ expr ++ Sql.code " = double precision 'NaN')"
Sql.code "CASE WHEN " ++ has_nan ++ Sql.code " THEN 'NaN' ELSE " ++ percentile ++ Sql.code " END"

## PRIVATE
These are written in a not most-efficient way, but a way that makes them
Expand Down Expand Up @@ -172,8 +166,44 @@ agg_longest = Base_Generator.lift_unary_op "LONGEST" arg->
## PRIVATE
concat_ops =
make_raw_concat_expr expr separator =
Sql.code "array_to_string(array_agg(" ++ expr ++ Sql.code "), " ++ separator ++ Sql.code ")"
make_contains_expr expr substring =
Sql.code "position(" ++ expr ++ Sql.code ", " ++ substring ++ Sql.code ") > 0"
concat = Helpers.make_concat make_raw_concat_expr make_contains_expr
Sql.code "string_agg(" ++ expr ++ Sql.code ", " ++ separator ++ Sql.code ")"
concat = Helpers.make_concat make_raw_concat_expr here.make_contains_expr
[["CONCAT", concat (has_quote=False)], ["CONCAT_QUOTE_IF_NEEDED", concat (has_quote=True)]]


## PRIVATE
agg_count_distinct args = if args.is_empty then (Error.throw (Illegal_Argument_Error "COUNT_DISTINCT requires at least one argument.")) else
case args.length == 1 of
True ->
## A single null value will be skipped.
Sql.code "COUNT(DISTINCT " ++ args.first ++ Sql.code ")"
False ->
## A tuple of nulls is not a null, so it will not be skipped - but
we want to ignore all-null columns. So we manually filter them
out.
count = Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code "))"
are_nulls = args.map arg-> arg.paren ++ Sql.code " IS NULL"
all_nulls_filter = Sql.code " FILTER (WHERE NOT (" ++ Sql.join " AND " are_nulls ++ Sql.code "))"
(count ++ all_nulls_filter).paren

## PRIVATE
agg_count_distinct_include_null args =
## If we always count as tuples, then even null fields are counted.
Sql.code "COUNT(DISTINCT (" ++ Sql.join ", " args ++ Sql.code ", 0))"

## PRIVATE
starts_with = Base_Generator.lift_binary_op "starts_with" str-> sub->
res = str ++ (Sql.code " LIKE CONCAT(") ++ sub ++ (Sql.code ", '%')")
res.paren

## PRIVATE
ends_with = Base_Generator.lift_binary_op "ends_with" str-> sub->
res = str ++ (Sql.code " LIKE CONCAT('%', ") ++ sub ++ (Sql.code ")")
res.paren

## PRIVATE
make_contains_expr expr substring =
Sql.code "position(" ++ substring ++ Sql.code " in " ++ expr ++ Sql.code ") > 0"

## PRIVATE
contains = Base_Generator.lift_binary_op "contains" here.make_contains_expr
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type Redshift_Dialect
Deduces the result type for an aggregation operation.

The provided aggregate is assumed to contain only already resolved columns.
You may need to transform it with `resolve_columns` first.
You may need to transform it with `resolve_aggregate` first.
resolve_target_sql_type : Aggregate_Column -> Sql_Type
resolve_target_sql_type aggregate =
Postgres.resolve_target_sql_type aggregate
Loading

0 comments on commit 2ee2a65

Please sign in to comment.