Skip to content

Commit

Permalink
Implement replace on the Database Column (#7275)
Browse files Browse the repository at this point in the history
Implements `replace` for database text columns, for text, regex, and column patterns.
  • Loading branch information
GregoryTravis committed Jul 25, 2023
1 parent 2dc565b commit 1f6fcf1
Show file tree
Hide file tree
Showing 13 changed files with 356 additions and 13 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@
- [Improving date/time support in Table - added `date_diff`, `date_add`,
`date_part` and some shorthands. Extended `Time_Period` with milli-, micro-
and nanosecond periods.][7221]
- [Implemented `replace` on database columns.][7275]
- [Retire `Column_Selector` and allow regex based selection of columns.][7295]
- [`Text.parse_to_table` can take a `Regex`.][7297]

Expand Down Expand Up @@ -765,6 +766,7 @@
[7223]: https://github.com/enso-org/enso/pull/7223
[7234]: https://github.com/enso-org/enso/pull/7234
[7221]: https://github.com/enso-org/enso/pull/7221
[7275]: https://github.com/enso-org/enso/pull/7275
[7295]: https://github.com/enso-org/enso/pull/7295
[7297]: https://github.com/enso-org/enso/pull/7297

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import project.Any.Any
import project.Data.Locale.Locale
import project.Data.Text.Regex.Regex
import project.Data.Text.Text
Expand Down Expand Up @@ -60,3 +61,12 @@ type Case_Sensitivity
to_explicit_sensitivity_in_memory self = case self of
Case_Sensitivity.Default -> Case_Sensitivity.Sensitive
_ -> self

## PRIVATE
Throws an error if self is Insensitive with a non-default locale
disallow_non_default_locale : Any -> Any
disallow_non_default_locale self ~action = case self of
Case_Sensitivity.Insensitive locale -> if locale == Locale.default then action else
msg = "Custom locales are not supported for this operation."
Error.throw (Illegal_Argument.Error msg)
_ -> action
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,12 @@ type Regex
if should_recompile.not then self else
Regex.compile self.internal_regex_object.pattern case_insensitive

## PRIVATE

Get the original pattern string as a `Text`.
pattern_string : Text
pattern_string self = self.internal_regex_object.pattern

## PRIVATE
Convert the polyglot map to a Map.
polyglot_map_to_map : Any -> Map Any Any
Expand Down
51 changes: 46 additions & 5 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import project.Internal.IR.Context.Context
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.Query.Query
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.Replace_Params.Replace_Params
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
from project.Data.Table import freshen_columns, Table
from project.Errors import Integrity_Error, Unsupported_Database_Operation, Unsupported_Name
Expand Down Expand Up @@ -1125,6 +1126,40 @@ type Column
sensitively.
- only_first: If True, only replace the first match.

! Backend Support

Each database backend supports different combinations of options:

Text:
+----------------+------------+----------+--------+
| case_sensitive | only_first | postgres | sqlite |
+----------------+------------+----------+--------+
| t | f | ✓ | ✓ |
| t | t | ✓ | ✓ |
| f | f | ✓ | ✗ |
| f | t | ✓ | ✓ |
+----------------+------------+----------+--------+

Regex:
+----------------+------------+----------+--------+
| case_sensitive | only_first | postgres | sqlite |
+----------------+------------+----------+--------+
| t | f | ✓ | ✗ |
| t | t | ✓ | ✗ |
| f | f | ✓ | ✗ |
| f | t | ✓ | ✗ |
+----------------+------------+----------+--------+

Text Column:
+----------------+------------+----------+--------+
| case_sensitive | only_first | postgres | sqlite |
+----------------+------------+----------+--------+
| t | f | ✓ | ✓ |
| t | t | ✗ | ✓ |
| f | f | ✗ | ✗ |
| f | t | ✗ | ✓ |
+----------------+------------+----------+--------+

> Example
Replace dashes with underscores.

Expand All @@ -1140,11 +1175,17 @@ type Column

column.replace '"(.*?)"'.to_regex '($1)'
@term make_regex_text_widget
replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column
replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False =
_ = [term, new_text, case_sensitivity, only_first]
msg = "`Column.replace` is not yet implemented."
Error.throw (Unsupported_Database_Operation.Error msg)
replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column ! Unsupported_Database_Operation
replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Default only_first=False =
Value_Type.expect_text self <| case_sensitivity.disallow_non_default_locale <|
input_type = Meta.type_of term
params = Replace_Params.Value input_type case_sensitivity only_first
self.connection.dialect.if_replace_params_supports params <|
raw_term = case term of
_ : Regex -> term.pattern_string
_ -> term
new_name = self.naming_helpers.function_name "replace" [self, raw_term, new_text]
self.make_op "REPLACE" [raw_term, new_text] new_name [term, params]

## Gets the year as a number from the date stored in the column.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor
import project.Internal.IR.Query.Query
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.Postgres.Postgres_Dialect
import project.Internal.Replace_Params.Replace_Params
import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
import project.Internal.SQLite.SQLite_Dialect
Expand Down Expand Up @@ -231,6 +232,13 @@ type Dialect
_ = [period, operation_input_type]
Unimplemented.throw "This is an interface only."

## PRVIATE
Returns true if the `replace` parameters are supported by this backend.
if_replace_params_supports : Replace_Params -> Any -> Any
if_replace_params_supports self replace_params ~action =
_ = [replace_params, action]
Unimplemented.throw "This is an interface only."

## PRIVATE

The dialect of SQLite databases.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import Standard.Base.Errors.Unimplemented.Unimplemented
import Standard.Table.Data.Aggregate_Column.Aggregate_Column
import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Vector_Builder.Vector_Builder
from Standard.Table import Value_Type
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
from Standard.Table.Errors import Inexact_Type_Coercion

import project.Connection.Connection.Connection
import project.Data.Column.Column
import project.Data.Dialect
import project.Data.SQL.Builder
import project.Data.SQL.SQL_Fragment
Expand All @@ -33,6 +35,7 @@ import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.Postgres.Postgres_Error_Mapper.Postgres_Error_Mapper
import project.Internal.Postgres.Postgres_Type_Mapping.Postgres_Type_Mapping
import project.Internal.Replace_Params.Replace_Params
import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
import project.Internal.Statement_Setter.Statement_Setter
Expand Down Expand Up @@ -239,10 +242,16 @@ type Postgres_Dialect
_ ->
Date_Period_Metadata.Value period operation_input_type

## PRVIATE
Returns true if the `replace` parameters are supported by this backend.
if_replace_params_supports : Replace_Params -> Any -> Any
if_replace_params_supports self replace_params ~action =
if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported

## PRIVATE
make_internal_generator_dialect =
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive]+concat_ops+cases+trim_ops
text = [starts_with, contains, ends_with, agg_shortest, agg_longest, make_case_sensitive, ["REPLACE", replace]]+concat_ops+cases+trim_ops
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
arith_extensions = [is_nan, is_inf, floating_point_div, mod_op, decimal_div, decimal_mod, ["ROW_MIN", Base_Generator.make_function "LEAST"], ["ROW_MAX", Base_Generator.make_function "GREATEST"]]
bool = [bool_or]
Expand Down Expand Up @@ -492,6 +501,67 @@ decimal_div = Base_Generator.lift_binary_op "DECIMAL_DIV" x-> y->
decimal_mod = Base_Generator.lift_binary_op "DECIMAL_MOD" x-> y->
x ++ " - FLOOR(CAST(" ++ x ++ " AS decimal) / CAST(" ++ y ++ " AS decimal)) * " ++ y

## PRIVATE
supported_replace_params : Set Replace_Params
supported_replace_params =
e0 = [Replace_Params.Value Text Case_Sensitivity.Default False, Replace_Params.Value Text Case_Sensitivity.Default True, Replace_Params.Value Text Case_Sensitivity.Sensitive False]
e1 = [Replace_Params.Value Text Case_Sensitivity.Sensitive True, Replace_Params.Value Text Case_Sensitivity.Insensitive False, Replace_Params.Value Text Case_Sensitivity.Insensitive True]
e2 = [Replace_Params.Value Regex Case_Sensitivity.Default False, Replace_Params.Value Regex Case_Sensitivity.Default True, Replace_Params.Value Regex Case_Sensitivity.Sensitive False]
e3 = [Replace_Params.Value Regex Case_Sensitivity.Sensitive True, Replace_Params.Value Regex Case_Sensitivity.Insensitive False, Replace_Params.Value Regex Case_Sensitivity.Insensitive True]
e4 = [Replace_Params.Value Column Case_Sensitivity.Default False, Replace_Params.Value Column Case_Sensitivity.Sensitive False]
Set.from_vector <| e0 + e1 + e2 + e3 + e4

replace : Vector Builder -> Any -> Builder
replace args metadata =
input = args.at 0
pattern = args.at 1
replacement = args.at 2

## `raw_pattern` is a `Text1 or `Regex`; it's the same value as `input`, but not
embedded in IR.
raw_pattern = metadata.at 0
replace_params = metadata.at 1

expression = case replace_params.input_type of
Text ->
## To use REGEXP_REPLACE on a non-regex, we have to escape it.
escaped_pattern = Builder.interpolation (Regex.escape raw_pattern)
case replace_params.only_first of
False -> case replace_params.case_sensitivity of
Case_Sensitivity.Insensitive _ ->
Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ escaped_pattern ++ ", " ++ replacement ++ ", 'ig')"
_ ->
Builder.code "REPLACE(" ++ input ++ ", " ++ pattern ++ ", " ++ replacement ++ ")"
True -> case replace_params.case_sensitivity of
Case_Sensitivity.Insensitive _ ->
Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ escaped_pattern ++ ", " ++ replacement ++ ", 'i')"
_ ->
Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ escaped_pattern ++ ", " ++ replacement ++ ")"
Regex ->
pattern_string = Builder.interpolation raw_pattern.pattern_string
case replace_params.only_first of
False -> case replace_params.case_sensitivity of
Case_Sensitivity.Insensitive _ ->
Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ", 'ig')"
_ ->
Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ", 'g')"
True -> case replace_params.case_sensitivity of
Case_Sensitivity.Insensitive _ ->
Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ", 'i')"
_ ->
Builder.code "REGEXP_REPLACE(" ++ input ++ ", " ++ pattern_string ++ ", " ++ replacement ++ ")"
Column ->
case replace_params.only_first of
False -> case replace_params.case_sensitivity of
Case_Sensitivity.Insensitive _ ->
Nothing
_ ->
Builder.code "REPLACE(" ++ input ++ ", " ++ pattern ++ ", " ++ replacement ++ ")"
True -> Nothing
case expression of
Nothing -> replace_params.throw_unsupported
_ -> expression

## PRIVATE
make_extract_as_int enso_name sql_name=enso_name =
Base_Generator.lift_unary_op enso_name arg->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import Standard.Base.Data.Boolean.Boolean
import Standard.Base.Error.Error
import Standard.Base.Nothing.Nothing
import Standard.Base.Data.Text.Case_Sensitivity.Case_Sensitivity

from project.Errors import Unsupported_Database_Operation

# Specifies a set of parameters to Table.replace
type Replace_Params
Value input_type (case_sensitivity : Case_Sensitivity) (only_first : Boolean)

## Raise an exception for an unsupported combination of parameters
throw_unsupported : Nothing ! Unsupported_Database_Operation
throw_unsupported self =
msg = "The REPLACE operation is not supported on PostgreSQL for the following options: argument type " + self.input_type.to_text + ", case_sensitivity " + self.case_sensitivity.to_display_text + ", only_first " + self.only_first.to_text
Error.throw (Unsupported_Database_Operation.Error msg)
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ from Standard.Table import Value_Type
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all

import project.Connection.Connection.Connection
import project.Data.Column.Column
import project.Data.Dialect
import project.Data.SQL.Builder
import project.Data.SQL_Statement.SQL_Statement
Expand All @@ -28,6 +29,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor
import project.Internal.IR.Query.Query
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.Replace_Params.Replace_Params
import project.Internal.SQL_Type_Mapping.SQL_Type_Mapping
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
import project.Internal.SQLite.SQLite_Error_Mapper.SQLite_Error_Mapper
Expand Down Expand Up @@ -265,9 +267,15 @@ type SQLite_Dialect
_ = [period, operation_input_type]
Error.throw (Unsupported_Database_Operation.Error "SQLite backend does not support date/time operations.")

## PRVIATE
Returns true if the `replace` parameters are suppoerted by this backend.
if_replace_params_supports : Replace_Params -> Any -> Any
if_replace_params_supports self replace_params ~action =
if supported_replace_params.contains replace_params then action else replace_params.throw_unsupported

## PRIVATE
make_internal_generator_dialect =
text = [starts_with, contains, ends_with, make_case_sensitive]+concat_ops+trim_ops
text = [starts_with, contains, ends_with, make_case_sensitive, ["REPLACE", replace]]+concat_ops+trim_ops
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
stats = [agg_stddev_pop, agg_stddev_samp]
arith_extensions = [is_inf, floating_point_div, mod_op]
Expand Down Expand Up @@ -405,6 +413,47 @@ floating_point_div = Base_Generator.lift_binary_op "/" x-> y->
mod_op = Base_Generator.lift_binary_op "mod" x-> y->
x ++ " - FLOOR(CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)) * " ++ y

## PRIVATE
supported_replace_params : Set Replace_Params
supported_replace_params =
e = [Replace_Params.Value Text Case_Sensitivity.Default False, Replace_Params.Value Text Case_Sensitivity.Sensitive False, Replace_Params.Value Text Case_Sensitivity.Default True, Replace_Params.Value Text Case_Sensitivity.Sensitive True, Replace_Params.Value Text Case_Sensitivity.Insensitive True]
Set.from_vector e

replace : Vector Builder -> Any -> Builder
replace args metadata =
input = args.at 0
pattern = args.at 1
replacement = args.at 2

replace_params = metadata.at 1

expression = case replace_params.input_type == Text || replace_params.input_type == Column of
True ->
## To use REGEXP_REPLACE on a non-regex, we have to escape it.
case replace_params.only_first of
False -> case replace_params.case_sensitivity of
Case_Sensitivity.Insensitive _ -> Nothing
_ ->
Builder.code "REPLACE(" ++ input ++ ", " ++ pattern ++ ", " ++ replacement ++ ")"
True -> case replace_params.case_sensitivity of
Case_Sensitivity.Insensitive _ ->
replace_only_first False input pattern replacement
_ ->
replace_only_first True input pattern replacement
False -> Nothing
case expression of
Nothing -> replace_params.throw_unsupported
_ -> expression

replace_only_first case_sensitive t p r =
search_string = if case_sensitive then t else
Builder.code "LOWER(" ++ t ++ ")"
instr = Builder.code "INSTR(" ++ search_string ++ ", " ++ p ++ ")"
prefix = Builder.code "SUBSTR(" ++ t ++ ", 1," ++ instr ++ "-1)"
suffix = Builder.code "SUBSTR(" ++ t ++ "," ++ instr ++ "+LENGTH(" ++ p ++ "))"
concatenation = prefix ++ " || " ++ r ++ " || " ++ suffix
Builder.code "CASE WHEN " ++ instr ++ "= 0 THEN " ++ t ++ " ELSE " ++ concatenation ++ "END"

## PRIVATE
It will return `Nothing` if the type does not require custom logic.
make_custom_cast column target_value_type type_mapping =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ operations_map =

always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE"]
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER"]
same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
Expand Down
Loading

0 comments on commit 1f6fcf1

Please sign in to comment.