diff --git a/CHANGELOG.md b/CHANGELOG.md index 25638fecdbf2..a41ded36f959 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -207,6 +207,8 @@ - [Added `Date_Period.Week` to `start_of` and `end_of` methods.][3733] - [Replaced `Table.where` with a new API relying on `Table.filter`.][3750] - [Added `Filter_Condition` to `Vector`, `Range` and `List`.][3770] +- [Extended `Filter_Condition` with `Is_Empty`, `Not_Empty`, `Like` and + `Not_Like`.][3775] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -332,6 +334,7 @@ [3749]: https://github.com/enso-org/enso/pull/3749 [3750]: https://github.com/enso-org/enso/pull/3750 [3770]: https://github.com/enso-org/enso/pull/3770 +[3775]: https://github.com/enso-org/enso/pull/3775 #### Enso Compiler diff --git a/build.sbt b/build.sbt index 742d952233b0..3f6f55386f41 100644 --- a/build.sbt +++ b/build.sbt @@ -2039,7 +2039,7 @@ buildEngineDistribution := { log.info(s"Engine package created at $root") } -val stdBitsProjects = List("Base", "Database", "Google_Api", "Image", "Table") +val stdBitsProjects = List("Base", "Database", "Google_Api", "Image", "Table", "All") val allStdBits: Parser[String] = stdBitsProjects.map(v => v: Parser[String]).reduce(_ | _) @@ -2057,7 +2057,7 @@ buildStdLib := Def.inputTaskDyn { }.evaluated lazy val pkgStdLibInternal = inputKey[Unit]("Use `buildStdLib`") -pkgStdLibInternal := Def.inputTaskDyn { +pkgStdLibInternal := Def.inputTask { val cmd = allStdBits.parsed val root = engineDistributionRoot.value val log: sbt.Logger = streams.value.log @@ -2073,15 +2073,27 @@ pkgStdLibInternal := Def.inputTaskDyn { (`std-image` / Compile / packageBin).value case "Table" => (`std-table` / Compile / packageBin).value + case "All" => + (`std-base` / Compile / packageBin).value + (`std-table` / Compile / packageBin).value + (`std-database` / Compile / packageBin).value + (`std-image` / Compile / packageBin).value + (`std-google-api` / Compile / packageBin).value case _ => } - StdBits.buildStdLibPackage( - cmd, - root, - cacheFactory, - log, - defaultDevEnsoVersion - ) + val libs = if (cmd != "All") Seq(cmd) else { + val prefix = "Standard." + Editions.standardLibraries.filter(_.startsWith(prefix)).map(_.stripPrefix(prefix)) + } + libs.foreach { lib => + StdBits.buildStdLibPackage( + lib, + root, + cacheFactory, + log, + defaultDevEnsoVersion + ) + } }.evaluated lazy val buildLauncherDistribution = diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso index 2254cc1d10ce..f22c29621dfc 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso @@ -2,6 +2,8 @@ from Standard.Base import all from Standard.Base.Data.Filter_Condition.Filter_Condition import all +polyglot java import org.enso.base.Regex_Utils + type Filter_Condition ## Is less than a value (or another column, in case of Table operations)? Less than:Any @@ -57,6 +59,52 @@ type Filter_Condition ## Is the value equal to False (Boolean only)? Is_False + ## Is equal to "" or Nothing (Text only)? + Is_Empty + + ## Is not equal to "" and Nothing (Text only)? + Not_Empty + + ## Does the value match the SQL pattern (Text only)? + + It accepts a Text value representing the matching pattern. In case of + Table operations, it can accept another column - then the corresponding + values from the source column and the provided column are checked. + + The pattern is interpreted according to the standard SQL convention: + - the `%` character matches any sequence of characters, + - the `_` character matches any single character, + - any other character is matched literally. + + ! Known Bugs + There is a known bug in Java Regex where escape characters are not + handled properly in Unicode-normalized matching mode. Due to this + limitation, Unicode normalization has been disabled for this function, + so beware that some equivalent graphemes like 'ś' and 's\u0301' will + not be matched. + See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926 + Like pattern:Text + + ## Does the value not match the SQL pattern (Text only)? + + It accepts a Text value representing the matching pattern. In case of + Table operations, it can accept another column - then the corresponding + values from the source column and the provided column are checked. + + The pattern is interpreted according to the standard SQL convention: + - the `%` character matches any sequence of characters, + - the `_` character matches any single character, + - any other character is matched literally. + + ! Known Bugs + There is a known bug in Java Regex where escape characters are not + handled properly in Unicode-normalized matching mode. Due to this + limitation, Unicode normalization has been disabled for this function, + so beware that some equivalent graphemes like 'ś' and 's\u0301' will + not be matched. + See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926 + Not_Like pattern:Text + ## Converts a `Filter_Condition` condition into a predicate taking an element and returning a value indicating whether the element should be accepted by the filter. @@ -80,3 +128,25 @@ type Filter_Condition _ -> True Is_True -> ==True Is_False -> ==False + Is_Empty -> elem -> case elem of + Nothing -> True + "" -> True + _ -> False + Not_Empty -> elem -> case elem of + Nothing -> False + "" -> False + _ -> True + Like sql_pattern -> + regex = sql_like_to_regex sql_pattern + regex.matches + Not_Like sql_pattern -> + regex = sql_like_to_regex sql_pattern + elem -> regex.matches elem . not + +## PRIVATE +sql_like_to_regex sql_pattern = + regex_pattern = Regex_Utils.sql_like_pattern_to_regex sql_pattern + ## There is a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting. + https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926 + Once that bug is fixed, `match_ascii` may be set back to `False`. + Regex.compile regex_pattern dot_matches_newline=True match_ascii=True diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 35fd7200406f..6d7a467a3084 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -113,6 +113,34 @@ type Column to_sql : SQL_Statement to_sql self = self.to_table.to_sql + ## PRIVATE + Sets up an operation of arbitrary arity. + + Arguments: + - op_kind: The kind of the operation + - operands: A vector of additional operation arguments (the column itself + is always passed as the first argument). + - new_type: The type of the SQL column that results from applying the + operator. If not specified, the type of this column is used. + - operand_types: The SQL types of the additional arguments. They are used + if additional arguments are constants (and if not provided, the type of + this column is used). If the other argument is a column, its type is + used. + make_op self op_kind operands new_type=Nothing operand_types=Nothing = + prepare_operand operand operand_type = case operand of + other_column : Column -> + if Helpers.check_integrity self other_column then other_column.expression else + Error.throw <| Unsupported_Database_Operation_Error "Cannot use columns coming from different contexts in one expression without a join." + constant -> + actual_operand_type = operand_type.if_nothing self.sql_type + Expression.Constant actual_operand_type constant + actual_operand_types = operand_types.if_nothing (Vector.fill operands.length Nothing) + expressions = operands.zip actual_operand_types prepare_operand + + actual_new_type = new_type.if_nothing self.sql_type + new_expr = Expression.Operation op_kind ([self.expression] + expressions) + Column.Value self.name self.connection actual_new_type new_expr self.context + ## PRIVATE Creates a binary operation with given kind and operand. @@ -129,20 +157,7 @@ type Column defaults to the current type if not provided. make_binary_op : Text -> Text -> (Column | Any) -> (SQL_Type | Nothing) -> (SQL_Type | Nothing) -> Column make_binary_op self op_kind operand new_type=Nothing operand_type=Nothing = - actual_new_type = new_type.if_nothing self.sql_type - case operand of - Column.Value _ _ _ other_expr _ -> - case Helpers.check_integrity self operand of - False -> - Error.throw <| Unsupported_Database_Operation_Error "Cannot compare columns coming from different contexts. Only columns of a single table can be compared." - True -> - new_expr = Expression.Operation op_kind [self.expression, other_expr] - Column.Value self.name self.connection actual_new_type new_expr self.context - _ -> - actual_operand_type = operand_type.if_nothing self.sql_type - other = Expression.Constant actual_operand_type operand - new_expr = Expression.Operation op_kind [self.expression, other] - Column.Value self.name self.connection actual_new_type new_expr self.context + self.make_op op_kind [operand] new_type [operand_type] ## PRIVATE @@ -153,10 +168,7 @@ type Column - new_type: The type of the SQL column that results from applying the operator. make_unary_op : Text -> Text -> (SQL_Type | Nothing) -> Column - make_unary_op self op_kind new_type=Nothing = - actual_new_type = new_type.if_nothing self.sql_type - new_expr = Expression.Operation op_kind [self.expression] - Column.Value self.name self.connection actual_new_type new_expr self.context + make_unary_op self op_kind new_type=Nothing = self.make_op op_kind [] new_type ## UNSTABLE @@ -314,6 +326,22 @@ type Column < : Column | Any -> Column < self other = self.make_binary_op "<" other new_type=SQL_Type.boolean + ## Element-wise inclusive bounds check. + + Arguments: + - lower: The lower bound to compare elements of `self` against. If + `lower` is a column, the comparison is performed pairwise between + corresponding elements of `self` and `lower`. + - upper: The upper bound to compare elements of `self` against. If + `upper` is a column, the comparison is performed pairwise between + corresponding elements of `self` and `upper`. + + Returns a column with boolean values indicating whether values of this + column fit between the lower and upper bounds (both ends inclusive). + between : (Column | Any) -> (Column | Any) -> Column + between self lower upper = + self.make_op "BETWEEN" [lower, upper] new_type=SQL_Type.boolean + ## UNSTABLE Element-wise addition. @@ -407,6 +435,12 @@ type Column is_missing : Column is_missing self = self.make_unary_op "ISNULL" new_type=SQL_Type.boolean + ## PRIVATE + Returns a column of booleans, with `True` items at the positions where + this column contains an empty string or `Nothing`. + is_empty : Column + is_empty self = self.make_unary_op "ISEMPTY" new_type=SQL_Type.boolean + ## UNSTABLE Returns a new column where missing values have been replaced with the @@ -517,6 +551,11 @@ type Column contains : Column | Text -> Column contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean + ## PRIVATE + Checks for each element of the column if it matches an SQL-like pattern. + like : Column | Text -> Column + like self other = self.make_binary_op "LIKE" other new_type=SQL_Type.boolean + ## PRIVATE as_internal : Internal_Column as_internal self = Internal_Column.Value self.name self.sql_type self.expression diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index 2dd95b3ac5ce..8405041367ef 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -168,15 +168,39 @@ base_dialect = bin = name -> [name, make_binary_op name] unary = name -> [name, make_unary_op name] fun = name -> [name, make_function name] + arith = [bin "+", bin "-", bin "*", bin "/"] logic = [bin "AND", bin "OR", unary "NOT"] - compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">="] + compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]] agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"] counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]] + text = [["ISEMPTY", make_is_empty], bin "LIKE"] nulls = [["ISNULL", make_right_unary_op "IS NULL"], ["FILLNULL", make_function "COALESCE"]] - base_map = Map.from_vector (arith + logic + compare + agg + nulls + counts) + base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls) Internal_Dialect.Value base_map wrap_in_quotes +## PRIVATE +make_is_empty : Vector Builder -> Builder +make_is_empty arguments = case arguments.length of + 1 -> + arg = arguments.at 0 + is_null = (arg ++ " IS NULL").paren + is_empty = (arg ++ " = ''").paren + (is_null ++ " OR " ++ is_empty).paren + _ -> + Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation ISEMPTY") + +## PRIVATE +make_between : Vector Builder -> Builder +make_between arguments = case arguments.length of + 3 -> + expr = arguments.at 0 + lower = arguments.at 1 + upper = arguments.at 2 + (expr ++ " BETWEEN " ++ lower ++ " AND " ++ upper).paren + _ -> + Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation BETWEEN") + ## PRIVATE Builds code for an expression. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 12aadbdfc376..22a30d1a359d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -247,6 +247,21 @@ type Column < : Column | Any -> Column < self other = run_vectorized_binary_op self "<" (<) other + ## Element-wise inclusive bounds check. + + Arguments: + - lower: The lower bound to compare elements of `self` against. If + `lower` is a column, the comparison is performed pairwise between + corresponding elements of `self` and `lower`. + - upper: The upper bound to compare elements of `self` against. If + `upper` is a column, the comparison is performed pairwise between + corresponding elements of `self` and `upper`. + + Returns a column with boolean values indicating whether values of this + column fit between the lower and upper bounds (both ends inclusive). + between : (Column | Any) -> (Column | Any) -> Column + between self lower upper = (self >= lower) && (self <= upper) + ## ALIAS Add Columns Element-wise addition. @@ -444,6 +459,12 @@ type Column is_missing : Column is_missing self = run_vectorized_unary_op self "is_missing" (== Nothing) + ## PRIVATE + Returns a column of booleans, with `True` items at the positions where + this column contains an empty string or `Nothing`. + is_empty : Column + is_empty self = run_vectorized_unary_op self "is_empty" Filter_Condition.Is_Empty.to_predicate + ## Returns a column of booleans, with `True` items at the positions where this column does not contain a `Nothing`. @@ -564,6 +585,12 @@ type Column contains self other = run_vectorized_binary_op self "contains" (a -> b -> a.contains b) other + ## PRIVATE + Checks for each element of the column if it matches an SQL-like pattern. + like : Column | Text -> Column + like self other = + run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) other + ## ALIAS Transform Column Applies `function` to each item in this column and returns the column diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso index d28302c770a6..24deeea8c79f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso @@ -12,13 +12,24 @@ from Standard.Base.Data.Filter_Condition.Filter_Condition import all It also performs validation and will throw errors if unexpected column types are encountered. make_filter_column source_column filter_condition = case filter_condition of + # Equality + Equal value -> (source_column == value) + Not_Equal value -> (source_column != value) + # Nothing + Is_Nothing -> source_column.is_missing + Not_Nothing -> source_column.is_missing.not + # Boolean + Is_True -> + Value_Type.expect_boolean source_column.value_type <| source_column + Is_False -> + Value_Type.expect_boolean source_column.value_type <| source_column.not + # Comparisons Less value -> (source_column < value) Equal_Or_Less value -> (source_column <= value) - Equal value -> (source_column == value) Equal_Or_Greater value -> (source_column >= value) Greater value -> (source_column > value) - Not_Equal value -> (source_column != value) - Between lower upper -> ((source_column >= lower) && (source_column <= upper)) + Between lower upper -> source_column.between lower upper + # Text Starts_With prefix -> Value_Type.expect_text source_column.value_type <| expect_column_or_value_as_text "prefix" prefix <| @@ -31,12 +42,20 @@ make_filter_column source_column filter_condition = case filter_condition of Value_Type.expect_text source_column.value_type <| expect_column_or_value_as_text "substring" substring <| source_column.contains substring - Is_Nothing -> source_column.is_missing - Not_Nothing -> source_column.is_missing.not - Is_True -> - Value_Type.expect_boolean source_column.value_type <| source_column - Is_False -> - Value_Type.expect_boolean source_column.value_type <| source_column.not + Is_Empty -> + Value_Type.expect_text source_column.value_type <| + source_column.is_empty + Not_Empty -> + Value_Type.expect_text source_column.value_type <| + source_column.is_empty.not + Like pattern -> + Value_Type.expect_text source_column.value_type <| + expect_column_or_value_as_text "pattern" pattern <| + source_column.like pattern + Not_Like pattern -> + Value_Type.expect_text source_column.value_type <| + expect_column_or_value_as_text "pattern" pattern <| + source_column.like pattern . not ## PRIVATE expect_column_or_value_as_text field_name column_or_value ~action = case column_or_value of diff --git a/project/StdBits.scala b/project/StdBits.scala index 98bd16714225..292a94c9fc6b 100644 --- a/project/StdBits.scala +++ b/project/StdBits.scala @@ -132,7 +132,7 @@ object StdBits { cacheFactory: sbt.util.CacheStoreFactory, log: sbt.Logger, defaultDevEnsoVersion: String - ) = Def.task { + ) = { log.info(s"Building standard library package for '$name'") val prefix = "Standard" val targetPkgRoot = root / "lib" / prefix / name / defaultDevEnsoVersion diff --git a/std-bits/base/src/main/java/org/enso/base/Regex_Utils.java b/std-bits/base/src/main/java/org/enso/base/Regex_Utils.java index ae24a5252640..8edde276714c 100644 --- a/std-bits/base/src/main/java/org/enso/base/Regex_Utils.java +++ b/std-bits/base/src/main/java/org/enso/base/Regex_Utils.java @@ -70,4 +70,42 @@ public static String[] find_all_matches(String regex, String text) { } return allMatches.toArray(new String[0]); } + + /** + * Converts a SQL-like pattern into a Regex with the same semantics. + * + *

Special regex characters present in the input pattern are quoted to match them literally + * according to the SQL-like format. + */ + public static String sql_like_pattern_to_regex(String sql_pattern) { + StringBuilder result = new StringBuilder(); + // Accumulates the intermittent characters between wildcards. These will be quoted in bulk. + StringBuilder acc = new StringBuilder(); + for (int i = 0; i < sql_pattern.length(); ++i) { + char c = sql_pattern.charAt(i); + if (c == '%' || c == '_') { + // Before inserting the converted wildcard, we append the accumulated characters, quoting + // them first. + if (acc.length() > 0) { + result.append(Pattern.quote(acc.toString())); + acc.setLength(0); + } + + if (c == '%') { + result.append(".*"); + } else { + result.append("."); + } + } else { + acc.append(c); + } + } + + // If any trailing characters were left, we append them too. + if (acc.length() > 0) { + result.append(Pattern.quote(acc.toString())); + } + + return result.toString(); + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java new file mode 100644 index 000000000000..0963d6ab9997 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java @@ -0,0 +1,60 @@ +package org.enso.table.data.column.operation.map.text; + +import java.util.BitSet; +import java.util.regex.Pattern; + +import com.ibm.icu.impl.UnicodeRegex; +import org.enso.base.Regex_Utils; +import org.enso.table.data.column.storage.BoolStorage; +import org.enso.table.data.column.storage.SpecializedStorage; +import org.enso.table.data.column.storage.Storage; +import org.enso.table.error.UnexpectedTypeException; + +public class LikeOp extends StringBooleanOp { + public LikeOp() { + super(Storage.Maps.LIKE); + } + + + /** + * There is a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting. + * Once that bug is fixed, we should add all relevant Unicode flags here too, + * consistently with the Default Enso regex engine. + */ + private final static int REGEX_FLAGS = Pattern.DOTALL; + + private Pattern createRegexPatternFromSql(String sqlPattern) { + String regex = Regex_Utils.sql_like_pattern_to_regex(sqlPattern); + String unicodeTransformed = UnicodeRegex.fix(regex); + return Pattern.compile(unicodeTransformed, REGEX_FLAGS); + } + + @Override + protected boolean doString(String a, String b) { + return createRegexPatternFromSql(b).matcher(a).matches(); + } + + @Override + public Storage runMap(SpecializedStorage storage, Object arg) { + if (arg == null) { + BitSet newVals = new BitSet(); + BitSet newMissing = new BitSet(); + newMissing.set(0, storage.size()); + return new BoolStorage(newVals, newMissing, storage.size(), false); + } else if (arg instanceof String argString) { + Pattern pattern = createRegexPatternFromSql(argString); + BitSet newVals = new BitSet(); + BitSet newMissing = new BitSet(); + for (int i = 0; i < storage.size(); i++) { + if (storage.isNa(i)) { + newMissing.set(i); + } else if (pattern.matcher(storage.getItem(i)).matches()) { + newVals.set(i); + } + } + return new BoolStorage(newVals, newMissing, storage.size(), false); + } else { + throw new UnexpectedTypeException("a Text"); + } + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index 871c7bfd41fd..b1560d3d740e 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -78,9 +78,11 @@ public static final class Maps { public static final String AND = "&&"; public static final String OR = "||"; public static final String IS_MISSING = "is_missing"; + public static final String IS_EMPTY = "is_empty"; public static final String STARTS_WITH = "starts_with"; public static final String ENDS_WITH = "ends_with"; public static final String CONTAINS = "contains"; + public static final String LIKE = "like"; } public static final class Aggregators { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java index 807789382da9..f77b54b0bbc7 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java @@ -5,6 +5,8 @@ import org.enso.table.data.column.builder.object.StringBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperation; +import org.enso.table.data.column.operation.map.UnaryMapOperation; +import org.enso.table.data.column.operation.map.text.LikeOp; import org.enso.table.data.column.operation.map.text.StringBooleanOp; import org.graalvm.polyglot.Value; @@ -93,6 +95,20 @@ public Storage runZip(SpecializedStorage storage, Storage arg) { return new BoolStorage(r, missing, storage.size(), false); } }); + t.add( + new UnaryMapOperation<>(Maps.IS_EMPTY) { + @Override + protected Storage run(SpecializedStorage storage) { + BitSet r = new BitSet(); + for (int i = 0; i < storage.size; i++) { + String s = storage.data[i]; + if (s == null || s.isEmpty()) { + r.set(i); + } + } + return new BoolStorage(r, new BitSet(), storage.size, false); + } + }); t.add( new StringBooleanOp(Maps.STARTS_WITH) { @Override @@ -114,6 +130,7 @@ protected boolean doString(String a, String b) { return Text_Utils.contains(a, b); } }); + t.add(new LikeOp()); return t; } } diff --git a/test/Table_Tests/src/Common_Table_Spec.enso b/test/Table_Tests/src/Common_Table_Spec.enso index 2523abe6cacd..1603e5cabc80 100644 --- a/test/Table_Tests/src/Common_Table_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Spec.enso @@ -16,7 +16,7 @@ import Standard.Test.Problems from project.Util import all type Test_Selection - Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True + Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False ## A common test suite for shared operations on the Table API. @@ -1118,8 +1118,8 @@ spec prefix table_builder test_selection pending=Nothing = t.filter "X" (Filter_Condition.Equal to=(t.at "Y")) . at "X" . to_vector . should_equal ["b", "c"] t.filter "X" (Filter_Condition.Between (t.at "Y") "bzzzz") . at "X" . to_vector . should_equal ["abb", "baca", "b"] - Test.specify "by text search (contains, starts_with, ends_with)" <| - t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "baca", "banana", Nothing, "nana"]], ["Y", ["a", "b", "b", "c", "a"]]] + Test.specify "by text search (contains, starts_with, ends_with, like)" <| + t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "baca", "banana", Nothing, "nana"]], ["Y", ["a", "b", "b", "c", "a"]], ["Z", ["aaaaa", "bbbbb", "[ab]", "[ab]aaaa", "[ab]ccc"]]] t.filter "X" (Filter_Condition.Starts_With "ba") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana"] t.filter "X" (Filter_Condition.Ends_With "na") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"] @@ -1129,8 +1129,51 @@ spec prefix table_builder test_selection pending=Nothing = t.filter "X" (Filter_Condition.Ends_With (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"] t.filter "X" (Filter_Condition.Contains (t.at "Y")) on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", "baca", "banana", "nana"] + t.filter "X" (Filter_Condition.Like "%an%") on_problems=Report_Error . at "X" . to_vector . should_equal ["banana", "nana"] + t.filter "X" (Filter_Condition.Like "_a%") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"] + t.filter "X" (Filter_Condition.Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["abb"] + t.filter "X" (Filter_Condition.Like "nana") on_problems=Report_Error . at "X" . to_vector . should_equal ["nana"] + t.filter "Z" (Filter_Condition.Like "[ab]_%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["[ab]aaaa", "[ab]ccc"] + + t.filter "X" (Filter_Condition.Not_Like "%b") on_problems=Report_Error . at "X" . to_vector . should_equal ["baca", "banana", "nana"] + t.filter "Z" (Filter_Condition.Not_Like "[ab]%") on_problems=Report_Error . at "Z" . to_vector . should_equal ["aaaaa", "bbbbb"] + + Test.specify "text operations should also match newlines" <| + t = table_builder [["X", ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb', 'caa\nbb']]] + t.filter "X" (Filter_Condition.Like 'a_') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n'] + t.filter "X" (Filter_Condition.Like 'a%') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb'] + t.filter "X" (Filter_Condition.Like 'a_b') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\nb'] + t.filter "X" (Filter_Condition.Like '%\nb') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\nb', 'a\nb'] + + t.filter "X" (Filter_Condition.Contains '\nb') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\nb', 'a\nb', 'caa\nbb'] + t.filter "X" (Filter_Condition.Ends_With '\nb') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\nb', 'a\nb'] + t.filter "X" (Filter_Condition.Ends_With '\n') on_problems=Report_Error . at "X" . to_vector . should_equal ['a\n\n\n', 'a\n'] + t.filter "X" (Filter_Condition.Starts_With 'c') on_problems=Report_Error . at "X" . to_vector . should_equal ['caa\nbb'] + + if test_selection.supports_unicode_normalization then + t = table_builder [["X", ['śnieg', 's\u0301nieg', 'X', Nothing, 'połać', 'połac\u0301']]] + Test.specify "text operations should support Unicode normalization" <| + t.filter "X" (Filter_Condition.Starts_With 'ś') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg', 's\u0301nieg'] + t.filter "X" (Filter_Condition.Contains 'ś') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg', 's\u0301nieg'] + t.filter "X" (Filter_Condition.Ends_With 'ś') on_problems=Report_Error . at "X" . to_vector . should_equal [] + t.filter "X" (Filter_Condition.Ends_With 'ć') on_problems=Report_Error . at "X" . to_vector . should_equal ['połać', 'połac\u0301'] + + # This should be replaced with the disabled test below, once the related bug is fixed. + t.filter "X" (Filter_Condition.Like 'ś%') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg'] + + # This test is split off just to mark is as pending, once resolved it can be merged with the one above. + Test.specify "text operations should support Unicode normalization (like)" pending='There is a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting.\nhttps://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926' <| + t.filter "X" (Filter_Condition.Like 'ś%') on_problems=Report_Error . at "X" . to_vector . should_equal ['śnieg', 's\u0301nieg'] + + Test.specify "by empty text" <| + t = table_builder [["ix", [1, 2, 3, 4, 5]], ["X", ["abb", "", " ", Nothing, "nana"]]] + t.filter "X" Filter_Condition.Is_Empty on_problems=Report_Error . at "X" . to_vector . should_equal ["", Nothing] + t.filter "X" Filter_Condition.Not_Empty on_problems=Report_Error . at "X" . to_vector . should_equal ["abb", " ", "nana"] + + Test.specify "should check types for text operations" <| + t = table_builder [["ix", [1, 2, 3, 4]], ["X", [Nothing, "A", "", " "]]] check_column_type_error_handling action = - tester = check_empty ["ix", "X", "Y"] + tester = check_empty ["ix", "X"] check_problem problem = problem.should_be_a Invalid_Value_Type.Invalid_Value_Type_Data problem.expected . should_equal Value_Type.Char @@ -1142,17 +1185,27 @@ spec prefix table_builder test_selection pending=Nothing = check_column_type_error_handling (t.filter "X" (Filter_Condition.Starts_With (t.at "ix")) on_problems=_) check_column_type_error_handling (t.filter "X" (Filter_Condition.Ends_With (t.at "ix")) on_problems=_) check_column_type_error_handling (t.filter "X" (Filter_Condition.Contains (t.at "ix")) on_problems=_) + check_column_type_error_handling (t.filter "X" (Filter_Condition.Like (t.at "ix")) on_problems=_) + check_column_type_error_handling (t.filter "X" (Filter_Condition.Not_Like (t.at "ix")) on_problems=_) + check_column_type_error_handling (t.filter "ix" (Filter_Condition.Starts_With "A") on_problems=_) check_column_type_error_handling (t.filter "ix" (Filter_Condition.Ends_With "A") on_problems=_) check_column_type_error_handling (t.filter "ix" (Filter_Condition.Contains "A") on_problems=_) + check_column_type_error_handling (t.filter "ix" (Filter_Condition.Like "A") on_problems=_) + check_column_type_error_handling (t.filter "ix" (Filter_Condition.Not_Like "A") on_problems=_) + + check_column_type_error_handling (t.filter "ix" Filter_Condition.Is_Empty on_problems=_) + check_column_type_error_handling (t.filter "ix" Filter_Condition.Not_Empty on_problems=_) check_scalar_type_error_handling name action = - tester = check_empty ["ix", "X", "Y"] + tester = check_empty ["ix", "X"] problems = [Type_Error_Data Text Integer name] Problems.test_problem_handling action problems tester check_scalar_type_error_handling "prefix" (t.filter "X" (Filter_Condition.Starts_With 42) on_problems=_) check_scalar_type_error_handling "suffix" (t.filter "X" (Filter_Condition.Ends_With 42) on_problems=_) check_scalar_type_error_handling "substring" (t.filter "X" (Filter_Condition.Contains 42) on_problems=_) + check_scalar_type_error_handling "pattern" (t.filter "X" (Filter_Condition.Like 42) on_problems=_) + check_scalar_type_error_handling "pattern" (t.filter "X" (Filter_Condition.Not_Like 42) on_problems=_) Test.specify "by nulls" <| t = table_builder [["ix", [1, 2, 3, 4]], ["X", [Nothing, 1, Nothing, 4]]] @@ -1169,6 +1222,8 @@ spec prefix table_builder test_selection pending=Nothing = t.filter "b" on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 4, 5] t.filter "b" Filter_Condition.Is_False on_problems=Report_Error . at "ix" . to_vector . should_equal [2] + Test.specify "should check types of boolean operations" <| + t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]] tester = check_empty ["ix", "b"] check_problem problem = problem.should_be_a Invalid_Value_Type.Invalid_Value_Type_Data diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 9d2bda76e2ad..f80810de9f4b 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -98,6 +98,16 @@ spec = c2 = t1.filter (t1.at "A" == t1.at "C") . at "B" c2.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" WHERE ("T1"."A" = "T1"."C")', []] + Test.specify "should generate a single BETWEEN expression" <| + t2 = t1.filter "A" (Filter_Condition.Between 10 20) + t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN ? AND ?)', [[10, int], [20, int]]] + + t3 = t1.filter "A" (Filter_Condition.Between (t1.at "B") (t1.at "C")) + t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND "T1"."C")', []] + + t4 = t1.filter "A" (Filter_Condition.Between (t1.at "B") 33) + t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND ?)', [[33, int]]] + Test.group "[Codegen] Joining Tables" <| t2 = test_connection.query (SQL_Query.Table_Name "T2") t3 = test_connection.query (SQL_Query.Table_Name "T3") diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 69d833c7ee0f..597ad17b3290 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -670,7 +670,7 @@ spec = t_3 = Table.new [c_3_1, c_3_2, c_3_3] t_3.default_visualization.should_equal Visualization.Id.table - selection = Common_Table_Spec.Test_Selection.Config supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True + selection = Common_Table_Spec.Test_Selection.Config supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True supports_unicode_normalization=True Common_Table_Spec.spec "[In-Memory] " table_builder=Table.new test_selection=selection Test.group "Use First Row As Names" <| diff --git a/test/Tests/src/Data/List_Spec.enso b/test/Tests/src/Data/List_Spec.enso index f8c0f34d636b..987b117ebfee 100644 --- a/test/Tests/src/Data/List_Spec.enso +++ b/test/Tests/src/Data/List_Spec.enso @@ -49,6 +49,7 @@ spec = Test.group "List" <| list.filter (Filter_Condition.Greater than=3) . should_equal [4, 5].to_list list.filter (Filter_Condition.Less than=3.5) . should_equal [1, 2, 3].to_list list.filter (Filter_Condition.Equal to=3) . should_equal (Cons 3 Nil) + list.filter (Filter_Condition.Not_Equal to=3) . should_equal [1, 2, 4, 5].to_list list.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5].to_list list.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal Nil list.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4].to_list @@ -69,6 +70,13 @@ spec = Test.group "List" <| txt.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"].to_list Test.expect_panic_with (txt.filter (Filter_Condition.Starts_With 42)) Unsupported_Argument_Types_Data + ["", Nothing, " ", "a"].to_list.filter (Filter_Condition.Is_Empty) . should_equal ["", Nothing].to_list + ["", Nothing, " ", "a"].to_list.filter (Filter_Condition.Not_Empty) . should_equal [" ", "a"].to_list + ["abab", "aaabaaaa", "ba"].to_list.filter (Filter_Condition.Like "ba") . should_equal (Cons "ba" Nil) + ["abab", "aaabaaaa"].to_list.filter (Filter_Condition.Like "_ba_") . should_equal ["abab"].to_list + ["abab", "aaabaaaa"].to_list.filter (Filter_Condition.Like "%ba__%") . should_equal ["aaabaaaa"].to_list + ["abab", "aaabaaaa"].to_list.filter (Filter_Condition.Not_Like "%ba%") . should_equal Nil + mixed = [1, Nothing, "b"].to_list mixed.filter Filter_Condition.Is_Nothing . should_equal (Cons Nothing Nil) mixed.filter Filter_Condition.Not_Nothing . should_equal (Cons 1 (Cons "b" Nil)) diff --git a/test/Tests/src/Data/Range_Spec.enso b/test/Tests/src/Data/Range_Spec.enso index 9fdcf90b7598..df48b7b9500b 100644 --- a/test/Tests/src/Data/Range_Spec.enso +++ b/test/Tests/src/Data/Range_Spec.enso @@ -72,16 +72,21 @@ spec = Test.group "Range" <| range.filter (Filter_Condition.Greater than=3) . should_equal [4, 5] range.filter (Filter_Condition.Less than=3.5) . should_equal [1, 2, 3] range.filter (Filter_Condition.Equal to=3) . should_equal [3] + range.filter (Filter_Condition.Not_Equal to=3) . should_equal [1, 2, 4, 5] range.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5] range.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal [] range.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4] range.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4] Test.expect_panic_with (range.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data + Test.expect_panic_with (range.filter (Filter_Condition.Like "a%")) Unsupported_Argument_Types_Data + Test.expect_panic_with (range.filter (Filter_Condition.Not_Like "a_")) Unsupported_Argument_Types_Data range.filter Filter_Condition.Is_True . should_equal [] range.filter Filter_Condition.Is_False . should_equal [] range.filter Filter_Condition.Is_Nothing . should_equal [] range.filter Filter_Condition.Not_Nothing . should_equal [1, 2, 3, 4, 5] + range.filter Filter_Condition.Is_Empty . should_equal [] + range.filter Filter_Condition.Not_Empty . should_equal [1, 2, 3, 4, 5] Test.specify "should allow iteration" <| vec_mut = Vector.new_builder diff --git a/test/Tests/src/Data/Vector_Spec.enso b/test/Tests/src/Data/Vector_Spec.enso index 82d898d3ee5a..8c770d72c499 100644 --- a/test/Tests/src/Data/Vector_Spec.enso +++ b/test/Tests/src/Data/Vector_Spec.enso @@ -144,6 +144,9 @@ spec = Test.group "Vectors" <| vec.filter (Filter_Condition.Greater than=3) . should_equal [4, 5] vec.filter (Filter_Condition.Less than=3.5) . should_equal [1, 2, 3] vec.filter (Filter_Condition.Equal to=3) . should_equal [3] + vec.filter (Filter_Condition.Equal to=3.0) . should_equal [3] + vec.filter (Filter_Condition.Equal to=3.1) . should_equal [] + vec.filter (Filter_Condition.Not_Equal to=3) . should_equal [1, 2, 4, 5] vec.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5] vec.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal [] vec.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4] @@ -165,9 +168,38 @@ spec = Test.group "Vectors" <| txtvec.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"] Test.expect_panic_with (txtvec.filter (Filter_Condition.Starts_With 42)) Unsupported_Argument_Types_Data + ["", Nothing, " ", "a"].filter (Filter_Condition.Is_Empty) . should_equal ["", Nothing] + ["", Nothing, " ", "a"].filter (Filter_Condition.Not_Empty) . should_equal [" ", "a"] + ["abab", "aaabaaaa", "ba"].filter (Filter_Condition.Like "ba") . should_equal ["ba"] + ["abab", "aaabaaaa"].filter (Filter_Condition.Like "_ba_") . should_equal ["abab"] + ["abab", "aaabaaaa"].filter (Filter_Condition.Like "%ba__%") . should_equal ["aaabaaaa"] + ["aaaa", "bbbbb", "[ab]aaaa"].filter (Filter_Condition.Like "[ab]%") . should_equal ["[ab]aaaa"] + ["a\Qa\Eabb", "aaabb"].filter (Filter_Condition.Like "_\Qa\Ea%") . should_equal ["a\Qa\Eabb"] + ["f.txt", "abc.*"].filter (Filter_Condition.Like "%.*") . should_equal ["abc.*"] + ["f.txt", "abc.*"].filter (Filter_Condition.Not_Like "%.*") . should_equal ["f.txt"] + + txt2 = ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb', 'caa\nbb'] + txt2.filter (Filter_Condition.Like 'a_') . should_equal ['a\n'] + txt2.filter (Filter_Condition.Like 'a%') . should_equal ['a\n\n\n', 'a\n', 'a\n\n\nb', 'a\nb'] + txt2.filter (Filter_Condition.Like 'a_b') . should_equal ['a\nb'] + txt2.filter (Filter_Condition.Like '%\nb') . should_equal ['a\n\n\nb', 'a\nb'] + + txt3 = ['śnieg', 's\u0301nieg', 'X', 'połać', 'połac\u0301'] + txt3.filter (Filter_Condition.Starts_With 'ś') . should_equal ['śnieg', 's\u0301nieg'] + txt3.filter (Filter_Condition.Contains 'ś') . should_equal ['śnieg', 's\u0301nieg'] + txt3.filter (Filter_Condition.Ends_With 'ś') . should_equal [] + txt3.filter (Filter_Condition.Ends_With 'ć') . should_equal ['połać', 'połac\u0301'] + ## There is a bug with Java Regex in Unicode normalized mode (CANON_EQ) with quoting. + https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926 + # txt3.filter (Filter_Condition.Like 'ś%') . should_equal ['śnieg', 's\u0301nieg'] + # This should be replaced with the disabled test above, once the related bug is fixed. + txt3.filter (Filter_Condition.Like 'ś%') . should_equal ['śnieg'] + mixed = [1, Nothing, "b"] mixed.filter Filter_Condition.Is_Nothing . should_equal [Nothing] mixed.filter Filter_Condition.Not_Nothing . should_equal [1, "b"] + mixed.filter Filter_Condition.Is_Empty . should_equal [Nothing] + mixed.filter Filter_Condition.Not_Empty . should_equal [1, "b"] boolvec = [True, False, Nothing, True] boolvec.filter Filter_Condition.Is_True . should_equal [True, True]