enso-org · mergify · Jul 18, 2023 · Jul 14, 2023 · Jul 14, 2023 · Jul 14, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -520,6 +520,7 @@
 - [Improving date/time support in Table - added `date_diff`, `date_add`,
   `date_part` and some shorthands. Extended `Time_Period` with milli-, micro-
   and nanosecond periods.][7221]
+- [`Text.parse_to_table` can take a `Regex`.][7297]
 
 [debug-shortcuts]:
   https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -749,6 +750,7 @@
 [7223]: https://github.com/enso-org/enso/pull/7223
 [7234]: https://github.com/enso-org/enso/pull/7234
 [7221]: https://github.com/enso-org/enso/pull/7221
+[7297]: https://github.com/enso-org/enso/pull/7297
 
 #### Enso Compiler
 

@@ -103,7 +103,7 @@ Table.from_objects value fields=Nothing =
    regex).
 
    Arguments:
-   - pattern: The pattern used to search within the text.
+   - pattern: The regex string or `Regex` used to search within the text.
    - case_sensitivity: Specifies if the text values should be compared case
      sensitively.
    - parse_values: Parse any values using the default value parser.
@@ -114,8 +114,8 @@ Table.from_objects value fields=Nothing =
    If the marked groups are named, the names will be used otherwise the column
    will be named `Column <N>` where `N` is the number of the marked group.
    (Group 0 is not included.)
-Text.parse_to_table : Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
-Text.parse_to_table self pattern case_sensitivity=Case_Sensitivity.Sensitive parse_values=True on_problems=Report_Warning =
+Text.parse_to_table : Text | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
+Text.parse_to_table self (pattern : Text | Regex) case_sensitivity=Case_Sensitivity.Sensitive parse_values=True on_problems=Report_Warning =
     Parse_To_Table.parse_text_to_table self pattern case_sensitivity parse_values on_problems
 
 ## PRIVATE

@@ -15,10 +15,12 @@ from project.Errors import Duplicate_Output_Column_Names
    Converts a Text into a Table using a regular expression pattern.
 
    See Table.parse_text_to_table.
-parse_text_to_table : Text -> Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
-parse_text_to_table text pattern_string="." case_sensitivity=Case_Sensitivity.Sensitive parse_values=True on_problems=Report_Warning =
+parse_text_to_table : Text | Regex -> Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Type_Error | Regex_Syntax_Error | Illegal_Argument
+parse_text_to_table text regex_or_pattern_string="." case_sensitivity=Case_Sensitivity.Sensitive parse_values=True on_problems=Report_Warning =
     case_insensitive = case_sensitivity.is_case_insensitive_in_memory
-    pattern = Regex.compile pattern_string case_insensitive=case_insensitive
+    pattern = case regex_or_pattern_string of
+        _ : Regex -> regex_or_pattern_string
-        _ : Regex -> regex_or_pattern_string
+        _ : Regex -> regex_or_pattern_string.recompile case_sensitivity
-        _ : Regex -> regex_or_pattern_string
+        _ : Regex -> regex_or_pattern_string.recompile case_sensitivity
+        _ : Text -> Regex.compile regex_or_pattern_string case_insensitive=case_insensitive
     matches = pattern.match_all text
 
     columns = case pattern.group_count == 1 of

@@ -2,6 +2,7 @@
 
 import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
 import com.oracle.truffle.api.dsl.Cached;
+import com.oracle.truffle.api.dsl.Fallback;
 import com.oracle.truffle.api.dsl.Specialization;
 import com.oracle.truffle.api.exception.AbstractTruffleException;
 import com.oracle.truffle.api.nodes.Node;
@@ -45,6 +46,13 @@ Object alwaysCompile(Text pattern, Text options) {
     return compile(pattern.toString(), options.toString());
   }
 
+  @Fallback
+  Object doOther(Object pattern, Object options) {
+    Builtins builtins = EnsoContext.get(this).getBuiltins();
+    Atom err = builtins.error().makeTypeError(builtins.text(), pattern, "pattern");
+    throw new PanicException(err, this);
+  }
+
   @TruffleBoundary
   Object compile(String pattern, String options) {
     var ctx = EnsoContext.get(this);

@@ -1,6 +1,7 @@
 from Standard.Base import all
 
 import Standard.Base.Data.Text.Regex.Regex_Syntax_Error
+import Standard.Base.Errors.Common.Type_Error
 import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 import Standard.Table.Data.Table_Conversions
 import Standard.Test.Extensions
@@ -19,6 +20,12 @@ spec =
             actual = "a 7 ab12 bt100 c12d20q 12".parse_to_table "[a-z]+\d*"
             actual.should_equal expected
 
+        Test.specify "text_to_table with a regex" <|
+            expected = Table.from_rows ["Column"]
+                                       [["a"], ["ab12"], ["bt100"], ["c12"], ["d20"], ["q"]]
+            actual = "a 7 ab12 bt100 c12d20q 12".parse_to_table "[a-z]+\d*".to_regex
+            actual.should_equal expected
+
     Test.group "Text.parse_to_table with groups" <|
         Test.specify "with groups" <|
             expected = Table.from_rows ["Column 1", "Column 2"]
@@ -68,4 +75,7 @@ spec =
         Test.specify "enpty pattern" <|
             "abc".parse_to_table "" . should_fail_with Illegal_Argument
 
+        Test.specify "bad arg" <|
+            Test.expect_panic_with (actual = "a 7 ab12 bt100 c12d20q 12".parse_to_table 12) Type_Error
+
 main = Test_Suite.run_main spec
@@ -33,6 +33,11 @@ spec =
         Test.specify "should disallow empty patterns in `compile`" <|
             Regex.compile "" . should_fail_with Illegal_Argument
 
+        Test.specify "passing a non-string should fail with a type error" <|
+            Test.expect_panic_with (Regex.compile 12) Type_Error
+            p = Regex.compile "[a-z]"
+            Test.expect_panic_with (Regex.compile p) Type_Error
+
     Test.group "Escape" <|
         Test.specify "should escape an expression for use as a literal" <|
             Regex.escape "[a-z\d]+" . should_equal '\\[a-z\\d\\]\\+'