enso-org · jdunkerley · Feb 14, 2022 · Feb 11, 2022 · Feb 11, 2022 · Feb 11, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -34,6 +34,7 @@
 - [Fixed `Vector.sort` to handle tail-recursive comparators][3256]
 - [Implemented `Range.find`, `Table.rename_columns` and
   `Table.use_first_row_as_names` operations][3249]
+- [Implemented `Text.at` and `Text.is_digit` methods][3269]
 
 [3153]: https://github.com/enso-org/enso/pull/3153
 [3166]: https://github.com/enso-org/enso/pull/3166
@@ -50,6 +51,7 @@
 [3250]: https://github.com/enso-org/enso/pull/3250
 [3256]: https://github.com/enso-org/enso/pull/3256
 [3249]: https://github.com/enso-org/enso/pull/3249
+[3269]: https://github.com/enso-org/enso/pull/3269
 
 #### Enso Compiler
 

@@ -19,6 +19,15 @@ polyglot java import com.ibm.icu.lang.UCharacter
 polyglot java import com.ibm.icu.text.BreakIterator
 polyglot java import org.enso.base.Text_Utils
 
+## UNSTABLE
+
+   An error for when an index is out of bounds in a text.
+
+   Arguments:
+   - index: The requested index in the text.
+   - length: The length of the text.
+type Index_Out_Of_Bounds_Error index length
+
 ## ALIAS Length
 
    Computes the number of characters in the text.
@@ -73,6 +82,48 @@ Text.each function =
     iterate fst nxt
     Nothing
 
+## ALIAS Get Character
+
+   Returns a character from the text at the specified index (0-based).
+
+   Arguments:
+     - index: The location in the text to get the character from. The
+       index is also allowed be negative, then the characters are
+       counted from the end of the text, i.e. -1 will correspond to the
+       last character.
+
+   ! What is a Character?
+     A character is defined as an Extended Grapheme Cluster, see Unicode
+     Standard Annex 29. This is the smallest unit that still has semantic
+     meaning in most text-processing applications.
+
+   > Example
+     Get the individual characters in the text "건반(Korean)".
+
+         "건반(Korean)".at 1 == "반"
+Text.at : Integer -> Text ! Index_Out_Of_Bounds_Error
+Text.at index =
+    case index < 0 of
+        True ->
+            length = this.length
+            new_index = index + length
+            if new_index < 0 then Error.throw (Index_Out_Of_Bounds_Error index length) else
+                this.at new_index
+        False ->
+            iterator = BreakIterator.getCharacterInstance
+            iterator.setText this
+
+            loop prev next count = if count == index then (Text_Utils.substring this prev next) else
+                next_next = iterator.next
+                if next_next == -1 then count else
+                    @Tail_Call loop next next_next (count + 1)
+
+            first = iterator.next
+            result = if (first == -1) then 0 else (loop 0 first 0)
+            case result of
+                Integer -> Error.throw (Index_Out_Of_Bounds_Error index result)
+                _ -> result
+
 ## ALIAS Get Characters
 
    Returns a vector containing all characters in the given text.
@@ -547,6 +598,34 @@ Text.is_empty = this == ""
 Text.not_empty : Boolean
 Text.not_empty = this.is_empty.not
 
+## Returns if a character from the text at the specified index (0-based) is a
+   digit (0-9).
+
+   Arguments:
+     - index: The location in the text to get the character from. The
+       index is also allowed be negative, then the characters are
+       counted from the end of the text, i.e. -1 will correspond to the
+       last character.
+
+   ! What is a Character?
+     A character is defined as an Extended Grapheme Cluster, see Unicode
+     Standard Annex 29. This is the smallest unit that still has semantic
+     meaning in most text-processing applications.
+
+   > Example
+     Check if an individual character is a digit:
+
+         "0".is_digit == True
+         "A0".is_digit == False
+         "A0".is_digit 1 == True
+         "건반(Korean)".is_digit 1 == False
+Text.is_digit : Integer -> Text ! Index_Out_Of_Bounds_Error
+Text.is_digit (index=0) =
+    grapheme = this.at index
+    if grapheme.is_error then grapheme else
+        char = (Text_Utils.get_chars grapheme).at 0
+        char>=48 && char<=57
+
 ## Returns a vector containing bytes representing the UTF-8 encoding of the
    input text.
 

@@ -1,5 +1,6 @@
 from Standard.Base import all
 
+from Standard.Base.Data.Text.Extensions import Index_Out_Of_Bounds_Error
 import Standard.Base.Data.Text.Regex.Engine.Default as Default_Engine
 import Standard.Base.Data.Locale
 import Standard.Base.Data.Text.Split_Kind
@@ -41,6 +42,25 @@ spec =
             str = kshi + facepalm + accent_1 + accent_2
             str.characters . should_equal [kshi, facepalm, accent_1, accent_2]
 
+        Test.specify "should allow access by index to a grapheme cluster" <|
+            str = kshi + facepalm + accent_1 + accent_2
+            str.at 0 . should_equal kshi
+            str.at 1 . should_equal facepalm
+            str.at 2 . should_equal accent_1
+            str.at 3 . should_equal accent_2
+
+        Test.specify "should allow access by negative index to a grapheme cluster" <|
+            str = kshi + facepalm + accent_1 + accent_2
+            str.at -4 . should_equal kshi
+            str.at -3 . should_equal facepalm
+            str.at -2 . should_equal accent_1
+            str.at -1 . should_equal accent_2
+
+        Test.specify "should return a dataflow error when accessing characters out of bounds" <|
+            str = kshi + facepalm + accent_1 + accent_2
+            str.at -5 . should_fail_with Index_Out_Of_Bounds_Error
+            str.at 4 . should_fail_with Index_Out_Of_Bounds_Error
+
         Test.specify "should be able to split the text into words" <|
             sentences.words . should_equal sentence_words
 
@@ -100,6 +120,27 @@ spec =
             kshi_chars = [2325, 2381, 2359, 2367]
             Text.from_utf_16 kshi_chars . should_equal kshi
 
+        Test.specify "should be able to check by index if is a digit" <|
+            str = kshi + "A12" + accent_2
+            str.is_digit . should_be_false
+            str.is_digit 1 . should_be_false
+            str.is_digit 2 . should_be_true
+            str.is_digit 3 . should_be_true
+            str.is_digit 4 . should_be_false
+
+        Test.specify "should be able to check by negative index if is a digit" <|
+            str = kshi + "A12" + accent_2
+            str.is_digit -1 . should_be_false
+            str.is_digit -2 . should_be_true
+            str.is_digit -3 . should_be_true
+            str.is_digit -4 . should_be_false
+            str.is_digit -5 . should_be_false
+
+        Test.specify "should return a dataflow error when checking is digit for out of bounds" <|
+            str = kshi + "A12" + accent_2
+            str.at -6 . should_fail_with Index_Out_Of_Bounds_Error
+            str.at 5 . should_fail_with Index_Out_Of_Bounds_Error
+
     Test.group "Regex matching" <|
         Test.specify "should be possible on text" <|
             match = "My Text: Goes Here".match "^My Text: (.+)$" mode=Regex_Mode.First
@@ -288,3 +329,4 @@ spec =
             result = "ababd".replace "b\w # Replacing a `b` followed by any word character" "a" comments=True
             result . should_equal "aaa"
 
+main = Test.Suite.run_main here.spec