diff --git a/rt/src/runtime/string.rs b/rt/src/runtime/string.rs index 0ec2ab58c..38e3e7a7f 100644 --- a/rt/src/runtime/string.rs +++ b/rt/src/runtime/string.rs @@ -146,21 +146,20 @@ pub unsafe extern "system" fn inko_string_chars_drop(iter: *mut u8) { } #[no_mangle] -pub unsafe extern "system" fn inko_string_slice_bytes( - state: *const State, +pub unsafe extern "system" fn inko_string_slice_bytes_into( string: *const InkoString, + into: *mut ByteArray, start: i64, length: i64, -) -> *const ByteArray { +) { let string = InkoString::read(string); let end = min((start + length) as usize, string.len()); - let bytes = if start < 0 || length <= 0 || start as usize >= end { - Vec::new() - } else { - string.as_bytes()[start as usize..end].to_vec() - }; - ByteArray::alloc((*state).byte_array_class, bytes) + if start < 0 || length <= 0 || start as usize >= end { + return; + } + + (*into).value.extend_from_slice(&string.as_bytes()[start as usize..end]); } #[no_mangle] diff --git a/std/src/std/string.inko b/std/src/std/string.inko index 74c428e05..31e4989cf 100644 --- a/std/src/std/string.inko +++ b/std/src/std/string.inko @@ -24,12 +24,12 @@ class extern StringResult { fn extern inko_string_to_lower(state: Pointer[UInt8], string: String) -> String fn extern inko_string_to_upper(state: Pointer[UInt8], string: String) -> String -fn extern inko_string_slice_bytes( - state: Pointer[UInt8], +fn extern inko_string_slice_bytes_into( string: String, + into: mut ByteArray, start: Int, size: Int, -) -> ByteArray +) fn extern inko_string_chars(string: String) -> Pointer[UInt8] fn extern inko_string_chars_next( @@ -257,6 +257,25 @@ class builtin String { buff.into_string } + # Slices `self` into a sequence of bytes using a _byte_ range, appending the + # bytes to `bytes` argument. + # + # This method is useful if you want to slice a `String` into a `ByteArray`, + # but wish to reuse the same `ByteArray` rather than allocating a new one for + # each slice. Unless you've determined you indeed need to reuse the same + # `ByteArray`, you're probably better off using `String.slice` instead. + # + # # Examples + # + # let bytes = ByteArray.new + # + # '😊'.slice_into(bytes, start: 0, size: 4) + # + # bytes # => '😊'.to_byte_array + fn pub slice_into(bytes: mut ByteArray, start: Int, size: Int) { + inko_string_slice_bytes_into(self, bytes, start, size) + } + # Returns the _byte_ index of the first occurrence of the given `String`, # starting at the given byte index. # @@ -542,6 +561,48 @@ class builtin String { buff.into_string } + # Replaces all occurrences of `string` with the value in `with`, returning the + # result as a new `String`. + # + # If the `string` argument is an empty `String`, this method doesn't perform + # any replacements and instead returns a copy of `self`. + # + # # Examples + # + # 'foo foo'.replace('foo', with: 'bar') # => 'bar bar' + fn pub replace(string: String, with: String) -> String { + # Different languages handle the pattern being empty differently. For + # example, Javascript and Node only match the start of the string if the + # pattern is empty. Other languages such as Ruby and Python appear to inject + # the replacement in between every character, such that + # `'AB'.replace('', ',')` results in `,A,B,`. + # + # We make the decision to just _not_ do any replacements in this case, as + # replacing an empty string is nonsensical to begin with. + if string.size == 0 { return self } + + let buf = ByteArray.new + let mut start = 0 + let mut last = 0 + + loop { + match byte_index(string, start) { + case Some(i) -> { + if i > last { slice_into(buf, start: last, size: i - last) } + with.slice_into(buf, start: 0, size: with.size) + start = i + string.size + last = start + } + case _ -> { + if start < size { slice_into(buf, start, size) } + break + } + } + } + + buf.into_string + } + fn byte_unchecked(index: Int) -> Int { (@bytes as Int + index as Pointer[UInt8]).0 as Int } @@ -586,7 +647,7 @@ impl Bytes for String { @size as Int } - # Slices `self` into a substring using a _byte_ range. + # Slices `self` into a sequence of bytes using a _byte_ range. # # # Examples # @@ -595,7 +656,10 @@ impl Bytes for String { # '😊'.slice_bytes(start: 0, size: 4) # => '😊'.to_byte_array # '😊'.slice_bytes(start: 0, size: 3) # => "\u{FFFD}".to_byte_array fn pub slice(start: Int, size: Int) -> ByteArray { - inko_string_slice_bytes(_INKO.state, self, start, size) + let bytes = ByteArray.new + + slice_into(bytes, start, size) + bytes } # Returns a raw pointer to the bytes of `self`. diff --git a/std/test/std/test_string.inko b/std/test/std/test_string.inko index fd939c000..d85e51daa 100644 --- a/std/test/std/test_string.inko +++ b/std/test/std/test_string.inko @@ -58,6 +58,18 @@ fn pub tests(t: mut Tests) { t.equal('foo'.slice(start: -5, size: 50), ''.to_byte_array) } + t.test('String.slice_into') fn (t) { + let bytes = ByteArray.new + let string = 'hello_world' + + # String.slice_into and String.slice use the same underlying function, so + # there's no need in duplicating many test cases here. + string.slice_into(bytes, start: 0, size: 3) + string.slice_into(bytes, start: 3, size: 2) + + t.equal(bytes, 'hello'.to_byte_array) + } + t.test('String.byte_index') fn (t) { t.equal('hello'.byte_index(of: 'h', starting_at: 0), Option.Some(0)) t.equal('hello'.byte_index(of: 'e', starting_at: 0), Option.Some(1)) @@ -321,4 +333,23 @@ fn pub tests(t: mut Tests) { t.equal(a, b) } + + t.test('String.replace') fn (t) { + t.equal(''.replace('', ''), '') + t.equal(''.replace('', 'foo'), '') + t.equal('foo'.replace('', 'bar'), 'foo') + t.equal(''.replace('foo', 'bar'), '') + t.equal(''.replace('foo', ''), '') + t.equal('foo'.replace('foo', 'bar'), 'bar') + t.equal('foo baz'.replace('foo', 'bar'), 'bar baz') + t.equal('foo foo'.replace('foo', 'bar'), 'bar bar') + t.equal('foo'.replace('foo', ''), '') + t.equal('foo bar'.replace('foo', ''), ' bar') + t.equal('foo'.replace('fo', 'FO'), 'FOo') + t.equal('foo'.replace('FOO', 'BAR'), 'foo') + t.equal('a😃a'.replace('😃', 'X'), 'aXa') + t.equal('foo'.replace('foo', '😃'), '😃') + t.equal('foo'.replace('foo', 'foobar'), 'foobar') + t.equal('a,b,c,d'.replace(',', ' '), 'a b c d') + } }