Skip to content

Commit

Permalink
Add String.replace and String.slice_into
Browse files Browse the repository at this point in the history
String.slice_into is the same as String.slice(), except it writes the
bytes into an existing ByteArray instead of allocating a new one.

String.replace(string, with) finds all occurrences of "string" and
replaces them with "with", using the new String.slice_into() method to
avoid allocating intermediate ByteArray values.

The behaviour of String.replace() with an empty "string" argument
differs compared to other languages: whereas most seem to effectively
split the receiver into separate characters then join those together
using "with", we just return an empty String instead. This is done
because I believe this makes the most sense: you can't replace nothing
with something, because there's nothing to replace.

This fixes #637.

Changelog: added
  • Loading branch information
yorickpeterse committed Dec 15, 2023
1 parent f4d3c5a commit e287956
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 14 deletions.
17 changes: 8 additions & 9 deletions rt/src/runtime/string.rs
Expand Up @@ -146,21 +146,20 @@ pub unsafe extern "system" fn inko_string_chars_drop(iter: *mut u8) {
}

#[no_mangle]
pub unsafe extern "system" fn inko_string_slice_bytes(
state: *const State,
pub unsafe extern "system" fn inko_string_slice_bytes_into(
string: *const InkoString,
into: *mut ByteArray,
start: i64,
length: i64,
) -> *const ByteArray {
) {
let string = InkoString::read(string);
let end = min((start + length) as usize, string.len());
let bytes = if start < 0 || length <= 0 || start as usize >= end {
Vec::new()
} else {
string.as_bytes()[start as usize..end].to_vec()
};

ByteArray::alloc((*state).byte_array_class, bytes)
if start < 0 || length <= 0 || start as usize >= end {
return;
}

(*into).value.extend_from_slice(&string.as_bytes()[start as usize..end]);
}

#[no_mangle]
Expand Down
74 changes: 69 additions & 5 deletions std/src/std/string.inko
Expand Up @@ -24,12 +24,12 @@ class extern StringResult {

fn extern inko_string_to_lower(state: Pointer[UInt8], string: String) -> String
fn extern inko_string_to_upper(state: Pointer[UInt8], string: String) -> String
fn extern inko_string_slice_bytes(
state: Pointer[UInt8],
fn extern inko_string_slice_bytes_into(
string: String,
into: mut ByteArray,
start: Int,
size: Int,
) -> ByteArray
)

fn extern inko_string_chars(string: String) -> Pointer[UInt8]
fn extern inko_string_chars_next(
Expand Down Expand Up @@ -257,6 +257,25 @@ class builtin String {
buff.into_string
}

# Slices `self` into a sequence of bytes using a _byte_ range, appending the
# bytes to `bytes` argument.
#
# This method is useful if you want to slice a `String` into a `ByteArray`,
# but wish to reuse the same `ByteArray` rather than allocating a new one for
# each slice. Unless you've determined you indeed need to reuse the same
# `ByteArray`, you're probably better off using `String.slice` instead.
#
# # Examples
#
# let bytes = ByteArray.new
#
# '😊'.slice_into(bytes, start: 0, size: 4)
#
# bytes # => '😊'.to_byte_array
fn pub slice_into(bytes: mut ByteArray, start: Int, size: Int) {
inko_string_slice_bytes_into(self, bytes, start, size)
}

# Returns the _byte_ index of the first occurrence of the given `String`,
# starting at the given byte index.
#
Expand Down Expand Up @@ -542,6 +561,48 @@ class builtin String {
buff.into_string
}

# Replaces all occurrences of `string` with the value in `with`, returning the
# result as a new `String`.
#
# If the `string` argument is an empty `String`, this method doesn't perform
# any replacements and instead returns a copy of `self`.
#
# # Examples
#
# 'foo foo'.replace('foo', with: 'bar') # => 'bar bar'
fn pub replace(string: String, with: String) -> String {
# Different languages handle the pattern being empty differently. For
# example, Javascript and Node only match the start of the string if the
# pattern is empty. Other languages such as Ruby and Python appear to inject
# the replacement in between every character, such that
# `'AB'.replace('', ',')` results in `,A,B,`.
#
# We make the decision to just _not_ do any replacements in this case, as
# replacing an empty string is nonsensical to begin with.
if string.size == 0 { return self }

let buf = ByteArray.new
let mut start = 0
let mut last = 0

loop {
match byte_index(string, start) {
case Some(i) -> {
if i > last { slice_into(buf, start: last, size: i - last) }
with.slice_into(buf, start: 0, size: with.size)
start = i + string.size
last = start
}
case _ -> {
if start < size { slice_into(buf, start, size) }
break
}
}
}

buf.into_string
}

fn byte_unchecked(index: Int) -> Int {
(@bytes as Int + index as Pointer[UInt8]).0 as Int
}
Expand Down Expand Up @@ -586,7 +647,7 @@ impl Bytes for String {
@size as Int
}

# Slices `self` into a substring using a _byte_ range.
# Slices `self` into a sequence of bytes using a _byte_ range.
#
# # Examples
#
Expand All @@ -595,7 +656,10 @@ impl Bytes for String {
# '😊'.slice_bytes(start: 0, size: 4) # => '😊'.to_byte_array
# '😊'.slice_bytes(start: 0, size: 3) # => "\u{FFFD}".to_byte_array
fn pub slice(start: Int, size: Int) -> ByteArray {
inko_string_slice_bytes(_INKO.state, self, start, size)
let bytes = ByteArray.new

slice_into(bytes, start, size)
bytes
}

# Returns a raw pointer to the bytes of `self`.
Expand Down
31 changes: 31 additions & 0 deletions std/test/std/test_string.inko
Expand Up @@ -58,6 +58,18 @@ fn pub tests(t: mut Tests) {
t.equal('foo'.slice(start: -5, size: 50), ''.to_byte_array)
}

t.test('String.slice_into') fn (t) {
let bytes = ByteArray.new
let string = 'hello_world'

# String.slice_into and String.slice use the same underlying function, so
# there's no need in duplicating many test cases here.
string.slice_into(bytes, start: 0, size: 3)
string.slice_into(bytes, start: 3, size: 2)

t.equal(bytes, 'hello'.to_byte_array)
}

t.test('String.byte_index') fn (t) {
t.equal('hello'.byte_index(of: 'h', starting_at: 0), Option.Some(0))
t.equal('hello'.byte_index(of: 'e', starting_at: 0), Option.Some(1))
Expand Down Expand Up @@ -321,4 +333,23 @@ fn pub tests(t: mut Tests) {

t.equal(a, b)
}

t.test('String.replace') fn (t) {
t.equal(''.replace('', ''), '')
t.equal(''.replace('', 'foo'), '')
t.equal('foo'.replace('', 'bar'), 'foo')
t.equal(''.replace('foo', 'bar'), '')
t.equal(''.replace('foo', ''), '')
t.equal('foo'.replace('foo', 'bar'), 'bar')
t.equal('foo baz'.replace('foo', 'bar'), 'bar baz')
t.equal('foo foo'.replace('foo', 'bar'), 'bar bar')
t.equal('foo'.replace('foo', ''), '')
t.equal('foo bar'.replace('foo', ''), ' bar')
t.equal('foo'.replace('fo', 'FO'), 'FOo')
t.equal('foo'.replace('FOO', 'BAR'), 'foo')
t.equal('a😃a'.replace('😃', 'X'), 'aXa')
t.equal('foo'.replace('foo', '😃'), '😃')
t.equal('foo'.replace('foo', 'foobar'), 'foobar')
t.equal('a,b,c,d'.replace(',', ' '), 'a b c d')
}
}

0 comments on commit e287956

Please sign in to comment.