Skip to content

Commit

Permalink
Merge pull request #2082 from artichoke/lopopolo/string-try-push-int
Browse files Browse the repository at this point in the history
Add `String::try_push_int` in `spinoso-string`
  • Loading branch information
lopopolo committed Aug 13, 2022
2 parents 4846fdb + ddb7698 commit 4f03839
Show file tree
Hide file tree
Showing 15 changed files with 161 additions and 59 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion artichoke-backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ spinoso-math = { version = "0.3.0", path = "../spinoso-math", optional = true, d
spinoso-random = { version = "0.3.0", path = "../spinoso-random", optional = true }
spinoso-regexp = { version = "0.3.0", path = "../spinoso-regexp", optional = true, default-features = false }
spinoso-securerandom = { version = "0.2.0", path = "../spinoso-securerandom", optional = true }
spinoso-string = { version = "0.19.0", path = "../spinoso-string", features = ["always-nul-terminated-c-string-compat"] }
spinoso-string = { version = "0.20.0", path = "../spinoso-string", features = ["always-nul-terminated-c-string-compat"] }
spinoso-symbol = { version = "0.3.0", path = "../spinoso-symbol" }
spinoso-time = { version = "0.5.0", path = "../spinoso-time", features = ["chrono"], default-features = false, optional = true }

Expand Down
6 changes: 3 additions & 3 deletions artichoke-backend/src/extn/core/string/mruby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub fn init(interp: &mut Artichoke) -> InitializeResult<()> {
class::Builder::for_spec(interp, &spec)
.add_method("*", string_mul, sys::mrb_args_req(1))?
.add_method("+", string_add, sys::mrb_args_req(1))?
.add_method("<<", string_push, sys::mrb_args_req(1))?
.add_method("<<", string_append, sys::mrb_args_req(1))?
.add_method("<=>", string_cmp_rocket, sys::mrb_args_req(1))?
.add_method("==", string_equals_equals, sys::mrb_args_req(1))?
.add_method("[]", string_aref, sys::mrb_args_req(1))?
Expand Down Expand Up @@ -101,12 +101,12 @@ unsafe extern "C" fn string_add(mrb: *mut sys::mrb_state, slf: sys::mrb_value) -
}
}

unsafe extern "C" fn string_push(mrb: *mut sys::mrb_state, slf: sys::mrb_value) -> sys::mrb_value {
unsafe extern "C" fn string_append(mrb: *mut sys::mrb_state, slf: sys::mrb_value) -> sys::mrb_value {
let other = mrb_get_args!(mrb, required = 1);
unwrap_interpreter!(mrb, to => guard);
let value = Value::from(slf);
let other = Value::from(other);
let result = trampoline::push(&mut guard, value, other);
let result = trampoline::append(&mut guard, value, other);
match result {
Ok(value) => value.inner(),
Err(exception) => error::raise(guard, exception),
Expand Down
59 changes: 13 additions & 46 deletions artichoke-backend/src/extn/core/string/trampoline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn add(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Result
super::String::alloc_value(concatenated, interp)
}

pub fn push(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Result<Value, Error> {
pub fn append(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Result<Value, Error> {
if value.is_frozen(interp) {
let s = unsafe { super::String::unbox_from_value(&mut value, interp)? };
let message = "can't modify frozen String: "
Expand All @@ -59,51 +59,18 @@ pub fn push(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Resul

let mut s = unsafe { super::String::unbox_from_value(&mut value, interp)? };
if let Ok(int) = other.try_convert_into::<i64>(interp) {
return match s.encoding() {
Encoding::Utf8 => {
// SAFETY: The string is repacked before any intervening uses of
// `interp` which means no mruby heap allocations can occur.
unsafe {
let string_mut = s.as_inner_mut();
// XXX: This call doesn't do a check to see if we'll exceed the max allocation
// size and may panic or abort.
string_mut
.try_push_codepoint(int)
.map_err(|err| RangeError::from(err.message()))?;
let s = s.take();
super::String::box_into_value(s, value, interp)
}
}
Encoding::Ascii => {
let byte = u8::try_from(int).map_err(|_| RangeError::from(format!("{int} out of char range")))?;
// SAFETY: The string is repacked before any intervening uses of
// `interp` which means no mruby heap allocations can occur.
unsafe {
let string_mut = s.as_inner_mut();
// XXX: This call doesn't do a check to see if we'll exceed the max allocation
// size and may panic or abort.
string_mut.push_byte(byte);
if !byte.is_ascii() {
string_mut.set_encoding(Encoding::Binary);
}
let s = s.take();
super::String::box_into_value(s, value, interp)
}
}
Encoding::Binary => {
let byte = u8::try_from(int).map_err(|_| RangeError::from(format!("{int} out of char range")))?;
// SAFETY: The string is repacked before any intervening uses of
// `interp` which means no mruby heap allocations can occur.
unsafe {
let string_mut = s.as_inner_mut();
// XXX: This call doesn't do a check to see if we'll exceed the max allocation
// size and may panic or abort.
string_mut.push_byte(byte);
let s = s.take();
super::String::box_into_value(s, value, interp)
}
}
};
// SAFETY: The string is repacked before any intervening uses of
// `interp` which means no mruby heap allocations can occur.
unsafe {
let string_mut = s.as_inner_mut();
// XXX: This call doesn't do a check to see if we'll exceed the max allocation
// size and may panic or abort.
string_mut
.try_push_int(int)
.map_err(|err| RangeError::from(err.message()))?;
let s = s.take();
return super::String::box_into_value(s, value, interp);
}
}
// SAFETY: The byte slice is immediately used and discarded after extraction.
// There are no intervening interpreter accesses.
Expand Down
2 changes: 1 addition & 1 deletion fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion spec-runner/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions spec-runner/enforced-specs.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ skip = [

[specs.core.kernel]
include = "set"
specs = ["Integer"]
specs = [
"Integer",
]

[specs.core.matchdata]
include = "all"
Expand All @@ -98,7 +100,7 @@ include = "all"
include = "set"
specs = [
# Missing support for `"UTF-16LE"` encoding, but the underlying behavior being
# tested is implemented.
# tested is implemented. 2 specs fail.
# "append",
"bytesize",
# `Range` of int, e.g. `1..9`, does not call `#to_int` on start and end.
Expand Down
2 changes: 1 addition & 1 deletion spinoso-string/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spinoso-string"
version = "0.19.0"
version = "0.20.0"
authors = ["Ryan Lopopolo <rjl@hyperbo.la>"]
edition = "2021"
rust-version = "1.63.0"
Expand Down
2 changes: 1 addition & 1 deletion spinoso-string/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Add this to your `Cargo.toml`:

```toml
[dependencies]
spinoso-string = "0.19.0"
spinoso-string = "0.20.0"
```

## `no_std`
Expand Down
14 changes: 14 additions & 0 deletions spinoso-string/src/enc/ascii/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use bstr::ByteSlice;

use crate::buf::Buf;
use crate::codepoints::InvalidCodepointError;
use crate::encoding::Encoding;
use crate::iter::{Bytes, IntoIter, Iter, IterMut};
use crate::ord::OrdError;

Expand Down Expand Up @@ -245,6 +246,19 @@ impl AsciiString {
}
}

#[inline]
pub fn try_push_int(&mut self, int: i64, enc: &mut Option<Encoding>) -> Result<(), InvalidCodepointError> {
match u8::try_from(int) {
Ok(byte) if byte.is_ascii() => self.push_byte(byte),
Ok(byte) => {
self.push_byte(byte);
*enc = Some(Encoding::Binary);
}
Err(_) => return Err(InvalidCodepointError::codepoint_out_of_range(int)),
}
Ok(())
}

#[inline]
pub fn push_char(&mut self, ch: char) {
self.inner.push_char(ch);
Expand Down
5 changes: 5 additions & 0 deletions spinoso-string/src/enc/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,11 @@ impl BinaryString {
}
}

#[inline]
pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
self.try_push_codepoint(int)
}

#[inline]
pub fn push_char(&mut self, ch: char) {
self.inner.push_char(ch);
Expand Down
16 changes: 16 additions & 0 deletions spinoso-string/src/enc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,22 @@ impl EncodedString {
}
}

#[inline]
pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
match self {
EncodedString::Ascii(inner) => {
let mut enc = None;
inner.try_push_int(int, &mut enc)?;
if let Some(enc) = enc {
self.set_encoding(enc);
}
}
EncodedString::Binary(inner) => inner.try_push_int(int)?,
EncodedString::Utf8(inner) => inner.try_push_int(int)?,
}
Ok(())
}

#[inline]
pub fn push_char(&mut self, ch: char) {
match self {
Expand Down
5 changes: 5 additions & 0 deletions spinoso-string/src/enc/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,11 @@ impl Utf8String {
}
}

#[inline]
pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
self.try_push_codepoint(int)
}

#[inline]
pub fn push_char(&mut self, ch: char) {
self.inner.push_char(ch);
Expand Down
24 changes: 24 additions & 0 deletions spinoso-string/src/eq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,30 @@ impl PartialEq<String> for Vec<u8> {
}
}

impl<const N: usize> PartialEq<[u8; N]> for String {
fn eq(&self, other: &[u8; N]) -> bool {
**self == *other
}
}

impl<const N: usize> PartialEq<String> for [u8; N] {
fn eq(&self, other: &String) -> bool {
*self == **other
}
}

impl<const N: usize> PartialEq<&[u8; N]> for String {
fn eq(&self, other: &&[u8; N]) -> bool {
**self == **other
}
}

impl<const N: usize> PartialEq<String> for &[u8; N] {
fn eq(&self, other: &String) -> bool {
**self == **other
}
}

impl PartialEq<[u8]> for String {
fn eq(&self, other: &[u8]) -> bool {
**self == *other
Expand Down
73 changes: 71 additions & 2 deletions spinoso-string/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,7 @@ impl String {
///
/// let mut s = String::utf8(b"UTF-8?".to_vec());
/// s.push_byte(0xFF);
/// assert_eq!(s, &b"UTF-8?\xFF"[..]);
/// assert_eq!(s, b"UTF-8?\xFF");
/// ```
///
/// [encoding]: crate::Encoding
Expand Down Expand Up @@ -1097,6 +1097,75 @@ impl String {
self.inner.try_push_codepoint(codepoint)
}

/// A more permissive version of [`try_push_codepoint`] which can alter the
/// receiver's encoding to accomodate the given byte.
///
/// # Errors
///
/// If this `String` is [conventionally UTF-8] and the given codepoint is
/// not a valid [`char`], an error is returned.
///
/// If this `String` has [ASCII] or [binary] encoding and the given
/// codepoint is not a valid byte, an error is returned.
///
/// # Examples
///
/// For [UTF-8] and [binary] strings, this function behaves identically to
/// [`try_push_codepoint`].
///
/// ```
/// use spinoso_string::String;
///
/// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> {
/// let mut s = String::utf8(b"".to_vec());
/// s.try_push_int(b'a' as i64)?;
/// assert_eq!(s, "a");
/// assert!(s.try_push_int(0xD83F).is_err());
/// assert!(s.try_push_int(-1).is_err());
///
/// let mut s = String::binary(b"".to_vec());
/// s.try_push_int(b'a' as i64)?;
/// assert_eq!(s, "a");
/// assert!(s.try_push_int(1024).is_err());
/// assert!(s.try_push_int(-1).is_err());
/// # Ok(())
/// # }
/// # example().unwrap();
/// ```
///
/// For [ASCII] strings, the given integer must be a valid byte. If the
/// given integer is outside of the ASCII range, the string's encoding is
/// changed to [`Encoding::Binary`].
///
/// ```
/// use spinoso_string::{Encoding, String};
///
/// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> {
/// let mut s = String::ascii(b"".to_vec());
/// s.try_push_int(b'a' as i64)?;
/// assert_eq!(s, "a");
/// assert_eq!(s.encoding(), Encoding::Ascii);
/// assert!(s.try_push_int(1024).is_err());
/// assert!(s.try_push_int(-1).is_err());
///
/// s.try_push_int(b'\xFF' as i64)?;
/// assert_eq!(s, b"a\xFF");
/// assert_eq!(s.encoding(), Encoding::Binary);
/// # Ok(())
/// # }
/// # example().unwrap();
/// ```
///
/// [`try_push_codepoint`]: Self::try_push_codepoint
/// [UTF-8]: crate::Encoding::Utf8
/// [ASCII]: crate::Encoding::Ascii
/// [binary]: crate::Encoding::Binary
/// [conventionally UTF-8]: crate::Encoding::Utf8
#[inline]
pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> {
self.inner.try_push_int(int)
}

/// Appends a given [`char`] onto the end of this `String`.
///
/// The given char is UTF-8 encoded and the UTF-8 bytes are appended to the
Expand All @@ -1109,7 +1178,7 @@ impl String {
///
/// let mut s = String::from("<3");
/// s.push_char('πŸ’Ž');
/// assert_eq!(s, &b"<3\xF0\x9F\x92\x8E"[..]); // "<3πŸ’Ž"
/// assert_eq!(s, b"<3\xF0\x9F\x92\x8E"); // "<3πŸ’Ž"
/// ```
#[inline]
pub fn push_char(&mut self, ch: char) {
Expand Down

0 comments on commit 4f03839

Please sign in to comment.