diff --git a/Cargo.lock b/Cargo.lock index c7e95b7c98b6..1c4b1bdf6408 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -716,7 +716,7 @@ dependencies = [ [[package]] name = "spinoso-string" -version = "0.19.0" +version = "0.20.0" dependencies = [ "bstr", "bytecount", diff --git a/artichoke-backend/Cargo.toml b/artichoke-backend/Cargo.toml index d28d3976f50d..3256eadf42c5 100644 --- a/artichoke-backend/Cargo.toml +++ b/artichoke-backend/Cargo.toml @@ -34,7 +34,7 @@ spinoso-math = { version = "0.3.0", path = "../spinoso-math", optional = true, d spinoso-random = { version = "0.3.0", path = "../spinoso-random", optional = true } spinoso-regexp = { version = "0.3.0", path = "../spinoso-regexp", optional = true, default-features = false } spinoso-securerandom = { version = "0.2.0", path = "../spinoso-securerandom", optional = true } -spinoso-string = { version = "0.19.0", path = "../spinoso-string", features = ["always-nul-terminated-c-string-compat"] } +spinoso-string = { version = "0.20.0", path = "../spinoso-string", features = ["always-nul-terminated-c-string-compat"] } spinoso-symbol = { version = "0.3.0", path = "../spinoso-symbol" } spinoso-time = { version = "0.5.0", path = "../spinoso-time", features = ["chrono"], default-features = false, optional = true } diff --git a/artichoke-backend/src/extn/core/string/mruby.rs b/artichoke-backend/src/extn/core/string/mruby.rs index d845ff13d2d7..51d9a1593a03 100644 --- a/artichoke-backend/src/extn/core/string/mruby.rs +++ b/artichoke-backend/src/extn/core/string/mruby.rs @@ -15,7 +15,7 @@ pub fn init(interp: &mut Artichoke) -> InitializeResult<()> { class::Builder::for_spec(interp, &spec) .add_method("*", string_mul, sys::mrb_args_req(1))? .add_method("+", string_add, sys::mrb_args_req(1))? - .add_method("<<", string_push, sys::mrb_args_req(1))? + .add_method("<<", string_append, sys::mrb_args_req(1))? .add_method("<=>", string_cmp_rocket, sys::mrb_args_req(1))? .add_method("==", string_equals_equals, sys::mrb_args_req(1))? .add_method("[]", string_aref, sys::mrb_args_req(1))? @@ -101,12 +101,12 @@ unsafe extern "C" fn string_add(mrb: *mut sys::mrb_state, slf: sys::mrb_value) - } } -unsafe extern "C" fn string_push(mrb: *mut sys::mrb_state, slf: sys::mrb_value) -> sys::mrb_value { +unsafe extern "C" fn string_append(mrb: *mut sys::mrb_state, slf: sys::mrb_value) -> sys::mrb_value { let other = mrb_get_args!(mrb, required = 1); unwrap_interpreter!(mrb, to => guard); let value = Value::from(slf); let other = Value::from(other); - let result = trampoline::push(&mut guard, value, other); + let result = trampoline::append(&mut guard, value, other); match result { Ok(value) => value.inner(), Err(exception) => error::raise(guard, exception), diff --git a/artichoke-backend/src/extn/core/string/trampoline.rs b/artichoke-backend/src/extn/core/string/trampoline.rs index 881c543341a5..68316e2991f7 100644 --- a/artichoke-backend/src/extn/core/string/trampoline.rs +++ b/artichoke-backend/src/extn/core/string/trampoline.rs @@ -47,7 +47,7 @@ pub fn add(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Result super::String::alloc_value(concatenated, interp) } -pub fn push(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Result { +pub fn append(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Result { if value.is_frozen(interp) { let s = unsafe { super::String::unbox_from_value(&mut value, interp)? }; let message = "can't modify frozen String: " @@ -59,51 +59,18 @@ pub fn push(interp: &mut Artichoke, mut value: Value, mut other: Value) -> Resul let mut s = unsafe { super::String::unbox_from_value(&mut value, interp)? }; if let Ok(int) = other.try_convert_into::(interp) { - return match s.encoding() { - Encoding::Utf8 => { - // SAFETY: The string is repacked before any intervening uses of - // `interp` which means no mruby heap allocations can occur. - unsafe { - let string_mut = s.as_inner_mut(); - // XXX: This call doesn't do a check to see if we'll exceed the max allocation - // size and may panic or abort. - string_mut - .try_push_codepoint(int) - .map_err(|err| RangeError::from(err.message()))?; - let s = s.take(); - super::String::box_into_value(s, value, interp) - } - } - Encoding::Ascii => { - let byte = u8::try_from(int).map_err(|_| RangeError::from(format!("{int} out of char range")))?; - // SAFETY: The string is repacked before any intervening uses of - // `interp` which means no mruby heap allocations can occur. - unsafe { - let string_mut = s.as_inner_mut(); - // XXX: This call doesn't do a check to see if we'll exceed the max allocation - // size and may panic or abort. - string_mut.push_byte(byte); - if !byte.is_ascii() { - string_mut.set_encoding(Encoding::Binary); - } - let s = s.take(); - super::String::box_into_value(s, value, interp) - } - } - Encoding::Binary => { - let byte = u8::try_from(int).map_err(|_| RangeError::from(format!("{int} out of char range")))?; - // SAFETY: The string is repacked before any intervening uses of - // `interp` which means no mruby heap allocations can occur. - unsafe { - let string_mut = s.as_inner_mut(); - // XXX: This call doesn't do a check to see if we'll exceed the max allocation - // size and may panic or abort. - string_mut.push_byte(byte); - let s = s.take(); - super::String::box_into_value(s, value, interp) - } - } - }; + // SAFETY: The string is repacked before any intervening uses of + // `interp` which means no mruby heap allocations can occur. + unsafe { + let string_mut = s.as_inner_mut(); + // XXX: This call doesn't do a check to see if we'll exceed the max allocation + // size and may panic or abort. + string_mut + .try_push_int(int) + .map_err(|err| RangeError::from(err.message()))?; + let s = s.take(); + return super::String::box_into_value(s, value, interp); + } } // SAFETY: The byte slice is immediately used and discarded after extraction. // There are no intervening interpreter accesses. diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 6eb4178a3fbd..33e0d9d4d4e3 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -530,7 +530,7 @@ dependencies = [ [[package]] name = "spinoso-string" -version = "0.19.0" +version = "0.20.0" dependencies = [ "bstr", "bytecount", diff --git a/spec-runner/Cargo.lock b/spec-runner/Cargo.lock index e01d356a9f49..a333b20f562f 100644 --- a/spec-runner/Cargo.lock +++ b/spec-runner/Cargo.lock @@ -848,7 +848,7 @@ dependencies = [ [[package]] name = "spinoso-string" -version = "0.19.0" +version = "0.20.0" dependencies = [ "bstr", "bytecount", diff --git a/spec-runner/enforced-specs.toml b/spec-runner/enforced-specs.toml index dec62856fa9a..2397d8f683ce 100644 --- a/spec-runner/enforced-specs.toml +++ b/spec-runner/enforced-specs.toml @@ -77,7 +77,9 @@ skip = [ [specs.core.kernel] include = "set" -specs = ["Integer"] +specs = [ + "Integer", +] [specs.core.matchdata] include = "all" @@ -98,7 +100,7 @@ include = "all" include = "set" specs = [ # Missing support for `"UTF-16LE"` encoding, but the underlying behavior being - # tested is implemented. + # tested is implemented. 2 specs fail. # "append", "bytesize", # `Range` of int, e.g. `1..9`, does not call `#to_int` on start and end. diff --git a/spinoso-string/Cargo.toml b/spinoso-string/Cargo.toml index 0fb06ac84251..3cd83e0dbc7c 100644 --- a/spinoso-string/Cargo.toml +++ b/spinoso-string/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spinoso-string" -version = "0.19.0" +version = "0.20.0" authors = ["Ryan Lopopolo "] edition = "2021" rust-version = "1.63.0" diff --git a/spinoso-string/README.md b/spinoso-string/README.md index 97e7635b0662..bddacd9d461b 100644 --- a/spinoso-string/README.md +++ b/spinoso-string/README.md @@ -27,7 +27,7 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -spinoso-string = "0.19.0" +spinoso-string = "0.20.0" ``` ## `no_std` diff --git a/spinoso-string/src/enc/ascii/mod.rs b/spinoso-string/src/enc/ascii/mod.rs index 518913e93de9..101dd8f03878 100644 --- a/spinoso-string/src/enc/ascii/mod.rs +++ b/spinoso-string/src/enc/ascii/mod.rs @@ -7,6 +7,7 @@ use bstr::ByteSlice; use crate::buf::Buf; use crate::codepoints::InvalidCodepointError; +use crate::encoding::Encoding; use crate::iter::{Bytes, IntoIter, Iter, IterMut}; use crate::ord::OrdError; @@ -245,6 +246,19 @@ impl AsciiString { } } + #[inline] + pub fn try_push_int(&mut self, int: i64, enc: &mut Option) -> Result<(), InvalidCodepointError> { + match u8::try_from(int) { + Ok(byte) if byte.is_ascii() => self.push_byte(byte), + Ok(byte) => { + self.push_byte(byte); + *enc = Some(Encoding::Binary); + } + Err(_) => return Err(InvalidCodepointError::codepoint_out_of_range(int)), + } + Ok(()) + } + #[inline] pub fn push_char(&mut self, ch: char) { self.inner.push_char(ch); diff --git a/spinoso-string/src/enc/binary/mod.rs b/spinoso-string/src/enc/binary/mod.rs index f9afd3ac64a5..94a3c8591f7c 100644 --- a/spinoso-string/src/enc/binary/mod.rs +++ b/spinoso-string/src/enc/binary/mod.rs @@ -244,6 +244,11 @@ impl BinaryString { } } + #[inline] + pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> { + self.try_push_codepoint(int) + } + #[inline] pub fn push_char(&mut self, ch: char) { self.inner.push_char(ch); diff --git a/spinoso-string/src/enc/mod.rs b/spinoso-string/src/enc/mod.rs index e578ec1f7cd2..f152a7856d65 100644 --- a/spinoso-string/src/enc/mod.rs +++ b/spinoso-string/src/enc/mod.rs @@ -472,6 +472,22 @@ impl EncodedString { } } + #[inline] + pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> { + match self { + EncodedString::Ascii(inner) => { + let mut enc = None; + inner.try_push_int(int, &mut enc)?; + if let Some(enc) = enc { + self.set_encoding(enc); + } + } + EncodedString::Binary(inner) => inner.try_push_int(int)?, + EncodedString::Utf8(inner) => inner.try_push_int(int)?, + } + Ok(()) + } + #[inline] pub fn push_char(&mut self, ch: char) { match self { diff --git a/spinoso-string/src/enc/utf8/mod.rs b/spinoso-string/src/enc/utf8/mod.rs index cf26fe023d44..0f26c225cbf9 100644 --- a/spinoso-string/src/enc/utf8/mod.rs +++ b/spinoso-string/src/enc/utf8/mod.rs @@ -502,6 +502,11 @@ impl Utf8String { } } + #[inline] + pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> { + self.try_push_codepoint(int) + } + #[inline] pub fn push_char(&mut self, ch: char) { self.inner.push_char(ch); diff --git a/spinoso-string/src/eq.rs b/spinoso-string/src/eq.rs index 6bee9ea42f24..39683add2716 100644 --- a/spinoso-string/src/eq.rs +++ b/spinoso-string/src/eq.rs @@ -14,6 +14,30 @@ impl PartialEq for Vec { } } +impl PartialEq<[u8; N]> for String { + fn eq(&self, other: &[u8; N]) -> bool { + **self == *other + } +} + +impl PartialEq for [u8; N] { + fn eq(&self, other: &String) -> bool { + *self == **other + } +} + +impl PartialEq<&[u8; N]> for String { + fn eq(&self, other: &&[u8; N]) -> bool { + **self == **other + } +} + +impl PartialEq for &[u8; N] { + fn eq(&self, other: &String) -> bool { + **self == **other + } +} + impl PartialEq<[u8]> for String { fn eq(&self, other: &[u8]) -> bool { **self == *other diff --git a/spinoso-string/src/lib.rs b/spinoso-string/src/lib.rs index 2f7e192039b8..b6c0518ce615 100644 --- a/spinoso-string/src/lib.rs +++ b/spinoso-string/src/lib.rs @@ -1020,7 +1020,7 @@ impl String { /// /// let mut s = String::utf8(b"UTF-8?".to_vec()); /// s.push_byte(0xFF); - /// assert_eq!(s, &b"UTF-8?\xFF"[..]); + /// assert_eq!(s, b"UTF-8?\xFF"); /// ``` /// /// [encoding]: crate::Encoding @@ -1097,6 +1097,75 @@ impl String { self.inner.try_push_codepoint(codepoint) } + /// A more permissive version of [`try_push_codepoint`] which can alter the + /// receiver's encoding to accomodate the given byte. + /// + /// # Errors + /// + /// If this `String` is [conventionally UTF-8] and the given codepoint is + /// not a valid [`char`], an error is returned. + /// + /// If this `String` has [ASCII] or [binary] encoding and the given + /// codepoint is not a valid byte, an error is returned. + /// + /// # Examples + /// + /// For [UTF-8] and [binary] strings, this function behaves identically to + /// [`try_push_codepoint`]. + /// + /// ``` + /// use spinoso_string::String; + /// + /// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> { + /// let mut s = String::utf8(b"".to_vec()); + /// s.try_push_int(b'a' as i64)?; + /// assert_eq!(s, "a"); + /// assert!(s.try_push_int(0xD83F).is_err()); + /// assert!(s.try_push_int(-1).is_err()); + /// + /// let mut s = String::binary(b"".to_vec()); + /// s.try_push_int(b'a' as i64)?; + /// assert_eq!(s, "a"); + /// assert!(s.try_push_int(1024).is_err()); + /// assert!(s.try_push_int(-1).is_err()); + /// # Ok(()) + /// # } + /// # example().unwrap(); + /// ``` + /// + /// For [ASCII] strings, the given integer must be a valid byte. If the + /// given integer is outside of the ASCII range, the string's encoding is + /// changed to [`Encoding::Binary`]. + /// + /// ``` + /// use spinoso_string::{Encoding, String}; + /// + /// # fn example() -> Result<(), spinoso_string::InvalidCodepointError> { + /// let mut s = String::ascii(b"".to_vec()); + /// s.try_push_int(b'a' as i64)?; + /// assert_eq!(s, "a"); + /// assert_eq!(s.encoding(), Encoding::Ascii); + /// assert!(s.try_push_int(1024).is_err()); + /// assert!(s.try_push_int(-1).is_err()); + /// + /// s.try_push_int(b'\xFF' as i64)?; + /// assert_eq!(s, b"a\xFF"); + /// assert_eq!(s.encoding(), Encoding::Binary); + /// # Ok(()) + /// # } + /// # example().unwrap(); + /// ``` + /// + /// [`try_push_codepoint`]: Self::try_push_codepoint + /// [UTF-8]: crate::Encoding::Utf8 + /// [ASCII]: crate::Encoding::Ascii + /// [binary]: crate::Encoding::Binary + /// [conventionally UTF-8]: crate::Encoding::Utf8 + #[inline] + pub fn try_push_int(&mut self, int: i64) -> Result<(), InvalidCodepointError> { + self.inner.try_push_int(int) + } + /// Appends a given [`char`] onto the end of this `String`. /// /// The given char is UTF-8 encoded and the UTF-8 bytes are appended to the @@ -1109,7 +1178,7 @@ impl String { /// /// let mut s = String::from("<3"); /// s.push_char('💎'); - /// assert_eq!(s, &b"<3\xF0\x9F\x92\x8E"[..]); // "<3💎" + /// assert_eq!(s, b"<3\xF0\x9F\x92\x8E"); // "<3💎" /// ``` #[inline] pub fn push_char(&mut self, ch: char) {