From f08169d5e4048706e891f695486b59a159ee525f Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Fri, 14 Jul 2023 22:41:21 +0200 Subject: [PATCH] Define more of String in Inko This commit changes the implementation of String, both in the standard and runtime library, such that the standard library has a bit more knowledge/control over it. For example, the memory layout no longer depends on a Box<[T]>, of which the memory layout isn't specified, and instead the runtime library defines a specific layout mirrored by the standard library. This allows removing of several runtime functions, in favour of implementing the logic in Inko's standard library. Changelog: changed --- compiler/src/llvm/layouts.rs | 11 +- compiler/src/type_check/expressions.rs | 10 ++ rt/src/immutable_string.rs | 141 ------------------------- rt/src/lib.rs | 1 - rt/src/mem.rs | 100 ++++++++++++++++-- rt/src/runtime/byte_array.rs | 11 +- rt/src/runtime/process.rs | 4 +- rt/src/runtime/string.rs | 48 +-------- std/src/std/json.inko | 5 +- std/src/std/string.inko | 44 +++++--- 10 files changed, 137 insertions(+), 238 deletions(-) delete mode 100644 rt/src/immutable_string.rs diff --git a/compiler/src/llvm/layouts.rs b/compiler/src/llvm/layouts.rs index 9e030f31e..935917041 100644 --- a/compiler/src/llvm/layouts.rs +++ b/compiler/src/llvm/layouts.rs @@ -13,7 +13,7 @@ use std::cmp::max; use std::collections::HashMap; use types::{ ClassId, MethodId, MethodSource, BOOLEAN_ID, BYTE_ARRAY_ID, CALL_METHOD, - CHANNEL_ID, DROPPER_METHOD, FLOAT_ID, INT_ID, NIL_ID, STRING_ID, + CHANNEL_ID, DROPPER_METHOD, FLOAT_ID, INT_ID, NIL_ID, }; /// The size of an object header. @@ -248,11 +248,6 @@ impl<'ctx> Layouts<'ctx> { header, context.f64_type().into(), ), - STRING_ID => context.builtin_type( - &name, - header, - context.pointer_type().into(), - ), BOOLEAN_ID | NIL_ID => { let typ = context.opaque_struct(&name); @@ -305,7 +300,9 @@ impl<'ctx> Layouts<'ctx> { }; for id in mir.classes.keys() { - if id.is_builtin() { + // String _is_ builtin, but we still process it such that the + // standard library can define fields for it. + if id.is_builtin() && *id != ClassId::string() { continue; } diff --git a/compiler/src/type_check/expressions.rs b/compiler/src/type_check/expressions.rs index 2ceec2003..8810e0d8a 100644 --- a/compiler/src/type_check/expressions.rs +++ b/compiler/src/type_check/expressions.rs @@ -1493,6 +1493,16 @@ impl<'a> CheckMethodBody<'a> { return TypeRef::Error; }; + if class.is_builtin() { + self.state.diagnostics.error( + DiagnosticId::InvalidType, + "Instances of builtin classes can't be created using the \ + class literal syntax", + self.file(), + node.location.clone(), + ); + } + let require_send = class.kind(self.db()).is_async(); let ins = ClassInstance::empty(self.db_mut(), class); let mut assigned = HashSet::new(); diff --git a/rt/src/immutable_string.rs b/rt/src/immutable_string.rs deleted file mode 100644 index 2e35c6416..000000000 --- a/rt/src/immutable_string.rs +++ /dev/null @@ -1,141 +0,0 @@ -//! Immutable strings that can be shared with C code. -use std::fmt; -use std::ops::Deref; -use std::str; - -const NULL_BYTE: u8 = 0; - -/// An immutable string that can be shared with C code. -/// -/// An ImmutableString is similar to Rust's `String` type, except it is -/// immutable and adds a NULL byte to the end. Despite the use of a trailing -/// NULL byte, an `ImmutableString` stores its length separately, allowing you -/// to still store NULL bytes any where in the `ImmutableString`. -#[derive(Eq, PartialEq, Hash, Clone)] -pub(crate) struct ImmutableString { - bytes: Box<[u8]>, -} - -#[cfg_attr(feature = "cargo-clippy", allow(clippy::len_without_is_empty))] -impl ImmutableString { - /// Creates an `ImmutableString` from a `Vec`, replacing any invalid - /// UTF-8 sequences with `U+FFFD REPLACEMENT CHARACTER`. - pub(crate) fn from_utf8(bytes: Vec) -> Self { - let string = match String::from_utf8(bytes) { - Ok(string) => string, - Err(err) => String::from_utf8_lossy(&err.into_bytes()).into_owned(), - }; - - Self::from(string) - } - - /// Returns a string slice pointing to the underlying bytes. - /// - /// The returned slice _does not_ include the NULL byte. - pub(crate) fn as_slice(&self) -> &str { - unsafe { str::from_utf8_unchecked(self.as_bytes()) } - } - - /// Returns a reference to the underlying bytes. - pub(crate) fn as_bytes(&self) -> &[u8] { - &self.bytes[0..self.len()] - } - - /// Returns the number of bytes in this String. - pub(crate) fn len(&self) -> usize { - self.bytes.len() - 1 - } - - /// Returns a pointer to the bytes, including the NULL byte. - pub(crate) fn as_ptr(&self) -> *const u8 { - self.bytes.as_ptr() as *const _ - } -} - -impl Deref for ImmutableString { - type Target = str; - - fn deref(&self) -> &str { - self.as_slice() - } -} - -impl From> for ImmutableString { - /// Creates an `ImmutableString` from a `Vec`, without checking if the - /// input is valid UTF-8. - fn from(mut bytes: Vec) -> Self { - bytes.reserve_exact(1); - bytes.push(NULL_BYTE); - - ImmutableString { bytes: bytes.into_boxed_slice() } - } -} - -impl From for ImmutableString { - fn from(string: String) -> Self { - Self::from(string.into_bytes()) - } -} - -impl fmt::Display for ImmutableString { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(self.as_slice(), f) - } -} - -impl fmt::Debug for ImmutableString { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(self.as_slice(), f) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_from_utf8() { - let valid = ImmutableString::from_utf8(vec![105, 110, 107, 111]); - let invalid = ImmutableString::from_utf8(vec![ - 72, 101, 108, 108, 111, 32, 240, 144, 128, 87, 111, 114, 108, 100, - ]); - - assert_eq!(valid.as_slice(), "inko"); - assert_eq!(invalid.as_slice(), "Hello �World"); - } - - #[test] - fn test_as_slice() { - let string = ImmutableString::from("hello".to_string()); - - assert_eq!(string.as_slice(), "hello"); - } - - #[test] - fn test_as_bytes() { - let string = ImmutableString::from("inko".to_string()); - - assert_eq!(string.as_bytes(), &[105, 110, 107, 111]); - } - - #[test] - fn test_len() { - let string = ImmutableString::from("inko".to_string()); - - assert_eq!(string.len(), 4); - } - - #[test] - fn test_from_bytes() { - let string = ImmutableString::from(vec![10]); - - assert_eq!(string.bytes.as_ref(), &[10, 0]); - } - - #[test] - fn test_deref_string_slice() { - let string = ImmutableString::from("inko".to_string()); - - assert_eq!(&*string, "inko"); - } -} diff --git a/rt/src/lib.rs b/rt/src/lib.rs index 7fe1e6ff3..cdc264fb0 100644 --- a/rt/src/lib.rs +++ b/rt/src/lib.rs @@ -8,7 +8,6 @@ pub mod macros; pub mod arc_without_weak; pub mod config; pub mod context; -pub mod immutable_string; pub mod mem; pub mod memory_map; pub mod network_poller; diff --git a/rt/src/mem.rs b/rt/src/mem.rs index 8acc31adb..ea91f0ab0 100644 --- a/rt/src/mem.rs +++ b/rt/src/mem.rs @@ -1,8 +1,9 @@ -use crate::immutable_string::ImmutableString; use std::alloc::{alloc, alloc_zeroed, dealloc, handle_alloc_error, Layout}; -use std::mem::{align_of, size_of, swap}; +use std::mem::{align_of, forget, size_of, swap}; use std::ops::Deref; use std::ptr::drop_in_place; +use std::slice; +use std::str; use std::string::String as RustString; /// The alignment to use for Inko objects. @@ -426,8 +427,9 @@ impl Float { /// atomic operations). #[repr(C)] pub struct String { - pub(crate) header: Header, - pub(crate) value: ImmutableString, + pub header: Header, + pub size: u64, + pub bytes: *mut u8, } impl String { @@ -441,38 +443,85 @@ impl String { } pub(crate) unsafe fn read<'a>(ptr: *const String) -> &'a str { - (*ptr).value.as_slice() + (*ptr).as_slice() } pub(crate) fn alloc( class: ClassPointer, value: RustString, ) -> *const String { - Self::from_immutable_string(class, ImmutableString::from(value)) + Self::new(class, value.into_bytes()) } pub(crate) fn alloc_permanent( class: ClassPointer, value: RustString, ) -> *const String { - let ptr = - Self::from_immutable_string(class, ImmutableString::from(value)); + let ptr = Self::new(class, value.into_bytes()); unsafe { header_of(ptr) }.set_permanent(); ptr } - pub(crate) fn from_immutable_string( + pub(crate) fn from_bytes( class: ClassPointer, - value: ImmutableString, + bytes: Vec, ) -> *const String { + let string = match RustString::from_utf8(bytes) { + Ok(string) => string, + Err(err) => { + RustString::from_utf8_lossy(&err.into_bytes()).into_owned() + } + }; + + String::new(class, string.into_bytes()) + } + + fn new(class: ClassPointer, mut bytes: Vec) -> *const String { + let len = bytes.len(); + + bytes.reserve_exact(1); + bytes.push(0); + + // Vec and Box<[u8]> don't have a public/stable memory layout. To work + // around that we have to break the Vec apart into a buffer and length, + // and store the two separately. + let mut boxed = bytes.into_boxed_slice(); + let buffer = boxed.as_mut_ptr(); + + forget(boxed); + let ptr = allocate(Layout::new::()) as *mut Self; let obj = unsafe { &mut *ptr }; obj.header.init_atomic(class); - init!(obj.value => value); + init!(obj.size => len as u64); + init!(obj.bytes => buffer); ptr as _ } + + /// Returns a string slice pointing to the underlying bytes. + /// + /// The returned slice _does not_ include the NULL byte. + pub(crate) fn as_slice(&self) -> &str { + unsafe { str::from_utf8_unchecked(self.as_bytes()) } + } + + /// Returns a slice to the underlying bytes, without the NULL byte. + fn as_bytes(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self.bytes, self.size as usize) } + } +} + +impl Drop for String { + fn drop(&mut self) { + unsafe { + drop(Box::from_raw(slice::from_raw_parts_mut( + self.bytes, + (self.size + 1) as usize, + ))); + } + } } #[cfg(test)] @@ -581,4 +630,33 @@ mod tests { unsafe { Class::drop(class) }; } + + #[test] + fn test_string_new() { + let class = Class::object("A".to_string(), 24, 0); + let string = String::new(class, vec![105, 110, 107, 111]); + + unsafe { + assert_eq!((*string).as_bytes(), &[105, 110, 107, 111]); + assert_eq!(String::read(string), "inko"); + Class::drop(class); + } + } + + #[test] + fn test_string_from_bytes() { + let class = Class::object("A".to_string(), 24, 0); + let string = String::from_bytes( + class, + vec![ + 72, 101, 108, 108, 111, 32, 240, 144, 128, 87, 111, 114, 108, + 100, + ], + ); + + unsafe { + assert_eq!(String::read(string), "Hello �World"); + Class::drop(class); + } + } } diff --git a/rt/src/runtime/byte_array.rs b/rt/src/runtime/byte_array.rs index b4c9bd682..c0912cc9e 100644 --- a/rt/src/runtime/byte_array.rs +++ b/rt/src/runtime/byte_array.rs @@ -1,4 +1,3 @@ -use crate::immutable_string::ImmutableString; use crate::mem::{tagged_int, Bool, ByteArray, Int, String as InkoString}; use crate::state::State; use std::cmp::min; @@ -105,10 +104,7 @@ pub unsafe extern "system" fn inko_byte_array_to_string( state: *const State, bytes: *const ByteArray, ) -> *const InkoString { - let bytes = &(*bytes).value; - let string = ImmutableString::from_utf8(bytes.clone()); - - InkoString::from_immutable_string((*state).string_class, string) + InkoString::from_bytes((*state).string_class, (*bytes).value.clone()) } #[no_mangle] @@ -116,10 +112,7 @@ pub unsafe extern "system" fn inko_byte_array_drain_to_string( state: *const State, bytes: *mut ByteArray, ) -> *const InkoString { - let bytes = &mut (*bytes); - let string = ImmutableString::from_utf8(bytes.take_bytes()); - - InkoString::from_immutable_string((*state).string_class, string) + InkoString::from_bytes((*state).string_class, (*bytes).take_bytes()) } #[no_mangle] diff --git a/rt/src/runtime/process.rs b/rt/src/runtime/process.rs index b46c5adde..1d84726d5 100644 --- a/rt/src/runtime/process.rs +++ b/rt/src/runtime/process.rs @@ -60,9 +60,7 @@ pub unsafe extern "system" fn inko_process_panic( process: ProcessPointer, message: *const InkoString, ) { - let msg = &(*message).value; - - panic(process, msg); + panic(process, (*message).as_slice()); } #[no_mangle] diff --git a/rt/src/runtime/string.rs b/rt/src/runtime/string.rs index 97b59f59c..d8e8c1178 100644 --- a/rt/src/runtime/string.rs +++ b/rt/src/runtime/string.rs @@ -1,6 +1,4 @@ -use crate::mem::{ - tagged_int, Bool, ByteArray, Float, Int, String as InkoString, -}; +use crate::mem::{ByteArray, Float, Int, String as InkoString}; use crate::process::ProcessPointer; use crate::result::Result as InkoResult; use crate::runtime::process::panic; @@ -23,31 +21,6 @@ pub unsafe extern "system" fn inko_string_new_permanent( InkoString::alloc_permanent((*state).string_class, string) } -#[no_mangle] -pub unsafe extern "system" fn inko_string_equals( - state: *const State, - left: *const InkoString, - right: *const InkoString, -) -> *const Bool { - let state = &*state; - - if InkoString::read(left) == InkoString::read(right) { - state.true_singleton - } else { - state.false_singleton - } -} - -#[no_mangle] -pub unsafe extern "system" fn inko_string_size( - state: *const State, - string: *const InkoString, -) -> *const Int { - let state = &*state; - - Int::new(state.int_class, InkoString::read(string).len() as i64) -} - #[no_mangle] pub unsafe extern "system" fn inko_string_concat( state: *const State, @@ -64,18 +37,6 @@ pub unsafe extern "system" fn inko_string_concat( InkoString::alloc((*state).string_class, buffer) } -#[no_mangle] -pub unsafe extern "system" fn inko_string_byte( - string: *const InkoString, - index: i64, -) -> *const Int { - let byte = i64::from( - *InkoString::read(string).as_bytes().get_unchecked(index as usize), - ); - - tagged_int(byte) -} - #[no_mangle] pub unsafe extern "system" fn inko_string_drop(pointer: *const InkoString) { InkoString::drop(pointer); @@ -233,13 +194,6 @@ pub unsafe extern "system" fn inko_string_slice_bytes( InkoString::alloc((*state).string_class, new_string) } -#[no_mangle] -pub unsafe extern "system" fn inko_string_to_pointer( - string: *const InkoString, -) -> *const u8 { - (*string).value.as_ptr() -} - #[no_mangle] pub unsafe extern "system" fn inko_string_from_pointer( state: *const State, diff --git a/std/src/std/json.inko b/std/src/std/json.inko index 8529693a6..6565ba094 100644 --- a/std/src/std/json.inko +++ b/std/src/std/json.inko @@ -73,7 +73,6 @@ class extern AnyResult { let @value: Any } -fn extern inko_string_byte(string: String, index: Int) -> Int fn extern inko_string_to_int( state: Pointer[Int8], process: Pointer[Int8], @@ -676,7 +675,7 @@ class pub Parser { fn current -> Int { if @index < @size { - inko_string_byte(@string, @index) + @string.byte(@index) } else { EOF } @@ -685,7 +684,7 @@ class pub Parser { fn peek -> Int { let index = @index + 1 - if index < @size { inko_string_byte(@string, index) } else { EOF } + if index < @size { @string.byte(index) } else { EOF } } fn mut identifier(name: String) -> Result[Nil, Error] { diff --git a/std/src/std/string.inko b/std/src/std/string.inko index 09c8a2a55..b326443eb 100644 --- a/std/src/std/string.inko +++ b/std/src/std/string.inko @@ -22,16 +22,8 @@ class extern StringResult { let @value: String } -fn extern inko_string_equals( - state: Pointer[Int8], - left: String, - right: String, -) -> Bool - fn extern inko_string_to_lower(state: Pointer[Int8], string: String) -> String fn extern inko_string_to_upper(state: Pointer[Int8], string: String) -> String -fn extern inko_string_size(state: Pointer[Int8], string: String) -> Int -fn extern inko_string_byte(string: String, index: Int) -> Int fn extern inko_string_slice_bytes( state: Pointer[Int8], string: String, @@ -58,7 +50,6 @@ fn extern inko_string_concat( length: Int, ) -> String -fn extern inko_string_to_pointer(string: String) -> Pointer[Int8] fn extern inko_string_from_pointer( state: Pointer[Int8], pointer: Pointer[Int8], @@ -134,6 +125,12 @@ trait pub ToString { # An UTF-8 encoded and immutable string type. class builtin String { + # The size of the string in bytes, _excluding_ the trailing NULL byte. + let @size: Int64 + + # A pointer to the bytes of this string, including the trailing NULL byte. + let @bytes: Pointer[Int8] + # Returns a `String` created from the given NULL terminated pointer. # # The purpose of this method is to allow creating a `String` from a pointer @@ -216,7 +213,7 @@ class builtin String { # 'foo'.size # => 3 # '😀'.size # => 4 fn pub size -> Int { - inko_string_size(_INKO.state, self) + @size as Int } # Returns the byte at the given byte index. @@ -228,7 +225,7 @@ class builtin String { # 'inko'.byte(0) # => 105 fn pub byte(index: Int) -> Int { bounds_check(index, size) - inko_string_byte(self, index) + byte_unchecked(index) } # Slices `self` into a substring. @@ -557,7 +554,11 @@ class builtin String { # This method is meant to be used when passing strings to foreign functions # (i.e. `*char` arguments). You should avoid using it for anything else. fn pub to_pointer -> Pointer[Int8] { - inko_string_to_pointer(self) + @bytes + } + + fn byte_unchecked(index: Int) -> Int { + (@bytes as Int + index as Pointer[Int8]).0 as Int } } @@ -616,7 +617,19 @@ impl Equal[String] for String { # # 'foo' == 'bar' # => false fn pub ==(other: ref String) -> Bool { - inko_string_equals(_INKO.state, self, other) + let size = self.size + + if size != other.size { return false } + + let mut idx = 0 + + while idx < size { + if byte_unchecked(idx) != other.byte_unchecked(idx) { return false } + + idx += 1 + } + + true } } @@ -625,8 +638,7 @@ impl Hash for String { let mut index = 0 while index < size { - hasher.write(inko_string_byte(self, index)) - + hasher.write(byte_unchecked(index)) index += 1 } } @@ -713,7 +725,7 @@ impl Iter[Int] for Bytes { impl BytesTrait for Bytes { fn pub mut next_byte -> Int { if @index < @string.size { - inko_string_byte(@string, (@index := @index + 1)) + @string.byte_unchecked(@index := @index + 1) } else { EOF }