Skip to content

Commit

Permalink
Define more of String in Inko
Browse files Browse the repository at this point in the history
This commit changes the implementation of String, both in the standard
and runtime library, such that the standard library has a bit more
knowledge/control over it. For example, the memory layout no longer
depends on a Box<[T]>, of which the memory layout isn't specified, and
instead the runtime library defines a specific layout mirrored by the
standard library. This allows removing of several runtime functions, in
favour of implementing the logic in Inko's standard library.

Changelog: changed
  • Loading branch information
yorickpeterse committed Jul 14, 2023
1 parent 524626e commit f08169d
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 238 deletions.
11 changes: 4 additions & 7 deletions compiler/src/llvm/layouts.rs
Expand Up @@ -13,7 +13,7 @@ use std::cmp::max;
use std::collections::HashMap;
use types::{
ClassId, MethodId, MethodSource, BOOLEAN_ID, BYTE_ARRAY_ID, CALL_METHOD,
CHANNEL_ID, DROPPER_METHOD, FLOAT_ID, INT_ID, NIL_ID, STRING_ID,
CHANNEL_ID, DROPPER_METHOD, FLOAT_ID, INT_ID, NIL_ID,
};

/// The size of an object header.
Expand Down Expand Up @@ -248,11 +248,6 @@ impl<'ctx> Layouts<'ctx> {
header,
context.f64_type().into(),
),
STRING_ID => context.builtin_type(
&name,
header,
context.pointer_type().into(),
),
BOOLEAN_ID | NIL_ID => {
let typ = context.opaque_struct(&name);

Expand Down Expand Up @@ -305,7 +300,9 @@ impl<'ctx> Layouts<'ctx> {
};

for id in mir.classes.keys() {
if id.is_builtin() {
// String _is_ builtin, but we still process it such that the
// standard library can define fields for it.
if id.is_builtin() && *id != ClassId::string() {
continue;
}

Expand Down
10 changes: 10 additions & 0 deletions compiler/src/type_check/expressions.rs
Expand Up @@ -1493,6 +1493,16 @@ impl<'a> CheckMethodBody<'a> {
return TypeRef::Error;
};

if class.is_builtin() {
self.state.diagnostics.error(
DiagnosticId::InvalidType,
"Instances of builtin classes can't be created using the \
class literal syntax",
self.file(),
node.location.clone(),
);
}

let require_send = class.kind(self.db()).is_async();
let ins = ClassInstance::empty(self.db_mut(), class);
let mut assigned = HashSet::new();
Expand Down
141 changes: 0 additions & 141 deletions rt/src/immutable_string.rs

This file was deleted.

1 change: 0 additions & 1 deletion rt/src/lib.rs
Expand Up @@ -8,7 +8,6 @@ pub mod macros;
pub mod arc_without_weak;
pub mod config;
pub mod context;
pub mod immutable_string;
pub mod mem;
pub mod memory_map;
pub mod network_poller;
Expand Down
100 changes: 89 additions & 11 deletions rt/src/mem.rs
@@ -1,8 +1,9 @@
use crate::immutable_string::ImmutableString;
use std::alloc::{alloc, alloc_zeroed, dealloc, handle_alloc_error, Layout};
use std::mem::{align_of, size_of, swap};
use std::mem::{align_of, forget, size_of, swap};
use std::ops::Deref;
use std::ptr::drop_in_place;
use std::slice;
use std::str;
use std::string::String as RustString;

/// The alignment to use for Inko objects.
Expand Down Expand Up @@ -426,8 +427,9 @@ impl Float {
/// atomic operations).
#[repr(C)]
pub struct String {
pub(crate) header: Header,
pub(crate) value: ImmutableString,
pub header: Header,
pub size: u64,
pub bytes: *mut u8,
}

impl String {
Expand All @@ -441,38 +443,85 @@ impl String {
}

pub(crate) unsafe fn read<'a>(ptr: *const String) -> &'a str {
(*ptr).value.as_slice()
(*ptr).as_slice()
}

pub(crate) fn alloc(
class: ClassPointer,
value: RustString,
) -> *const String {
Self::from_immutable_string(class, ImmutableString::from(value))
Self::new(class, value.into_bytes())
}

pub(crate) fn alloc_permanent(
class: ClassPointer,
value: RustString,
) -> *const String {
let ptr =
Self::from_immutable_string(class, ImmutableString::from(value));
let ptr = Self::new(class, value.into_bytes());

unsafe { header_of(ptr) }.set_permanent();
ptr
}

pub(crate) fn from_immutable_string(
pub(crate) fn from_bytes(
class: ClassPointer,
value: ImmutableString,
bytes: Vec<u8>,
) -> *const String {
let string = match RustString::from_utf8(bytes) {
Ok(string) => string,
Err(err) => {
RustString::from_utf8_lossy(&err.into_bytes()).into_owned()
}
};

String::new(class, string.into_bytes())
}

fn new(class: ClassPointer, mut bytes: Vec<u8>) -> *const String {
let len = bytes.len();

bytes.reserve_exact(1);
bytes.push(0);

// Vec and Box<[u8]> don't have a public/stable memory layout. To work
// around that we have to break the Vec apart into a buffer and length,
// and store the two separately.
let mut boxed = bytes.into_boxed_slice();
let buffer = boxed.as_mut_ptr();

forget(boxed);

let ptr = allocate(Layout::new::<Self>()) as *mut Self;
let obj = unsafe { &mut *ptr };

obj.header.init_atomic(class);
init!(obj.value => value);
init!(obj.size => len as u64);
init!(obj.bytes => buffer);
ptr as _
}

/// Returns a string slice pointing to the underlying bytes.
///
/// The returned slice _does not_ include the NULL byte.
pub(crate) fn as_slice(&self) -> &str {
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}

/// Returns a slice to the underlying bytes, without the NULL byte.
fn as_bytes(&self) -> &[u8] {
unsafe { slice::from_raw_parts(self.bytes, self.size as usize) }
}
}

impl Drop for String {
fn drop(&mut self) {
unsafe {
drop(Box::from_raw(slice::from_raw_parts_mut(
self.bytes,
(self.size + 1) as usize,
)));
}
}
}

#[cfg(test)]
Expand Down Expand Up @@ -581,4 +630,33 @@ mod tests {

unsafe { Class::drop(class) };
}

#[test]
fn test_string_new() {
let class = Class::object("A".to_string(), 24, 0);
let string = String::new(class, vec![105, 110, 107, 111]);

unsafe {
assert_eq!((*string).as_bytes(), &[105, 110, 107, 111]);
assert_eq!(String::read(string), "inko");
Class::drop(class);
}
}

#[test]
fn test_string_from_bytes() {
let class = Class::object("A".to_string(), 24, 0);
let string = String::from_bytes(
class,
vec![
72, 101, 108, 108, 111, 32, 240, 144, 128, 87, 111, 114, 108,
100,
],
);

unsafe {
assert_eq!(String::read(string), "Hello �World");
Class::drop(class);
}
}
}
11 changes: 2 additions & 9 deletions rt/src/runtime/byte_array.rs
@@ -1,4 +1,3 @@
use crate::immutable_string::ImmutableString;
use crate::mem::{tagged_int, Bool, ByteArray, Int, String as InkoString};
use crate::state::State;
use std::cmp::min;
Expand Down Expand Up @@ -105,21 +104,15 @@ pub unsafe extern "system" fn inko_byte_array_to_string(
state: *const State,
bytes: *const ByteArray,
) -> *const InkoString {
let bytes = &(*bytes).value;
let string = ImmutableString::from_utf8(bytes.clone());

InkoString::from_immutable_string((*state).string_class, string)
InkoString::from_bytes((*state).string_class, (*bytes).value.clone())
}

#[no_mangle]
pub unsafe extern "system" fn inko_byte_array_drain_to_string(
state: *const State,
bytes: *mut ByteArray,
) -> *const InkoString {
let bytes = &mut (*bytes);
let string = ImmutableString::from_utf8(bytes.take_bytes());

InkoString::from_immutable_string((*state).string_class, string)
InkoString::from_bytes((*state).string_class, (*bytes).take_bytes())
}

#[no_mangle]
Expand Down
4 changes: 1 addition & 3 deletions rt/src/runtime/process.rs
Expand Up @@ -60,9 +60,7 @@ pub unsafe extern "system" fn inko_process_panic(
process: ProcessPointer,
message: *const InkoString,
) {
let msg = &(*message).value;

panic(process, msg);
panic(process, (*message).as_slice());
}

#[no_mangle]
Expand Down

0 comments on commit f08169d

Please sign in to comment.