diff --git a/Cargo.lock b/Cargo.lock index b3cef3857b..2e4912e6c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,9 +402,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" dependencies = [ "arrayref", "arrayvec", diff --git a/Cargo.toml b/Cargo.toml index 22e7e6d122..7470873564 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -109,7 +109,7 @@ axum-extra = { version = "0.9", features = ["typed-header"] } backtrace = "0.3.66" base64 = "0.21.2" bitflags = "2.3.3" -blake3 = "1.5" +blake3 = "1.5.1" brotli = "3.5" byte-unit = "4.0.18" bytes = "1.2.1" diff --git a/crates/table/benches/page.rs b/crates/table/benches/page.rs index a32fe83035..3b88a218ae 100644 --- a/crates/table/benches/page.rs +++ b/crates/table/benches/page.rs @@ -1,5 +1,5 @@ use core::hash::BuildHasher; -use core::mem::{self, MaybeUninit}; +use core::mem; use core::time::Duration; use criterion::measurement::{Measurement, WallTime}; use criterion::{black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion, Throughput}; @@ -12,12 +12,11 @@ use spacetimedb_table::bflatn_from::serialize_row_from_page; use spacetimedb_table::bflatn_to::write_row_to_page; use spacetimedb_table::blob_store::NullBlobStore; use spacetimedb_table::eq::eq_row_in_page; -use spacetimedb_table::indexes::{PageOffset, RowHash}; +use spacetimedb_table::indexes::{Byte, Bytes, PageOffset, RowHash}; use spacetimedb_table::layout::{row_size_for_type, RowTypeLayout}; use spacetimedb_table::page::Page; use spacetimedb_table::row_hash::hash_row_in_page; use spacetimedb_table::row_type_visitor::{row_type_visitor, VarLenVisitorProgram}; -use spacetimedb_table::util; use spacetimedb_table::var_len::{AlignedVarLenOffsets, NullVarLenVisitor, VarLenGranule, VarLenMembers, VarLenRef}; fn time(acc: &mut Duration, body: impl FnOnce() -> R) -> R { @@ -51,8 +50,11 @@ fn clear_zero(page: &mut Page) { unsafe { page.zero_data() }; } -fn as_bytes(t: &T) -> &[MaybeUninit] { - let ptr = (t as *const T).cast::>(); +// Strictly this would be unsafe, +// since it causes UB when applied to types that contain padding/`poison`, +// but it's a benchmark so who cares. +fn as_bytes(t: &T) -> &Bytes { + let ptr = (t as *const T).cast::(); unsafe { std::slice::from_raw_parts(ptr, mem::size_of::()) } } @@ -66,12 +68,15 @@ unsafe trait Row { } #[allow(clippy::missing_safety_doc)] // It's a benchmark, clippy. Who cares. +/// Apply only to types which: +/// - Contain no padding bytes. +/// - Contain no members which are stored BFLATN as var-len. unsafe trait FixedLenRow: Row + Sized { - fn as_bytes(&self) -> &[MaybeUninit] { + fn as_bytes(&self) -> &Bytes { as_bytes(self) } - unsafe fn from_bytes(bytes: &[MaybeUninit]) -> &Self { + unsafe fn from_bytes(bytes: &Bytes) -> &Self { let ptr = bytes.as_ptr(); debug_assert_eq!(ptr as usize % mem::align_of::(), 0); debug_assert_eq!(bytes.len(), mem::size_of::()); @@ -241,6 +246,7 @@ fn insert_var_len_clean_page(c: &mut Criterion, visitor: &impl VarLenMembers, vi |b, &len_in_bytes| { let mut page = Page::new(row_size_for_type::()); unsafe { page.zero_data() }; + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. let data = [0xa5u8].repeat(len_in_bytes); iter_time_with_page(b, &mut page, clear_zero, |_, _, page| { fill_with_var_len(page, &data, visitor) @@ -263,6 +269,7 @@ fn insert_var_len_dirty_page(c: &mut Criterion, visitor: &impl VarLenMembers, vi &len_in_bytes, |b, &len_in_bytes| { let mut page = Page::new(row_size_for_type::()); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. let data = [0xa5u8].repeat(len_in_bytes); fill_with_var_len(&mut page, &data, visitor); @@ -294,11 +301,13 @@ fn insert_opt_str(c: &mut Criterion) { assert!(fixed_row_size.len() == 6); let mut clean_page_group = c.benchmark_group("insert_optional_str/clean_page"); - let mut variant_none = util::uninit_array::(); - variant_none[4].write(1); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. + let mut variant_none = [0xa5u8; 6]; + variant_none[0] = 1; - let mut variant_some = util::uninit_array::(); - variant_some[4].write(0); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. + let mut variant_some = [0xa5u8; 6]; + variant_some[0] = 0; for some_ratio in [0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] { for &data_length_in_bytes in if some_ratio == 0.0 { &[0][..] } else { &VL_SIZES } { @@ -311,6 +320,7 @@ fn insert_opt_str(c: &mut Criterion) { clean_page_group.throughput(Throughput::Bytes((rows_per_page * avg_row_useful_size) as u64)); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. let var_len_data = [0xa5].repeat(data_length_in_bytes); clean_page_group.bench_with_input( BenchmarkId::new( @@ -391,6 +401,7 @@ fn iter_read_fixed_len(c: &mut Criterion) { // Construct a page which is approximately `fullness_ratio` full, // i.e. contains approximately `fullness_ratio * U64S_PER_PAGE` rows. let mut partial_page = Page::new(row_size_for_type::()); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. fill_with_fixed_len::(&mut partial_page, Row::from_u64(0xa5a5a5a5_a5a5a5a5), &visitor); // `delete_u64s_to_approx_fullness_ratio` uses a seeded `StdRng`, // so this should be consistent-ish. @@ -414,6 +425,7 @@ fn iter_read_fixed_len(c: &mut Criterion) { } let mut full_page = Page::new(row_size_for_type::()); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. fill_with_fixed_len(&mut full_page, Row::from_u64(0xa5a5a5a5_a5a5a5a5), &visitor); group.throughput(Throughput::Bytes((full_page.num_rows() * mem::size_of::()) as u64)); group.bench_with_input( @@ -436,6 +448,7 @@ fn copy_filter_into_fixed_len_keep_ratio(b: &mut Bencher, keep let mut target_page = Page::new(row_size_for_type::()); let mut src_page = Page::new(row_size_for_type::()); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. fill_with_fixed_len::(&mut src_page, Row::from_u64(0xa5a5a5a5_a5a5a5a5), &visitor); let mut rng = StdRng::seed_from_u64(0xa5a5a5a5_a5a5a5a5); @@ -525,6 +538,7 @@ fn product_value_test_cases() -> impl Iterator< ( "U32", [AlgebraicType::U32].into(), + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. product![0xa5a5_a5a5u32], Some(NullVarLenVisitor), Some(AlignedVarLenOffsets::from_offsets(&[])), @@ -539,6 +553,7 @@ fn product_value_test_cases() -> impl Iterator< ( "Option/Some", [AlgebraicType::option(AlgebraicType::U32)].into(), + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. product![AlgebraicValue::OptionSome(AlgebraicValue::U32(0xa5a5_a5a5))], Some(NullVarLenVisitor), Some(AlignedVarLenOffsets::from_offsets(&[])), diff --git a/crates/table/benches/page_manager.rs b/crates/table/benches/page_manager.rs index 41b08c7758..c6e83da94e 100644 --- a/crates/table/benches/page_manager.rs +++ b/crates/table/benches/page_manager.rs @@ -1,5 +1,5 @@ use core::iter; -use core::mem::{self, MaybeUninit}; +use core::mem; use core::time::Duration; use criterion::measurement::{Measurement, WallTime}; use criterion::{ @@ -12,7 +12,8 @@ use spacetimedb_sats::db::def::{TableDef, TableSchema}; use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue}; use spacetimedb_table::blob_store::NullBlobStore; use spacetimedb_table::btree_index::BTreeIndex; -use spacetimedb_table::indexes::{PageOffset, RowPointer, Size, SquashedOffset, PAGE_DATA_SIZE}; +use spacetimedb_table::indexes::Byte; +use spacetimedb_table::indexes::{Bytes, PageOffset, RowPointer, Size, SquashedOffset, PAGE_DATA_SIZE}; use spacetimedb_table::layout::{row_size_for_bytes, row_size_for_type}; use spacetimedb_table::pages::Pages; use spacetimedb_table::row_type_visitor::{row_type_visitor, VarLenVisitorProgram}; @@ -45,8 +46,11 @@ fn iter_time_with( }) } -fn as_bytes(t: &T) -> &[MaybeUninit] { - let ptr = (t as *const T).cast::>(); +// Strictly this would be unsafe, +// since it causes UB when applied to types that contain padding/`poison`, +// but it's a benchmark so who cares. +fn as_bytes(t: &T) -> &Bytes { + let ptr = (t as *const T).cast::(); unsafe { std::slice::from_raw_parts(ptr, mem::size_of::()) } } @@ -64,12 +68,15 @@ unsafe trait Row { } #[allow(clippy::missing_safety_doc)] // It's a benchmark, clippy. Who cares. +/// Apply only to types which: +/// - Contain no padding bytes. +/// - Contain no members which are stored BFLATN as var-len. unsafe trait FixedLenRow: Row + Sized { - fn as_bytes(&self) -> &[MaybeUninit] { + fn as_bytes(&self) -> &Bytes { as_bytes(self) } - unsafe fn from_bytes(bytes: &[MaybeUninit]) -> &Self { + unsafe fn from_bytes(bytes: &Bytes) -> &Self { let ptr = bytes.as_ptr(); debug_assert_eq!(ptr as usize % mem::align_of::(), 0); debug_assert_eq!(bytes.len(), mem::size_of::()); @@ -234,6 +241,7 @@ fn insert_one_page_fixed_len(c: &mut Criterion) { )); group.bench_function(name, |b| { let mut pages = Pages::default(); + // `0xa5` is the alternating bit pattern, which makes incorrect accesses obvious. insert_one_page_worth_fixed_len(&mut pages, visitor, &R::from_u64(0xa5a5a5a5_a5a5a5a5)); let pre = |_, pages: &mut Pages| pages.clear(); iter_time_with(b, &mut pages, pre, |_, _, pages| { diff --git a/crates/table/benches/var_len_visitor.rs b/crates/table/benches/var_len_visitor.rs index ee57f228a7..adc0d493c6 100644 --- a/crates/table/benches/var_len_visitor.rs +++ b/crates/table/benches/var_len_visitor.rs @@ -1,12 +1,12 @@ use core::slice; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use spacetimedb_sats::{AlgebraicType, ProductType}; +use spacetimedb_table::indexes::{Byte, Bytes}; use spacetimedb_table::row_type_visitor::{dump_visitor_program, row_type_visitor, VarLenVisitorProgram}; -use spacetimedb_table::util::uninit_array; use spacetimedb_table::var_len::{AlignedVarLenOffsets, NullVarLenVisitor, VarLenMembers, VarLenRef}; -use std::mem::{self, MaybeUninit}; +use std::mem; -fn visit_count(row: &[MaybeUninit], visitor: &impl VarLenMembers) { +fn visit_count(row: &Bytes, visitor: &impl VarLenMembers) { black_box(unsafe { visitor.visit_var_len(row) }.count()); } @@ -23,8 +23,8 @@ fn visitor_program(row_ty: impl Into) -> VarLenVisitorProgram { } fn visit_fixed_len(c: &mut C) { - let row = &uninit_array::(); - let row = row.as_ptr().cast::>(); + let row = &[0xa5a5_a5a5u32; 1]; + let row = row.as_ptr().cast::(); let row = unsafe { slice::from_raw_parts(row, mem::size_of::()) }; let mut group = c.benchmark_group("visit_fixed_len/u64"); @@ -49,8 +49,8 @@ fn visit_fixed_len(c: &mut C) { } fn visit_var_len_product(c: &mut C) { - let row = &uninit_array::(); - let row = row.as_ptr().cast::>(); + let row = &[VarLenRef::NULL; 1]; + let row = row.as_ptr().cast::(); let row = unsafe { slice::from_raw_parts(row, mem::size_of::()) }; let mut group = c.benchmark_group("visit_var_len_product/VarLenRef"); @@ -73,20 +73,20 @@ fn visit_var_len_sum(c: &mut C) { let visitor = &visitor_program([AlgebraicType::sum([AlgebraicType::String, AlgebraicType::unit()])]); - let row = &mut uninit_array::(); - let row = row.as_mut_ptr().cast::>(); + let row = &mut [0xa5a5u16; 3]; + let row = row.as_mut_ptr().cast::(); let row = unsafe { slice::from_raw_parts_mut(row, 6) }; group.bench_function("none/VarLenVisitorProgram", |b| { // None - row[4].write(1); + row[0] = 1; b.iter(|| visit_count(row, visitor)); }); group.bench_function("some/VarLenVisitorProgram", |b| { // Some - row[4].write(0); + row[0] = 0; b.iter(|| visit_count(row, visitor)); }); diff --git a/crates/table/src/bflatn_from.rs b/crates/table/src/bflatn_from.rs index 4451c90ab0..0452fb8805 100644 --- a/crates/table/src/bflatn_from.rs +++ b/crates/table/src/bflatn_from.rs @@ -110,8 +110,7 @@ unsafe fn serialize_sum( ty: &SumTypeLayout, ) -> Result { // Read the tag of the sum value. - // SAFETY: `bytes[curr_offset..]` hold a sum value at `ty`. - let (tag, data_ty) = unsafe { read_tag(bytes, ty, curr_offset.get()) }; + let (tag, data_ty) = read_tag(bytes, ty, curr_offset.get()); // Serialize the variant data value. let data_offset = &Cell::new(curr_offset.get() + ty.offset_of_variant_data(tag)); @@ -133,20 +132,9 @@ unsafe fn serialize_sum( } /// Reads the tag of the sum value and selects the data variant type. -/// -/// # Safety -/// -/// `bytes[curr_offset..]` has a sum value typed at `ty`. -pub unsafe fn read_tag<'ty>( - bytes: &Bytes, - ty: &'ty SumTypeLayout, - curr_offset: usize, -) -> (u8, &'ty AlgebraicTypeLayout) { +pub fn read_tag<'ty>(bytes: &Bytes, ty: &'ty SumTypeLayout, curr_offset: usize) -> (u8, &'ty AlgebraicTypeLayout) { let tag_offset = ty.offset_of_tag(); let tag = bytes[curr_offset + tag_offset]; - // SAFETY: Caller promised that `bytes[curr_offset..]` has a sum value typed at `ty`. - // We can therefore assume that `curr_offset + tag_offset` refers to a valid `u8`. - let tag = unsafe { tag.assume_init() }; // Extract the variant data type depending on the tag. let data_ty = &ty.variants[tag as usize].ty; diff --git a/crates/table/src/bflatn_to.rs b/crates/table/src/bflatn_to.rs index 66cfabdbf1..b6d549a0fb 100644 --- a/crates/table/src/bflatn_to.rs +++ b/crates/table/src/bflatn_to.rs @@ -11,8 +11,8 @@ use super::{ }, page::{GranuleOffsetIter, Page, VarView}, pages::Pages, - util::{maybe_uninit_write_slice, range_move}, - var_len::{visit_var_len_assume_init, VarLenGranule, VarLenMembers, VarLenRef}, + util::range_move, + var_len::{VarLenGranule, VarLenMembers, VarLenRef}, }; use spacetimedb_sats::{bsatn::to_writer, buffer::BufWriter, AlgebraicType, AlgebraicValue, ProductValue, SumValue}; use thiserror::Error; @@ -151,12 +151,12 @@ impl BflatnSerializedRowBuffer<'_> { // and `fixed_buf.len()` matches exactly the size of the row type. // - `fixed_buf`'s `VarLenRef`s are initialized up to `last_allocated_var_len_index`. // - `visitor` is proper for the row type. - let visitor_iter = unsafe { visit_var_len_assume_init(visitor, self.fixed_buf) }; + let visitor_iter = unsafe { visitor.visit_var_len(self.fixed_buf) }; for vlr in visitor_iter.take(self.last_allocated_var_len_index) { // SAFETY: The `vlr` came from the allocation in `write_var_len_obj` // which wrote it to the fixed part using `write_var_len_ref`. // Thus, it points to a valid `VarLenGranule`. - unsafe { self.var_view.free_object_ignore_blob(vlr) }; + unsafe { self.var_view.free_object_ignore_blob(*vlr) }; } } @@ -165,7 +165,8 @@ impl BflatnSerializedRowBuffer<'_> { for (vlr, value) in self.large_blob_insertions { // SAFETY: `vlr` was given to us by `alloc_for_slice` // so it is properly aligned for a `VarLenGranule` and in bounds of the page. - // However, as it was added to `self.large_blob_insertion`, it is also uninit. + // However, as it was added to `self.large_blob_insertions`, + // we have not yet written the hash to that granule. unsafe { self.var_view.write_large_blob_hash_to_granule(blob_store, &value, vlr); } @@ -250,7 +251,8 @@ impl BflatnSerializedRowBuffer<'_> { // so we need to check that our `ProductValue` has the same number of elements // as our `ProductTypeLayout` to be sure it's typed correctly. // Otherwise, if the value is too long, we'll discard its fields (whatever), - // or if it's too long, we'll leave some fields in the page uninit (very bad). + // or if it's too long, we'll leave some fields in the page "uninit" + // (actually valid-unconstrained) (very bad). if ty.elements.len() != val.elements.len() { return Err(Error::WrongType( ty.algebraic_type(), @@ -302,7 +304,9 @@ impl BflatnSerializedRowBuffer<'_> { } else { // Write directly to the page. // SAFETY: `vlr.first_granule` points to a granule - // even though the granule's data is uninit as of yet. + // even though the granule's data is not initialized as of yet. + // Note that the granule stores valid-unconstrained bytes (i.e. they are not uninit), + // but they may be leftovers from a previous allocation. let iter = unsafe { self.var_view.granule_offset_iter(vlr.first_granule) }; let mut writer = GranuleBufWriter { buf: None, iter }; to_writer(&mut writer, val).unwrap(); @@ -347,7 +351,7 @@ impl BflatnSerializedRowBuffer<'_> { // Write to the granule. for (to, byte) in write_to.iter_mut().zip(extend_with) { - to.write(*byte); + *to = *byte; } slice = rest; @@ -377,7 +381,7 @@ impl BflatnSerializedRowBuffer<'_> { /// Write `bytes: &[u8; N]` starting at the current offset /// and advance the offset by `N`. fn write_bytes(&mut self, bytes: &[u8; N]) { - maybe_uninit_write_slice(&mut self.fixed_buf[range_move(0..N, self.curr_offset)], bytes); + self.fixed_buf[range_move(0..N, self.curr_offset)].copy_from_slice(bytes); self.curr_offset += N; } @@ -458,7 +462,7 @@ pub mod test { use spacetimedb_sats::proptest::generate_typed_row; proptest! { - #![proptest_config(ProptestConfig::with_cases(2048))] + #![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))] #[test] fn av_serde_round_trip_through_page((ty, val) in generate_typed_row()) { let ty: RowTypeLayout = ty.into(); diff --git a/crates/table/src/bflatn_to_bsatn_fast_path.rs b/crates/table/src/bflatn_to_bsatn_fast_path.rs index a5a98f451b..d3512fbde7 100644 --- a/crates/table/src/bflatn_to_bsatn_fast_path.rs +++ b/crates/table/src/bflatn_to_bsatn_fast_path.rs @@ -28,7 +28,6 @@ use crate::{ }, util::range_move, }; -use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; /// A precomputed BSATN layout for a type whose encoded length is a known constant, /// enabling fast BFLATN -> BSATN conversion. @@ -109,14 +108,11 @@ impl MemcpyField { /// /// - `buf` must be at least `self.bsatn_offset + self.length` long. /// - `row` must be at least `self.bflatn_offset + self.length` long. - /// - `row[self.bflatn_offset .. self.bflatn_offset + length]` must all be initialized. unsafe fn copy(&self, buf: &mut [u8], row: &Bytes) { // SAFETY: forward caller requirement #1. let to = unsafe { buf.get_unchecked_mut(range_move(0..self.length as usize, self.bsatn_offset as usize)) }; // SAFETY: forward caller requirement #2. let from = unsafe { row.get_unchecked(range_move(0..self.length as usize, self.bflatn_offset as usize)) }; - // SAFETY: forward caller requirement #3. - let from = unsafe { slice_assume_init_ref(from) }; to.copy_from_slice(from); } diff --git a/crates/table/src/eq.rs b/crates/table/src/eq.rs index 40d5e186ac..7b33c2c0a4 100644 --- a/crates/table/src/eq.rs +++ b/crates/table/src/eq.rs @@ -11,7 +11,6 @@ use super::{ util::range_move, var_len::VarLenRef, }; -use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; /// Equates row `a` in `page_a` with its fixed part starting at `fixed_offset_a` /// to row `b` in `page_b` with its fixed part starting at `fixed_offset_b`. @@ -120,10 +119,8 @@ unsafe fn eq_value(ctx: &mut EqCtx<'_, '_>, ty: &AlgebraicTypeLayout) -> bool { match ty { AlgebraicTypeLayout::Sum(ty) => { // Read the tags of the sum values. - // SAFETY: `ctx.a.bytes[curr_offset..]` hold a sum value at `ty`. - let (tag_a, data_ty) = unsafe { read_tag(ctx.a.bytes, ty, ctx.curr_offset) }; - // SAFETY: `ctx.b.bytes[curr_offset..]` hold a sum value at `ty`. - let (tag_b, _) = unsafe { read_tag(ctx.b.bytes, ty, ctx.curr_offset) }; + let (tag_a, data_ty) = read_tag(ctx.a.bytes, ty, ctx.curr_offset); + let (tag_b, _) = read_tag(ctx.b.bytes, ty, ctx.curr_offset); // The tags must match! if tag_a != tag_b { @@ -158,11 +155,7 @@ unsafe fn eq_value(ctx: &mut EqCtx<'_, '_>, ty: &AlgebraicTypeLayout) -> bool { | &AlgebraicTypeLayout::I128 | &AlgebraicTypeLayout::U128 | &AlgebraicTypeLayout::F32 - | &AlgebraicTypeLayout::F64 => { - // SAFETY: `value_a/b` are valid, - // so `&ctx.a/b.bytes[range_move(0..ty.size(), *ctx.curr_offset)]` contains init bytes. - unsafe { eq_byte_array(ctx, ty.size()) } - } + | &AlgebraicTypeLayout::F64 => eq_byte_array(ctx, ty.size()), // The var-len cases. &AlgebraicTypeLayout::String | AlgebraicTypeLayout::VarLen(_) => { @@ -209,15 +202,9 @@ unsafe fn eq_vlo(ctx: &mut EqCtx<'_, '_>) -> bool { /// Equates the byte arrays `data_a/data_b = ctx.a/b.bytes[range_move(0..len, ctx.curr_offset)]` /// and advances the offset. -/// -/// SAFETY: `data_a/b` must both be initialized as valid `&[u8]`s. -unsafe fn eq_byte_array(ctx: &mut EqCtx<'_, '_>, len: usize) -> bool { +fn eq_byte_array(ctx: &mut EqCtx<'_, '_>, len: usize) -> bool { let data_a = &ctx.a.bytes[range_move(0..len, ctx.curr_offset)]; let data_b = &ctx.b.bytes[range_move(0..len, ctx.curr_offset)]; ctx.curr_offset += len; - // SAFETY: Caller promised that `data_a` was initialized. - let data_a = unsafe { slice_assume_init_ref(data_a) }; - // SAFETY: Caller promised that `data_b` was initialized. - let data_b = unsafe { slice_assume_init_ref(data_b) }; data_a == data_b } diff --git a/crates/table/src/eq_to_pv.rs b/crates/table/src/eq_to_pv.rs index 0777dd1b54..402076b4dd 100644 --- a/crates/table/src/eq_to_pv.rs +++ b/crates/table/src/eq_to_pv.rs @@ -109,8 +109,7 @@ unsafe fn eq_value(ctx: &mut EqCtx<'_>, ty: &AlgebraicTypeLayout, rhs: &Algebrai match (ty, rhs) { (AlgebraicTypeLayout::Sum(ty), AlgebraicValue::Sum(rhs)) => { // Read the tag of the sum value of `lhs`. - // SAFETY: `ctx.lhs.bytes[curr_offset..]` hold a sum value at `ty`. - let (tag_lhs, data_ty) = unsafe { read_tag(ctx.lhs.bytes, ty, ctx.curr_offset) }; + let (tag_lhs, data_ty) = read_tag(ctx.lhs.bytes, ty, ctx.curr_offset); // The tags must match! if tag_lhs != rhs.tag { @@ -230,7 +229,7 @@ mod tests { use spacetimedb_sats::proptest::generate_typed_row; proptest! { - #![proptest_config(ProptestConfig::with_cases(2048))] + #![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))] #[test] fn pv_row_ref_eq((ty, val) in generate_typed_row()) { // Turn `val` into a `RowRef`. diff --git a/crates/table/src/fixed_bit_set.rs b/crates/table/src/fixed_bit_set.rs index 303552c935..0b392fd655 100644 --- a/crates/table/src/fixed_bit_set.rs +++ b/crates/table/src/fixed_bit_set.rs @@ -274,7 +274,7 @@ pub(crate) mod test { const MAX_NBITS: usize = 1000; proptest! { - #![proptest_config(ProptestConfig::with_cases(2048))] + #![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))] #[test] fn after_new_there_are_no_bits_set(nbits in 0..MAX_NBITS) { diff --git a/crates/table/src/indexes.rs b/crates/table/src/indexes.rs index 7d6d009f0f..b4676f6dbd 100644 --- a/crates/table/src/indexes.rs +++ b/crates/table/src/indexes.rs @@ -5,13 +5,16 @@ use super::util::range_move; use crate::static_assert_size; use ahash::RandomState; use core::fmt; -use core::mem::MaybeUninit; use core::ops::{AddAssign, Div, Mul, Range, SubAssign}; use derive_more::{Add, Sub}; use spacetimedb_data_structures::map::ValidAsIdentityHash; -/// A byte is a possibly uninit `u8`. -pub type Byte = MaybeUninit; +/// A byte is a `u8`. +/// +/// Previous implementations used `MaybeUninit` here, +/// but it became necessary to serialize pages to enable snapshotting, +/// so we require that all bytes in a page be valid `u8`s, never uninit. +pub type Byte = u8; /// A slice of [`Byte`]s. pub type Bytes = [Byte]; diff --git a/crates/table/src/page.rs b/crates/table/src/page.rs index 4964d1c5f8..db46adf868 100644 --- a/crates/table/src/page.rs +++ b/crates/table/src/page.rs @@ -13,7 +13,21 @@ //! //! - `valid` refers to, when referring to a type, granule, or row, //! depending on the context, a memory location that holds a *safe* object. -//! When "valid for writes" is used, it refers to the `MaybeUninit` case. +//! When "valid for writes" is used, the location must be properly aligned +//! and none of its bytes may be uninit, +//! but the value need not be valid at the type in question. +//! "Valid for writes" is equivalent to valid-unconstrained. +//! +//! - `valid-unconstrained`, when referring to a memory location with a given type, +//! that the location stores a byte pattern which Rust/LLVM's memory model recognizes as valid, +//! and therefore must not contain any uninit, +//! but the value is not required to be logically meaningful, +//! and no code may depend on the data within it to uphold any invariants. +//! E.g. an unallocated [`VarLenGranule`] within a page stores valid-unconstrained bytes, +//! because the bytes are either 0 from the initial [`alloc_zeroed`] of the page, +//! or contain stale data from a previously freed [`VarLenGranule`]. +//! +//! - `unused` means that it is safe to overwrite a block of memory without cleaning up its previous value. //! //! See the post [Two Kinds of Invariants: Safety and Validity][ralf_safe_valid] //! for a discussion on safety and validity invariants. @@ -23,18 +37,10 @@ use super::{ fixed_bit_set::FixedBitSet, indexes::{Byte, Bytes, PageOffset, Size, PAGE_HEADER_SIZE, PAGE_SIZE}, layout::MIN_ROW_SIZE, - util::maybe_uninit_write_slice, - var_len::{ - is_granule_offset_aligned, visit_var_len_assume_init, VarLenGranule, VarLenGranuleHeader, VarLenMembers, - VarLenRef, - }, + var_len::{is_granule_offset_aligned, VarLenGranule, VarLenGranuleHeader, VarLenMembers, VarLenRef}, }; use crate::{fixed_bit_set::IterSet, static_assert_size}; -use core::{ - mem::{self, MaybeUninit}, - ops::ControlFlow, - ptr, -}; +use core::{mem, ops::ControlFlow, ptr}; use thiserror::Error; #[derive(Error, Debug)] @@ -111,9 +117,9 @@ impl FreeCellRef { let next = self.replace(new_head); let new_head = adjust_free(new_head); // SAFETY: Per caller contract, `new_head` is in bounds of `row_data`. - // SAFETY: Moreover, `new_head` points to an uninit `FreeCellRef` so we can write to it. - let next_slot: &mut MaybeUninit = unsafe { get_mut(row_data, new_head) }; - next_slot.write(next); + // Moreover, `new_head` points to an unused `FreeCellRef`, so we can write to it. + let next_slot: &mut FreeCellRef = unsafe { get_mut(row_data, new_head) }; + *next_slot = next; } } @@ -140,7 +146,9 @@ struct FixedHeader { // TODO(stable-module-abi): should this be inlined into the page? /// For each fixed-length row slot, true if a row is stored there, - /// false if the slot is uninit. + /// false if the slot is unallocated. + /// + /// Unallocated row slots store valid-unconstrained bytes, i.e. are never uninit. present_rows: FixedBitSet, #[cfg(debug_assertions)] @@ -606,7 +614,7 @@ impl<'page> VarView<'page> { // // 2. `next` is either NULL or was initialized in the previous loop iteration. // - // 3. `granule` points to uninit data as the space was just allocated. + // 3. `granule` points to an unused slot as the space was just allocated. unsafe { self.write_chunk_to_granule(chunk, len, granule, next) }; next = granule; } @@ -623,7 +631,7 @@ impl<'page> VarView<'page> { /// Allocates a granule for a large blob object /// and returns a [`VarLenRef`] pointing to that granule. /// - /// The granule is left completely uninitialized. + /// The granule is not initialized by this method, and contains valid-unconstrained bytes. /// It is the caller's responsibility to initialize it with a [`BlobHash`](super::blob_hash::BlobHash). #[cold] fn alloc_blob_hash(&mut self) -> Result { @@ -638,7 +646,8 @@ impl<'page> VarView<'page> { /// /// # Safety /// - /// `vlr.first_granule` must point to an uninit `VarLenGranule` in bounds of this page. + /// `vlr.first_granule` must point to an unused `VarLenGranule` in bounds of this page, + /// which must be valid for writes. pub unsafe fn write_large_blob_hash_to_granule( &mut self, blob_store: &mut dyn BlobStore, @@ -651,11 +660,11 @@ impl<'page> VarView<'page> { // SAFETY: // 1. `granule` is properly aligned for `VarLenGranule` and is in bounds of the page. // 2. The null granule is trivially initialized. - // 3. The caller promised that `granule` is uninit. + // 3. The caller promised that `granule` is safe to overwrite. unsafe { self.write_chunk_to_granule(&hash.data, hash.data.len(), granule, PageOffset::VAR_LEN_NULL) }; } - /// Write the `chunk` (data) to the uninit [`VarLenGranule`] pointed to by `granule`, + /// Write the `chunk` (data) to the [`VarLenGranule`] pointed to by `granule`, /// set the granule's length to be `len`, /// and set the next granule in the list to `next`. /// @@ -668,8 +677,8 @@ impl<'page> VarView<'page> { /// before the granule-list is read from (e.g., iterated on). /// The null granule is considered trivially initialized. /// - /// 3. The space pointed to by `granule` - /// must be unused/freed/uninit as it will be overwritten here. + /// 3. The space pointed to by `granule` must be unused and valid for writes, + /// and will be overwritten here. unsafe fn write_chunk_to_granule(&mut self, chunk: &[u8], len: usize, granule: PageOffset, next: PageOffset) { let granule = self.adjuster()(granule); // SAFETY: A `PageOffset` is always in bounds of the page. @@ -694,32 +703,37 @@ impl<'page> VarView<'page> { header.write(VarLenGranuleHeader::new(len as u8, next)); } - // Copy the data into the granule. // SAFETY: We can treat any part of `row_data` as `.data`. Also (1) and (2). - maybe_uninit_write_slice(unsafe { &mut (*ptr).data }, chunk); + let data = unsafe { &mut (*ptr).data }; + + // Copy the data into the granule. + data[0..chunk.len()].copy_from_slice(chunk); } - /// Allocate a [`MaybeUninit`](VarLenGranule) at the returned [`PageOffset`]. + /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`]. + /// + /// The allocated storage is not initialized by this method, + /// and will be valid-unconstrained at [`VarLenGranule`]. /// /// This offset will be properly aligned for `VarLenGranule` when converted to a pointer. /// /// Returns an error when there are neither free granules nor space in the gap left. fn alloc_granule(&mut self) -> Result { - let uninit_granule = self + let granule = self .alloc_from_freelist() .or_else(|| self.alloc_from_gap()) .ok_or(Error::InsufficientVarLenSpace { need: 1, have: 0 })?; debug_assert!( - is_granule_offset_aligned(uninit_granule), + is_granule_offset_aligned(granule), "Allocated an unaligned var-len granule: {:x}", - uninit_granule, + granule, ); - Ok(uninit_granule) + Ok(granule) } - /// Allocate a [`MaybeUninit`](VarLenGranule) at the returned [`PageOffset`] + /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`] /// taken from the freelist, if any. #[inline] fn alloc_from_freelist(&mut self) -> Option { @@ -733,7 +747,7 @@ impl<'page> VarView<'page> { Some(free) } - /// Allocate a [`MaybeUninit`](VarLenGranule) at the returned [`PageOffset`] + /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`] /// taken from the gap, if there is space left, or `None` if there is insufficient space. #[inline] fn alloc_from_gap(&mut self) -> Option { @@ -1008,11 +1022,17 @@ impl Page { pub fn new(fixed_row_size: Size) -> Box { // TODO(perf): mmap? allocator may do so already. // mmap may be more efficient as we save allocator metadata. - use std::alloc::{alloc, handle_alloc_error, Layout}; + use std::alloc::{alloc_zeroed, handle_alloc_error, Layout}; let layout = Layout::new::(); + + // Allocate with `alloc_zeroed` so that the bytes are initially 0, rather than uninit. + // We will never write an uninit byte into the page except in the `PageHeader`, + // so it is safe for `row_data` to have type `[u8; _]` rather than `[MaybeUninit; _]`. + // `alloc_zeroed` may be more efficient than `alloc` + `memset`; + // in particular, it may `mmap` pages directly from the OS, which are always zeroed for security reasons. // SAFETY: The layout's size is non-zero. - let raw: *mut Page = unsafe { alloc(layout) }.cast(); + let raw: *mut Page = unsafe { alloc_zeroed(layout) }.cast(); if raw.is_null() { handle_alloc_error(layout); @@ -1028,7 +1048,8 @@ impl Page { unsafe { header.write(PageHeader::new(fixed_row_size)) }; // SAFETY: We used the global allocator with a layout for `Page`. - // We have initialized the `header` + // We have initialized the `header`, + // and the `row_bytes` are initially 0 by `alloc_zeroed`, // making the pointee a `Page` valid for reads and writes. unsafe { Box::from_raw(raw) } } @@ -1179,13 +1200,13 @@ impl Page { // The blob store insertion will never fail. // SAFETY: `alloc_for_slice` always returns a pointer // to a `VarLenGranule` in bounds of this page. - // As `in_blob` holds, it is also uninit, as required. + // As `in_blob` holds, it is also unused, as required. // We'll now make that granule valid. unsafe { var.write_large_blob_hash_to_granule(blob_store, var_len_obj, var_len_ref); } } - var_len_ref_slot.write(var_len_ref); + *var_len_ref_slot = var_len_ref; } Ok(fixed_len_offset) @@ -1320,17 +1341,15 @@ impl Page { // Visit the var-len members of the fixed row and free them. let row = fixed.get_row(fixed_row, fixed_row_size); - // SAFETY: Allocation initializes the `VarLenRef`s in the row, - // so a row that has been allocated and is live - // will have initialized `VarLenRef` members. - let var_len_refs = unsafe { visit_var_len_assume_init(var_len_visitor, row) }; + // SAFETY: `row` is derived from `fixed_row`, which is known by caller requirements to be valid. + let var_len_refs = unsafe { var_len_visitor.visit_var_len(row) }; for var_len_ref in var_len_refs { - // SAFETY: A sound call to `visit_var_len_assume_init`, + // SAFETY: A sound call to `visit_var_len` on a fully initialized valid row, // which we've justified that the above is, // returns an iterator, that will only yield `var_len_ref`s, - // where `var_len_ref.first_granule` points to a valid `VarLenGranule` or be NULL. + // where `var_len_ref.first_granule` points to a valid `VarLenGranule` or is NULL. unsafe { - var.free_object(var_len_ref, blob_store); + var.free_object(*var_len_ref, blob_store); } } @@ -1367,8 +1386,8 @@ impl Page { // SAFETY: // - Caller promised that `fixed_row_offset` is a valid row. // - Caller promised consistency of `var_len_visitor` wrt. `fixed_row_size` and this page. - let vlr_iter = unsafe { visit_var_len_assume_init(var_len_visitor, fixed_row) }; - vlr_iter.map(|slot| slot.granules_used()).sum() + let vlr_iter = unsafe { var_len_visitor.visit_var_len(fixed_row) }; + vlr_iter.copied().map(|slot| slot.granules_used()).sum() } /// Copy as many rows from `self` for which `filter` returns `true` into `dst` as will fit, @@ -1477,11 +1496,7 @@ impl Page { // SAFETY: `src_row` is valid because it came from `self.iter_fixed_len_from`. // // Forward our safety requirements re: `var_len_visitor` to `visit_var_len`. - // - // SAFETY: Every `VarLenRef` in `src_vlr_iter` is initialized - // because to reach this point without violating any above safety invariants, - // it must have been allocated and its ref stored in the `src_row`. - let src_vlr_iter = unsafe { visit_var_len_assume_init(var_len_visitor, src_row) }; + let src_vlr_iter = unsafe { var_len_visitor.visit_var_len(src_row) }; // SAFETY: forward our requirement on `var_len_visitor` to `visit_var_len_mut`. let target_vlr_iter = unsafe { var_len_visitor.visit_var_len_mut(dst_row) }; for (src_vlr, target_vlr_slot) in src_vlr_iter.zip(target_vlr_iter) { @@ -1492,10 +1507,10 @@ impl Page { // // - the call to `dst.has_space_for_row` above ensures // that the allocation will not fail part-way through. - let target_vlr_fixup = unsafe { self.copy_var_len_into(src_vlr, &mut dst_var, blob_store) } + let target_vlr_fixup = unsafe { self.copy_var_len_into(*src_vlr, &mut dst_var, blob_store) } .expect("Failed to allocate var-len object in dst page after checking for available space"); - target_vlr_slot.write(target_vlr_fixup); + *target_vlr_slot = target_vlr_fixup; } true @@ -1549,7 +1564,7 @@ impl Page { // 2. `next_dst_chunk` will be initialized // either in the next iteration or after the loop ends. // - // 3. `dst_chunk` points to uninit data as the space was allocated before the loop + // 3. `dst_chunk` points to unused data as the space was allocated before the loop // or was `next_dst_chunk` in the previous iteration and hasn't been written to yet. unsafe { dst_var.write_chunk_to_granule(data, data.len(), dst_chunk, next_dst_chunk) }; dst_chunk = next_dst_chunk; @@ -1567,7 +1582,7 @@ impl Page { // // 2. `next` is NULL which is trivially init. // - // 3. `dst_chunk` points to uninit data as the space was allocated before the loop + // 3. `dst_chunk` points to unused data as the space was allocated before the loop // or was `next_dst_chunk` in the previous iteration and hasn't been written to yet. unsafe { dst_var.write_chunk_to_granule(data, data.len(), dst_chunk, PageOffset::VAR_LEN_NULL) }; @@ -1593,10 +1608,15 @@ impl Page { /// Zeroes every byte of row data in this page. /// /// This is only used for benchmarks right now. + /// + /// # Safety: + /// + /// Causes the page header to no longer match the contents, invalidating many assumptions. + /// Should be called in conjuction with [`Self::clear`]. #[doc(hidden)] pub unsafe fn zero_data(&mut self) { for byte in &mut self.row_data { - unsafe { ptr::write(byte.as_mut_ptr(), 0) }; + *byte = 0; } } } @@ -1660,18 +1680,8 @@ impl<'page> Iterator for VarLenGranulesIter<'page> { #[cfg(test)] mod tests { use super::*; - use crate::{ - blob_store::NullBlobStore, layout::row_size_for_type, util::uninit_array, var_len::AlignedVarLenOffsets, - }; + use crate::{blob_store::NullBlobStore, layout::row_size_for_type, var_len::AlignedVarLenOffsets}; use proptest::{collection::vec, prelude::*}; - use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; - use std::slice::from_raw_parts; - - fn as_uninit(slice: &[u8]) -> &Bytes { - let ptr = slice.as_ptr(); - let len = slice.len(); - unsafe { from_raw_parts(ptr.cast::(), len) } - } fn u64_row_size() -> Size { let fixed_row_size = row_size_for_type::(); @@ -1686,8 +1696,7 @@ mod tests { fn insert_u64(page: &mut Page, val: u64) -> PageOffset { let val_slice = val.to_le_bytes(); - let val_slice = as_uninit(&val_slice); - unsafe { page.insert_row(val_slice, &[] as &[&[u8]], u64_var_len_visitor(), &mut NullBlobStore) } + unsafe { page.insert_row(&val_slice, &[] as &[&[u8]], u64_var_len_visitor(), &mut NullBlobStore) } .expect("Failed to insert first row") } @@ -1697,7 +1706,7 @@ mod tests { fn read_u64(page: &Page, offset: PageOffset) -> u64 { let row = page.get_row_data(offset, u64_row_size()); - u64::from_le_bytes(unsafe { slice_assume_init_ref(row) }.try_into().unwrap()) + u64::from_le_bytes(row.try_into().unwrap()) } fn data_sub_n_vlg(n: usize) -> usize { @@ -1808,7 +1817,7 @@ mod tests { } fn insert_str(page: &mut Page, data: &[u8]) -> PageOffset { - let fixed_len_data = uninit_array::(); + let fixed_len_data = [0u8; STR_ROW_SIZE.len()]; unsafe { page.insert_row(&fixed_len_data, &[data], str_var_len_visitor(), &mut NullBlobStore) } .expect("Failed to insert row") } diff --git a/crates/table/src/read_column.rs b/crates/table/src/read_column.rs index 74ca685b52..6e820b9053 100644 --- a/crates/table/src/read_column.rs +++ b/crates/table/src/read_column.rs @@ -10,10 +10,7 @@ use crate::{ table::RowRef, }; use spacetimedb_sats::{ - algebraic_value::{ - ser::{slice_assume_init_ref, ValueSerializer}, - Packed, - }, + algebraic_value::{ser::ValueSerializer, Packed}, AlgebraicType, AlgebraicValue, ArrayValue, MapValue, ProductType, ProductValue, SumValue, }; use std::{cell::Cell, mem}; @@ -188,7 +185,8 @@ unsafe impl ReadColumn for bool { let data: *const bool = data.as_ptr().cast(); // SAFETY: We trust that the `row_ref` refers to a valid, initialized row, // and that the `offset_in_bytes` refers to a column of type `Bool` within that row. - // A valid row can never have an uninitialized column or a column of an invalid value, + // A valid row can never have a column of an invalid value, + // and no byte in `Page.row_data` is ever uninit, // so `data` must be initialized as either 0 or 1. unsafe { *data } } @@ -211,11 +209,6 @@ macro_rules! impl_read_column_number { let col_offset = offset + PageOffset(layout.offset); let data = page.get_row_data(col_offset, Size(mem::size_of::() as u16)); - // SAFETY: We trust that the `row_ref` refers to a valid, initialized row, - // and that the `offset_in_bytes` refers to a column of type `Self` within that row. - // A valid row can never have an uninitialized column, - // so `data` must be initialized. - let data = unsafe { slice_assume_init_ref(data) }; let data: Result<[u8; mem::size_of::()], _> = data.try_into(); // SAFETY: `<[u8; N] as TryFrom<&[u8]>` succeeds if and only if the slice's length is `N`. // We used `mem::size_of::()` as both the length of the slice and the array, @@ -360,7 +353,7 @@ mod test { } proptest! { - #![proptest_config(ProptestConfig::with_cases(2048))] + #![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))] #[test] /// Test that `AlgebraicValue::read_column` returns expected values. diff --git a/crates/table/src/row_hash.rs b/crates/table/src/row_hash.rs index f1b010ffb8..020267edac 100644 --- a/crates/table/src/row_hash.rs +++ b/crates/table/src/row_hash.rs @@ -98,8 +98,7 @@ unsafe fn hash_value( match ty { AlgebraicTypeLayout::Sum(ty) => { // Read and hash the tag of the sum value. - // SAFETY: `bytes[curr_offset..]` hold a sum value at `ty`. - let (tag, data_ty) = unsafe { read_tag(bytes, ty, *curr_offset) }; + let (tag, data_ty) = read_tag(bytes, ty, *curr_offset); tag.hash(hasher); // Hash the variant data value. @@ -245,7 +244,7 @@ mod tests { use spacetimedb_sats::proptest::generate_typed_row; proptest! { - #![proptest_config(ProptestConfig::with_cases(2048))] + #![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))] #[test] fn pv_row_ref_hash_same_std_random_state((ty, val) in generate_typed_row()) { // Turn `val` into a `RowRef`. diff --git a/crates/table/src/row_type_visitor.rs b/crates/table/src/row_type_visitor.rs index 8d4d3cf591..55b9da5bf9 100644 --- a/crates/table/src/row_type_visitor.rs +++ b/crates/table/src/row_type_visitor.rs @@ -35,7 +35,6 @@ use super::{ }; use core::fmt; use core::marker::PhantomData; -use core::mem::MaybeUninit; use itertools::Itertools; use std::sync::Arc; @@ -408,7 +407,7 @@ unsafe impl VarLenMembers for VarLenVisitorProgram { // // - Caller promised that `row` is properly aligned for the row type // so based on this assumption, our program will yield references that are properly - // aligned for `MaybeUninit`s. + // aligned for `VarLenGranule`s. // // - Caller promised that `row.len() == row_type.size()`. // This ensures that our program will yield references that are in bounds of `row`. @@ -441,7 +440,7 @@ pub struct VarLenVisitorProgramIter<'visitor, 'row> { } impl<'row> Iterator for VarLenVisitorProgramIter<'_, 'row> { - type Item = &'row MaybeUninit; + type Item = &'row VarLenRef; fn next(&mut self) -> Option { // Reads the `tag: u8` at `offset`. @@ -456,7 +455,7 @@ impl<'row> Iterator for VarLenVisitorProgramIter<'_, 'row> { // Moreover, `self.row` is non-null, so adding the offset to it results in a non-null pointer. // By having `self.row: &'row Bytes` we also know that the pointer is valid for reads // and that it will be for `'row` which is tied to the lifetime of `Self::Item`. - Some(unsafe { get_ref::>(self.row, offset) }) + Some(unsafe { get_ref::(self.row, offset) }) } } @@ -472,7 +471,7 @@ pub struct VarLenVisitorProgramIterMut<'visitor, 'row> { } impl<'row> Iterator for VarLenVisitorProgramIterMut<'_, 'row> { - type Item = &'row mut MaybeUninit; + type Item = &'row mut VarLenRef; fn next(&mut self) -> Option { // Reads the `tag: u8` at `offset`. @@ -482,7 +481,7 @@ impl<'row> Iterator for VarLenVisitorProgramIterMut<'_, 'row> { let offset = next_vlr_offset(self.program, &mut self.instr_ptr, read_tag)?; // SAFETY: Constructing the iterator is a promise that // `offset`s produced by the program will be in bounds of `self.row`. - let vlr_ptr: *mut MaybeUninit = unsafe { self.row.add(offset.idx()).cast() }; + let vlr_ptr: *mut VarLenRef = unsafe { self.row.add(offset.idx()).cast() }; // SAFETY: Constructing the iterator is a promise that // The derived pointer must be properly aligned for a `VarLenRef`. // @@ -496,7 +495,7 @@ impl<'row> Iterator for VarLenVisitorProgramIterMut<'_, 'row> { #[cfg(test)] mod test { use super::*; - use crate::{indexes::Size, util::uninit_array}; + use crate::indexes::Size; use spacetimedb_sats::{AlgebraicType, ProductType}; fn row_type>(row_ty: T) -> RowTypeLayout { @@ -521,9 +520,9 @@ mod test { let ty = row_type([AlgebraicType::U32, AlgebraicType::String, AlgebraicType::U8]); assert_eq!(ty.size(), Size(12)); - // alloc an uninit_array of u32 to ensure 4-byte alignment. - let row = &uninit_array::(); - let row = row.as_ptr().cast::<[MaybeUninit; 12]>(); + // alloc an array of u32 to ensure 4-byte alignment. + let row = &[0xa5a5_a5a5u32; 3]; + let row = row.as_ptr().cast::<[Byte; 12]>(); let row = unsafe { &*row }; check_addrs(&row_type_visitor(&ty), row, [4]); @@ -540,9 +539,9 @@ mod test { ]); assert_eq!(ty.size(), Size(24)); - // Alloc an uninit_array of u32 to ensure 4-byte alignment. - let row = &uninit_array::(); - let row = row.as_ptr().cast::<[MaybeUninit; 24]>(); + // Alloc an array of u32 to ensure 4-byte alignment. + let row = &[0xa5a5_a5a5u32; 6]; + let row = row.as_ptr().cast::<[Byte; 24]>(); let row = unsafe { &*row }; check_addrs(&row_type_visitor(&ty), row, [4, 16]); @@ -558,17 +557,17 @@ mod test { let outer_sum = &ty[0].as_sum().unwrap(); let outer_tag = outer_sum.offset_of_tag(); - let row = &mut uninit_array::(); - let row_ptr = row.as_mut_ptr().cast::<[MaybeUninit; 6]>(); + let row = &mut [0xa5a5u16; 3]; + let row_ptr = row.as_mut_ptr().cast::<[Byte; 6]>(); let row = unsafe { &mut *row_ptr }; let program = row_type_visitor(&ty); // Variant 1 (String) is live - row[outer_tag].write(0); + row[outer_tag] = 0; check_addrs(&program, row, [2]); // Variant 1 (none) is live - row[outer_tag].write(1); + row[outer_tag] = 1; check_addrs(&program, row, []); } @@ -596,33 +595,33 @@ mod test { let inner_tag = outer_sum.offset_of_variant_data(3) + inner_sum.offset_of_tag(); assert_eq!(inner_tag, 4); - let row = &mut uninit_array::(); - let row_ptr = row.as_mut_ptr().cast::<[MaybeUninit; 12]>(); + let row = &mut [0xa5a5_a5a5u32; 3]; + let row_ptr = row.as_mut_ptr().cast::<[Byte; 12]>(); let row = unsafe { &mut *row_ptr }; let program = row_type_visitor(&ty); // Variant 0 (U32) is live - row[outer_tag].write(0); + row[outer_tag] = 0; check_addrs(&program, row, []); // Variant 1 (String) is live - row[outer_tag].write(1); + row[outer_tag] = 1; check_addrs(&program, row, [4]); // Variant 2 (Product) is live - row[outer_tag].write(2); + row[outer_tag] = 2; check_addrs(&program, row, [8]); - // Variant 3 (Sum) is live but its tag is not init yet. - row[outer_tag].write(3); + // Variant 3 (Sum) is live but its tag is not valid yet. + row[outer_tag] = 3; // Variant 3, 0 (Sum, U32) is live. - row[inner_tag].write(0); + row[inner_tag] = 0; check_addrs(&program, row, []); // Variant 3, 1 (Sum, String) is live. - row[inner_tag].write(1); + row[inner_tag] = 1; check_addrs(&program, row, [8]); } } diff --git a/crates/table/src/util.rs b/crates/table/src/util.rs index af3be15f3f..ca74c4167e 100644 --- a/crates/table/src/util.rs +++ b/crates/table/src/util.rs @@ -1,4 +1,3 @@ -use core::mem::{self, MaybeUninit}; use core::ops::Range; /// Translates the range `r` by adding `by` to both its `start` and its `end`. @@ -8,22 +7,6 @@ pub const fn range_move(r: Range, by: usize) -> Range { (r.start + by)..(r.end + by) } -/// Copy elements from `src` into `this`, initializing those elements of `this`. -/// -/// If `this` is longer than `src`, write only the first `src.len()` elements of `this`. -/// -/// If `src` is longer than `this`, panic. -/// -/// Copy of the source of `MaybeUninit::write_slice`, but that's not stabilized. -/// https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.write_slice -/// Unlike that function, this does not return a reference to the initialized bytes. -pub fn maybe_uninit_write_slice(this: &mut [MaybeUninit], src: &[T]) { - // SAFETY: &[T] and &[MaybeUninit] have the same layout - let uninit_src: &[MaybeUninit] = unsafe { mem::transmute(src) }; - - this[0..uninit_src.len()].copy_from_slice(uninit_src); -} - /// Asserts that `$ty` is `$size` bytes in `static_assert_size($ty, $size)`. /// /// Example: @@ -51,15 +34,3 @@ macro_rules! static_assert_align { const _: [(); $align] = [(); ::core::mem::align_of::<$ty>()]; }; } - -/// Construct an uninitialized array of `N` elements. -/// -/// The array will be appropriately sized and aligned to hold `N` elements of type `T`, -/// but those elements will be uninitialized. -/// -/// Identitcal copy of the source of `MaybeUninit::uninit_array`, but that's not stabilized. -/// https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array -pub const fn uninit_array() -> [MaybeUninit; N] { - // SAFETY: An uninitialized `[MaybeUninit<_>; N]` is valid. - unsafe { MaybeUninit::<[MaybeUninit; N]>::uninit().assume_init() } -} diff --git a/crates/table/src/var_len.rs b/crates/table/src/var_len.rs index 2fe0fe50b8..faed050ae8 100644 --- a/crates/table/src/var_len.rs +++ b/crates/table/src/var_len.rs @@ -37,13 +37,13 @@ use super::{ use crate::{static_assert_align, static_assert_size}; use core::iter; use core::marker::PhantomData; -use core::mem::{self, MaybeUninit}; -use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; +use core::mem::{self}; /// Reference to var-len object within a page. // TODO: make this larger and do short-string optimization? // - Or store a few elts inline and then a `VarLenRef`? -// - Or first store `VarLenRef` that records num inline elements (remaining inline are uninit) +// - Or first store `VarLenRef` that records num inline elements +// (remaining inline "uninit," actually valid-unconstrained) // (bitfield; only need 10 bits for `len_in_bytes`)? #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] #[repr(C)] @@ -284,11 +284,7 @@ impl VarLenGranule { /// Returns the data from the var-len object in this granule. pub fn data(&self) -> &[u8] { let len = self.header.len() as usize; - let slice = &self.data[0..len]; - - // SAFETY: Because we never store `uninit` padding bytes in a var-len object, - // the paths that construct a `VarLenGranule` always initialize the bytes up to the length. - unsafe { slice_assume_init_ref(slice) } + &self.data[0..len] } /// Assumes that the granule stores a [`BlobHash`] and returns it. @@ -320,17 +316,22 @@ const _VLG_CAN_STORE_BLOB_HASH: () = assert!(VarLenGranule::DATA_SIZE >= BlobHas /// Various consumers in `Page` and friends depend on this and the previous requirement. pub unsafe trait VarLenMembers { /// The iterator type returned by [`VarLenMembers::visit_var_len`]. - type Iter<'this, 'row>: Iterator> + type Iter<'this, 'row>: Iterator where Self: 'this; /// The iterator type returned by [`VarLenMembers::visit_var_len_mut`]. - type IterMut<'this, 'row>: Iterator> + type IterMut<'this, 'row>: Iterator where Self: 'this; /// Treats `row` as storage for a row of the particular type handled by `self`, - /// and iterates over the (possibly uninitialized) `VarLenRef`s within it. + /// and iterates over the (possibly stale) `VarLenRef`s within it. + /// + /// Visited `VarLenRef`s are valid-unconstrained + /// and will always be valid from Rust/LLVM's perspective, + /// i.e. will never be uninit, + /// but will not necessarily point to properly-allocated `VarLenGranule`s. /// /// Callers are responsible for maintaining whether var-len members have been initialized. /// @@ -357,7 +358,12 @@ pub unsafe trait VarLenMembers { unsafe fn visit_var_len_mut<'this, 'row>(&'this self, row: &'row mut Bytes) -> Self::IterMut<'this, 'row>; /// Treats `row` as storage for a row of the particular type handled by `self`, - /// and iterates over the (possibly uninitialized) `VarLenRef`s within it. + /// and iterates over the (possibly stale) `VarLenRef`s within it. + /// + /// Visited `VarLenRef`s are valid-unconstrained + /// and will always be valid from Rust/LLVM's perspective, + /// i.e. will never be uninit, + /// but will not necessarily point to properly-allocated `VarLenGranule`s. /// /// Callers are responsible for maintaining whether var-len members have been initialized. /// @@ -384,25 +390,6 @@ pub unsafe trait VarLenMembers { unsafe fn visit_var_len<'this, 'row>(&'this self, row: &'row Bytes) -> Self::Iter<'this, 'row>; } -/// Treat `init_row` as storage for a row of the particular type handled by `visitor`, -/// and iterate over the assumed-to-be initialized `VarLenRef`s within it. -/// -/// # Safety -/// -/// - Callers must satisfy the contract of [`VarLenMembers::visit_var_len`] -/// with respect to `visitor` and `init_row`. -/// -/// - `init_row` must be initialized and each `VarLenRef` -/// in `visitor.visit_var_len(init_row)` must also be initialized. -pub unsafe fn visit_var_len_assume_init<'row>( - visitor: &'row impl VarLenMembers, - init_row: &'row Bytes, -) -> impl 'row + Iterator { - // SAFETY: `init_row` is valid per safety requirements. - // SAFETY: `vlr` is initialized in `init_row` per safety requirements. - unsafe { visitor.visit_var_len(init_row) }.map(move |vlr| unsafe { vlr.assume_init_read() }) -} - /// Slice of offsets to var-len members, in units of 2-byte words. /// /// This type is intended as a demonstration of the `VarLenMembers` interface, @@ -497,7 +484,7 @@ pub struct AlignedVarLenOffsetsIter<'offsets, 'row> { } impl<'offsets, 'row> Iterator for AlignedVarLenOffsetsIter<'offsets, 'row> { - type Item = &'row MaybeUninit; + type Item = &'row VarLenRef; fn next(&mut self) -> Option { if self.next_offset_idx >= self.offsets.0.len() { @@ -512,12 +499,14 @@ impl<'offsets, 'row> Iterator for AlignedVarLenOffsetsIter<'offsets, 'row> { // mean that `row` is always 2-byte aligned, so this will be too, // and that `row` is large enough for all the `offsets`, // so this `add` is always in-bounds. - let elt_ptr: *const MaybeUninit = + let elt_ptr: *const VarLenRef = unsafe { self.row.add(curr_offset_idx * mem::align_of::()).cast() }; // SAFETY: `elt_ptr` is aligned and inbounds. - // `MaybeUninit` has no value restrictions, - // so it's safe to create an `&mut` to `uninit` or garbage. + // Any pattern of init bytes is valid at `VarLenRef`, + // and the `row_data` in a `Page` is never uninit, + // so it's safe to create an `&mut` to any value in the page, + // though the resulting `VarLenRef` may be garbage. Some(unsafe { &*elt_ptr }) } } @@ -531,7 +520,7 @@ pub struct AlignedVarLenOffsetsIterMut<'offsets, 'row> { } impl<'offsets, 'row> Iterator for AlignedVarLenOffsetsIterMut<'offsets, 'row> { - type Item = &'row mut MaybeUninit; + type Item = &'row mut VarLenRef; fn next(&mut self) -> Option { if self.next_offset_idx >= self.offsets.0.len() { @@ -546,12 +535,14 @@ impl<'offsets, 'row> Iterator for AlignedVarLenOffsetsIterMut<'offsets, 'row> { // mean that `row` is always 2-byte aligned, so this will be too, // and that `row` is large enough for all the `offsets`, // so this `add` is always in-bounds. - let elt_ptr: *mut MaybeUninit = + let elt_ptr: *mut VarLenRef = unsafe { self.row.add(curr_offset_idx * mem::align_of::()).cast() }; // SAFETY: `elt_ptr` is aligned and inbounds. - // `MaybeUninit` has no value restrictions, - // so it's safe to create an `&mut` to `uninit` or garbage. + // Any pattern of init bytes is valid at `VarLenRef`, + // and the `row_data` in a `Page` is never uninit, + // so it's safe to create an `&mut` to any value in the page, + // though the resulting `VarLenRef` may be garbage. Some(unsafe { &mut *elt_ptr }) } } @@ -564,8 +555,8 @@ pub struct NullVarLenVisitor; // SAFETY: Both `visit_var_len` and `visit_var_len_mut` visit the empty set. unsafe impl VarLenMembers for NullVarLenVisitor { - type Iter<'this, 'row> = iter::Empty<&'row MaybeUninit>; - type IterMut<'this, 'row> = iter::Empty<&'row mut MaybeUninit>; + type Iter<'this, 'row> = iter::Empty<&'row VarLenRef>; + type IterMut<'this, 'row> = iter::Empty<&'row mut VarLenRef>; unsafe fn visit_var_len<'this, 'row>(&'this self, _row: &'row Bytes) -> Self::Iter<'this, 'row> { iter::empty()