Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Page always fully init #1193

Merged
merged 6 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions crates/table/benches/page.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use core::hash::BuildHasher;
use core::mem::{self, MaybeUninit};
use core::mem;
use core::time::Duration;
use criterion::measurement::{Measurement, WallTime};
use criterion::{black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion, Throughput};
Expand All @@ -12,12 +12,11 @@ use spacetimedb_table::bflatn_from::serialize_row_from_page;
use spacetimedb_table::bflatn_to::write_row_to_page;
use spacetimedb_table::blob_store::NullBlobStore;
use spacetimedb_table::eq::eq_row_in_page;
use spacetimedb_table::indexes::{PageOffset, RowHash};
use spacetimedb_table::indexes::{Byte, Bytes, PageOffset, RowHash};
use spacetimedb_table::layout::{row_size_for_type, RowTypeLayout};
use spacetimedb_table::page::Page;
use spacetimedb_table::row_hash::hash_row_in_page;
use spacetimedb_table::row_type_visitor::{row_type_visitor, VarLenVisitorProgram};
use spacetimedb_table::util;
use spacetimedb_table::var_len::{AlignedVarLenOffsets, NullVarLenVisitor, VarLenGranule, VarLenMembers, VarLenRef};

fn time<R>(acc: &mut Duration, body: impl FnOnce() -> R) -> R {
Expand Down Expand Up @@ -51,8 +50,8 @@ fn clear_zero(page: &mut Page) {
unsafe { page.zero_data() };
}

fn as_bytes<T>(t: &T) -> &[MaybeUninit<u8>] {
let ptr = (t as *const T).cast::<MaybeUninit<u8>>();
fn as_bytes<T>(t: &T) -> &Bytes {
gefjon marked this conversation as resolved.
Show resolved Hide resolved
let ptr = (t as *const T).cast::<Byte>();
unsafe { std::slice::from_raw_parts(ptr, mem::size_of::<T>()) }
}

Expand All @@ -67,11 +66,11 @@ unsafe trait Row {

#[allow(clippy::missing_safety_doc)] // It's a benchmark, clippy. Who cares.
unsafe trait FixedLenRow: Row + Sized {
gefjon marked this conversation as resolved.
Show resolved Hide resolved
fn as_bytes(&self) -> &[MaybeUninit<u8>] {
fn as_bytes(&self) -> &Bytes {
as_bytes(self)
}

unsafe fn from_bytes(bytes: &[MaybeUninit<u8>]) -> &Self {
unsafe fn from_bytes(bytes: &Bytes) -> &Self {
let ptr = bytes.as_ptr();
debug_assert_eq!(ptr as usize % mem::align_of::<Self>(), 0);
debug_assert_eq!(bytes.len(), mem::size_of::<Self>());
Expand Down Expand Up @@ -294,11 +293,11 @@ fn insert_opt_str(c: &mut Criterion) {
assert!(fixed_row_size.len() == 6);
let mut clean_page_group = c.benchmark_group("insert_optional_str/clean_page");

let mut variant_none = util::uninit_array::<u8, 6>();
variant_none[4].write(1);
let mut variant_none = [0xa5u8; 6];
gefjon marked this conversation as resolved.
Show resolved Hide resolved
variant_none[0] = 1;

let mut variant_some = util::uninit_array::<u8, 6>();
variant_some[4].write(0);
let mut variant_some = [0xa5u8; 6];
variant_some[0] = 0;

for some_ratio in [0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0] {
for &data_length_in_bytes in if some_ratio == 0.0 { &[0][..] } else { &VL_SIZES } {
Expand Down
13 changes: 7 additions & 6 deletions crates/table/benches/page_manager.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use core::iter;
use core::mem::{self, MaybeUninit};
use core::mem;
use core::time::Duration;
use criterion::measurement::{Measurement, WallTime};
use criterion::{
Expand All @@ -12,7 +12,8 @@ use spacetimedb_sats::db::def::{TableDef, TableSchema};
use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductType, ProductValue};
use spacetimedb_table::blob_store::NullBlobStore;
use spacetimedb_table::btree_index::BTreeIndex;
use spacetimedb_table::indexes::{PageOffset, RowPointer, Size, SquashedOffset, PAGE_DATA_SIZE};
use spacetimedb_table::indexes::Byte;
use spacetimedb_table::indexes::{Bytes, PageOffset, RowPointer, Size, SquashedOffset, PAGE_DATA_SIZE};
use spacetimedb_table::layout::{row_size_for_bytes, row_size_for_type};
use spacetimedb_table::pages::Pages;
use spacetimedb_table::row_type_visitor::{row_type_visitor, VarLenVisitorProgram};
Expand Down Expand Up @@ -45,8 +46,8 @@ fn iter_time_with<P, B, X>(
})
}

fn as_bytes<T>(t: &T) -> &[MaybeUninit<u8>] {
let ptr = (t as *const T).cast::<MaybeUninit<u8>>();
fn as_bytes<T>(t: &T) -> &Bytes {
gefjon marked this conversation as resolved.
Show resolved Hide resolved
let ptr = (t as *const T).cast::<Byte>();
unsafe { std::slice::from_raw_parts(ptr, mem::size_of::<T>()) }
}

Expand All @@ -65,11 +66,11 @@ unsafe trait Row {

#[allow(clippy::missing_safety_doc)] // It's a benchmark, clippy. Who cares.
unsafe trait FixedLenRow: Row + Sized {
gefjon marked this conversation as resolved.
Show resolved Hide resolved
fn as_bytes(&self) -> &[MaybeUninit<u8>] {
fn as_bytes(&self) -> &Bytes {
as_bytes(self)
}

unsafe fn from_bytes(bytes: &[MaybeUninit<u8>]) -> &Self {
unsafe fn from_bytes(bytes: &Bytes) -> &Self {
let ptr = bytes.as_ptr();
debug_assert_eq!(ptr as usize % mem::align_of::<Self>(), 0);
debug_assert_eq!(bytes.len(), mem::size_of::<Self>());
Expand Down
22 changes: 11 additions & 11 deletions crates/table/benches/var_len_visitor.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use core::slice;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use spacetimedb_sats::{AlgebraicType, ProductType};
use spacetimedb_table::indexes::{Byte, Bytes};
use spacetimedb_table::row_type_visitor::{dump_visitor_program, row_type_visitor, VarLenVisitorProgram};
use spacetimedb_table::util::uninit_array;
use spacetimedb_table::var_len::{AlignedVarLenOffsets, NullVarLenVisitor, VarLenMembers, VarLenRef};
use std::mem::{self, MaybeUninit};
use std::mem;

fn visit_count(row: &[MaybeUninit<u8>], visitor: &impl VarLenMembers) {
fn visit_count(row: &Bytes, visitor: &impl VarLenMembers) {
black_box(unsafe { visitor.visit_var_len(row) }.count());
}

Expand All @@ -23,8 +23,8 @@ fn visitor_program(row_ty: impl Into<ProductType>) -> VarLenVisitorProgram {
}

fn visit_fixed_len(c: &mut C) {
let row = &uninit_array::<u32, 1>();
let row = row.as_ptr().cast::<MaybeUninit<u8>>();
let row = &[0xa5a5_a5a5u32; 1];
gefjon marked this conversation as resolved.
Show resolved Hide resolved
let row = row.as_ptr().cast::<Byte>();
let row = unsafe { slice::from_raw_parts(row, mem::size_of::<u32>()) };

let mut group = c.benchmark_group("visit_fixed_len/u64");
Expand All @@ -49,8 +49,8 @@ fn visit_fixed_len(c: &mut C) {
}

fn visit_var_len_product(c: &mut C) {
let row = &uninit_array::<VarLenRef, 1>();
let row = row.as_ptr().cast::<MaybeUninit<u8>>();
let row = &[VarLenRef::NULL; 1];
let row = row.as_ptr().cast::<Byte>();
let row = unsafe { slice::from_raw_parts(row, mem::size_of::<VarLenRef>()) };

let mut group = c.benchmark_group("visit_var_len_product/VarLenRef");
Expand All @@ -73,20 +73,20 @@ fn visit_var_len_sum(c: &mut C) {

let visitor = &visitor_program([AlgebraicType::sum([AlgebraicType::String, AlgebraicType::unit()])]);

let row = &mut uninit_array::<u16, 3>();
let row = row.as_mut_ptr().cast::<MaybeUninit<u8>>();
let row = &mut [0xa5a5u16; 3];
let row = row.as_mut_ptr().cast::<Byte>();
let row = unsafe { slice::from_raw_parts_mut(row, 6) };

group.bench_function("none/VarLenVisitorProgram", |b| {
// None
row[4].write(1);
row[0] = 1;

b.iter(|| visit_count(row, visitor));
});

group.bench_function("some/VarLenVisitorProgram", |b| {
// Some
row[4].write(0);
row[0] = 0;

b.iter(|| visit_count(row, visitor));
});
Expand Down
3 changes: 0 additions & 3 deletions crates/table/src/bflatn_from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,6 @@ pub unsafe fn read_tag<'ty>(
) -> (u8, &'ty AlgebraicTypeLayout) {
gefjon marked this conversation as resolved.
Show resolved Hide resolved
let tag_offset = ty.offset_of_tag();
let tag = bytes[curr_offset + tag_offset];
// SAFETY: Caller promised that `bytes[curr_offset..]` has a sum value typed at `ty`.
// We can therefore assume that `curr_offset + tag_offset` refers to a valid `u8`.
let tag = unsafe { tag.assume_init() };

// Extract the variant data type depending on the tag.
let data_ty = &ty.variants[tag as usize].ty;
Expand Down
24 changes: 14 additions & 10 deletions crates/table/src/bflatn_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use super::{
},
page::{GranuleOffsetIter, Page, VarView},
pages::Pages,
util::{maybe_uninit_write_slice, range_move},
var_len::{visit_var_len_assume_init, VarLenGranule, VarLenMembers, VarLenRef},
util::range_move,
var_len::{VarLenGranule, VarLenMembers, VarLenRef},
};
use spacetimedb_sats::{bsatn::to_writer, buffer::BufWriter, AlgebraicType, AlgebraicValue, ProductValue, SumValue};
use thiserror::Error;
Expand Down Expand Up @@ -151,12 +151,12 @@ impl BflatnSerializedRowBuffer<'_> {
// and `fixed_buf.len()` matches exactly the size of the row type.
// - `fixed_buf`'s `VarLenRef`s are initialized up to `last_allocated_var_len_index`.
// - `visitor` is proper for the row type.
let visitor_iter = unsafe { visit_var_len_assume_init(visitor, self.fixed_buf) };
let visitor_iter = unsafe { visitor.visit_var_len(self.fixed_buf) };
for vlr in visitor_iter.take(self.last_allocated_var_len_index) {
// SAFETY: The `vlr` came from the allocation in `write_var_len_obj`
// which wrote it to the fixed part using `write_var_len_ref`.
// Thus, it points to a valid `VarLenGranule`.
unsafe { self.var_view.free_object_ignore_blob(vlr) };
unsafe { self.var_view.free_object_ignore_blob(*vlr) };
}
}

Expand All @@ -165,7 +165,8 @@ impl BflatnSerializedRowBuffer<'_> {
for (vlr, value) in self.large_blob_insertions {
// SAFETY: `vlr` was given to us by `alloc_for_slice`
// so it is properly aligned for a `VarLenGranule` and in bounds of the page.
// However, as it was added to `self.large_blob_insertion`, it is also uninit.
// However, as it was added to `self.large_blob_insertions`,
// we have not yet written the hash to that granule.
unsafe {
self.var_view.write_large_blob_hash_to_granule(blob_store, &value, vlr);
}
Expand Down Expand Up @@ -250,7 +251,8 @@ impl BflatnSerializedRowBuffer<'_> {
// so we need to check that our `ProductValue` has the same number of elements
// as our `ProductTypeLayout` to be sure it's typed correctly.
// Otherwise, if the value is too long, we'll discard its fields (whatever),
// or if it's too long, we'll leave some fields in the page uninit (very bad).
// or if it's too long, we'll leave some fields in the page "uninit"
// (actually valid-unconstrained) (very bad).
if ty.elements.len() != val.elements.len() {
return Err(Error::WrongType(
ty.algebraic_type(),
Expand Down Expand Up @@ -302,7 +304,9 @@ impl BflatnSerializedRowBuffer<'_> {
} else {
// Write directly to the page.
// SAFETY: `vlr.first_granule` points to a granule
// even though the granule's data is uninit as of yet.
// even though the granule's data is not initialized as of yet.
// Note that the granule stores valid non-`poison` bytes,
// but they may be leftovers from a previous allocation.
let iter = unsafe { self.var_view.granule_offset_iter(vlr.first_granule) };
let mut writer = GranuleBufWriter { buf: None, iter };
to_writer(&mut writer, val).unwrap();
Expand Down Expand Up @@ -347,7 +351,7 @@ impl BflatnSerializedRowBuffer<'_> {

// Write to the granule.
for (to, byte) in write_to.iter_mut().zip(extend_with) {
to.write(*byte);
*to = *byte;
}

slice = rest;
Expand Down Expand Up @@ -377,7 +381,7 @@ impl BflatnSerializedRowBuffer<'_> {
/// Write `bytes: &[u8; N]` starting at the current offset
/// and advance the offset by `N`.
fn write_bytes<const N: usize>(&mut self, bytes: &[u8; N]) {
maybe_uninit_write_slice(&mut self.fixed_buf[range_move(0..N, self.curr_offset)], bytes);
self.fixed_buf[range_move(0..N, self.curr_offset)].copy_from_slice(bytes);
self.curr_offset += N;
}

Expand Down Expand Up @@ -458,7 +462,7 @@ pub mod test {
use spacetimedb_sats::proptest::generate_typed_row;

proptest! {
#![proptest_config(ProptestConfig::with_cases(2048))]
#![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))]
#[test]
fn av_serde_round_trip_through_page((ty, val) in generate_typed_row()) {
let ty: RowTypeLayout = ty.into();
Expand Down
4 changes: 0 additions & 4 deletions crates/table/src/bflatn_to_bsatn_fast_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ use crate::{
},
util::range_move,
};
use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref;

/// A precomputed BSATN layout for a type whose encoded length is a known constant,
/// enabling fast BFLATN -> BSATN conversion.
Expand Down Expand Up @@ -109,14 +108,11 @@ impl MemcpyField {
///
/// - `buf` must be at least `self.bsatn_offset + self.length` long.
/// - `row` must be at least `self.bflatn_offset + self.length` long.
/// - `row[self.bflatn_offset .. self.bflatn_offset + length]` must all be initialized.
unsafe fn copy(&self, buf: &mut [u8], row: &Bytes) {
// SAFETY: forward caller requirement #1.
let to = unsafe { buf.get_unchecked_mut(range_move(0..self.length as usize, self.bsatn_offset as usize)) };
// SAFETY: forward caller requirement #2.
let from = unsafe { row.get_unchecked(range_move(0..self.length as usize, self.bflatn_offset as usize)) };
// SAFETY: forward caller requirement #3.
let from = unsafe { slice_assume_init_ref(from) };
to.copy_from_slice(from);
}

Expand Down
15 changes: 2 additions & 13 deletions crates/table/src/eq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use super::{
util::range_move,
var_len::VarLenRef,
};
use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref;

/// Equates row `a` in `page_a` with its fixed part starting at `fixed_offset_a`
/// to row `b` in `page_b` with its fixed part starting at `fixed_offset_b`.
Expand Down Expand Up @@ -158,11 +157,7 @@ unsafe fn eq_value(ctx: &mut EqCtx<'_, '_>, ty: &AlgebraicTypeLayout) -> bool {
| &AlgebraicTypeLayout::I128
| &AlgebraicTypeLayout::U128
| &AlgebraicTypeLayout::F32
| &AlgebraicTypeLayout::F64 => {
// SAFETY: `value_a/b` are valid,
// so `&ctx.a/b.bytes[range_move(0..ty.size(), *ctx.curr_offset)]` contains init bytes.
unsafe { eq_byte_array(ctx, ty.size()) }
}
| &AlgebraicTypeLayout::F64 => eq_byte_array(ctx, ty.size()),

// The var-len cases.
&AlgebraicTypeLayout::String | AlgebraicTypeLayout::VarLen(_) => {
Expand Down Expand Up @@ -209,15 +204,9 @@ unsafe fn eq_vlo(ctx: &mut EqCtx<'_, '_>) -> bool {

/// Equates the byte arrays `data_a/data_b = ctx.a/b.bytes[range_move(0..len, ctx.curr_offset)]`
/// and advances the offset.
///
/// SAFETY: `data_a/b` must both be initialized as valid `&[u8]`s.
unsafe fn eq_byte_array(ctx: &mut EqCtx<'_, '_>, len: usize) -> bool {
fn eq_byte_array(ctx: &mut EqCtx<'_, '_>, len: usize) -> bool {
let data_a = &ctx.a.bytes[range_move(0..len, ctx.curr_offset)];
let data_b = &ctx.b.bytes[range_move(0..len, ctx.curr_offset)];
ctx.curr_offset += len;
// SAFETY: Caller promised that `data_a` was initialized.
let data_a = unsafe { slice_assume_init_ref(data_a) };
// SAFETY: Caller promised that `data_b` was initialized.
let data_b = unsafe { slice_assume_init_ref(data_b) };
data_a == data_b
}
2 changes: 1 addition & 1 deletion crates/table/src/eq_to_pv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ mod tests {
use spacetimedb_sats::proptest::generate_typed_row;

proptest! {
#![proptest_config(ProptestConfig::with_cases(2048))]
#![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))]
#[test]
fn pv_row_ref_eq((ty, val) in generate_typed_row()) {
// Turn `val` into a `RowRef`.
Expand Down
2 changes: 1 addition & 1 deletion crates/table/src/fixed_bit_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ pub(crate) mod test {
const MAX_NBITS: usize = 1000;

proptest! {
#![proptest_config(ProptestConfig::with_cases(2048))]
#![proptest_config(ProptestConfig::with_cases(if cfg!(miri) { 8 } else { 2048 }))]

#[test]
fn after_new_there_are_no_bits_set(nbits in 0..MAX_NBITS) {
Expand Down
9 changes: 6 additions & 3 deletions crates/table/src/indexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ use super::util::range_move;
use crate::static_assert_size;
use ahash::RandomState;
use core::fmt;
use core::mem::MaybeUninit;
use core::ops::{AddAssign, Div, Mul, Range, SubAssign};
use derive_more::{Add, Sub};
use spacetimedb_data_structures::map::ValidAsIdentityHash;

/// A byte is a possibly uninit `u8`.
pub type Byte = MaybeUninit<u8>;
/// A byte is a `u8`.
///
/// Previous implementations used `MaybeUninit<u8>` here,
/// but it became necessary to serialize pages to enable snapshotting,
/// so we require that all bytes in a page be valid `u8`s, never `poison`/`uninit`.
pub type Byte = u8;

/// A slice of [`Byte`]s.
pub type Bytes = [Byte];
Expand Down
Loading
Loading