Skip to content

Commit

Permalink
Store vtables sorted in Rust builder (#6765)
Browse files Browse the repository at this point in the history
* benchmark many vtables

* Rust: Store written_table rev-positions sorted.

The previous implementation was slow if there were too many tables.

Asymototically when inserting the n^th vtable: The old implementation
took O(n) lookup steps and O(1) insertion. The new implementation is
O(log n) lookup and O(n) insertion. This might be improved further by
using a balanced btree.

Benchmarking, create_many_tables is 7.5x faster (on my laptop):

// Simple vector cache
test create_many_tables ... bench: 728,875 ns/iter (+/- 12,279) = 44 MB/s

// Sorted vector cache
test create_many_tables ... bench: 97,843 ns/iter (+/- 4,430) = 334 MB/s

* Fix lints

Co-authored-by: Casper Neo <cneo@google.com>
  • Loading branch information
CasperN and Casper Neo committed Aug 3, 2021
1 parent c39fc9d commit 35e2cac
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 60 deletions.
66 changes: 24 additions & 42 deletions rust/flatbuffers/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -554,63 +554,45 @@ impl<'fbb> FlatBufferBuilder<'fbb> {
// serialize every FieldLoc to the vtable:
for &fl in self.field_locs.iter() {
let pos: VOffsetT = (object_revloc_to_vtable.value() - fl.off) as VOffsetT;
debug_assert_eq!(
vtfw.get_field_offset(fl.id),
0,
"tried to write a vtable field multiple times"
);
vtfw.write_field_offset(fl.id, pos);
}
}
let dup_vt_use = {
let this_vt = VTable::init(&self.owned_buf[..], self.head);
self.find_duplicate_stored_vtable_revloc(this_vt)
};

let vt_use = match dup_vt_use {
Some(n) => {
let new_vt_bytes = &self.owned_buf[vt_start_pos..vt_end_pos];
let found = self.written_vtable_revpos.binary_search_by(|old_vtable_revpos: &UOffsetT| {
let old_vtable_pos = self.owned_buf.len() - *old_vtable_revpos as usize;
let old_vtable = VTable::init(&self.owned_buf, old_vtable_pos);
new_vt_bytes.cmp(old_vtable.as_bytes())
});
let final_vtable_revpos = match found {
Ok(i) => {
// The new vtable is a duplicate so clear it.
VTableWriter::init(&mut self.owned_buf[vt_start_pos..vt_end_pos]).clear();
self.head += vtable_byte_len;
n
self.written_vtable_revpos[i]
}
None => {
let new_vt_use = self.used_space() as UOffsetT;
self.written_vtable_revpos.push(new_vt_use);
new_vt_use
Err(i) => {
// This is a new vtable. Add it to the cache.
let new_vt_revpos = self.used_space() as UOffsetT;
self.written_vtable_revpos.insert(i, new_vt_revpos);
new_vt_revpos
}
};

{
let n = self.head + self.used_space() - object_revloc_to_vtable.value() as usize;
let saw = unsafe { read_scalar_at::<UOffsetT>(&self.owned_buf, n) };
debug_assert_eq!(saw, 0xF0F0_F0F0);
unsafe {
emplace_scalar::<SOffsetT>(
&mut self.owned_buf[n..n + SIZE_SOFFSET],
vt_use as SOffsetT - object_revloc_to_vtable.value() as SOffsetT,
);
}
// Write signed offset from table to its vtable.
let table_pos = self.owned_buf.len() - object_revloc_to_vtable.value() as usize;
let tmp_soffset_to_vt = unsafe { read_scalar_at::<UOffsetT>(&self.owned_buf, table_pos) };
debug_assert_eq!(tmp_soffset_to_vt, 0xF0F0_F0F0);
unsafe {
emplace_scalar::<SOffsetT>(
&mut self.owned_buf[table_pos..table_pos + SIZE_SOFFSET],
final_vtable_revpos as SOffsetT - object_revloc_to_vtable.value() as SOffsetT
);
}

self.field_locs.clear();

object_revloc_to_vtable
}

#[inline]
fn find_duplicate_stored_vtable_revloc(&self, needle: VTable) -> Option<UOffsetT> {
for &revloc in self.written_vtable_revpos.iter().rev() {
let o = VTable::init(
&self.owned_buf[..],
self.head + self.used_space() - revloc as usize,
);
if needle == o {
return Some(revloc);
}
}
None
}

// Only call this when you know it is safe to double the size of the buffer.
#[inline]
fn grow_owned_buf(&mut self) {
Expand Down
12 changes: 1 addition & 11 deletions rust/flatbuffers/src/vtable_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

use std::ptr::write_bytes;

use crate::endian_scalar::{emplace_scalar, read_scalar_at};
use crate::endian_scalar::emplace_scalar;
use crate::primitives::*;

/// VTableWriter compartmentalizes actions needed to create a vtable.
Expand Down Expand Up @@ -54,16 +54,6 @@ impl<'a> VTableWriter<'a> {
}
}

/// Gets an object field offset from the vtable. Only used for debugging.
///
/// Note that this expects field offsets (which are like pointers), not
/// field ids (which are like array indices).
#[inline(always)]
pub fn get_field_offset(&self, vtable_offset: VOffsetT) -> VOffsetT {
let idx = vtable_offset as usize;
unsafe { read_scalar_at::<VOffsetT>(&self.buf, idx) }
}

/// Writes an object field offset into the vtable.
///
/// Note that this expects field offsets (which are like pointers), not
Expand Down
31 changes: 25 additions & 6 deletions tests/rust_usage_test/benches/flatbuffers_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ fn blackbox<T>(t: T) -> T {

#[inline(always)]
fn traverse_serialized_example_with_generated_code(bytes: &[u8]) {
let m = my_game::example::get_root_as_monster(bytes);
let m = unsafe { my_game::example::root_as_monster_unchecked(bytes) };
blackbox(m.hp());
blackbox(m.mana());
blackbox(m.name());
Expand Down Expand Up @@ -172,7 +172,7 @@ fn traverse_serialized_example_with_generated_code(bytes: &[u8]) {
}

fn create_string_10(bench: &mut Bencher) {
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
let mut i = 0;
bench.iter(|| {
builder.create_string("foobarbaz"); // zero-terminated -> 10 bytes
Expand All @@ -187,7 +187,7 @@ fn create_string_10(bench: &mut Bencher) {
}

fn create_string_100(bench: &mut Bencher) {
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
let s_owned = (0..99).map(|_| "x").collect::<String>();
let s: &str = &s_owned;

Expand All @@ -205,7 +205,7 @@ fn create_string_100(bench: &mut Bencher) {
}

fn create_byte_vector_100_naive(bench: &mut Bencher) {
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
let v: &[u8] = &v_owned;

Expand All @@ -223,7 +223,7 @@ fn create_byte_vector_100_naive(bench: &mut Bencher) {
}

fn create_byte_vector_100_optimal(bench: &mut Bencher) {
let builder = &mut flatbuffers::FlatBufferBuilder::new_with_capacity(1 << 20);
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
let v_owned = (0u8..100).map(|i| i).collect::<Vec<u8>>();
let v: &[u8] = &v_owned;

Expand All @@ -240,12 +240,31 @@ fn create_byte_vector_100_optimal(bench: &mut Bencher) {
bench.bytes = v.len() as u64;
}

fn create_many_tables(bench: &mut Bencher) {
let builder = &mut flatbuffers::FlatBufferBuilder::with_capacity(1 << 20);
// We test vtable overhead by making many unique tables of up to 16 fields of u8s.
bench.iter(|| {
for i in 0..(1u16 << 10) {
let t = builder.start_table();
for j in 0..15 {
if i & (1 << j) == 1 {
builder.push_slot_always(i * 2, 42u8);
}
}
builder.end_table(t);
}
builder.reset();
});
bench.bytes = 1 << 15;
}

benchmark_group!(
benches,
create_byte_vector_100_naive,
create_byte_vector_100_optimal,
traverse_canonical_buffer,
create_canonical_buffer_then_reset,
create_string_10,
create_string_100
create_string_100,
create_many_tables,
);
2 changes: 1 addition & 1 deletion tests/rust_usage_test/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ fn test_object_api_reads_correctly() -> Result<(), &'static str>{
// Disabled due to Windows CI limitations.
// #[test]
// fn builder_initializes_with_maximum_buffer_size() {
// flatbuffers::FlatBufferBuilder::new_with_capacity(flatbuffers::FLATBUFFERS_MAX_BUFFER_SIZE);
// flatbuffers::FlatBufferBuilder::with_capacity(flatbuffers::FLATBUFFERS_MAX_BUFFER_SIZE);
// }

#[should_panic]
Expand Down

0 comments on commit 35e2cac

Please sign in to comment.