Skip to content

Commit

Permalink
Adds Vec::extend_from_slices_copy that accepts multiple slices (#240)
Browse files Browse the repository at this point in the history
* Adds `Vec::extend_from_slices_copy` that accepts multiple slices

* Adds quickchecks, tweaks comments and variable names

* Adds cfg(feature="collections") to all new quickchecks

* Renames variable `buf` to `slice`

* Updated comment

* Moves `set_len` call to `extend_from_slice_copy_unchecked`

---------

Co-authored-by: Zack Slayton <zslayton@amazon.com>
  • Loading branch information
zslayton and Zack Slayton committed Mar 7, 2024
1 parent 2ed8718 commit 6a91333
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 18 deletions.
73 changes: 73 additions & 0 deletions benches/benches.rs
Expand Up @@ -176,6 +176,78 @@ fn bench_extend_from_slice_copy(c: &mut Criterion) {
}
}

fn bench_extend_from_slices_copy(c: &mut Criterion) {
// The number of slices that will be copied into the Vec
let slice_counts = &[1, 2, 4, 8, 16, 32];

// Whether the Bump and its Vec have will already enough space to store the data without
// requiring reallocation
let is_preallocated_settings = &[false, true];

// Slices that can be used to extend the Vec; each may be used more than once.
let data: [&[u8]; 4] = [
black_box(b"wwwwwwwwwwwwwwww"),
black_box(b"xxxxxxxxxxxxxxxx"),
black_box(b"yyyyyyyyyyyyyyyy"),
black_box(b"zzzzzzzzzzzzzzzz"),
];

// For each (`is_preallocated`, `num_slices`) pair...
for is_preallocated in is_preallocated_settings {
for num_slices in slice_counts.iter().copied() {
// Create an appropriately named benchmark group
let mut group = c.benchmark_group(
format!("extend_from_slices num_slices={num_slices}, is_preallocated={is_preallocated}")
);

// Cycle over `data` to construct a slice of slices to append
let slices = data
.iter()
.copied()
.cycle()
.take(num_slices)
.collect::<Vec<_>>();
let total_size = slices.iter().map(|s| s.len()).sum();

// If `is_preallocated` is true, both the Bump and the benchmark Vecs will have enough
// capacity to store the concatenated data. If it's false, the Bump and the Vec start
// out with no capacity allocated and grow on demand.
let size_to_allocate = match is_preallocated {
true => total_size,
false => 0,
};
let mut bump = bumpalo::Bump::with_capacity(size_to_allocate);

// This benchmark demonstrates the performance of looping over the slice-of-slices,
// calling `extend_from_slice_copy` (and transitively, `reserve`) for each slice.
group.bench_function("loop over extend_from_slice_copy", |b| {
b.iter(|| {
bump.reset();
let mut vec = bumpalo::collections::Vec::<u8>::with_capacity_in(size_to_allocate, &bump);
for slice in black_box(&slices) {
vec.extend_from_slice_copy(slice);
}
black_box(vec.as_slice());
});
});

// This benchmark demonstrates the performance of using a single call to
// `extend_from_slices_copy`, which performs a single `reserve` before appending
// all of the slices.
group.bench_function("extend_from_slices_copy", |b| {
b.iter(|| {
bump.reset();
let mut vec = bumpalo::collections::Vec::<u8>::with_capacity_in(size_to_allocate, &bump);
vec.extend_from_slices_copy(black_box(slices.as_slice()));
black_box(vec.as_slice());
});
});

group.finish();
}
}
}

fn bench_alloc(c: &mut Criterion) {
let mut group = c.benchmark_group("alloc");
group.throughput(Throughput::Elements(ALLOCATIONS as u64));
Expand Down Expand Up @@ -320,6 +392,7 @@ fn bench_string_push_str(c: &mut Criterion) {
criterion_group!(
benches,
bench_extend_from_slice_copy,
bench_extend_from_slices_copy,
bench_alloc,
bench_alloc_with,
bench_alloc_try_with,
Expand Down
101 changes: 83 additions & 18 deletions src/collections/vec.rs
Expand Up @@ -1778,12 +1778,43 @@ impl<'bump, T: 'bump + Clone> Vec<'bump, T> {
}

impl<'bump, T: 'bump + Copy> Vec<'bump, T> {
/// Helper method to copy all of the items in `other` and append them to the end of `self`.
///
/// SAFETY:
/// * The caller is responsible for:
/// * calling [`reserve`](Self::reserve) beforehand to guarantee that there is enough
/// capacity to store `other.len()` more items.
/// * guaranteeing that `self` and `other` do not overlap.
unsafe fn extend_from_slice_copy_unchecked(&mut self, other: &[T]) {
let old_len = self.len();
debug_assert!(old_len + other.len() <= self.capacity());

// SAFETY:
// * `src` is valid for reads of `other.len()` values by virtue of being a `&[T]`.
// * `dst` is valid for writes of `other.len()` bytes because the caller of this
// method is required to `reserve` capacity to store at least `other.len()` items
// beforehand.
// * Because `src` is a `&[T]` and dst is a `&[T]` within the `Vec<T>`,
// `copy_nonoverlapping`'s alignment requirements are met.
// * Caller is required to guarantee that the source and destination ranges cannot overlap
unsafe {
let src = other.as_ptr();
let dst = self.as_mut_ptr().add(old_len);
ptr::copy_nonoverlapping(src, dst, other.len());
self.set_len(old_len + other.len());
}
}


/// Copies all elements in the slice `other` and appends them to the `Vec`.
///
/// Note that this function is same as [`extend_from_slice`] except that it is optimized for
/// slices of types that implement the `Copy` trait. If and when Rust gets specialization
/// this function will likely be deprecated (but still available).
///
/// To copy and append the data from multiple source slices at once, see
/// [`extend_from_slices_copy`].
///
/// # Examples
///
/// ```
Expand All @@ -1806,35 +1837,69 @@ impl<'bump, T: 'bump + Copy> Vec<'bump, T> {
/// assert_eq!(vec, "Hello, world!".as_bytes());
/// ```
///
/// [`extend`]: #method.extend_from_slice
/// [`extend_from_slice`]: #method.extend_from_slice
/// [`extend_from_slices`]: #method.extend_from_slices
pub fn extend_from_slice_copy(&mut self, other: &[T]) {

// Reserve space in the Vec for the values to be added
let old_len = self.len();
self.reserve(other.len());

let new_len = old_len + other.len();
debug_assert!(new_len <= self.capacity());

// Copy values into the space that was just reserved
// SAFETY:
// * `src` is valid for reads of `other.len()` values by virtue of being a `&[T]`.
// * `dst` is valid for writes of `other.len()` bytes as `self.reserve(other.len())`
// * `self` has enough capacity to store `other.len()` more items as `self.reserve(other.len())`
// above guarantees that.
// * Because `src` is a `&[T]` and dst is a `&[T]` within the `Vec<T>`,
// `copy_nonoverlapping`'s alignment requirements are met.
// * Source and destination ranges cannot overlap as we just reserved the destination
// * Source and destination data ranges cannot overlap as we just reserved the destination
// range from the bump.
unsafe {
let src = other.as_ptr();
let dst = self.as_mut_ptr().add(old_len);
ptr::copy_nonoverlapping(src, dst, other.len());
self.extend_from_slice_copy_unchecked(other);
}
}

/// For each slice in `slices`, copies all elements in the slice and appends them to the `Vec`.
///
/// This method is equivalent to calling [`extend_from_slice_copy`] in a loop, but is able
/// to precompute the total amount of space to reserve in advance. This reduces the potential
/// maximum number of reallocations needed from one-per-slice to just one.
///
/// # Examples
///
/// ```
/// use bumpalo::{Bump, collections::Vec};
///
/// let b = Bump::new();
///
/// let mut vec = bumpalo::vec![in &b; 1];
/// vec.extend_from_slices_copy(&[&[2, 3], &[], &[4]]);
/// assert_eq!(vec, [1, 2, 3, 4]);
/// ```
///
/// ```
/// use bumpalo::{Bump, collections::Vec};
///
/// let b = Bump::new();
///
/// let mut vec = bumpalo::vec![in &b; 'H' as u8];
/// vec.extend_from_slices_copy(&["ello,".as_bytes(), &[], " world!".as_bytes()]);
/// assert_eq!(vec, "Hello, world!".as_bytes());
/// ```
///
/// [`extend_from_slice_copy`]: #method.extend_from_slice_copy
pub fn extend_from_slices_copy(&mut self, slices: &[&[T]]) {
// Reserve the total amount of capacity we'll need to safely append the aggregated contents
// of each slice in `slices`.
let capacity_to_reserve: usize = slices.iter().map(|slice| slice.len()).sum();
self.reserve(capacity_to_reserve);

// Update length of Vec to include values just pushed
// SAFETY: We reserved sufficient capacity for the values above.
// The elements at `old_len..new_len` were initialized by `copy_nonoverlapping` above.
unsafe { self.set_len(new_len) };
// SAFETY:
// * `dst` is valid for writes of `capacity_to_reserve` items as
// `self.reserve(capacity_to_reserve)` above guarantees that.
// * Source and destination ranges cannot overlap as we just reserved the destination
// range from the bump.
unsafe {
// Copy the contents of each slice onto the end of `self`
slices.iter().for_each(|slice| {
self.extend_from_slice_copy_unchecked(slice);
});
}
}
}

Expand Down
86 changes: 86 additions & 0 deletions tests/all/quickchecks.rs
Expand Up @@ -306,4 +306,90 @@ quickcheck! {
}
}
}

#[cfg(feature = "collections")]
fn extending_from_slice(data1: Vec<usize>, data2: Vec<usize>) -> () {
let bump = Bump::new();

// Create a bumpalo Vec with the contents of `data1`
let mut vec = bumpalo::collections::Vec::new_in(&bump);
vec.extend_from_slice_copy(&data1);
assert_eq!(vec.as_slice(), data1);

// Extend the Vec using the contents of `data2`
vec.extend_from_slice_copy(&data2);
// Confirm that the Vec now has the expected number of items
assert_eq!(vec.len(), data1.len() + data2.len());
// Confirm that the beginning of the Vec matches `data1`'s elements
assert_eq!(&vec[0..data1.len()], data1);
// Confirm that the end of the Vec matches `data2`'s elements
assert_eq!(&vec[data1.len()..], data2);
}

#[cfg(feature = "collections")]
fn extending_from_slices(data: Vec<Vec<usize>>) -> () {
let bump = Bump::new();

// Convert the Vec<Vec<usize>> into a &[&[usize]]
let slices_vec: Vec<&[usize]> = data.iter().map(Vec::as_slice).collect();
let slices = slices_vec.as_slice();

// Isolate the first slice from the remaining slices. If `slices` is empty,
// fall back to empty slices for both.
let (first_slice, remaining_slices) = match slices {
[head, tail @ ..] => (*head, tail),
[] => (&[][..], &[][..])
};

// Create a bumpalo `Vec` and populate it with the contents of the first slice.
let mut vec = bumpalo::collections::Vec::new_in(&bump);
vec.extend_from_slice_copy(first_slice);
assert_eq!(vec.as_slice(), first_slice);

// Append all of the other slices onto the end of the Vec
vec.extend_from_slices_copy(remaining_slices);

let total_length: usize = slices.iter().map(|s| s.len()).sum();
assert_eq!(vec.len(), total_length);

let total_data: Vec<usize> = slices.iter().flat_map(|s| s.iter().copied()).collect();
assert_eq!(vec.as_slice(), total_data.as_slice());
}

#[cfg(feature = "collections")]
fn compare_extending_from_slice_and_from_slices(data: Vec<Vec<usize>>) -> () {
let bump = Bump::new();

// Convert the Vec<Vec<usize>> into a &[&[usize]]
let slices_vec: Vec<&[usize]> = data.iter().map(Vec::as_slice).collect();
let slices = slices_vec.as_slice();

// Isolate the first slice from the remaining slices. If `slices` is empty,
// fall back to empty slices for both.
let (first_slice, remaining_slices) = match slices {
[head, tail @ ..] => (*head, tail),
[] => (&[][..], &[][..])
};

// Create a bumpalo `Vec` and populate it with the contents of the first slice.
let mut vec1 = bumpalo::collections::Vec::new_in(&bump);
vec1.extend_from_slice_copy(first_slice);
assert_eq!(vec1.as_slice(), first_slice);

// Append each remaining slice individually
for slice in remaining_slices {
vec1.extend_from_slice_copy(slice);
}

// Create a second Vec populated with the contents of the first slice.
let mut vec2 = bumpalo::collections::Vec::new_in(&bump);
vec2.extend_from_slice_copy(first_slice);
assert_eq!(vec2.as_slice(), first_slice);

// Append the remaining slices en masse
vec2.extend_from_slices_copy(remaining_slices);

// Confirm that the two approaches to extending a Vec resulted in the same data
assert_eq!(vec1, vec2);
}
}
35 changes: 35 additions & 0 deletions tests/all/vec.rs
Expand Up @@ -106,6 +106,41 @@ fn test_vec_items_get_dropped() {
assert_eq!("Dropped!Dropped!", buffer.borrow().deref());
}

#[test]
fn test_extend_from_slice_copy() {
let bump = Bump::new();
let mut vec = vec![in &bump; 1, 2, 3];
assert_eq!(&[1, 2, 3][..], vec.as_slice());

vec.extend_from_slice_copy(&[4, 5, 6]);
assert_eq!(&[1, 2, 3, 4, 5, 6][..], vec.as_slice());

// Confirm that passing an empty slice is a no-op
vec.extend_from_slice_copy(&[]);
assert_eq!(&[1, 2, 3, 4, 5, 6][..], vec.as_slice());

vec.extend_from_slice_copy(&[7]);
assert_eq!(&[1, 2, 3, 4, 5, 6, 7][..], vec.as_slice());
}

#[test]
fn test_extend_from_slices_copy() {
let bump = Bump::new();
let mut vec = vec![in &bump; 1, 2, 3];
assert_eq!(&[1, 2, 3][..], vec.as_slice());

// Confirm that passing an empty slice of slices is a no-op
vec.extend_from_slices_copy(&[]);
assert_eq!(&[1, 2, 3][..], vec.as_slice());

// Confirm that an empty slice in the slice-of-slices is a no-op
vec.extend_from_slices_copy(&[&[4, 5, 6], &[], &[7]]);
assert_eq!(&[1, 2, 3, 4, 5, 6, 7][..], vec.as_slice());

vec.extend_from_slices_copy(&[&[8], &[9, 10, 11], &[12]]);
assert_eq!(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], vec.as_slice());
}

#[cfg(feature = "std")]
#[test]
fn test_vec_write() {
Expand Down

0 comments on commit 6a91333

Please sign in to comment.