Skip to content

Commit

Permalink
Merge pull request #142 from cuviper/drain-ranges
Browse files Browse the repository at this point in the history
Expand drain to accept any RangeBounds<usize>
  • Loading branch information
cuviper committed Aug 7, 2020
2 parents 884a104 + c42867c commit 8ddc4f1
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 13 deletions.
22 changes: 18 additions & 4 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use ::core::cmp::Ordering;
use ::core::fmt;
use ::core::hash::{BuildHasher, Hash, Hasher};
use ::core::iter::FromIterator;
use ::core::ops::{Index, IndexMut, RangeFull};
use ::core::ops::{Index, IndexMut, RangeBounds};
use ::core::slice::{Iter as SliceIter, IterMut as SliceIterMut};

#[cfg(has_std)]
Expand Down Expand Up @@ -252,9 +252,23 @@ impl<K, V, S> IndexMap<K, V, S> {
self.core.clear();
}

/// Clears the `IndexMap`, returning all key-value pairs as a drain iterator.
/// Keeps the allocated memory for reuse.
pub fn drain(&mut self, range: RangeFull) -> Drain<'_, K, V> {
/// Clears the `IndexMap` in the given index range, returning those
/// key-value pairs as a drain iterator.
///
/// The range may be any type that implements `RangeBounds<usize>`,
/// including all of the `std::ops::Range*` types, or even a tuple pair of
/// `Bound` start and end values. To drain the map entirely, use `RangeFull`
/// like `map.drain(..)`.
///
/// This shifts down all entries following the drained range to fill the
/// gap, and keeps the allocated memory for reuse.
///
/// ***Panics*** if the starting point is greater than the end point or if
/// the end point is greater than the length of the map.
pub fn drain<R>(&mut self, range: R) -> Drain<'_, K, V>
where
R: RangeBounds<usize>,
{
Drain {
iter: self.core.drain(range),
}
Expand Down
12 changes: 8 additions & 4 deletions src/map/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ use crate::vec::{Drain, Vec};
use core::cmp;
use core::fmt;
use core::mem::replace;
use core::ops::RangeFull;
use core::ops::RangeBounds;

use crate::equivalent::Equivalent;
use crate::util::enumerate;
use crate::util::{enumerate, simplify_range};
use crate::{Bucket, Entries, HashValue};

/// Core of the map that does not depend on S
Expand Down Expand Up @@ -129,8 +129,12 @@ impl<K, V> IndexMapCore<K, V> {
self.entries.clear();
}

pub(crate) fn drain(&mut self, range: RangeFull) -> Drain<'_, Bucket<K, V>> {
self.indices.clear();
pub(crate) fn drain<R>(&mut self, range: R) -> Drain<'_, Bucket<K, V>>
where
R: RangeBounds<usize>,
{
let range = simplify_range(range, self.entries.len());
self.erase_indices(range.start, range.end);
self.entries.drain(range)
}

Expand Down
65 changes: 65 additions & 0 deletions src/map/core/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! mostly in dealing with its bucket "pointers".

use super::{Entry, Equivalent, HashValue, IndexMapCore, VacantEntry};
use crate::util::enumerate;
use core::fmt;
use core::mem::replace;
use hashbrown::raw::RawTable;
Expand Down Expand Up @@ -44,11 +45,75 @@ impl<K, V> IndexMapCore<K, V> {
}
}

/// Erase the given index from `indices`.
///
/// The index doesn't need to be valid in `entries` while calling this. No other index
/// adjustments are made -- this is only used by `pop` for the greatest index.
pub(super) fn erase_index(&mut self, hash: HashValue, index: usize) {
debug_assert_eq!(index, self.indices.len() - 1);
let raw_bucket = self.find_index(hash, index).unwrap();
unsafe { self.indices.erase(raw_bucket) };
}

/// Erase `start..end` from `indices`, and shift `end..` indices down to `start..`
///
/// All of these items should still be at their original location in `entries`.
/// This is used by `drain`, which will let `Vec::drain` do the work on `entries`.
pub(super) fn erase_indices(&mut self, start: usize, end: usize) {
let (init, shifted_entries) = self.entries.split_at(end);
let (start_entries, erased_entries) = init.split_at(start);

let erased = erased_entries.len();
let shifted = shifted_entries.len();
let half_capacity = self.indices.buckets() / 2;

// Use a heuristic between different strategies
if erased == 0 {
// Degenerate case, nothing to do
} else if start + shifted < half_capacity && start < erased {
// Reinsert everything, as there are few kept indices
self.indices.clear();

// Reinsert stable indices
for (i, entry) in enumerate(start_entries) {
self.indices.insert_no_grow(entry.hash.get(), i);
}

// Reinsert shifted indices
for (i, entry) in (start..).zip(shifted_entries) {
self.indices.insert_no_grow(entry.hash.get(), i);
}
} else if erased + shifted < half_capacity {
// Find each affected index, as there are few to adjust

// Find erased indices
for (i, entry) in (start..).zip(erased_entries) {
let bucket = self.find_index(entry.hash, i).unwrap();
unsafe { self.indices.erase(bucket) };
}

// Find shifted indices
for ((new, old), entry) in (start..).zip(end..).zip(shifted_entries) {
let bucket = self.find_index(entry.hash, old).unwrap();
unsafe { bucket.write(new) };
}
} else {
// Sweep the whole table for adjustments
unsafe {
for bucket in self.indices.iter() {
let i = bucket.read();
if i >= end {
bucket.write(i - erased);
} else if i >= start {
self.indices.erase(bucket);
}
}
}
}

debug_assert_eq!(self.indices.len(), start + shifted);
}

pub(crate) fn entry(&mut self, hash: HashValue, key: K) -> Entry<'_, K, V>
where
K: Eq,
Expand Down
22 changes: 18 additions & 4 deletions src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use core::cmp::Ordering;
use core::fmt;
use core::hash::{BuildHasher, Hash};
use core::iter::{Chain, FromIterator};
use core::ops::{BitAnd, BitOr, BitXor, RangeFull, Sub};
use core::ops::{BitAnd, BitOr, BitXor, RangeBounds, Sub};
use core::slice;

use super::{Entries, Equivalent, IndexMap};
Expand Down Expand Up @@ -200,9 +200,23 @@ impl<T, S> IndexSet<T, S> {
self.map.clear();
}

/// Clears the `IndexSet`, returning all values as a drain iterator.
/// Keeps the allocated memory for reuse.
pub fn drain(&mut self, range: RangeFull) -> Drain<'_, T> {
/// Clears the `IndexSet` in the given index range, returning those values
/// as a drain iterator.
///
/// The range may be any type that implements `RangeBounds<usize>`,
/// including all of the `std::ops::Range*` types, or even a tuple pair of
/// `Bound` start and end values. To drain the set entirely, use `RangeFull`
/// like `set.drain(..)`.
///
/// This shifts down all entries following the drained range to fill the
/// gap, and keeps the allocated memory for reuse.
///
/// ***Panics*** if the starting point is greater than the end point or if
/// the end point is greater than the length of the set.
pub fn drain<R>(&mut self, range: R) -> Drain<'_, T>
where
R: RangeBounds<usize>,
{
Drain {
iter: self.map.drain(range).iter,
}
Expand Down
27 changes: 27 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use core::iter::Enumerate;
use core::ops::{Bound, Range, RangeBounds};

pub(crate) fn third<A, B, C>(t: (A, B, C)) -> C {
t.2
Expand All @@ -10,3 +11,29 @@ where
{
iterable.into_iter().enumerate()
}

pub(crate) fn simplify_range<R>(range: R, len: usize) -> Range<usize>
where
R: RangeBounds<usize>,
{
let start = match range.start_bound() {
Bound::Unbounded => 0,
Bound::Included(&i) if i <= len => i,
Bound::Excluded(&i) if i < len => i + 1,
bound => panic!("range start {:?} should be <= length {}", bound, len),
};
let end = match range.end_bound() {
Bound::Unbounded => len,
Bound::Excluded(&i) if i <= len => i,
Bound::Included(&i) if i < len => i + 1,
bound => panic!("range end {:?} should be <= length {}", bound, len),
};
if start > end {
panic!(
"range start {:?} should be <= range end {:?}",
range.start_bound(),
range.end_bound()
);
}
start..end
}
30 changes: 29 additions & 1 deletion tests/quick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use itertools::Itertools;
use quickcheck::quickcheck;
use quickcheck::Arbitrary;
use quickcheck::Gen;
use quickcheck::TestResult;

use rand::Rng;

Expand All @@ -18,6 +19,7 @@ use std::collections::HashSet;
use std::fmt::Debug;
use std::hash::Hash;
use std::iter::FromIterator;
use std::ops::Bound;
use std::ops::Deref;

use indexmap::map::Entry as OEntry;
Expand Down Expand Up @@ -100,7 +102,7 @@ quickcheck! {
map.capacity() >= cap
}

fn drain(insert: Vec<u8>) -> bool {
fn drain_full(insert: Vec<u8>) -> bool {
let mut map = IndexMap::new();
for &key in &insert {
map.insert(key, ());
Expand All @@ -113,6 +115,32 @@ quickcheck! {
map.is_empty()
}

fn drain_bounds(insert: Vec<u8>, range: (Bound<usize>, Bound<usize>)) -> TestResult {
let mut map = IndexMap::new();
for &key in &insert {
map.insert(key, ());
}

// First see if `Vec::drain` is happy with this range.
let result = std::panic::catch_unwind(|| {
let mut keys: Vec<u8> = map.keys().cloned().collect();
keys.drain(range);
keys
});

if let Ok(keys) = result {
map.drain(range);
// Check that our `drain` matches the same key order.
assert!(map.keys().eq(&keys));
// Check that hash lookups all work too.
assert!(keys.iter().all(|key| map.contains_key(key)));
TestResult::passed()
} else {
// If `Vec::drain` panicked, so should we.
TestResult::must_fail(move || { map.drain(range); })
}
}

fn shift_remove(insert: Vec<u8>, remove: Vec<u8>) -> bool {
let mut map = IndexMap::new();
for &key in &insert {
Expand Down

0 comments on commit 8ddc4f1

Please sign in to comment.