Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add map and set extract_if #308

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 41 additions & 2 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ mod tests;
pub use self::core::raw_entry_v1::{self, RawEntryApiV1};
pub use self::core::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
pub use self::iter::{
Drain, IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Splice, Values, ValuesMut,
Drain, ExtractIf, IntoIter, IntoKeys, IntoValues, Iter, IterMut, Keys, Splice, Values,
ValuesMut,
};
pub use self::slice::Slice;
pub use crate::mutable_keys::MutableKeys;
Expand All @@ -33,7 +34,7 @@ use alloc::vec::Vec;
#[cfg(feature = "std")]
use std::collections::hash_map::RandomState;

use self::core::IndexMapCore;
pub(crate) use self::core::{ExtractCore, IndexMapCore};
use crate::util::{third, try_simplify_range};
use crate::{Bucket, Entries, Equivalent, HashValue, TryReserveError};

Expand Down Expand Up @@ -301,6 +302,44 @@ impl<K, V, S> IndexMap<K, V, S> {
Drain::new(self.core.drain(range))
}

/// Creates an iterator which uses a closure to determine if an element should be removed.
///
/// If the closure returns true, the element is removed from the map and yielded.
/// If the closure returns false, or panics, the element remains in the map and will not be
/// yielded.
///
/// Note that `extract_if` lets you mutate every value in the filter closure, regardless of
/// whether you choose to keep or remove it.
///
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
/// or the iteration short-circuits, then the remaining elements will be retained.
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
///
/// [`retain`]: IndexMap::retain
///
/// # Examples
///
/// Splitting a map into even and odd keys, reusing the original map:
///
/// ```
/// use indexmap::IndexMap;
///
/// let mut map: IndexMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
/// let extracted: IndexMap<i32, i32> = map.extract_if(|k, _v| k % 2 == 0).collect();
///
/// let evens = extracted.keys().copied().collect::<Vec<_>>();
/// let odds = map.keys().copied().collect::<Vec<_>>();
///
/// assert_eq!(evens, vec![0, 2, 4, 6]);
/// assert_eq!(odds, vec![1, 3, 5, 7]);
/// ```
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
ExtractIf::new(&mut self.core, pred)
}

/// Splits the collection into two at the given index.
///
/// Returns a newly allocated map containing the elements in the range
Expand Down
2 changes: 2 additions & 0 deletions src/map/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::util::simplify_range;
use crate::{Bucket, Entries, Equivalent, HashValue};

pub use entry::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
pub(crate) use raw::ExtractCore;

/// Core of the map that does not depend on S
pub(crate) struct IndexMapCore<K, V> {
Expand Down Expand Up @@ -145,6 +146,7 @@ impl<K, V> IndexMapCore<K, V> {

#[inline]
pub(crate) fn len(&self) -> usize {
debug_assert_eq!(self.entries.len(), self.indices.len());
self.indices.len()
}

Expand Down
91 changes: 91 additions & 0 deletions src/map/core/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,20 @@ impl<K, V> IndexMapCore<K, V> {
// only the item references that are appropriately bound to `&mut self`.
unsafe { self.indices.iter().map(|bucket| bucket.as_mut()) }
}

pub(crate) fn extract(&mut self) -> ExtractCore<'_, K, V> {
// SAFETY: We must have consistent lengths to start, so that's a hard assertion.
// Then the worst `set_len(0)` can do is leak items if `ExtractCore` doesn't drop.
assert_eq!(self.entries.len(), self.indices.len());
unsafe {
self.entries.set_len(0);
}
ExtractCore {
map: self,
current: 0,
new_len: 0,
}
}
}

/// A view into an occupied raw entry in an `IndexMap`.
Expand Down Expand Up @@ -143,3 +157,80 @@ impl<'a, K, V> RawTableEntry<'a, K, V> {
(self.map, index)
}
}

pub(crate) struct ExtractCore<'a, K, V> {
map: &'a mut IndexMapCore<K, V>,
current: usize,
new_len: usize,
}

impl<K, V> Drop for ExtractCore<'_, K, V> {
fn drop(&mut self) {
let old_len = self.map.indices.len();
let mut new_len = self.new_len;
debug_assert!(new_len <= self.current);
debug_assert!(self.current <= old_len);
debug_assert!(old_len <= self.map.entries.capacity());

// SAFETY: We assume `new_len` and `current` were correctly maintained by the iterator.
// So `entries[new_len..current]` were extracted, but the rest before and after are valid.
unsafe {
if new_len == self.current {
// Nothing was extracted, so any remaining items can be left in place.
new_len = old_len;
} else if self.current < old_len {
// Need to shift the remaining items down.
let tail_len = old_len - self.current;
let base = self.map.entries.as_mut_ptr();
let src = base.add(self.current);
let dest = base.add(new_len);
src.copy_to(dest, tail_len);
new_len += tail_len;
}
self.map.entries.set_len(new_len);
}

if new_len != old_len {
// We don't keep track of *which* items were extracted, so reindex everything.
self.map.rebuild_hash_table();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While it would be nice to reuse vec::ExtractIf, I haven't found a way to do this fixup for the indices afterward, because that vec::ExtractIf would be holding &mut on the entries, excluding even a raw pointer elsewhere.

}
}
}

impl<K, V> ExtractCore<'_, K, V> {
pub(crate) fn extract_if<F>(&mut self, mut pred: F) -> Option<Bucket<K, V>>
where
F: FnMut(&mut Bucket<K, V>) -> bool,
{
let old_len = self.map.indices.len();
debug_assert!(old_len <= self.map.entries.capacity());

let base = self.map.entries.as_mut_ptr();
while self.current < old_len {
// SAFETY: We're maintaining both indices within bounds of the original entries, so
// 0..new_len and current..old_len are always valid items for our Drop to keep.
unsafe {
let item = base.add(self.current);
if pred(&mut *item) {
// Extract it!
self.current += 1;
return Some(item.read());
} else {
// Keep it, shifting it down if needed.
if self.new_len != self.current {
debug_assert!(self.new_len < self.current);
let dest = base.add(self.new_len);
item.copy_to_nonoverlapping(dest, 1);
}
self.current += 1;
self.new_len += 1;
}
}
}
None
}

pub(crate) fn remaining(&self) -> usize {
self.map.indices.len() - self.current
}
}
56 changes: 54 additions & 2 deletions src/map/iter.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::core::IndexMapCore;
use super::{Bucket, Entries, IndexMap, Slice};
use super::{Bucket, Entries, ExtractCore, IndexMap, IndexMapCore, Slice};

use alloc::vec::{self, Vec};
use core::fmt;
Expand Down Expand Up @@ -711,3 +710,56 @@ where
.finish()
}
}

/// An extracting iterator for `IndexMap`.
///
/// This `struct` is created by [`IndexMap::extract_if()`].
/// See its documentation for more.
pub struct ExtractIf<'a, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
inner: ExtractCore<'a, K, V>,
pred: F,
}

impl<K, V, F> ExtractIf<'_, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
pub(super) fn new(core: &mut IndexMapCore<K, V>, pred: F) -> ExtractIf<'_, K, V, F> {
ExtractIf {
inner: core.extract(),
pred,
}
}
}

impl<K, V, F> Iterator for ExtractIf<'_, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
type Item = (K, V);

fn next(&mut self) -> Option<Self::Item> {
self.inner
.extract_if(|bucket| {
let (key, value) = bucket.ref_mut();
(self.pred)(key, value)
})
.map(Bucket::key_value)
}

fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.inner.remaining()))
}
}

impl<'a, K, V, F> fmt::Debug for ExtractIf<'a, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("ExtractIf").finish_non_exhaustive()
}
}
37 changes: 36 additions & 1 deletion src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod slice;
mod tests;

pub use self::iter::{
Difference, Drain, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
Difference, Drain, ExtractIf, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
};
pub use self::slice::Slice;

Expand Down Expand Up @@ -253,6 +253,41 @@ impl<T, S> IndexSet<T, S> {
Drain::new(self.map.core.drain(range))
}

/// Creates an iterator which uses a closure to determine if a value should be removed.
///
/// If the closure returns true, then the value is removed and yielded.
/// If the closure returns false, the value will remain in the list and will not be yielded
/// by the iterator.
///
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
/// or the iteration short-circuits, then the remaining elements will be retained.
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
///
/// [`retain`]: IndexSet::retain
///
/// # Examples
///
/// Splitting a set into even and odd values, reusing the original set:
///
/// ```
/// use indexmap::IndexSet;
///
/// let mut set: IndexSet<i32> = (0..8).collect();
/// let extracted: IndexSet<i32> = set.extract_if(|v| v % 2 == 0).collect();
///
/// let evens = extracted.into_iter().collect::<Vec<_>>();
/// let odds = set.into_iter().collect::<Vec<_>>();
///
/// assert_eq!(evens, vec![0, 2, 4, 6]);
/// assert_eq!(odds, vec![1, 3, 5, 7]);
/// ```
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, T, F>
where
F: FnMut(&T) -> bool,
{
ExtractIf::new(&mut self.map.core, pred)
}

/// Splits the collection into two at the given index.
///
/// Returns a newly allocated set containing the elements in the range
Expand Down
52 changes: 52 additions & 0 deletions src/set/iter.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::map::{ExtractCore, IndexMapCore};

use super::{Bucket, Entries, IndexSet, Slice};

use alloc::vec::{self, Vec};
Expand Down Expand Up @@ -624,3 +626,53 @@ impl<I: fmt::Debug> fmt::Debug for UnitValue<I> {
fmt::Debug::fmt(&self.0, f)
}
}

/// An extracting iterator for `IndexSet`.
///
/// This `struct` is created by [`IndexSet::extract_if()`].
/// See its documentation for more.
pub struct ExtractIf<'a, T, F>
where
F: FnMut(&T) -> bool,
{
inner: ExtractCore<'a, T, ()>,
pred: F,
}

impl<T, F> ExtractIf<'_, T, F>
where
F: FnMut(&T) -> bool,
{
pub(super) fn new(core: &mut IndexMapCore<T, ()>, pred: F) -> ExtractIf<'_, T, F> {
ExtractIf {
inner: core.extract(),
pred,
}
}
}

impl<T, F> Iterator for ExtractIf<'_, T, F>
where
F: FnMut(&T) -> bool,
{
type Item = T;

fn next(&mut self) -> Option<Self::Item> {
self.inner
.extract_if(|bucket| (self.pred)(bucket.key_ref()))
.map(Bucket::key)
}

fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.inner.remaining()))
}
}

impl<'a, T, F> fmt::Debug for ExtractIf<'a, T, F>
where
F: FnMut(&T) -> bool,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("ExtractIf").finish_non_exhaustive()
}
}