Copy path View file
@@ -0,0 +1,222 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::hash::{Hash, BuildHasher};
use std::mem::replace;

use adaptive_hashing::AdaptiveState;
use table::{
RawTable,
FullBucketMut,
FullBucket,
};
use internal_entry::InternalEntry;
use HashMap;

// Beyond this displacement, we switch to safe hashing or grow the table.
const DISPLACEMENT_THRESHOLD: usize = 128;
const FORWARD_SHIFT_THRESHOLD: usize = 512;
// When the map's load factor is below this threshold, we switch to safe hashing.
// Otherwise, we grow the table.
// const LOAD_FACTOR_THRESHOLD: f32 = 0.625;
const LOAD_FACTOR_THRESHOLD: f32 = 0.2;

// The displacement threshold should be high enough so that even with the maximal load factor,
// it's very rarely exceeded.
// As the load approaches 90%, displacements larger than ~ 20 are much more probable.
// On the other hand, the threshold should be low enough so that the same number of hashes
// easily fits in the cache and takes a reasonable time to iterate through.

// The load factor threshold should be relatively low, but high enough so that its half is not very
// low (~ 20%). We choose 62.5%, because it's a simple fraction (5/8), and its half is 31.25%.
// (When a map is grown, the load factor is halved.)

// At a load factor of α, the odds of finding the target bucket after exactly n
// unsuccesful probes[1] are
//
// Pr_α{displacement = n} =
// (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1))
//
// We use this formula to find the probability of loading half of a cache line, as well as
// the probability of triggering the DoS safeguard with an insertion:
//
// Pr_0.625{displacement > 3} = 0.036
// Pr_0.625{displacement > 128} = 2.284 * 10^-49

// Pr_0.909{displacement > 3} = 0.487
// Pr_0.909{displacement > 128} = 1.601 * 10^-11
//
// 1. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing
// hashing with buckets.

// TODO: add one-shot hashing for String, str, arrays and other types.
// TODO: consider adding a limit for the number of fully equal hashes in a probe sequence.
// Fully equal hashes cause key comparison, which might be a problem for large string keys.

// Avoid problems with private types in public interfaces.
pub type InternalEntryMut<'a, K: 'a, V: 'a> = InternalEntry<K, V, &'a mut RawTable<K, V>>;

pub trait OneshotHash: Hash {}

// We have this trait, because specialization doesn't work for inherent impls yet.
pub trait SafeguardedSearch<K, V> {
// Method names are changed, because inherent methods shadow trait impl
// methods.
fn reduce_displacement(&mut self);

fn is_safeguarded() -> bool;
}

impl OneshotHash for i8 {}
impl OneshotHash for u8 {}
impl OneshotHash for u16 {}
impl OneshotHash for i16 {}
impl OneshotHash for u32 {}
impl OneshotHash for i32 {}
impl OneshotHash for u64 {}
impl OneshotHash for i64 {}
impl OneshotHash for usize {}
impl OneshotHash for isize {}
impl OneshotHash for char {}
impl<T> OneshotHash for *const T {}
impl<T> OneshotHash for *mut T {}
impl<'a, T> OneshotHash for &'a T where T: OneshotHash {}
impl<'a, T> OneshotHash for &'a mut T where T: OneshotHash {}

#[inline]
pub fn safeguard_insertion<K, V>(
bucket: &FullBucketMut<K, V>,
reduce_displacement_flag: Option<&mut bool>) {
if let Some(flag) = reduce_displacement_flag {
if bucket.displacement() > DISPLACEMENT_THRESHOLD {
*flag = true;
}
}
}

#[inline]
pub fn safeguard_forward_shifted<'a, K, V>(
bucket: FullBucket<K, V, FullBucket<K, V, &'a mut RawTable<K, V>>>,
mut reduce_displacement_flag: Option<&'a mut bool>)
-> FullBucket<K, V, &'a mut RawTable<K, V>> {
let end_index = bucket.index();
let bucket = bucket.into_table();
let start_index = bucket.index();
if let Some(flag) = reduce_displacement_flag.as_mut() {
if end_index - start_index > FORWARD_SHIFT_THRESHOLD {
**flag = true;
}
}
safeguard_insertion(&bucket, reduce_displacement_flag);
bucket
}

impl<K, V, S> SafeguardedSearch<K, V> for HashMap<K, V, S>
where K: Eq + Hash,
S: BuildHasher
{
default fn reduce_displacement(&mut self) {
// nothing to do.
}

default fn is_safeguarded() -> bool {
false
}
}

impl<K, V> SafeguardedSearch<K, V> for HashMap<K, V, AdaptiveState>
where K: Eq + OneshotHash
{
#[cold]
fn reduce_displacement(&mut self) {
let load_factor = self.table.size() as f32 / self.table.capacity() as f32;
if load_factor >= LOAD_FACTOR_THRESHOLD {
// Probe sequence is too long. We must reduce its length.
let new_capacity = self.table.capacity() * 2;
self.resize(new_capacity);
} else {
// Taking this branch is extremely rare, assuming no intentional DoS attack.
self.hash_builder.switch_to_safe_hashing();
rebuild_table(self);
}
}

fn is_safeguarded() -> bool {
true
}
}

fn rebuild_table<K, V>(map: &mut HashMap<K, V, AdaptiveState>)
where K: Eq + Hash
{
let table_capacity = map.table.capacity();
let old_table = replace(&mut map.table, RawTable::new(table_capacity));
for (_, k, v) in old_table.into_iter() {
let hash = map.make_hash(&k);
map.insert_hashed_nocheck(hash, k, v);
}
}

#[cfg(test)]
mod test_adaptive_map {
use HashMap;
use super::DISPLACEMENT_THRESHOLD;

// These values all hash to N * 2^24 + 1523546 +/- 2.
static VALUES: &'static [u32] = &[
513314, 2977019, 3921903, 5005242, 6124431, 7696812, 16129307, 16296222, 17425488,
17898424, 19926075, 24768203, 25614709, 29006382, 30234341, 32377109, 34394074,
40324616, 40892565, 43025295, 43208269, 43761687, 43883113, 45274367, 47850630,
48320162, 48458322, 48960668, 49470322, 50545229, 51305930, 51391781, 54465806,
54541272, 55497339, 55788640, 57113511, 58250085, 58326435, 59316149, 62059483,
64136437, 64978683, 65076823, 66571125, 66632487, 68067917, 69921206, 70107088,
71829636, 76189936, 78639014, 80841986, 81844602, 83028134, 85818283, 86768196,
90374529, 91119955, 91540016, 93761675, 94583431, 95027700, 95247246, 95564585,
95663108, 95742804, 96147866, 97538112, 101129622, 101782620, 102170444,
104790535, 104815436, 105802703, 106364729, 106520836, 106563112, 107893429,
112185856, 113337504, 116895916, 122566166, 123359972, 123897385, 124028529,
125100458, 127234401, 128292718, 129767575, 132088268, 133737047, 133796663,
135903283, 136513103, 138868673, 139106372, 141282728, 141628856, 143250884,
143784740, 149114217, 150882858, 151116713, 152221499, 154271016, 155574791,
156179900, 157228942, 157518087, 159572211, 161327800, 161750984, 162237441,
164793050, 165064176, 166764350, 166847618, 167111553, 168117915, 169230761,
170322861, 170937855, 172389295, 173619266, 177610645, 178415544, 179549865,
185538500, 185906457, 195946437, 196591640, 196952032, 197505405, 200021193,
201058930, 201496104, 204691301, 206144773, 207320627, 211221882, 215434456,
];

#[test]
fn test_dos_safeguard() {
let mut map = HashMap::new();
let mut values = VALUES.iter();
for &value in (&mut values).take(DISPLACEMENT_THRESHOLD - 1) {
map.insert(value, ());
}
assert!(!map.hash_builder.uses_safe_hashing());
map.reserve(1000);
for &value in values.take(8) {
map.insert(value, ());
}
assert!(map.hash_builder.uses_safe_hashing());
}

// Regression test
#[test]
fn test_safeguarded_insertion() {
let mut map = HashMap::new();
let values = VALUES.iter().enumerate();
for (i, &value) in values.clone() {
map.insert(value, i);
}
for (i, &value) in values {
assert_eq!(map[&value], i);
}
}
}
Copy path View file
@@ -0,0 +1,209 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::mem;

use table::{EmptyBucket, FullBucket, SafeHash, RawTable};
use internal_entry::InternalEntry;
use pop_internal;
use robin_hood;
use adaptive_map;

pub use self::Entry::*;
pub use self::VacantEntryState::*;

/// A view into a single location in a map, which may be vacant or occupied.
pub enum Entry<'a, K: 'a, V: 'a> {
/// An occupied Entry.
Occupied(OccupiedEntry<'a, K, V>),

/// A vacant Entry.
Vacant(VacantEntry<'a, K, V>),
}

/// A view into a single occupied location in a HashMap.
pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
key: Option<K>,
elem: FullBucket<K, V, &'a mut RawTable<K, V>>,
}

/// A view into a single empty location in a HashMap.
pub struct VacantEntry<'a, K: 'a, V: 'a> {
hash: SafeHash,
key: K,
reduce_displacement_flag: Option<&'a mut bool>,
elem: VacantEntryState<K, V, &'a mut RawTable<K, V>>,
}

/// Possible states of a VacantEntry.
pub enum VacantEntryState<K, V, M> {
/// The index is occupied, but the key to insert has precedence,
/// and will kick the current one out on insertion.
NeqElem(FullBucket<K, V, M>, usize),
/// The index is genuinely vacant.
NoElem(EmptyBucket<K, V, M>),
}

impl<'a, K, V> Entry<'a, K, V> {
/// Returns the entry key
///
/// # Examples
///
/// ```
/// use hashmap2::HashMap;
///
/// let mut map = HashMap::<String, u32>::new();
///
/// assert_eq!("hello", map.entry("hello".to_string()).key());
/// ```
pub fn key(&self) -> &K {
match *self {
Occupied(ref entry) => entry.key(),
Vacant(ref entry) => entry.key(),
}
}

/// Ensures a value is in the entry by inserting the default if empty, and returns
/// a mutable reference to the value in the entry.
pub fn or_insert(self, default: V) -> &'a mut V {
match self {
Occupied(entry) => entry.into_mut(),
Vacant(entry) => entry.insert(default),
}
}

/// Ensures a value is in the entry by inserting the result of the default function if empty,
/// and returns a mutable reference to the value in the entry.
pub fn or_insert_with<F: FnOnce() -> V>(self, default: F) -> &'a mut V {
match self {
Occupied(entry) => entry.into_mut(),
Vacant(entry) => entry.insert(default()),
}
}
}

impl<'a, K, V> OccupiedEntry<'a, K, V> {
/// Gets a reference to the value in the entry.
pub fn get(&self) -> &V {
self.elem.read().1
}

/// Gets a mutable reference to the value in the entry.
pub fn get_mut(&mut self) -> &mut V {
self.elem.read_mut().1
}

/// Converts the OccupiedEntry into a mutable reference to the value in the entry
/// with a lifetime bound to the map itself
pub fn into_mut(self) -> &'a mut V {
self.elem.into_mut_refs().1
}

/// Sets the value of the entry, and returns the entry's old value
pub fn insert(&mut self, mut value: V) -> V {
let old_value = self.get_mut();
mem::swap(&mut value, old_value);
value
}

/// Takes the value out of the entry, and returns it
pub fn remove(self) -> V {
pop_internal(self.elem).1
}

/// Gets a reference to the entry key
///
/// # Examples
///
/// ```
/// use hashmap2::HashMap;
///
/// let mut map = HashMap::new();
///
/// map.insert("foo".to_string(), 1);
/// assert_eq!("foo", map.entry("foo".to_string()).key());
/// ```
pub fn key(&self) -> &K {
self.elem.read().0
}

/// Returns a key that was used for search.
///
/// The key was retained for further use.
pub fn take_key(&mut self) -> Option<K> {
self.key.take()
}
}

impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> {
/// Sets the value of the entry with the VacantEntry's key,
/// and returns a mutable reference to it
pub fn insert(self, value: V) -> &'a mut V {
match self.elem {
NeqElem(bucket, ib) => {
robin_hood(bucket, ib, self.hash, self.key, value, self.reduce_displacement_flag)
}
NoElem(bucket) => {
let bucket = bucket.put(self.hash, self.key, value);
adaptive_map::safeguard_insertion(&bucket, self.reduce_displacement_flag);
bucket.into_mut_refs().1
}
}
}

/// Gets a reference to the entry key
///
/// # Examples
///
/// ```
/// use hashmap2::HashMap;
///
/// let mut map = HashMap::<String, u32>::new();
///
/// assert_eq!("foo", map.entry("foo".to_string()).key());
/// ```
pub fn key(&self) -> &K {
&self.key
}

pub fn set_flag_for_reduce_displacement(&mut self, flag: &'a mut bool) {
self.reduce_displacement_flag = Some(flag);
}
}

// These fns are public, but the entire module is not.

#[inline]
pub fn from_internal<K, V>(internal: InternalEntry<K, V, &mut RawTable<K, V>>, key: Option<K>)
-> Option<Entry<K, V>> {
match internal {
InternalEntry::Occupied { elem } => {
Some(Entry::Occupied(OccupiedEntry {
key: key,
elem: elem
}))
}
InternalEntry::Vacant { hash, elem } => {
Some(Entry::Vacant(VacantEntry {
hash: hash,
key: key.unwrap(),
reduce_displacement_flag: None,
elem: elem,
}))
}
InternalEntry::TableIsEmpty => None
}
}

#[inline]
pub fn occupied_elem<'a, 'r, K, V>(occupied: &'r mut OccupiedEntry<'a, K, V>)
-> &'r mut FullBucket<K, V, &'a mut RawTable<K, V>> {
&mut occupied.elem
}
Copy path View file
@@ -0,0 +1,41 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use table::{FullBucket, SafeHash, RawTable};
use entry::{self, VacantEntryState};
use Entry;

pub enum InternalEntry<K, V, M> {
Occupied {
elem: FullBucket<K, V, M>,
},
Vacant {
hash: SafeHash,
elem: VacantEntryState<K, V, M>,
},
TableIsEmpty,
}

impl<K, V, M> InternalEntry<K, V, M> {
#[inline]
pub fn into_occupied_bucket(self) -> Option<FullBucket<K, V, M>> {
match self {
InternalEntry::Occupied { elem } => Some(elem),
_ => None,
}
}
}

impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
#[inline]
pub fn into_entry(self, key: K) -> Option<Entry<'a, K, V>> {
entry::from_internal(self, Some(key))
}
}
Copy path View file

Large diffs are not rendered by default.

Oops, something went wrong.
Copy path View file
@@ -0,0 +1,48 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::hash::{BuildHasher, SipHasher13};
use rand::{self, Rng};

/// `SipHashState` is the default state for `HashMap` types.
///
/// A particular instance `SipHashState` will create the same instances of
/// `Hasher`, but the hashers created by two different `SipHashState`
/// instances are unlikely to produce the same result for the same values.
#[derive(Clone)]
pub struct SipHashState {
k0: u64,
k1: u64,
}

impl SipHashState {
/// Constructs a new `SipHashState` that is initialized with random keys.
#[inline]
#[allow(deprecated)] // rand
pub fn new() -> SipHashState {
thread_local!(static KEYS: (u64, u64) = {
let r = rand::OsRng::new();
let mut r = r.expect("failed to create an OS RNG");
(r.gen(), r.gen())
});

KEYS.with(|&(k0, k1)| {
SipHashState { k0: k0, k1: k1 }
})
}
}

impl BuildHasher for SipHashState {
type Hasher = SipHasher13;
#[inline]
fn build_hasher(&self) -> SipHasher13 {
SipHasher13::new_with_keys(self.k0, self.k1)
}
}
Copy path View file
@@ -59,7 +59,6 @@ const EMPTY_BUCKET: u64 = 0;
/// around just the "table" part of the hashtable. It enforces some
/// invariants at the type level and employs some performance trickery,
/// but in general is just a tricked out `Vec<Option<u64, K, V>>`.
#[unsafe_no_drop_flag]
pub struct RawTable<K, V> {
capacity: usize,
size: usize,
@@ -1025,7 +1024,7 @@ impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
impl<K, V> Drop for RawTable<K, V> {
#[unsafe_destructor_blind_to_params]
fn drop(&mut self) {
if self.capacity == 0 || self.capacity == mem::POST_DROP_USIZE {
if self.capacity == 0 {
return;
}