diff --git a/folly/AtomicUnorderedMap.h b/folly/AtomicUnorderedMap.h new file mode 100644 index 00000000000..a2c4ac73e38 --- /dev/null +++ b/folly/AtomicUnorderedMap.h @@ -0,0 +1,490 @@ +/* + * Copyright 2015 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef FOLLY_ATOMICUNORDEREDMAP_H +#define FOLLY_ATOMICUNORDEREDMAP_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace folly { + +/// You're probably reading this because you are looking for an +/// AtomicUnorderedMap that is fully general, highly concurrent (for +/// reads, writes, and iteration), and makes no performance compromises. +/// We haven't figured that one out yet. What you will find here is a +/// hash table implementation that sacrifices generality so that it can +/// give you all of the other things. +/// +/// LIMITATIONS: +/// +/// * Insert only (*) - the only write operation supported directly by +/// AtomicUnorderedInsertMap is findOrConstruct. There is a (*) because +/// values aren't moved, so you can roll your own concurrency control for +/// in-place updates of values (see MutableData and MutableAtom below), +/// but the hash table itself doesn't help you. +/// +/// * No resizing - you must specify the capacity up front, and once +/// the hash map gets full you won't be able to insert. Insert +/// performance will degrade once the load factor is high. Insert is +/// O(1/(1-actual_load_factor)). Note that this is a pretty strong +/// limitation, because you can't remove existing keys. +/// +/// * 2^30 maximum capacity - you'll need to use something else if you +/// have more than a billion entries. If this limit bothers you let it +/// wouldn't be too hard to parameterize the internal indexes between +/// uint32_t and uint64_t. +/// +/// WHAT YOU GET IN EXCHANGE: +/// +/// * Arbitrary key and value types - any K and V that can be used in a +/// std::unordered_map can be used here. In fact, the key and value +/// types don't even have to be copyable or moveable! +/// +/// * Keys and values in the map won't be moved - it is safe to keep +/// pointers or references to the keys and values in the map, because +/// they are never moved or destroyed (until the map itself is destroyed). +/// +/// * Iterators are never invalidated - writes don't invalidate iterators, +/// so you can scan and insert in parallel. +/// +/// * Fast wait-free reads - reads are usually only a single cache miss, +/// even when the hash table is very large. Wait-freedom means that +/// you won't see latency outliers even in the face of concurrent writes. +/// +/// * Lock-free insert - writes proceed in parallel. If a thread in the +/// middle of a write is unlucky and gets suspended, it doesn't block +/// anybody else. +/// +/// COMMENTS ON INSERT-ONLY +/// +/// This map provides wait-free linearizable reads and lock-free +/// linearizable inserts. Inserted values won't be moved, but no +/// concurrency control is provided for safely updating them. To remind +/// you of that fact they are only provided in const form. This is the +/// only simple safe thing to do while preserving something like the normal +/// std::map iteration form, which requires that iteration be exposed +/// via std::pair (and prevents encapsulation of access to the value). +/// +/// There are a couple of reasonable policies for doing in-place +/// concurrency control on the values. I am hoping that the policy can +/// be injected via the value type or an extra template param, to keep +/// the core AtomicUnorderedInsertMap insert-only: +/// +/// CONST: this is the currently implemented strategy, which is simple, +/// performant, and not that expressive. You can always put in a value +/// with a mutable field (see MutableAtom below), but that doesn't look +/// as pretty as it should. +/// +/// ATOMIC: for integers and integer-size trivially copyable structs +/// (via an adapter like tao/queues/AtomicStruct) the value can be a +/// std::atomic and read and written atomically. +/// +/// SEQ-LOCK: attach a counter incremented before and after write. +/// Writers serialize by using CAS to make an even->odd transition, +/// then odd->even after the write. Readers grab the value with memcpy, +/// checking sequence value before and after. Readers retry until they +/// see an even sequence number that doesn't change. This works for +/// larger structs, but still requires memcpy to be equivalent to copy +/// assignment, and it is no longer lock-free. It scales very well, +/// because the readers are still invisible (no cache line writes). +/// +/// LOCK: folly's SharedMutex would be a good choice here. +/// +/// MEMORY ALLOCATION +/// +/// Underlying memory is allocated as a big anonymous mmap chunk, which +/// might be cheaper than calloc() and is certainly not more expensive +/// for large maps. If the SkipKeyValueDeletion template param is true +/// then deletion of the map consists of unmapping the backing memory, +/// which is much faster than destructing all of the keys and values. +/// Feel free to override if std::is_trivial_destructor isn't recognizing +/// the triviality of your destructors. +template , + typename KeyEqual = std::equal_to, + bool SkipKeyValueDeletion = + (boost::has_trivial_destructor::value && + boost::has_trivial_destructor::value), + template class Atom = std::atomic, + typename Allocator = folly::detail::MMapAlloc> + +struct AtomicUnorderedInsertMap { + + typedef Key key_type; + typedef Value mapped_type; + typedef std::pair value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef Hash hasher; + typedef KeyEqual key_equal; + typedef const value_type& const_reference; + + typedef struct ConstIterator { + ConstIterator(const AtomicUnorderedInsertMap& owner, uint32_t slot) + : owner_(owner) + , slot_(slot) + {} + + ConstIterator(const ConstIterator&) = default; + ConstIterator& operator= (const ConstIterator&) = default; + + const value_type& operator* () const { + return owner_.slots_[slot_].keyValue(); + } + + const value_type* operator-> () const { + return &owner_.slots_[slot_].keyValue(); + } + + // pre-increment + const ConstIterator& operator++ () { + while (slot_ > 0) { + --slot_; + if (owner_.slots_[slot_].state() == LINKED) { + break; + } + } + return *this; + } + + // post-increment + ConstIterator operator++ (int dummy) { + auto prev = *this; + ++*this; + return prev; + } + + bool operator== (const ConstIterator& rhs) const { + return slot_ == rhs.slot_; + } + bool operator!= (const ConstIterator& rhs) const { + return !(*this == rhs); + } + + private: + const AtomicUnorderedInsertMap& owner_; + uint32_t slot_; + } const_iterator; + + friend ConstIterator; + + /// Constructs a map that will support the insertion of maxSize + /// key-value pairs without exceeding the max load factor. Load + /// factors of greater than 1 are not supported, and once the actual load + /// factor of the map approaches 1 the insert performance will suffer. + /// The capacity is limited to 2^30 (about a billion), beyond which + /// we will throw invalid_argument. + explicit AtomicUnorderedInsertMap( + size_t maxSize, + float maxLoadFactor = 0.8f, + const Allocator& alloc = Allocator()) + : allocator_(alloc) + { + size_t capacity = maxSize / std::max(1.0f, maxLoadFactor) + 128; + if (capacity > (1 << 30) && maxSize < (1 << 30)) { + // we'll do our best + capacity = (1 << 30); + } + if (capacity < maxSize || capacity > (1 << 30)) { + throw std::invalid_argument( + "AtomicUnorderedInsertMap capacity must fit in 30 bits"); + } + + numSlots_ = capacity; + slotMask_ = folly::nextPowTwo(capacity * 4) - 1; + mmapRequested_ = sizeof(Slot) * capacity; + slots_ = reinterpret_cast(allocator_.allocate(mmapRequested_)); + zeroFillSlots(); + // mark the zero-th slot as in-use but not valid, since that happens + // to be our nil value + slots_[0].stateUpdate(EMPTY, CONSTRUCTING); + } + + ~AtomicUnorderedInsertMap() { + if (!SkipKeyValueDeletion) { + for (size_t i = 1; i < numSlots_; ++i) { + slots_[i].~Slot(); + } + } + allocator_.deallocate(reinterpret_cast(slots_), mmapRequested_); + } + + /// Searches for the key, returning (iter,false) if it is found. + /// If it is not found calls the functor Func with a void* argument + /// that is raw storage suitable for placement construction of a Value + /// (see raw_value_type), then returns (iter,true). May call Func and + /// then return (iter,false) if there are other concurrent writes, in + /// which case the newly constructed value will be immediately destroyed. + /// + /// This function does not block other readers or writers. If there + /// are other concurrent writes, many parallel calls to func may happen + /// and only the first one to complete will win. The values constructed + /// by the other calls to func will be destroyed. + /// + /// Usage: + /// + /// AtomicUnorderedInsertMap memo; + /// + /// auto value = memo.findOrConstruct(key, [=](void* raw) { + /// new (raw) std::string(computation(key)); + /// })->first; + template + std::pair findOrConstruct(const Key& key, Func&& func) { + auto const slot = keyToSlotIdx(key); + auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire); + + auto existing = find(key, slot); + if (existing != 0) { + return std::make_pair(ConstIterator(*this, existing), false); + } + + auto idx = allocateNear(slot); + new (&slots_[idx].keyValue().first) Key(key); + func(static_cast(&slots_[idx].keyValue().second)); + + while (true) { + slots_[idx].next_ = prev >> 2; + + // we can merge the head update and the CONSTRUCTING -> LINKED update + // into a single CAS if slot == idx (which should happen often) + auto after = idx << 2; + if (slot == idx) { + after += LINKED; + } else { + after += (prev & 3); + } + + if (slots_[slot].headAndState_.compare_exchange_strong(prev, after)) { + // success + if (idx != slot) { + slots_[idx].stateUpdate(CONSTRUCTING, LINKED); + } + return std::make_pair(ConstIterator(*this, idx), true); + } + // compare_exchange_strong updates its first arg on failure, so + // there is no need to reread prev + + existing = find(key, slot); + if (existing != 0) { + // our allocated key and value are no longer needed + slots_[idx].keyValue().first.~Key(); + slots_[idx].keyValue().second.~Value(); + slots_[idx].stateUpdate(CONSTRUCTING, EMPTY); + + return std::make_pair(ConstIterator(*this, existing), false); + } + } + } + + /// This isn't really emplace, but it is what we need to test. + /// Eventually we can duplicate all of the std::pair constructor + /// forms, including a recursive tuple forwarding template + /// http://functionalcpp.wordpress.com/2013/08/28/tuple-forwarding/). + template + std::pair emplace(const K& key, V&& value) { + return findOrConstruct(key, [&](void* raw) { + new (raw) Value(std::forward(value)); + }); + } + + const_iterator find(const Key& key) const { + return ConstIterator(*this, find(key, keyToSlotIdx(key))); + } + + const_iterator cbegin() const { + uint32_t slot = numSlots_ - 1; + while (slot > 0 && slots_[slot].state() != LINKED) { + --slot; + } + return ConstIterator(*this, slot); + } + + const_iterator cend() const { + return ConstIterator(*this, 0); + } + + private: + + enum { + kMaxAllocationTries = 1000, // after this we throw + }; + + enum BucketState : uint32_t { + EMPTY = 0, + CONSTRUCTING = 1, + LINKED = 2, + }; + + /// Lock-free insertion is easiest by prepending to collision chains. + /// A large chaining hash table takes two cache misses instead of + /// one, however. Our solution is to colocate the bucket storage and + /// the head storage, so that even though we are traversing chains we + /// are likely to stay within the same cache line. Just make sure to + /// traverse head before looking at any keys. This strategy gives us + /// 32 bit pointers and fast iteration. + struct Slot { + /// The bottom two bits are the BucketState, the rest is the index + /// of the first bucket for the chain whose keys map to this slot. + /// When things are going well the head usually links to this slot, + /// but that doesn't always have to happen. + Atom headAndState_; + + /// The next bucket in the chain + uint32_t next_; + + /// Key and Value + typename std::aligned_storage::type raw_; + + + ~Slot() { + auto s = state(); + assert(s == EMPTY || s == LINKED); + if (s == LINKED) { + keyValue().first.~Key(); + keyValue().second.~Value(); + } + } + + BucketState state() const { + return BucketState(headAndState_.load(std::memory_order_acquire) & 3); + } + + void stateUpdate(BucketState before, BucketState after) { + assert(state() == before); + headAndState_ += (after - before); + } + + value_type& keyValue() { + assert(state() != EMPTY); + return *static_cast(static_cast(&raw_)); + } + + const value_type& keyValue() const { + assert(state() != EMPTY); + return *static_cast(static_cast(&raw_)); + } + + }; + + // We manually manage the slot memory so we can bypass initialization + // (by getting a zero-filled mmap chunk) and optionally destruction of + // the slots + + size_t mmapRequested_; + size_t numSlots_; + + /// tricky, see keyToSlodIdx + size_t slotMask_; + + Allocator allocator_; + Slot* slots_; + + uint32_t keyToSlotIdx(const Key& key) const { + size_t h = hasher()(key); + h &= slotMask_; + while (h >= numSlots_) { + h -= numSlots_; + } + return h; + } + + uint32_t find(const Key& key, uint32_t slot) const { + KeyEqual ke = {}; + auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire); + for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) { + if (ke(key, slots_[slot].keyValue().first)) { + return slot; + } + } + return 0; + } + + /// Allocates a slot and returns its index. Tries to put it near + /// slots_[start]. + uint32_t allocateNear(uint32_t start) { + for (auto tries = 0; tries < kMaxAllocationTries; ++tries) { + auto slot = allocationAttempt(start, tries); + auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire); + if ((prev & 3) == EMPTY && + slots_[slot].headAndState_.compare_exchange_strong( + prev, prev + CONSTRUCTING - EMPTY)) { + return slot; + } + } + throw std::bad_alloc(); + } + + /// Returns the slot we should attempt to allocate after tries failed + /// tries, starting from the specified slot. This is pulled out so we + /// can specialize it differently during deterministic testing + uint32_t allocationAttempt(uint32_t start, uint32_t tries) const { + if (LIKELY(tries < 8 && start + tries < numSlots_)) { + return start + tries; + } else { + uint32_t rv = folly::Random::rand32(numSlots_); + assert(rv < numSlots_); + return rv; + } + } + + void zeroFillSlots() { + using folly::detail::GivesZeroFilledMemory; + if (!GivesZeroFilledMemory::value) { + memset(slots_, 0, mmapRequested_); + } + } +}; + + +/// MutableAtom is a tiny wrapper than gives you the option of atomically +/// updating values inserted into an AtomicUnorderedInsertMap>. This relies on AtomicUnorderedInsertMap's guarantee +/// that it doesn't move values. +template class Atom = std::atomic> +struct MutableAtom { + mutable Atom data; + + explicit MutableAtom(const T& init) : data(init) {} +}; + +/// MutableData is a tiny wrapper than gives you the option of using an +/// external concurrency control mechanism to updating values inserted +/// into an AtomicUnorderedInsertMap. +template +struct MutableData { + mutable T data; + explicit MutableData(const T& init) : data(init) {} +}; + + +} +#endif diff --git a/folly/Makefile.am b/folly/Makefile.am index 5b1775e475e..a682e0120d5 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -28,6 +28,7 @@ nobase_follyinclude_HEADERS = \ AtomicHashMap-inl.h \ AtomicLinkedList.h \ AtomicStruct.h \ + AtomicUnorderedMap.h \ Baton.h \ Benchmark.h \ Bits.h \ @@ -39,6 +40,7 @@ nobase_follyinclude_HEADERS = \ CpuId.h \ CPortability.h \ detail/AtomicHashUtils.h \ + detail/AtomicUnorderedMapUtils.h \ detail/BitIteratorDetail.h \ detail/BitsDetail.h \ detail/CacheLocality.h \ diff --git a/folly/detail/AtomicUnorderedMapUtils.h b/folly/detail/AtomicUnorderedMapUtils.h new file mode 100644 index 00000000000..f5737972481 --- /dev/null +++ b/folly/detail/AtomicUnorderedMapUtils.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include + +namespace folly { namespace detail { + +class MMapAlloc { + private: + size_t computeSize(size_t size) { + long pagesize = sysconf(_SC_PAGESIZE); + size_t mmapLength = ((size - 1) & ~(pagesize - 1)) + pagesize; + assert(size <= mmapLength && mmapLength < size + pagesize); + assert((mmapLength % pagesize) == 0); + return mmapLength; + } + + public: + void* allocate(size_t size) { + auto len = computeSize(size); + + // MAP_HUGETLB is a perf win, but requires cooperation from the + // deployment environment (and a change to computeSize()). + void* mem = static_cast(mmap( + nullptr, + len, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, + -1, + 0)); + if (mem == reinterpret_cast(-1)) { + throw std::system_error(errno, std::system_category()); + } + + return mem; + } + + void deallocate(void* p, size_t size) { + auto len = computeSize(size); + munmap(p, len); + } +}; + +template +struct GivesZeroFilledMemory : public std::false_type {}; + +template<> +struct GivesZeroFilledMemory : public std::true_type{}; + +}} diff --git a/folly/test/AtomicUnorderedMapTest.cpp b/folly/test/AtomicUnorderedMapTest.cpp new file mode 100644 index 00000000000..7896906f1e2 --- /dev/null +++ b/folly/test/AtomicUnorderedMapTest.cpp @@ -0,0 +1,316 @@ +/* + * Copyright 2015 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace folly; +using namespace folly::test; + +template +struct non_atomic { + T value; + + non_atomic() = default; + non_atomic(const non_atomic&) = delete; + constexpr /* implicit */ non_atomic(T desired): value(desired) {} + + T operator+=(T arg) { value += arg; return load();} + + T load(std::memory_order order= std::memory_order_seq_cst) const { + return value; + } + + /* implicit */ + operator T() const {return load();} + + void store(T desired, std::memory_order order = std::memory_order_seq_cst) { + value = desired; + } + + T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) { + T old = load(); + store(desired); + return old; + } + + bool compare_exchange_weak( + T& expected, T desired, + std::memory_order success = std::memory_order_seq_cst, + std::memory_order failure = std::memory_order_seq_cst) { + if (value == expected) { + value = desired; + return true; + } + + expected = value; + return false; + } + + bool compare_exchange_strong( + T& expected, T desired, + std::memory_order success = std::memory_order_seq_cst, + std::memory_order failure = std::memory_order_seq_cst) { + if (value == expected) { + value = desired; + return true; + } + + expected = value; + return false; + } + + bool is_lock_free() const {return true;} +}; + +template< + typename Key, typename Value, template class Atom = non_atomic> +using UnorderedInsertMap = AtomicUnorderedInsertMap< + Key, + Value, + std::hash, + std::equal_to, + (boost::has_trivial_destructor::value && + boost::has_trivial_destructor::value), + Atom, + std::allocator>; + +TEST(AtomicUnorderedInsertMap, basic) { + AtomicUnorderedInsertMap m(100); + + m.emplace("abc", "ABC"); + EXPECT_TRUE(m.find("abc") != m.cend()); + EXPECT_EQ(m.find("abc")->first, "abc"); + EXPECT_EQ(m.find("abc")->second, "ABC"); + EXPECT_TRUE(m.find("def") == m.cend()); + auto iter = m.cbegin(); + EXPECT_TRUE(iter != m.cend()); + EXPECT_TRUE(iter == m.find("abc")); + auto a = iter; + EXPECT_TRUE(a == iter); + auto b = iter; + ++iter; + EXPECT_TRUE(iter == m.cend()); + EXPECT_TRUE(a == b); + EXPECT_TRUE(a != iter); + a++; + EXPECT_TRUE(a == iter); + EXPECT_TRUE(a != b); +} + +TEST(AtomicUnorderedInsertMap, value_mutation) { + AtomicUnorderedInsertMap> m(100); + + for (int i = 0; i < 50; ++i) { + m.emplace(i, i); + } + + m.find(1)->second.data++; +} + +TEST(UnorderedInsertMap, value_mutation) { + UnorderedInsertMap> m(100); + + for (int i = 0; i < 50; ++i) { + m.emplace(i, i); + } + + m.find(1)->second.data++; + EXPECT_EQ(m.find(1)->second.data, 2); +} + +BENCHMARK(lookup_int_int_hit, iters) { + std::unique_ptr> ptr = {}; + + size_t capacity = 100000; + + BENCHMARK_SUSPEND { + ptr.reset(new AtomicUnorderedInsertMap(capacity)); + for (size_t i = 0; i < capacity; ++i) { + auto k = 3 * ((5641 * i) % capacity); + ptr->emplace(k, k + 1); + EXPECT_EQ(ptr->find(k)->second, k + 1); + } + } + + for (size_t i = 0; i < iters; ++i) { + size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity); + auto iter = ptr->find(k); + if (iter == ptr->cend() || + iter->second != k + 1) { + auto jter = ptr->find(k); + EXPECT_TRUE(iter == jter); + } + EXPECT_EQ(iter->second, k + 1); + } + + BENCHMARK_SUSPEND { + ptr.reset(nullptr); + } +} + +struct PairHash { + size_t operator()(const std::pair& pr) const { + return pr.first ^ pr.second; + } +}; + +void contendedRW(size_t itersPerThread, + size_t capacity, + size_t numThreads, + size_t readsPerWrite) { + typedef std::pair Key; + typedef AtomicUnorderedInsertMap,PairHash> Map; + + std::unique_ptr ptr = {}; + std::atomic go; + std::vector threads; + + BENCHMARK_SUSPEND { + ptr.reset(new Map(capacity)); + while (threads.size() < numThreads) { + threads.emplace_back([&](){ + while (!go) { + std::this_thread::yield(); + } + + size_t reads = 0; + size_t writes = 0; + while (reads + writes < itersPerThread) { + auto r = Random::rand32(); + Key key(reads + writes, r); + if (reads < writes * readsPerWrite || + writes >= capacity / numThreads) { + // read needed + ++reads; + auto iter = ptr->find(key); + EXPECT_TRUE( + iter == ptr->cend() || + iter->second.data.load(std::memory_order_acquire) >= key.first); + } else { + ++writes; + try { + auto pr = ptr->emplace(key, key.first); + if (!pr.second) { + pr.first->second.data++; + } + } catch (std::bad_alloc& x) { + LOG(INFO) << "bad alloc"; + } + } + } + }); + } + } + + go = true; + + for (auto& thr : threads) { + thr.join(); + } + + BENCHMARK_SUSPEND { + ptr.reset(nullptr); + } +} + +// sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000 +// +// without MAP_HUGETLB (default) +// +// ============================================================================ +// common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter +// iters/s +// ============================================================================ +// lookup_int_int_hit 20.05ns 49.89M +// contendedRW(small_32thr_99pct) 70.36ns 14.21M +// contendedRW(large_32thr_99pct) 164.23ns 6.09M +// contendedRW(large_32thr_99_9pct) 158.81ns 6.30M +// ============================================================================ +// +// with MAP_HUGETLB hacked in +// ============================================================================ +// lookup_int_int_hit 19.67ns 50.84M +// contendedRW(small_32thr_99pct) 62.46ns 16.01M +// contendedRW(large_32thr_99pct) 119.41ns 8.37M +// contendedRW(large_32thr_99_9pct) 111.23ns 8.99M +// ============================================================================ +BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99) +BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99) +BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999) + +BENCHMARK_DRAW_LINE(); + +// sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000 +// Single threaded benchmarks to test how much better we are than +// std::unordered_map and what is the cost of using atomic operations +// in the uncontended use case +// ============================================================================ +// std_map 1.20ms 832.58 +// atomic_fast_map 511.35us 1.96K +// fast_map 196.28us 5.09K +// ============================================================================ + +BENCHMARK(std_map) { + std::unordered_map m; + m.reserve(10000); + for (int i=0; i<10000; ++i) { + m.emplace(i,i); + } + + for (int i=0; i<10000; ++i) { + auto a = m.find(i); + folly::doNotOptimizeAway(&*a); + } +} + +BENCHMARK(atomic_fast_map) { + UnorderedInsertMap m(10000); + for (int i=0; i<10000; ++i) { + m.emplace(i,i); + } + + for (int i=0; i<10000; ++i) { + auto a = m.find(i); + folly::doNotOptimizeAway(&*a); + } +} + +BENCHMARK(fast_map) { + UnorderedInsertMap m(10000); + for (int i=0; i<10000; ++i) { + m.emplace(i,i); + } + + for (int i=0; i<10000; ++i) { + auto a = m.find(i); + folly::doNotOptimizeAway(&*a); + } +} + + +int main(int argc, char ** argv) { + testing::InitGoogleTest(&argc, argv); + google::ParseCommandLineFlags(&argc, &argv, true); + int rv = RUN_ALL_TESTS(); + folly::runBenchmarksOnFlag(); + return rv; +}