diff --git a/folly/Arena.h b/folly/Arena.h index c119d8c16ee..9798dc81a91 100644 --- a/folly/Arena.h +++ b/folly/Arena.h @@ -18,12 +18,13 @@ #define FOLLY_ARENA_H_ #include -#include #include +#include #include #include "folly/Likely.h" #include "folly/Malloc.h" +#include "folly/Memory.h" namespace folly { @@ -52,8 +53,7 @@ namespace folly { * guaranteed to be rounded up to a multiple of the maximum alignment * required on your system; the returned value must be also. * - * An implementation that uses malloc() / free() is defined below, see - * SysAlloc / SysArena. + * An implementation that uses malloc() / free() is defined below, see SysArena. */ template struct ArenaAllocatorTraits; template @@ -197,6 +197,9 @@ class Arena { size_t sizeLimit_; }; +template +struct IsArenaAllocator> : std::true_type { }; + /** * By default, don't pad the given size. */ @@ -207,21 +210,6 @@ struct ArenaAllocatorTraits { } }; -/** - * Arena-compatible allocator that calls malloc() and free(); see - * goodMallocSize() in Malloc.h for goodSize(). - */ -class SysAlloc { - public: - void* allocate(size_t size) { - return checkedMalloc(size); - } - - void deallocate(void* p) { - free(p); - } -}; - template <> struct ArenaAllocatorTraits { static size_t goodSize(const SysAlloc& alloc, size_t size) { @@ -234,13 +222,15 @@ struct ArenaAllocatorTraits { */ class SysArena : public Arena { public: - explicit SysArena( - size_t minBlockSize = kDefaultMinBlockSize, - size_t sizeLimit = 0) - : Arena(SysAlloc(), minBlockSize, sizeLimit) { + explicit SysArena(size_t minBlockSize = kDefaultMinBlockSize, + size_t sizeLimit = 0) + : Arena(SysAlloc(), minBlockSize, sizeLimit) { } }; +template <> +struct IsArenaAllocator : std::true_type { }; + } // namespace folly #include "folly/Arena-inl.h" diff --git a/folly/ConcurrentSkipList-inl.h b/folly/ConcurrentSkipList-inl.h index deb34737de9..c303ac03849 100644 --- a/folly/ConcurrentSkipList-inl.h +++ b/folly/ConcurrentSkipList-inl.h @@ -20,11 +20,19 @@ #define FOLLY_CONCURRENTSKIPLIST_INL_H_ #include +#include #include #include +#include +#include +#include +#include +#include #include - +#include #include + +#include "folly/Memory.h" #include "folly/SmallLocks.h" #include "folly/ThreadLocal.h" @@ -33,7 +41,7 @@ namespace folly { namespace detail { template class csl_iterator; template -class SkipListNode : boost::noncopyable { +class SkipListNode : private boost::noncopyable { enum { IS_HEAD_NODE = 1, MARKED_FOR_REMOVAL = (1 << 1), @@ -42,22 +50,30 @@ class SkipListNode : boost::noncopyable { public: typedef T value_type; - template::value>::type> - static SkipListNode* create(int height, U&& data, bool isHead = false) { + static SkipListNode* create( + NodeAlloc& alloc, int height, U&& data, bool isHead = false) { DCHECK(height >= 1 && height < 64) << height; size_t size = sizeof(SkipListNode) + height * sizeof(std::atomic); - auto* node = static_cast(malloc(size)); + auto* node = static_cast(alloc.allocate(size)); // do placement new new (node) SkipListNode(height, std::forward(data), isHead); return node; } - static void destroy(SkipListNode* node) { + template + static void destroy(NodeAlloc& alloc, SkipListNode* node) { node->~SkipListNode(); - free(node); + alloc.deallocate(node); + } + + template + static constexpr bool destroyIsNoOp() { + return IsArenaAllocator::value && + boost::has_trivial_destructor>::value; } // copy the head node to a new head node assuming lock acquired @@ -178,7 +194,6 @@ class SkipListRandomHeight { } private: - SkipListRandomHeight() { initLookupTable(); } void initLookupTable() { @@ -211,6 +226,110 @@ class SkipListRandomHeight { size_t sizeLimitTable_[kMaxHeight]; }; -}} +template +class NodeRecycler; + +template +class NodeRecycler()>::type> { + public: + explicit NodeRecycler(const NodeAlloc& alloc) + : alloc_(alloc), refs_(0), dirty_(false) { lock_.init(); } + + ~NodeRecycler() { + CHECK_EQ(refs(), 0); + if (nodes_) { + for (auto& node : *nodes_) { + NodeType::destroy(alloc_, node); + } + } + } + + void add(NodeType* node) { + std::lock_guard g(lock_); + if (nodes_.get() == nullptr) { + nodes_.reset(new std::vector(1, node)); + } else { + nodes_->push_back(node); + } + DCHECK_GT(refs(), 0); + dirty_.store(true, std::memory_order_relaxed); + } + + int addRef() { + return refs_.fetch_add(1, std::memory_order_relaxed); + } + + int releaseRef() { + // We don't expect to clean the recycler immediately everytime it is OK + // to do so. Here, it is possible that multiple accessors all release at + // the same time but nobody would clean the recycler here. If this + // happens, the recycler will usually still get cleaned when + // such a race doesn't happen. The worst case is the recycler will + // eventually get deleted along with the skiplist. + if (LIKELY(!dirty_.load(std::memory_order_relaxed) || refs() > 1)) { + return refs_.fetch_add(-1, std::memory_order_relaxed); + } + + std::unique_ptr> newNodes; + { + std::lock_guard g(lock_); + if (nodes_.get() == nullptr || refs() > 1) { + return refs_.fetch_add(-1, std::memory_order_relaxed); + } + // once refs_ reaches 1 and there is no other accessor, it is safe to + // remove all the current nodes in the recycler, as we already acquired + // the lock here so no more new nodes can be added, even though new + // accessors may be added after that. + newNodes.swap(nodes_); + dirty_.store(false, std::memory_order_relaxed); + } + + // TODO(xliu) should we spawn a thread to do this when there are large + // number of nodes in the recycler? + for (auto& node : *newNodes) { + NodeType::destroy(alloc_, node); + } + + // decrease the ref count at the very end, to minimize the + // chance of other threads acquiring lock_ to clear the deleted + // nodes again. + return refs_.fetch_add(-1, std::memory_order_relaxed); + } + + NodeAlloc& alloc() { return alloc_; } + + private: + int refs() const { + return refs_.load(std::memory_order_relaxed); + } + + NodeAlloc alloc_; + std::unique_ptr> nodes_; + std::atomic refs_; // current number of visitors to the list + std::atomic dirty_; // whether *nodes_ is non-empty + MicroSpinLock lock_; // protects access to *nodes_ +}; + +// In case of arena allocator, no recycling is necessary, and it's possible +// to save on ConcurrentSkipList size. +template +class NodeRecycler()>::type> { + public: + explicit NodeRecycler(const NodeAlloc& alloc) : alloc_(alloc) { } + + void addRef() { } + void releaseRef() { } + + void add(NodeType* node) { } + + NodeAlloc& alloc() { return alloc_; } + + private: + NodeAlloc alloc_; +}; + +}} // namespaces #endif // FOLLY_CONCURRENTSKIPLIST_INL_H_ diff --git a/folly/ConcurrentSkipList.h b/folly/ConcurrentSkipList.h index 42642b4b6f4..265f5fc5e9c 100644 --- a/folly/ConcurrentSkipList.h +++ b/folly/ConcurrentSkipList.h @@ -122,21 +122,25 @@ Sample usage: #include #include -#include +#include #include #include -#include -#include #include #include #include "folly/ConcurrentSkipList-inl.h" #include "folly/Likely.h" +#include "folly/Memory.h" #include "folly/SmallLocks.h" namespace folly { -template, int MAX_HEIGHT=24> +template, + // All nodes are allocated using provided SimpleAllocator, + // it should be thread-safe. + typename NodeAlloc = SysAlloc, + int MAX_HEIGHT = 24> class ConcurrentSkipList { // MAX_HEIGHT needs to be at least 2 to suppress compiler // warnings/errors (Werror=uninitialized tiggered due to preds_[1] @@ -144,7 +148,7 @@ class ConcurrentSkipList { static_assert(MAX_HEIGHT >= 2 && MAX_HEIGHT < 64, "MAX_HEIGHT can only be in the range of [2, 64)"); typedef std::unique_lock ScopedLocker; - typedef ConcurrentSkipList SkipListType; + typedef ConcurrentSkipList SkipListType; public: typedef detail::SkipListNode NodeType; @@ -157,17 +161,20 @@ class ConcurrentSkipList { class Accessor; class Skipper; - explicit ConcurrentSkipList(int height) - : head_(NodeType::create(height, value_type(), true)), size_(0) { } + explicit ConcurrentSkipList(int height, const NodeAlloc& alloc = NodeAlloc()) + : recycler_(alloc), + head_(NodeType::create(recycler_.alloc(), height, value_type(), true)), + size_(0) { } // Convenient function to get an Accessor to a new instance. - static Accessor create(int height = 1) { - return Accessor(createInstance(height)); + static Accessor create(int height = 1, const NodeAlloc& alloc = NodeAlloc()) { + return Accessor(createInstance(height, alloc)); } // Create a shared_ptr skiplist object with initial head height. - static std::shared_ptr createInstance(int height = 1) { - return std::make_shared(height); + static std::shared_ptr createInstance( + int height = 1, const NodeAlloc& alloc = NodeAlloc()) { + return std::make_shared(height, alloc); } //=================================================================== @@ -176,10 +183,13 @@ class ConcurrentSkipList { //=================================================================== ~ConcurrentSkipList() { - CHECK_EQ(recycler_.refs(), 0); + /* static */ if (NodeType::template destroyIsNoOp()) { + // Avoid traversing the list if using arena allocator. + return; + } for (NodeType* current = head_.load(std::memory_order_relaxed); current; ) { NodeType* tmp = current->skip(0); - NodeType::destroy(current); + NodeType::destroy(recycler_.alloc(), current); current = tmp; } } @@ -219,84 +229,12 @@ class ConcurrentSkipList { return foundLayer; } - struct Recycler : private boost::noncopyable { - Recycler() : refs_(0), dirty_(false) { lock_.init(); } - - ~Recycler() { - if (nodes_) { - for (auto& node : *nodes_) { - NodeType::destroy(node); - } - } - } - - void add(NodeType* node) { - std::lock_guard g(lock_); - if (nodes_.get() == nullptr) { - nodes_.reset(new std::vector(1, node)); - } else { - nodes_->push_back(node); - } - DCHECK_GT(refs(), 0); - dirty_.store(true, std::memory_order_relaxed); - } - - int refs() const { - return refs_.load(std::memory_order_relaxed); - } - - int addRef() { - return refs_.fetch_add(1, std::memory_order_relaxed); - } - - int release() { - // We don't expect to clean the recycler immediately everytime it is OK - // to do so. Here, it is possible that multiple accessors all release at - // the same time but nobody would clean the recycler here. If this - // happens, the recycler will usually still get cleaned when - // such a race doesn't happen. The worst case is the recycler will - // eventually get deleted along with the skiplist. - if (LIKELY(!dirty_.load(std::memory_order_relaxed) || refs() > 1)) { - return refs_.fetch_add(-1, std::memory_order_relaxed); - } - - std::unique_ptr> newNodes; - { - std::lock_guard g(lock_); - if (nodes_.get() == nullptr || refs() > 1) { - return refs_.fetch_add(-1, std::memory_order_relaxed); - } - // once refs_ reaches 1 and there is no other accessor, it is safe to - // remove all the current nodes in the recycler, as we already acquired - // the lock here so no more new nodes can be added, even though new - // accessors may be added after that. - newNodes.swap(nodes_); - dirty_.store(false, std::memory_order_relaxed); - } - - // TODO(xliu) should we spawn a thread to do this when there are large - // number of nodes in the recycler? - for (auto& node : *newNodes) { - NodeType::destroy(node); - } - - // decrease the ref count at the very end, to minimize the - // chance of other threads acquiring lock_ to clear the deleted - // nodes again. - return refs_.fetch_add(-1, std::memory_order_relaxed); - } - - private: - std::unique_ptr> nodes_; - std::atomic refs_; // current number of visitors to the list - std::atomic dirty_; // whether *nodes_ is non-empty - MicroSpinLock lock_; // protects access to *nodes_ - }; // class ConcurrentSkipList::Recycler - size_t size() const { return size_.load(std::memory_order_relaxed); } + int height() const { return head_.load(std::memory_order_consume)->height(); } + int maxLayer() const { return height() - 1; } size_t incrementSize(int delta) { @@ -376,7 +314,8 @@ class ConcurrentSkipList { } // locks acquired and all valid, need to modify the links under the locks. - newNode = NodeType::create(nodeHeight, std::forward(data)); + newNode = + NodeType::create(recycler_.alloc(), nodeHeight, std::forward(data)); for (int layer = 0; layer < nodeHeight; ++layer) { newNode->setSkip(layer, succs[layer]); preds[layer]->setSkip(layer, newNode); @@ -537,7 +476,8 @@ class ConcurrentSkipList { return; } - NodeType* newHead = NodeType::create(height, value_type(), true); + NodeType* newHead = + NodeType::create(recycler_.alloc(), height, value_type(), true); { // need to guard the head node in case others are adding/removing // nodes linked to the head. @@ -547,7 +487,7 @@ class ConcurrentSkipList { if (!head_.compare_exchange_strong(expected, newHead, std::memory_order_release)) { // if someone has already done the swap, just return. - NodeType::destroy(newHead); + NodeType::destroy(recycler_.alloc(), newHead); return; } oldHead->setMarkedForRemoval(); @@ -559,15 +499,15 @@ class ConcurrentSkipList { recycler_.add(node); } + detail::NodeRecycler recycler_; std::atomic head_; - Recycler recycler_; std::atomic size_; }; -template -class ConcurrentSkipList::Accessor { +template +class ConcurrentSkipList::Accessor { typedef detail::SkipListNode NodeType; - typedef ConcurrentSkipList SkipListType; + typedef ConcurrentSkipList SkipListType; public: typedef T value_type; typedef T key_type; @@ -607,7 +547,7 @@ class ConcurrentSkipList::Accessor { Accessor& operator=(const Accessor &accessor) { if (this != &accessor) { slHolder_ = accessor.slHolder_; - sl_->recycler_.release(); + sl_->recycler_.releaseRef(); sl_ = accessor.sl_; sl_->recycler_.addRef(); } @@ -615,7 +555,7 @@ class ConcurrentSkipList::Accessor { } ~Accessor() { - sl_->recycler_.release(); + sl_->recycler_.releaseRef(); } bool empty() const { return sl_->size() == 0; } @@ -734,10 +674,10 @@ class detail::csl_iterator : }; // Skipper interface -template -class ConcurrentSkipList::Skipper { +template +class ConcurrentSkipList::Skipper { typedef detail::SkipListNode NodeType; - typedef ConcurrentSkipList SkipListType; + typedef ConcurrentSkipList SkipListType; typedef typename SkipListType::Accessor Accessor; public: diff --git a/folly/Memory.h b/folly/Memory.h index c9948569ced..8394f6ad24c 100644 --- a/folly/Memory.h +++ b/folly/Memory.h @@ -19,13 +19,13 @@ #include "folly/Traits.h" -#include -#include -#include +#include +#include #include +#include +#include #include - -#include +#include namespace folly { @@ -42,12 +42,7 @@ std::unique_ptr make_unique(Args&&... args) { return std::unique_ptr(new T(std::forward(args)...)); } -/* - * StlAllocator wraps a SimpleAllocator into a STL-compliant - * allocator, maintaining an instance pointer to the simple allocator - * object. The underlying SimpleAllocator object must outlive all - * instances of StlAllocator using it. - * +/** * A SimpleAllocator must provide two methods: * * void* allocate(size_t size); @@ -58,26 +53,31 @@ std::unique_ptr make_unique(Args&&... args) { * if the allocation can't be satisfied, and free a previously * allocated block. * - * Note that the following allocator resembles the standard allocator - * quite well: - * - * class MallocAllocator { - * public: - * void* allocate(size_t size) { - * void* p = malloc(size); - * if (!p) throw std::bad_alloc(); - * return p; - * } - * void deallocate(void* p) { - * free(p); - * } - * }; + * SysAlloc resembles the standard allocator. + */ +class SysAlloc { + public: + void* allocate(size_t size) { + void* p = ::malloc(size); + if (!p) throw std::bad_alloc(); + return p; + } + void deallocate(void* p) { + ::free(p); + } +}; + +/** + * StlAllocator wraps a SimpleAllocator into a STL-compliant + * allocator, maintaining an instance pointer to the simple allocator + * object. The underlying SimpleAllocator object must outlive all + * instances of StlAllocator using it. * * But note that if you pass StlAllocator to a * standard container it will be larger due to the contained state * pointer. * - * author: Tudor Bosman + * @author: Tudor Bosman */ // This would be so much simpler with std::allocator_traits, but gcc 4.6.2 @@ -341,6 +341,12 @@ std::shared_ptr allocate_shared(Allocator&& allocator, Args&&... args) { ); } +/** + * IsArenaAllocator::value describes whether SimpleAllocator has + * no-op deallocate(). + */ +template struct IsArenaAllocator : std::false_type { }; + } // namespace folly #endif /* FOLLY_MEMORY_H_ */ diff --git a/folly/test/ArenaTest.cpp b/folly/test/ArenaTest.cpp index b2c2aec525f..f54c0892539 100644 --- a/folly/test/ArenaTest.cpp +++ b/folly/test/ArenaTest.cpp @@ -25,6 +25,8 @@ using namespace folly; +static_assert(IsArenaAllocator::value, ""); + TEST(Arena, SizeSanity) { std::set allocatedItems;