diff --git a/async/bounded_queue.h b/async/bounded_queue.h index 6aadf2c..341e5f3 100644 --- a/async/bounded_queue.h +++ b/async/bounded_queue.h @@ -16,8 +16,9 @@ namespace async { struct bounded_traits { static constexpr bool NOEXCEPT_CHECK = false; // exception handling flag - static constexpr size_t CachelineSize = 64; - using sequence_type = uint64_t; + static constexpr std::size_t CachelineSize = 64; + static constexpr std::size_t CachelineAlignment = 16; // must not be larger than alignof(std::max_align_t), see issue #1 + using sequence_type = std::uint64_t; }; template class bounded_queue { @@ -26,9 +27,10 @@ template class bounded_queue { "T must be nothrow destructible"); public: - static constexpr size_t cacheline_size = TRAITS::CachelineSize; + static constexpr std::size_t cacheline_size = TRAITS::CachelineSize; + static constexpr std::size_t cacheline_alignment = TRAITS::CachelineAlignment; using seq_t = typename TRAITS::sequence_type; - explicit bounded_queue(size_t size) + explicit bounded_queue(std::size_t size) : fastmodulo((size > 0 && ((size & (size - 1)) == 0))), bitshift(fastmodulo ? getShiftBitsCount(size) : 0), elements(new element[size]), mask(fastmodulo ? size - 1 : 0), @@ -40,7 +42,7 @@ template class bounded_queue { bounded_queue &operator=(bounded_queue const &) = delete; bounded_queue &operator=(bounded_queue &&) = delete; ~bounded_queue() { delete[] elements; } - size_t size() { return qsize; } + std::size_t size() { return qsize; } template class bounded_queue { bool const fastmodulo; // true if qsize is power of 2 int const bitshift; // used if fastmodulo is true element *const elements; // pointer to buffer - size_t const mask; // used if fastmodulo is true - size_t const qsize; // queue size - alignas(cacheline_size) char cacheline_padding1[cacheline_size]; - alignas(cacheline_size) std::atomic enqueueIx; - alignas(cacheline_size) char cacheline_padding2[cacheline_size]; - alignas(cacheline_size) std::atomic dequeueIx; - alignas(cacheline_size) char cacheline_padding3[cacheline_size]; + std::size_t const mask; // used if fastmodulo is true + std::size_t const qsize; // queue size + alignas(cacheline_alignment) char cacheline_padding1[cacheline_size]; + alignas(cacheline_alignment) std::atomic enqueueIx; + alignas(cacheline_alignment) char cacheline_padding2[cacheline_size]; + alignas(cacheline_alignment) std::atomic dequeueIx; + alignas(cacheline_alignment) char cacheline_padding3[cacheline_size]; }; } // namespace async diff --git a/async/queue.h b/async/queue.h index ce95323..6b00d1d 100644 --- a/async/queue.h +++ b/async/queue.h @@ -14,34 +14,36 @@ namespace async { struct traits // 3-level (L3, L2, L1) depth of nested group design, total // indexing space is pow(2, 64-Tagbits) { // user can change the bits settings by providing your own TRAITS - static constexpr uint64_t Tagbits = 24; - static constexpr uint64_t L3bits = 10; - static constexpr uint64_t L2bits = 10; - static constexpr uint64_t L1bits = 12; - static constexpr uint64_t Basebits = 8; + static constexpr std::uint64_t Tagbits = 24; + static constexpr std::uint64_t L3bits = 10; + static constexpr std::uint64_t L2bits = 10; + static constexpr std::uint64_t L1bits = 12; + static constexpr std::uint64_t Basebits = 8; static constexpr bool NOEXCEPT_CHECK = false; // exception handling flag - static constexpr size_t CachelineSize = 64; + static constexpr std::size_t CachelineSize = 64; + static constexpr std::size_t CachelineAlignment = 16; // must not be larger than alignof(std::max_align_t), see issue #1 }; template class queue final { public: static bool is_lock_free_v() { - return std::atomic{}.is_lock_free(); + return std::atomic{}.is_lock_free(); } - static constexpr size_t cacheline_size = TRAITS::CachelineSize; - static constexpr uint64_t BaseMask = getBitmask(TRAITS::Basebits); - static constexpr uint64_t L1Mask = getBitmask(TRAITS::L1bits) + static constexpr std::size_t cacheline_size = TRAITS::CachelineSize; + static constexpr std::size_t cacheline_alignment = TRAITS::CachelineAlignment; + static constexpr std::uint64_t BaseMask = getBitmask(TRAITS::Basebits); + static constexpr std::uint64_t L1Mask = getBitmask(TRAITS::L1bits) << TRAITS::Basebits; - static constexpr uint64_t L2Mask = getBitmask(TRAITS::L2bits) + static constexpr std::uint64_t L2Mask = getBitmask(TRAITS::L2bits) << (TRAITS::Basebits + TRAITS::L1bits); - static constexpr uint64_t L3Mask = - getBitmask(TRAITS::L3bits) + static constexpr std::uint64_t L3Mask = + getBitmask(TRAITS::L3bits) << (TRAITS::Basebits + TRAITS::L1bits + TRAITS::L2bits); - static constexpr uint64_t TagMask = - getBitmask(TRAITS::Tagbits) + static constexpr std::uint64_t TagMask = + getBitmask(TRAITS::Tagbits) << (TRAITS::Basebits + TRAITS::L1bits + TRAITS::L2bits + TRAITS::L3bits); - static constexpr uint64_t TagShift = 64 - TRAITS::Tagbits; - static constexpr uint64_t TagPlus1 = static_cast(1) << TagShift; + static constexpr std::uint64_t TagShift = 64 - TRAITS::Tagbits; + static constexpr std::uint64_t TagPlus1 = static_cast(1) << TagShift; public: // assert bits settings meet requirements static_assert(TRAITS::Tagbits + TRAITS::L3bits + TRAITS::L2bits + @@ -59,13 +61,13 @@ template class queue final { queue() : nodeCount(3), dequeueIx(2), enqueueIx(2), spawnIx(1), recycleIx(1) { container.get(index(0)); // allocate initial space } - queue(size_t size) // pre-allocate size + queue(std::size_t size) // pre-allocate size : nodeCount(3), dequeueIx(2), enqueueIx(2), spawnIx(1), recycleIx(1) { container.get(index(0)); - if (size > (static_cast(1) << TRAITS::Basebits)) { + if (size > (static_cast(1) << TRAITS::Basebits)) { index ix; - for (size_t i = (static_cast(1) << TRAITS::Basebits); i < size; + for (std::size_t i = (static_cast(1) << TRAITS::Basebits); i < size; ++i) { auto &node = getNode(ix); recycle(ix); @@ -107,9 +109,9 @@ template class queue final { return true; } - template void bulk_enqueue(IT it, size_t count) { + template void bulk_enqueue(IT it, std::size_t count) { index firstidx(0), preidx(0), lastidx(0); - for (size_t i = 0; i < count; ++i) { + for (std::size_t i = 0; i < count; ++i) { lastidx = encapsulate(*it++); if (firstidx == 0) firstidx = lastidx; @@ -126,9 +128,9 @@ template class queue final { } template - size_t bulk_dequeue(IT &&it, size_t maxcount) // or IT& it to return the + std::size_t bulk_dequeue(IT &&it, std::size_t maxcount) // or IT& it to return the { - size_t count(0); + std::size_t count(0); while (maxcount-- && dequeue(*it++)) { ++count; } @@ -181,15 +183,15 @@ template class queue final { } } } - uint64_t getNodeCount() { return nodeCount; } // get in-use-nodes count + std::uint64_t getNodeCount() { return nodeCount; } // get in-use-nodes count private: // internal data structures struct index // simulate tagged pointer { - index(uint64_t newval) noexcept + index(std::uint64_t newval) noexcept : value(newval) {} // is_trivially_copyable must be true index() noexcept : value(0) {} - inline operator uint64_t() const { return value; } + inline operator std::uint64_t() const { return value; } std::uint64_t getVersion() { return (value & TagMask) >> TagShift; } inline void increTag() { value = (value & ~TagMask) | ((value + TagPlus1) & TagMask); @@ -269,14 +271,14 @@ template class queue final { inline node &get(index const &ix) { return operator[](ix); } inline node &at(index const &ix) { return operator[](ix); } inline node &operator[](index const &ix) { return nodes[ix & BaseMask]; } - std::array(1) << TRAITS::Basebits> nodes; + std::array(1) << TRAITS::Basebits> nodes; }; - template struct nestedcontainer { - static constexpr uint64_t mask = BitMask; - static constexpr uint64_t bits = getSetBitsCount(mask); - static constexpr uint64_t shift = getShiftBitsCount(mask); - std::array, static_cast(1) << bits> + template struct nestedcontainer { + static constexpr std::uint64_t mask = BitMask; + static constexpr std::uint64_t bits = getSetBitsCount(mask); + static constexpr std::uint64_t shift = getShiftBitsCount(mask); + std::array, static_cast(1) << bits> subgroups; nestedcontainer() { for (auto &gptr : subgroups) { @@ -411,17 +413,17 @@ template class queue final { using L1container = nestedcontainer; using L2container = nestedcontainer; nestedcontainer container; - alignas(cacheline_size) char cacheline_padding1[cacheline_size]; - alignas(cacheline_size) std::atomic nodeCount; // # of allocated nodes, not the # - // of elements stored in the queue - alignas(cacheline_size) char cacheline_padding2[cacheline_size]; - alignas(cacheline_size) std::atomic dequeueIx; // dequeue pointer - alignas(cacheline_size) char cacheline_padding3[cacheline_size]; - alignas(cacheline_size) std::atomic enqueueIx; // enqueue pointer - alignas(cacheline_size) char cacheline_padding4[cacheline_size]; - alignas(cacheline_size) std::atomic spawnIx; // spawn pointer - alignas(cacheline_size) char cacheline_padding5[cacheline_size]; - alignas(cacheline_size) std::atomic recycleIx; // recycle pointer - alignas(cacheline_size) char cacheline_padding6[cacheline_size]; + alignas(cacheline_alignment) char cacheline_padding1[cacheline_size]; + alignas(cacheline_alignment) std::atomic nodeCount; // # of allocated nodes, not the # + // of elements stored in the queue + alignas(cacheline_alignment) char cacheline_padding2[cacheline_size]; + alignas(cacheline_alignment) std::atomic dequeueIx; // dequeue pointer + alignas(cacheline_alignment) char cacheline_padding3[cacheline_size]; + alignas(cacheline_alignment) std::atomic enqueueIx; // enqueue pointer + alignas(cacheline_alignment) char cacheline_padding4[cacheline_size]; + alignas(cacheline_alignment) std::atomic spawnIx; // spawn pointer + alignas(cacheline_alignment) char cacheline_padding5[cacheline_size]; + alignas(cacheline_alignment) std::atomic recycleIx; // recycle pointer + alignas(cacheline_alignment) char cacheline_padding6[cacheline_size]; }; -} // namespace async \ No newline at end of file +} // namespace async diff --git a/async/threadpool.h b/async/threadpool.h index d812eef..395a9d8 100644 --- a/async/threadpool.h +++ b/async/threadpool.h @@ -21,7 +21,7 @@ class threadpool final { public: static int defaultpoolsize() { return std::thread::hardware_concurrency(); } - threadpool(int poolsize = static_cast(defaultpoolsize())) + threadpool(int poolsize = defaultpoolsize()) : idlecount(0), conflag(false) { configurepool(poolsize); } @@ -33,7 +33,7 @@ class threadpool final { ~threadpool() { cleanup(); } - inline size_t size() { + inline std::size_t size() { std::lock_guard lg(poolmux); return threads.size(); } @@ -43,11 +43,11 @@ class threadpool final { // can be called to resize the pool at any time after construction and before // destruction, recommand to be called from main thread or manager thread even // though it is thread-safe - void configurepool(size_t poolsize) { + void configurepool(std::size_t poolsize) { std::unique_lock veclk(poolmux); auto currentsize = threads.size(); if (currentsize < poolsize) { // expand the pool - for (auto const &v : std::vector(poolsize - currentsize)) { + for (std::size_t i = currentsize; i < poolsize; i++) { tpstops.emplace_back(addthread()); } } else if (currentsize > poolsize) { // shrink the pool @@ -189,4 +189,4 @@ class threadpool final { std::condition_variable qcv; bool conflag; // continue flag for cv }; -} // namespace async \ No newline at end of file +} // namespace async diff --git a/async/utility.h b/async/utility.h index b248438..480946f 100644 --- a/async/utility.h +++ b/async/utility.h @@ -30,7 +30,7 @@ static constexpr unsigned int getSetBitsCount(std::uint64_t n) { return count; } -static constexpr unsigned int getShiftBitsCount(uint64_t n) { +static constexpr unsigned int getShiftBitsCount(std::uint64_t n) { // requires c++14 unsigned int count{0}; if (n == 0) @@ -48,7 +48,7 @@ static constexpr unsigned int getSetBitsCount(std::uint64_t n) { return n == 0 ? 0 : 1 + getSetBitsCount(n & (n - 1)); } -static constexpr unsigned int getShiftBitsCount(uint64_t n) { +static constexpr unsigned int getShiftBitsCount(std::uint64_t n) { return n == 0 ? 0 : ((n & 0x1) == 0 ? 1 + getShiftBitsCount(n >> 1) : 0); } @@ -81,8 +81,8 @@ inline std::string getErrorMsg(std::string const &message, char const *file, #include #include -static size_t cache_line_size() { - size_t line_size = 0; +static std::size_t cache_line_size() { + std::size_t line_size = 0; DWORD buffer_size = 0; DWORD i = 0; SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buffer = 0; @@ -104,15 +104,15 @@ static size_t cache_line_size() { #elif defined(__linux__) #include -static size_t cache_line_size() { - size_t line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); +static std::size_t cache_line_size() { + std::size_t line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); return line_size; } #elif defined(__APPLE__) #include -static size_t cache_line_size() { - size_t line_size = 0; - size_t size_of_linesize = sizeof(line_size); +static std::size_t cache_line_size() { + std::size_t line_size = 0; + std::size_t size_of_linesize = sizeof(line_size); sysctlbyname("hw.cachelinesize", &line_size, &size_of_linesize, 0, 0); return line_size; }