Skip to content

Commit

Permalink
Create extra arenas for use in the JIT workers
Browse files Browse the repository at this point in the history
Summary:
Try to reuse ManagedArena class as a C++ wrapper for jemalloc arena.

When deserializing profile data, we create a configurable number of extra arenas, balanced between NUMA nodes.
The deserilization workers and JIT workers can then be assigned to such extra arenas, to reduce contention as they run.

The arenas cannot be destroyed.

Reviewed By: interwq

Differential Revision: D10215895

fbshipit-source-id: bfb19f70cb0e94b29fb50c4b14e45c04a9c96789
  • Loading branch information
binliu19 authored and hhvm-bot committed Oct 22, 2018
1 parent f06682e commit 2c33142
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 23 deletions.
9 changes: 8 additions & 1 deletion hphp/runtime/base/program-functions.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -85,15 +85,16 @@
#include "hphp/util/arch.h" #include "hphp/util/arch.h"
#include "hphp/util/boot-stats.h" #include "hphp/util/boot-stats.h"
#include "hphp/util/build-info.h" #include "hphp/util/build-info.h"
#include "hphp/util/compatibility.h"
#include "hphp/util/capability.h" #include "hphp/util/capability.h"
#include "hphp/util/compatibility.h"
#include "hphp/util/embedded-data.h" #include "hphp/util/embedded-data.h"
#include "hphp/util/hardware-counter.h" #include "hphp/util/hardware-counter.h"
#include "hphp/util/hphp-config.h" #include "hphp/util/hphp-config.h"
#include "hphp/util/kernel-version.h" #include "hphp/util/kernel-version.h"
#ifndef _MSC_VER #ifndef _MSC_VER
#include "hphp/util/light-process.h" #include "hphp/util/light-process.h"
#endif #endif
#include "hphp/util/managed-arena.h"
#include "hphp/util/maphuge.h" #include "hphp/util/maphuge.h"
#include "hphp/util/perf-event.h" #include "hphp/util/perf-event.h"
#include "hphp/util/process-exec.h" #include "hphp/util/process-exec.h"
Expand Down Expand Up @@ -2440,6 +2441,12 @@ void hphp_process_init() {
} }
auto const numWorkers = RuntimeOption::EvalJitWorkerThreadsForSerdes ? auto const numWorkers = RuntimeOption::EvalJitWorkerThreadsForSerdes ?
RuntimeOption::EvalJitWorkerThreadsForSerdes : Process::GetCPUCount(); RuntimeOption::EvalJitWorkerThreadsForSerdes : Process::GetCPUCount();
#if USE_JEMALLOC_EXTENT_HOOKS
auto const numArenas =
std::min(RuntimeOption::EvalJitWorkerArenas,
std::max(RuntimeOption::EvalJitWorkerThreads, numWorkers));
setup_extra_arenas(numArenas);
#endif
auto const errMsg = auto const errMsg =
jit::deserializeProfData(RuntimeOption::EvalJitSerdesFile, numWorkers); jit::deserializeProfData(RuntimeOption::EvalJitSerdesFile, numWorkers);


Expand Down
1 change: 1 addition & 0 deletions hphp/runtime/base/runtime-option.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ struct RuntimeOption {
F(int, JitThreads, 4) \ F(int, JitThreads, 4) \
F(int, JitWorkerThreads, Process::GetCPUCount() / 2) \ F(int, JitWorkerThreads, Process::GetCPUCount() / 2) \
F(int, JitWorkerThreadsForSerdes, 0) \ F(int, JitWorkerThreadsForSerdes, 0) \
F(int, JitWorkerArenas, Process::GetCPUCount() / 4) \
F(bool, JitDesProfDataAfterRetranslateAll, true) \ F(bool, JitDesProfDataAfterRetranslateAll, true) \
F(int, JitLdimmqSpan, 8) \ F(int, JitLdimmqSpan, 8) \
F(int, JitPrintOptimizedIR, 0) \ F(int, JitPrintOptimizedIR, 0) \
Expand Down
11 changes: 10 additions & 1 deletion hphp/runtime/vm/jit/mcgen-translate.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
#include "hphp/util/hfsort.h" #include "hphp/util/hfsort.h"
#include "hphp/util/job-queue.h" #include "hphp/util/job-queue.h"
#include "hphp/util/logger.h" #include "hphp/util/logger.h"
#include "hphp/util/match.h" #include "hphp/util/managed-arena.h"
#include "hphp/util/numa.h"
#include "hphp/util/trace.h" #include "hphp/util/trace.h"


TRACE_SET_MOD(mcg); TRACE_SET_MOD(mcg);
Expand Down Expand Up @@ -134,6 +135,14 @@ struct TranslateWorker : JobQueueWorker<OptimizeData*, void*, true, true> {
VMProtect _; VMProtect _;
optimize(d->info); optimize(d->info);
} }

#if USE_JEMALLOC_EXTENT_HOOKS
void onThreadEnter() override {
if (auto arena = next_extra_arena(s_numaNode)) {
arena->bindCurrentThread();
}
}
#endif
}; };


using WorkerDispatcher = JobQueueDispatcher<TranslateWorker>; using WorkerDispatcher = JobQueueDispatcher<TranslateWorker>;
Expand Down
16 changes: 16 additions & 0 deletions hphp/runtime/vm/jit/prof-data-serialize.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@


#include "hphp/util/boot-stats.h" #include "hphp/util/boot-stats.h"
#include "hphp/util/build-info.h" #include "hphp/util/build-info.h"
#include "hphp/util/managed-arena.h"
#include "hphp/util/numa.h"
#include "hphp/util/process.h" #include "hphp/util/process.h"


#include <folly/portability/Unistd.h> #include <folly/portability/Unistd.h>
Expand Down Expand Up @@ -790,9 +792,23 @@ void merge_loaded_units(int numWorkers) {
// the first worker to finish. // the first worker to finish.
auto const batchSize{std::max(units.size() / numWorkers / 16, size_t(1))}; auto const batchSize{std::max(units.size() / numWorkers / 16, size_t(1))};
std::atomic<size_t> index{0}; std::atomic<size_t> index{0};
UNUSED std::atomic_int curr_node{0};
for (auto worker = 0; worker < numWorkers; ++worker) { for (auto worker = 0; worker < numWorkers; ++worker) {
workers.push_back(std::thread([&] { workers.push_back(std::thread([&] {
ProfileNonVMThread nonVM; ProfileNonVMThread nonVM;
#if USE_JEMALLOC_EXTENT_HOOKS
auto const numaNode = next_numa_node(curr_node);
#ifdef HAVE_NUMA
if (use_numa) {
s_numaNode = numaNode;
numa_sched_setaffinity(0, node_to_cpu_mask[numaNode]);
}
#endif
if (auto arena = next_extra_arena(numaNode)) {
arena->bindCurrentThread();
}
#endif

hphp_thread_init(); hphp_thread_init();
hphp_session_init(Treadmill::SessionKind::PreloadRepo); hphp_session_init(Treadmill::SessionKind::PreloadRepo);


Expand Down
49 changes: 49 additions & 0 deletions hphp/util/alloc.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -407,6 +407,55 @@ void setup_high_arena(unsigned n1GPages) {
high_arena_tcache_create(); // set up high_arena_flags high_arena_tcache_create(); // set up high_arena_flags
} }


// Set up extra arenas for use in non-VM threads, when we have short bursts of
// worker threads running, e.g., during deserialization of profile data.
static std::vector<std::pair<std::vector<DefaultArena*>,
std::atomic_uint*>> s_extra_arenas;
static unsigned s_extra_arena_per_node;
bool setup_extra_arenas(unsigned count) {
if (count == 0) return false;
// This may be called when we have many other threads running. So hold the
// lock while making changes.
static std::mutex lock;
std::lock_guard<std::mutex> g(lock);
// only the first call allocate the arenas.
if (!s_extra_arenas.empty()) {
return count <= s_extra_arenas.size() * s_extra_arenas[0].first.size();
}
// `count` needs to be a multiple of `num_numa_nodes()`, if it isn't, we round
// it up to make it easy to balance across nodes.
const unsigned nNodes = std::max(1, num_numa_nodes());
s_extra_arena_per_node = (count + nNodes - 1) / nNodes;
assert(s_extra_arena_per_node >= 1);
s_extra_arenas.resize(nNodes);
for (unsigned n = 0; n < nNodes; ++n) {
s_extra_arenas[n].first.resize(s_extra_arena_per_node);
auto constexpr kArenaSize =
(sizeof(DefaultArena) + alignof(DefaultArena) - 1)
/ alignof(DefaultArena) * alignof(DefaultArena);
auto const allocSize = kArenaSize * s_extra_arena_per_node
+ sizeof(std::atomic_uint);
void* addr = mallocx_on_node(allocSize, n, alignof(DefaultArena));
memset(addr, 0, allocSize);
for (unsigned i = 0; i < s_extra_arena_per_node; ++i) {
s_extra_arenas[n].first[i] = DefaultArena::CreateAt(addr);
addr = (char*)addr + kArenaSize;
}
s_extra_arenas[n].second = static_cast<std::atomic_uint*>(addr);
}
return true;
}

DefaultArena* next_extra_arena(int node) {
if (s_extra_arena_per_node == 0) return nullptr;
if (node >= s_extra_arenas.size()) return nullptr;
if (node < 0) node = 0;
auto const n = static_cast<unsigned>(node);
auto counter = s_extra_arenas[n].second;
auto const next = counter->fetch_add(1, std::memory_order_relaxed);
return s_extra_arenas[n].first[next % s_extra_arena_per_node];
}

void* huge_page_extent_alloc(extent_hooks_t* extent_hooks, void* addr, void* huge_page_extent_alloc(extent_hooks_t* extent_hooks, void* addr,
size_t size, size_t alignment, bool* zero, size_t size, size_t alignment, bool* zero,
bool* commit, unsigned arena_ind) { bool* commit, unsigned arena_ind) {
Expand Down
3 changes: 2 additions & 1 deletion hphp/util/alloc.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ static_assert(MAX_MANAGED_ARENA_COUNT >= 1, "");
// All ManagedArena's represented as an array of pair<id, pointer>. Each // All ManagedArena's represented as an array of pair<id, pointer>. Each
// pointer can be casted to the underlying ExtentAllocator/Arena. We use this // pointer can be casted to the underlying ExtentAllocator/Arena. We use this
// to access the state of ExtentAllocators in extent hooks. An id of zero // to access the state of ExtentAllocators in extent hooks. An id of zero
// indicates an empty entry. // indicates an empty entry. If the arena doesn't have a custom extent hook,
// the arena won't be registered here.
using ArenaArray = std::array<std::pair<unsigned, void*>, using ArenaArray = std::array<std::pair<unsigned, void*>,
MAX_MANAGED_ARENA_COUNT>; MAX_MANAGED_ARENA_COUNT>;
extern ArenaArray g_arenas; extern ArenaArray g_arenas;
Expand Down
38 changes: 38 additions & 0 deletions hphp/util/extent-hooks.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -39,6 +39,39 @@


namespace HPHP { namespace alloc { namespace HPHP { namespace alloc {


template<typename T> struct extent_allocator_traits {
public:
constexpr static extent_hooks_t* get_hooks() {
return get_hooks_internal<T>(nullptr);
}
constexpr static ssize_t get_decay_ms() {
return get_decay_ms_internal<T>(nullptr);
}

private:
template<typename A>
static constexpr extent_hooks_t* get_hooks_internal(decltype(&(A::s_hooks))) {
return &(A::s_hooks);
}
template<typename A>
static constexpr extent_hooks_t* get_hooks_internal(...) {
return nullptr;
}
template<typename A>
static constexpr ssize_t get_decay_ms_internal(decltype(&(A::s_decay_ms))) {
return A::s_decay_ms;
}
template<typename A>
static constexpr ssize_t get_decay_ms_internal(...) {
return 60 * 1000; // purge every minute by default
}
};

/**
* Default extent hooks used by jemalloc.
*/
struct DefaultExtentAllocator {};

/** /**
* Extent hooks that do bump mapping for ManagedArena. * Extent hooks that do bump mapping for ManagedArena.
*/ */
Expand All @@ -61,6 +94,11 @@ struct BumpExtentAllocator : private BumpAllocState {
BumpMapper* const m_mapper; BumpMapper* const m_mapper;
}; };


static_assert(extent_allocator_traits<BumpExtentAllocator>::get_hooks(), "");
static_assert(!extent_allocator_traits<DefaultExtentAllocator>::get_hooks(), "");
static_assert(extent_allocator_traits<BumpExtentAllocator>::
get_decay_ms() == 60000, "");

}} }}


#endif // USE_JEMALLOC_EXTENT_HOOKS #endif // USE_JEMALLOC_EXTENT_HOOKS
Expand Down
41 changes: 24 additions & 17 deletions hphp/util/managed-arena.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ template<typename ExtentAllocator>
std::string ManagedArena<ExtentAllocator>::reportStats() { std::string ManagedArena<ExtentAllocator>::reportStats() {
mallctl_epoch(); mallctl_epoch();
char buffer[128]; char buffer[128];
using Traits = extent_allocator_traits<ExtentAllocator>;
std::snprintf(buffer, sizeof(buffer), std::snprintf(buffer, sizeof(buffer),
"Arena %d: capacity %zd, max_capacity %zd, used %zd\n", "Arena %d: capacity %zd, max_capacity %zd, used %zd\n",
id(), id(),
Expand All @@ -88,6 +89,8 @@ size_t ManagedArena<ExtentAllocator>::unusedSize() {


template<typename ExtentAllocator> template<typename ExtentAllocator>
void ManagedArena<ExtentAllocator>::init() { void ManagedArena<ExtentAllocator>::init() {
using Traits = extent_allocator_traits<ExtentAllocator>;

if (!g_mib_initialized) { if (!g_mib_initialized) {
initializeMibs(); initializeMibs();
} }
Expand All @@ -101,37 +104,41 @@ void ManagedArena<ExtentAllocator>::init() {
throw std::runtime_error{"arenas.create"}; throw std::runtime_error{"arenas.create"};
} }
char command[32]; char command[32];
std::snprintf(command, sizeof(command), "arena.%d.extent_hooks", m_arenaId); if (extent_hooks_t* hooks_ptr = Traits::get_hooks()) {
extent_hooks_t* hooks_ptr = &ExtentAllocator::s_hooks; std::snprintf(command, sizeof(command), "arena.%d.extent_hooks", m_arenaId);
if (mallctl(command, nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr))) { if (mallctl(command, nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr))) {
throw std::runtime_error{command}; throw std::runtime_error{command};
}
} }
// Purge infrequently. Most of the purging will fail if hugetlb pages are ssize_t decay_ms = Traits::get_decay_ms();
// used, yet it is possible to use normal pages when hugetlb pages are
// unavailable.
ssize_t decay_ms = 60 * 1000;
std::snprintf(command, sizeof(command), std::snprintf(command, sizeof(command),
"arena.%d.dirty_decay_ms", m_arenaId); "arena.%d.dirty_decay_ms", m_arenaId);
if (mallctl(command, nullptr, nullptr, &decay_ms, sizeof(decay_ms))) { if (mallctl(command, nullptr, nullptr, &decay_ms, sizeof(decay_ms))) {
throw std::runtime_error{command}; throw std::runtime_error{command};
} }
assert(GetByArenaId<ManagedArena>(m_arenaId) == nullptr);
for (auto& i : g_arenas) { if (Traits::get_hooks() != nullptr) {
if (!i.first) { // The only place where we need `GetByArenaId` is in custom extent hooks.
i.first = m_arenaId; assert(GetByArenaId<ManagedArena>(m_arenaId) == nullptr);
i.second = this; for (auto& i : g_arenas) {
return; if (!i.first) {
i.first = m_arenaId;
i.second = this;
return;
}
} }
// Should never reached here, as there should be spare entries in g_arenas.
throw std::out_of_range{
"too many ManagedArena's, check MAX_MANAGED_ARENA_COUNT"};
} }
// Should never reached here, as there should be spare entries in g_arenas.
throw std::out_of_range{
"too many ManagedArena's, check MAX_HUGE_ARENA_COUNT"};
} }


template void ManagedArena<BumpExtentAllocator>::init(); template void ManagedArena<BumpExtentAllocator>::init();
template size_t ManagedArena<BumpExtentAllocator>::unusedSize(); template size_t ManagedArena<BumpExtentAllocator>::unusedSize();
template std::string ManagedArena<BumpExtentAllocator>::reportStats(); template std::string ManagedArena<BumpExtentAllocator>::reportStats();


template void ManagedArena<DefaultExtentAllocator>::init();

}} }}


#endif #endif
27 changes: 24 additions & 3 deletions hphp/util/managed-arena.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ template <typename ExtentAllocator>
struct ManagedArena : public ExtentAllocator { struct ManagedArena : public ExtentAllocator {
private: private:
// Constructor forwards all arguments. The only correct way to create a // Constructor forwards all arguments. The only correct way to create a
// ManagedArena is `CreateAt()` on statically allocated memory. // ManagedArena is `CreateAt()` on preallocated memory.
template<typename... Args> template<typename... Args>
explicit ManagedArena(Args&&... args) explicit ManagedArena(Args&&... args)
: ExtentAllocator(std::forward<Args>(args)...) { : ExtentAllocator(std::forward<Args>(args)...) {
Expand All @@ -58,10 +58,14 @@ struct ManagedArena : public ExtentAllocator {
~ManagedArena() = delete; ~ManagedArena() = delete;


public: public:
inline unsigned id() const { unsigned id() const {
return m_arenaId; return m_arenaId;
} }


void bindCurrentThread() {
mallctlWrite("thread.arena", id());
}

// For stats reporting // For stats reporting
size_t unusedSize(); size_t unusedSize();
std::string reportStats(); std::string reportStats();
Expand Down Expand Up @@ -94,7 +98,24 @@ inline HighArena* highArena() {
return nullptr; return nullptr;
} }


}} }

using DefaultArena = alloc::ManagedArena<alloc::DefaultExtentAllocator>;

/*
* Make sure we have at least `count` extra arenas, with the same number of
* extra arenas for each NUMA node. Returns whether we have enough arenas to
* meet the required count. This function tries to create the extra arenas at
* the first time it is called with nonzero count. Subsequent calls won't
* change the number of extra arenas.
*/
bool setup_extra_arenas(unsigned count);
/*
* Get the next extra arena on the specified NUMA node.
*/
DefaultArena* next_extra_arena(int node);

}


#endif // USE_JEMALLOC_EXTENT_HOOKS #endif // USE_JEMALLOC_EXTENT_HOOKS
#endif #endif

0 comments on commit 2c33142

Please sign in to comment.