Permalink
Browse files

provide a way to abort ongoing requests due to memory pressure

Summary: Sometimes it is better to abort ongoing requests than to have the entire process killed when it OOMs.  This diff introduces two bits in request injected data, to indicate host-level OOM and the decision to abort the request, respectively.  A request is only aborted when the host-level OOM bit is set and the request is using more memory than a configurable threshold.  When a request is aborted, shutdown handlers and PSP code will not run.

Reviewed By: markw65

Differential Revision: D7230817

fbshipit-source-id: 1f23f96ff5aeb667a447b1fcad6f74c70c3ef4bc
  • Loading branch information...
binliu19 authored and hhvm-bot committed Apr 7, 2018
1 parent 25c2f61 commit 3b613d1986fb1e73c221af2e4742e36474413263
@@ -669,13 +669,19 @@ void ExecutionContext::onShutdownPreSend() {
try { obFlushAll(); } catch (...) {}
};
// When host is OOMing, abort abruptly.
if (RID().shouldOOMAbort()) return;
tl_heap->resetCouldOOM(isStandardRequest());
executeFunctions(ShutDown);
}
extern void ext_session_request_shutdown();
void ExecutionContext::onShutdownPostSend() {
// When host is OOMing, abort abruptly.
if (RID().shouldOOMAbort()) return;
ServerStats::SetThreadMode(ServerStats::ThreadMode::PostProcessing);
tl_heap->resetCouldOOM(isStandardRequest());
try {
@@ -687,6 +693,8 @@ void ExecutionContext::onShutdownPostSend() {
bump_counter_and_rethrow(true /* isPsp */);
} catch (const ExitException &e) {
// do nothing
} catch (const HostOutOfMemoryException &e) {
onOOMKill(e);
} catch (const Exception &e) {
onFatalError(e);
} catch (const Object &e) {
@@ -1014,6 +1022,19 @@ bool ExecutionContext::onUnhandledException(Object e) {
return false;
}
void ExecutionContext::onOOMKill(const HostOutOfMemoryException& e) {
// When host is out of memory, we don't have the luxury of continue running
// things such as PSP, custom error handlers, etc. Some counters are bumped
// in `bump_counter_and_rethrow()`. Here we log some additional details about
// the killed request to help people debug.
Logger::Error("Request killed due to memory pressure. "
"URL %s, bytes used %zu",
g_context->getRequestUrl().c_str(),
e.m_bytes);
// TODO(T25950158): consider writing some StructuredLog when we have a
// better way to describe the request beyond URL.
}
///////////////////////////////////////////////////////////////////////////////
void ExecutionContext::debuggerInfo(
@@ -263,6 +263,7 @@ struct ExecutionContext {
void clearLastError();
bool onFatalError(const Exception &e); // returns handled
bool onUnhandledException(Object e);
void onOOMKill(const HostOutOfMemoryException& e);
ErrorState getErrorState() const;
void setErrorState(ErrorState);
String getLastError() const;
@@ -447,6 +447,19 @@ void bump_counter_and_rethrow(bool isPsp) {
jemalloc_pprof_dump("", false);
#endif
throw;
} catch (const HostOutOfMemoryException& e) {
if (isPsp) {
static auto requestHostOOMPSPCounter = ServiceData::createTimeSeries(
"requests_oom_killed_psp", {ServiceData::StatsType::COUNT});
requestHostOOMPSPCounter->addValue(1);
ServerStats::Log("request.oom_killed.psp", 1);
} else {
static auto requestHostOOMCounter = ServiceData::createTimeSeries(
"requests_oom_killed_non_psp", {ServiceData::StatsType::COUNT});
requestHostOOMCounter->addValue(1);
ServerStats::Log("request.oom_killed.non_psp", 1);
}
throw;
}
}
@@ -491,6 +504,11 @@ static void handle_exception_helper(bool& ret,
if (richErrorMsg) {
handle_exception_append_bt(errorMsg, e);
}
} catch (const HostOutOfMemoryException &e) {
ret = false;
error = true;
errorMsg = "OOM";
context->onOOMKill(e);
} catch (const Exception &e) {
bool oldRet = ret;
bool origError = error;
@@ -627,6 +627,8 @@ void RequestInjectionData::resetCPUTimer(int seconds /* = 0 */) {
void RequestInjectionData::reset() {
m_sflagsAndStkPtr->fetch_and(kSurpriseFlagStackMask);
m_hostOutOfMemory.store(false, std::memory_order_relaxed);
m_OOMAbort = false;
m_coverage = RuntimeOption::RecordCodeCoverage;
m_jittingDisabled = false;
m_debuggerAttached = false;
@@ -151,6 +151,30 @@ struct RequestInjectionData {
void clearFlag(SurpriseFlag);
void setFlag(SurpriseFlag);
/*
* Flags for rquest-level OOM killer. The `m_hostOutOfMemory` flag is set on
* all requests when host is low in memory, which triggers a memory check upon
* checking surprise flags. The `m_OOMAbort` is set when we decide to kill
* the request.
*/
void setHostOOMFlag() {
m_hostOutOfMemory.store(true, std::memory_order_release);
setFlag(MemExceededFlag);
}
void clearHostOOMFlag() {
clearFlag(MemExceededFlag);
m_hostOutOfMemory.store(false, std::memory_order_relaxed);
}
bool hostOOMFlag() const {
return m_hostOutOfMemory.load(std::memory_order_acquire);
}
void setRequestOOMAbort() {
m_OOMAbort = true;
}
bool shouldOOMAbort() const {
return m_OOMAbort;
}
/*
* Whether the JIT is enabled.
*/
@@ -327,6 +351,17 @@ struct RequestInjectionData {
bool m_safeFileAccess{false};
bool m_logFunctionCalls{false};
/*
* `m_hostOutOfMemory` is a flag used together with MemExceededFlag, to
* indicate whether the host is running low on memory. Note that the presence
* of this flag doesn't necessarily lead to the request being aborted. A
* request is only affected when it satisfies some other criteria, e.g., when
* it uses more memory than RequestMemoryOOMKillBytes. If we do decide to
* abort the request, `m_OOMAbort` is set.
*/
std::atomic<bool> m_hostOutOfMemory{false};
bool m_OOMAbort{false};
/* Pointer to surprise flags stored in RDS. */
std::atomic<size_t>* m_sflagsAndStkPtr{nullptr};
@@ -195,6 +195,8 @@ int RuntimeOption::PspCpuTimeoutSeconds = 0;
int64_t RuntimeOption::MaxRequestAgeFactor = 0;
int64_t RuntimeOption::RequestMemoryMaxBytes =
std::numeric_limits<int64_t>::max();
int64_t RuntimeOption::RequestMemoryOOMKillBytes =
std::numeric_limits<int64_t>::max();
int64_t RuntimeOption::RequestHugeMaxBytes = 0;
int64_t RuntimeOption::ImageMemoryMaxBytes = 0;
int RuntimeOption::ServerGracefulShutdownWait = 0;
@@ -1516,6 +1518,8 @@ void RuntimeOption::Load(
"Server.PspCpuTimeoutSeconds", 0);
Config::Bind(RequestMemoryMaxBytes, ini, config,
"Server.RequestMemoryMaxBytes", (16LL << 30)); // 16GiB
Config::Bind(RequestMemoryOOMKillBytes, ini, config,
"Server.RequestMemoryOOMKillBytes", 128LL << 20);
Config::Bind(RequestHugeMaxBytes, ini, config,
"Server.RequestHugeMaxBytes", (24LL << 20));
Config::Bind(ServerGracefulShutdownWait, ini,
@@ -171,6 +171,8 @@ struct RuntimeOption {
static int PspCpuTimeoutSeconds;
static int64_t MaxRequestAgeFactor;
static int64_t RequestMemoryMaxBytes;
// Threshold for aborting when host is low on memory.
static int64_t RequestMemoryOOMKillBytes;
// Approximate upper bound for thread heap that is backed by huge pages. This
// doesn't include the first slab colocated with thread stack, if any.
static int64_t RequestHugeMaxBytes;
@@ -24,6 +24,7 @@
#include "hphp/util/alloc.h"
#include "hphp/util/lock.h"
#include "hphp/util/perf-event.h"
#include "hphp/util/service-data.h"
#include "hphp/runtime/base/backtrace.h"
#include "hphp/runtime/base/builtin-functions.h"
@@ -116,6 +117,19 @@ int ThreadInfo::SetPendingGCForAllOnRequestThread() {
return cnt;
}
void ThreadInfo::InvokeOOMKiller() {
ExecutePerThread(
[] (ThreadInfo* t) {
t->m_reqInjectionData.setHostOOMFlag();
}
);
Logger::Error("Invoking request-level OOM killer");
static auto OOMKillerInvokeCounter = ServiceData::createTimeSeries(
"hhvm_oom_killer_invoke", {ServiceData::StatsType::COUNT}
);
OOMKillerInvokeCounter->addValue(1);
}
void ThreadInfo::onSessionInit() {
m_reqInjectionData.onSessionInit();
}
@@ -268,7 +282,30 @@ size_t handle_request_surprise(c_WaitableWaitHandle* wh, size_t mask) {
if (flags & MemExceededFlag) {
if (pendingException) {
setSurpriseFlag(MemExceededFlag);
} else if (p.hostOOMFlag()) {
// When the host is running out of memory, don't abort all requests.
// Instead, only kill a request if it uses a nontrivial amount of memory.
auto const currUsage = tl_heap->currentUsage();
// Once a request has the OOM abort flag set, it is never unset through
// the lifetime of the request.
// TODO(#T25950158): add flags to indicate whether a request is safe to
// retry, etc. to help the OOM killer to make better decisions.
if (currUsage > RuntimeOption::RequestMemoryOOMKillBytes) {
p.setRequestOOMAbort();
}
if (p.shouldOOMAbort()) {
pendingException =
new HostOutOfMemoryException(static_cast<size_t>(currUsage));
// In case this exception doesn't stop other pieces of code from
// running, keep aborting them until all are dead.
p.setHostOOMFlag();
} else {
// Let this request survive. If the OOM killer comes back again, we will
// check again then.
p.clearHostOOMFlag();
}
} else {
// Request exceeded memory limit, but the host is fine.
pendingException = generate_memory_exceeded_exception(wh);
}
}
@@ -63,6 +63,12 @@ struct ThreadInfo {
static int SetPendingGCForAllOnRequestThread();
static THREAD_LOCAL_NO_CHECK(ThreadInfo, s_threadInfo);
/*
* Actively kill inflight requests when memory is tight. Some or all ongoing
* request will terminate with fatal error.
*/
static void InvokeOOMKiller();
/*
* This is the amount of "slack" in stack usage checks - if the stack pointer
* gets within this distance from the end (minus overhead), throw an infinite
@@ -91,6 +91,19 @@ struct FileOpenException : Exception {
};
///////////////////////////////////////////////////////////////////////////////
struct HostOutOfMemoryException : Exception {
explicit HostOutOfMemoryException(size_t usedBytes)
: m_bytes(usedBytes) {
}
EXCEPTION_COMMON_IMPL(HostOutOfMemoryException);
size_t m_bytes;
};
///////////////////////////////////////////////////////////////////////////////
}
#endif // incl_HPHP_EXCEPTION_H_

0 comments on commit 3b613d1

Please sign in to comment.