Skip to content

Commit

Permalink
MB-38109: Simplify logic for generating phosphor trace
Browse files Browse the repository at this point in the history
This patch simplifies generation of a trace dump by
adding the logic to format the trace dump to the
"begin()" call which would "block" until the executor
had formatted the trace dump (in another thread).

Also remove the "chunked read" method to fetch the trace
dump to a single get call as the old chunk retrieval
required the server to keep state between each call to
the get (and that the trace dump could only be read
out _once_ by a single client. So if that client crashed
on the client side the entire trace would be lost).

By removal of all the complexity on the chunked reads
with multiple locks between various objects we got
rid of the code with caused the crash in the
referenced MB.

Change-Id: Id2096147033c668bc105fd28a5cce34006f198ee
Reviewed-on: http://review.couchbase.org/c/kv_engine/+/144622
Tested-by: Build Bot <build@couchbase.com>
Reviewed-by: Dave Rigby <daver@couchbase.com>
  • Loading branch information
trondn committed Feb 4, 2021
1 parent 01ec2d9 commit f3f2541
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 156 deletions.
2 changes: 1 addition & 1 deletion daemon/ioctl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ static const std::unordered_map<std::string, GetCallbackFunc> ioctl_get_map{
{"trace.status", ioctlGetTracingStatus},
{"trace.dump.list", ioctlGetTracingList},
{"trace.dump.begin", ioctlGetTracingBeginDump},
{"trace.dump.chunk", ioctlGetTracingDumpChunk},
{"trace.dump.get", ioctlGetTraceDump},
{"sla", ioctlGetMcbpSla},
{"rbac.db.dump", ioctlRbacDbDump}};

Expand Down
181 changes: 75 additions & 106 deletions daemon/tracing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,7 @@ Task::Status StaleTraceDumpRemover::periodicExecute() {

using value_type = decltype(TraceDumps::dumps)::value_type;
erase_if(traceDumps.dumps, [now, this](const value_type& dump) {
// If the mutex is locked then a
// chunk is being generated from this dump
auto isLocked = dump.second->mutex.try_lock();
if (!isLocked) {
return false;
}

std::lock_guard<std::mutex> guard(dump.second->mutex, std::adopt_lock);
return (dump.second->last_touch + std::chrono::seconds(max_age)) <= now;
return (dump.second.last_touch + std::chrono::seconds(max_age)) <= now;
});
return Status::Continue; // always repeat
}
Expand Down Expand Up @@ -109,6 +101,43 @@ void deinitializeTracing() {
traceDumps.dumps.clear();
}

/// Task to run on the executor pool to convert the TraceContext to JSON
class TraceFormatterTask : public Task {
public:
TraceFormatterTask(Cookie& cookie, phosphor::TraceContext&& context)
: Task(), cookie(cookie), context(std::move(context)) {
}

Status execute() override {
phosphor::tools::JSONExport exporter(context);
static const std::size_t chunksize = 1024 * 1024;
size_t last_wrote;
do {
formatted.resize(formatted.size() + chunksize);
last_wrote = exporter.read(&formatted[formatted.size() - chunksize],
chunksize);
} while (!exporter.done());
formatted.resize(formatted.size() - (chunksize - last_wrote));
return Status::Finished;
}

void notifyExecutionComplete() override {
notifyIoComplete(cookie, ENGINE_SUCCESS);
}

Cookie& cookie;
std::string formatted;
phosphor::TraceContext context;
};

struct TraceFormatterContext : public CommandContext {
explicit TraceFormatterContext(std::shared_ptr<TraceFormatterTask>& task)
: task(task) {
}

std::shared_ptr<TraceFormatterTask> task;
};

phosphor::TraceContext getTraceContext() {
// Lock the instance until we've grabbed the trace context
std::lock_guard<phosphor::TraceLog> lh(PHOSPHOR_INSTANCE);
Expand All @@ -123,83 +152,49 @@ ENGINE_ERROR_CODE ioctlGetTracingBeginDump(Cookie& cookie,
const StrToStrMap&,
std::string& value,
cb::mcbp::Datatype& datatype) {
auto context = getTraceContext();
if (!context.getBuffer()) {
cookie.setErrorContext(
"Cannot begin a dump when there is no existing trace");
return ENGINE_EINVAL;
auto* cctx = cookie.getCommandContext();
if (!cctx) {
auto context = getTraceContext();
if (!context.getBuffer()) {
cookie.setErrorContext(
"Cannot begin a dump when there is no existing trace");
return ENGINE_EINVAL;
}

// Create a task to format the dump
auto task = std::make_shared<TraceFormatterTask>(cookie,
std::move(context));
cookie.setCommandContext(new TraceFormatterContext{task});

cookie.setEwouldblock(true);
std::lock_guard<std::mutex> guard(task->getMutex());
std::shared_ptr<Task> basicTask = task;
executorPool->schedule(basicTask, true);

return ENGINE_EWOULDBLOCK;
}

// Create the new dump associated with a random uuid
cb::uuid::uuid_t uuid = cb::uuid::random();
auto* ctx = dynamic_cast<TraceFormatterContext*>(cctx);
if (!ctx) {
throw std::runtime_error(
"ioctlGetTracingBeginDump: Unknown value for command context");
}
const auto uuid = cb::uuid::random();
{
std::lock_guard<std::mutex> guard(traceDumps.mutex);
traceDumps.dumps.emplace(
uuid, std::make_unique<DumpContext>(std::move(context)));
traceDumps.dumps.emplace(uuid,
DumpContext{std::move(ctx->task->formatted)});
}

// Return the textual form of the uuid back to the user with success
value = to_string(uuid);
cookie.setCommandContext();
return ENGINE_SUCCESS;
}

/**
* A task for generating tasks in the background on an
* executor thread instead of a front-end thread
*/
class ChunkBuilderTask : public Task {
public:
/// This constructor assumes that the dump's
/// mutex has already been locked
ChunkBuilderTask(Cookie& cookie_,
DumpContext& dump,
std::unique_lock<std::mutex> lck,
size_t chunk_size)
: Task(), cookie(cookie_), dump(dump), lck(std::move(lck)) {
chunk.resize(chunk_size);
}

Status execute() override {
size_t count = dump.json_export.read(&chunk[0], chunk.size());
chunk.resize(count);
return Status::Finished;
}

void notifyExecutionComplete() override {
notifyIoComplete(cookie, ENGINE_SUCCESS);
}

std::string& getChunk() {
return chunk;
}

private:
std::string chunk;
Cookie& cookie;
DumpContext& dump;
std::unique_lock<std::mutex> lck;
};

struct ChunkBuilderContext : public CommandContext {
explicit ChunkBuilderContext(std::shared_ptr<ChunkBuilderTask>& task)
: task(task) {
}

std::shared_ptr<ChunkBuilderTask> task;
};

ENGINE_ERROR_CODE ioctlGetTracingDumpChunk(Cookie& cookie,
const StrToStrMap& arguments,
std::string& value,
cb::mcbp::Datatype& datatype) {
// If we have a context then we already generated the chunk
auto* ctx = dynamic_cast<ChunkBuilderContext*>(cookie.getCommandContext());
if (ctx != nullptr) {
value = std::move(ctx->task->getChunk());
cookie.setCommandContext();
return ENGINE_SUCCESS;
}

ENGINE_ERROR_CODE ioctlGetTraceDump(Cookie& cookie,
const StrToStrMap& arguments,
std::string& value,
cb::mcbp::Datatype& datatype) {
auto id = arguments.find("id");
if (id == arguments.end()) {
cookie.setErrorContext("Dump ID must be specified as a key argument");
Expand All @@ -220,39 +215,13 @@ ENGINE_ERROR_CODE ioctlGetTracingDumpChunk(Cookie& cookie,
if (iter == traceDumps.dumps.end()) {
cookie.setErrorContext(
"Dump ID must correspond to an existing dump");
return ENGINE_EINVAL;
}
auto& dump = *(iter->second);

// @todo make configurable
const size_t chunk_size = 1024 * 1024;

if (dump.json_export.done()) {
value = "";
return ENGINE_SUCCESS;
}

std::unique_lock<std::mutex> lck(dump.mutex, std::try_to_lock);
// A chunk is already being generated for this dump
if (!lck) {
value = "";
cookie.setErrorContext(
"A chunk is already being fetched for this dump");
return ENGINE_TMPFAIL;
return ENGINE_KEY_ENOENT;
}

// ChunkBuilderTask assumes the lock above is already held
auto task = std::make_shared<ChunkBuilderTask>(
cookie, dump, std::move(lck), chunk_size);
cookie.setCommandContext(new ChunkBuilderContext{task});

cookie.setEwouldblock(true);
std::lock_guard<std::mutex> guard(task->getMutex());
std::shared_ptr<Task> basicTask = task;
executorPool->schedule(basicTask, true);

return ENGINE_EWOULDBLOCK;
value.assign(iter->second.content);
}

return ENGINE_SUCCESS;
}

ENGINE_ERROR_CODE ioctlSetTracingClearDump(Cookie& cookie,
Expand Down
14 changes: 6 additions & 8 deletions daemon/tracing.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,13 @@ ENGINE_ERROR_CODE ioctlGetTracingBeginDump(Cookie& cookie,
cb::mcbp::Datatype& datatype);

/**
* IOCTL Get callback to generate and return chunks from the specified dump
* @param arguments 'id' argument should be given to specify uuid of dump to
continue
* @param[out] value The contents of the next chunk (or empty if dump is done)
* IOCTL Get callback to fetch an entire trace dump
* @param[out] value The uuid of the newly created dump
*/
ENGINE_ERROR_CODE ioctlGetTracingDumpChunk(Cookie& cookie,
const StrToStrMap& arguments,
std::string& value,
cb::mcbp::Datatype& datatype);
ENGINE_ERROR_CODE ioctlGetTraceDump(Cookie& cookie,
const StrToStrMap&,
std::string& value,
cb::mcbp::Datatype& datatype);

/**
* IOCTL Set callback to clear a tracing dump
Expand Down
19 changes: 7 additions & 12 deletions daemon/tracing_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,27 @@
#include <map>
#include <mutex>

class Connection;

/**
* DumpContext holds all the information required for an ongoing
* trace dump
*/
struct DumpContext {
explicit DumpContext(phosphor::TraceContext&& _context)
: context(std::move(_context)),
json_export(context),
explicit DumpContext(std::string content)
: content(std::move(content)),
last_touch(std::chrono::steady_clock::now()) {
}

// Moving is dangerous as json_export contains a reference to
// context.
DumpContext(DumpContext&& other) = delete;

phosphor::TraceContext context;
phosphor::tools::JSONExport json_export;
std::chrono::steady_clock::time_point last_touch;
std::mutex mutex;
const std::string content;
const std::chrono::steady_clock::time_point last_touch;
};

/**
* Aggregate object to hold a map of dumps and a mutex protecting them
*/
struct TraceDumps {
std::map<cb::uuid::uuid_t, std::unique_ptr<DumpContext>> dumps;
std::map<cb::uuid::uuid_t, DumpContext> dumps;
std::mutex mutex;
};

Expand Down
13 changes: 5 additions & 8 deletions engines/ep/management/kv_trace_dump
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,12 @@ uuid = subprocess.check_output(mcctl_args + ['get', 'trace.dump.begin'],
universal_newlines=True)
uuid = uuid.strip()

# Repeatedly get chunks, until we get one with zero length.
while True:
chunk = subprocess.check_output(
mcctl_args + ['get', 'trace.dump.chunk?id=' + uuid],
universal_newlines=True).rstrip()
if len(chunk) == 0:
break
args.outfile.write(chunk)
chunk = subprocess.check_output(
mcctl_args + ['get', 'trace.dump.get?id=' + uuid],
universal_newlines=True).rstrip()
args.outfile.write(chunk)

# Delete the trace dump on the server
subprocess.check_call(mcctl_args + ['set', 'trace.dump.clear', uuid])

if args.restart:
Expand Down
15 changes: 3 additions & 12 deletions tests/executor/executor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -334,23 +334,14 @@ TEST_F(StaleTraceDumpRemoverTest, DoesRemove) {
std::unique_lock<std::mutex> lock(task->getMutex());
executorpool->schedule(task, false);

// Fastest way to create a TraceContext is to start tracing, stop it, and
// pull out the context
PHOSPHOR_INSTANCE.start(
phosphor::TraceConfig(phosphor::BufferMode::ring, 1024 * 1024));
PHOSPHOR_INSTANCE.stop();
auto context = PHOSPHOR_INSTANCE.getTraceContext();

cb::uuid::uuid_t uuid = cb::uuid::random();
{
std::lock_guard<std::mutex> lh(traceDumps.mutex);
traceDumps.dumps.emplace(
uuid,
std::make_unique<DumpContext>(
phosphor::TraceContext(std::move(context))));
traceDumps.dumps.emplace(uuid,
DumpContext("{}")); // Dummy trace content
}

auto now = traceDumps.dumps.begin()->second->last_touch;
auto now = traceDumps.dumps.begin()->second.last_touch;
auto removeTime = now + std::chrono::seconds(1);
task->makeRunnable(now);

Expand Down
11 changes: 2 additions & 9 deletions tests/testapp/testapp_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -464,15 +464,8 @@ TEST_P(McdTestappTest, IOCTL_Tracing) {
// get the data
auto uuid = conn.ioctl_get("trace.dump.begin");

const std::string chunk_key = "trace.dump.chunk?id=" + uuid;
std::string dump;
std::string chunk;

do {
chunk = conn.ioctl_get(chunk_key);
dump += chunk;
} while (!chunk.empty());

const std::string chunk_key = "trace.dump.get?id=" + uuid;
std::string dump = conn.ioctl_get(chunk_key);
conn.ioctl_set("trace.dump.clear", uuid);

// Difficult to tell what's been written to the buffer so just check
Expand Down

0 comments on commit f3f2541

Please sign in to comment.