Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

crimson/osd/osd_operation: fix dump_historic_slow_ops command works #56994

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
105 changes: 40 additions & 65 deletions src/crimson/osd/osd_operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,11 @@ void OSDOperationRegistry::do_stop()
/* add_ref= */ false
};
});
last_of_recents = std::end(historic_registry);
// to_ref_down is going off
}

OSDOperationRegistry::OSDOperationRegistry()
: OperationRegistryT(seastar::this_shard_id())
{
constexpr auto historic_reg_index =
static_cast<size_t>(OperationTypeCode::historic_client_request);
auto& historic_registry = get_registry<historic_reg_index>();
last_of_recents = std::begin(historic_registry);
}
: OperationRegistryT(seastar::this_shard_id()) {}

static auto get_duration(const ClientRequest& client_request)
{
Expand All @@ -55,51 +48,46 @@ static auto get_duration(const ClientRequest& client_request)

void OSDOperationRegistry::put_historic(const ClientRequest& op)
{
using crimson::common::local_conf;
// unlink the op from the client request registry. this is a part of
// the re-link procedure. finally it will be in historic registry.
constexpr auto client_reg_index =
static_cast<size_t>(OperationTypeCode::client_request);
constexpr auto historic_reg_index =
static_cast<size_t>(OperationTypeCode::historic_client_request);
constexpr auto slow_historic_reg_index =
static_cast<size_t>(OperationTypeCode::historic_slow_client_request);

auto& client_registry = get_registry<client_reg_index>();
auto& historic_registry = get_registry<historic_reg_index>();
historic_registry.splice(std::end(historic_registry),
client_registry,
client_registry.iterator_to(op));
// slow ops should put into historic_slow_client_request list
// we only save the newest op
if (get_duration(op) > local_conf()->osd_op_complaint_time) {
auto& slow_historic_registry = get_registry<slow_historic_reg_index>();
slow_historic_registry.splice(std::end(slow_historic_registry),
client_registry,
client_registry.iterator_to(op));

if (num_slow_ops >= local_conf()->osd_op_history_slow_op_size) {
slow_historic_registry.pop_front();
} else {
++num_slow_ops;
}
} else {
auto& historic_registry = get_registry<historic_reg_index>();
historic_registry.splice(std::end(historic_registry),
client_registry,
client_registry.iterator_to(op));

if (num_recent_ops >= local_conf()->osd_op_history_size) {
historic_registry.pop_front();
} else {
++num_recent_ops;
}
}

ClientRequest::ICRef(
&op, /* add_ref= */true
).detach(); // yes, "leak" it for now!

// check whether the history size limit is not exceeded; if so, then
// purge the oldest op.
// NOTE: Operation uses the auto-unlink feature of boost::intrusive.
// NOTE: the cleaning happens in OSDOperationRegistry::do_stop()
using crimson::common::local_conf;
if (num_recent_ops >= local_conf()->osd_op_history_size) {
++last_of_recents;
++num_slow_ops;
} else {
++num_recent_ops;
}
if (num_slow_ops > local_conf()->osd_op_history_slow_op_size) {
// we're interested in keeping slowest ops. if the slow op history
// is disabled, the list will have only one element, so the full-blown
// search will boil down into `.front()`.
const auto fastest_historic_iter = std::min_element(
std::cbegin(historic_registry), last_of_recents,
[] (const auto& lop, const auto& rop) {
const auto& lclient_request = static_cast<const ClientRequest&>(lop);
const auto& rclient_request = static_cast<const ClientRequest&>(rop);
return get_duration(lclient_request) < get_duration(rclient_request);
});
assert(fastest_historic_iter != std::end(historic_registry));
const auto& fastest_historic_op =
static_cast<const ClientRequest&>(*fastest_historic_iter);
historic_registry.erase(fastest_historic_iter);
// clear a previously "leaked" op
ClientRequest::ICRef(&fastest_historic_op, /* add_ref= */false);
--num_slow_ops;
}
}

size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) const
Expand All @@ -125,33 +113,20 @@ size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) c

size_t OSDOperationRegistry::dump_slowest_historic_client_requests(ceph::Formatter* f) const
{
const auto& historic_client_registry =
get_registry<static_cast<size_t>(OperationTypeCode::historic_client_request)>(); //ClientRequest::type)>();
const auto& slow_historic_client_registry =
get_registry<static_cast<size_t>(OperationTypeCode::historic_slow_client_request)>(); //ClientRequest::type)>();
f->open_object_section("op_history");
f->dump_int("size", historic_client_registry.size());
f->dump_int("size", slow_historic_client_registry.size());
// TODO: f->dump_int("duration", history_duration.load());
// the intrusive list is configured to not store the size
std::multimap<utime_t,
const ClientRequest*,
std::greater<utime_t>> sorted_slowest_ops;
// iterating over the entire registry as a slow op could be also
// in the "recently added" part.
std::transform(std::begin(historic_client_registry),
std::end(historic_client_registry),
std::inserter(sorted_slowest_ops, std::end(sorted_slowest_ops)),
[] (const Operation& op) {
const auto& cop = static_cast<const ClientRequest&>(op);
return std::make_pair(get_duration(cop), &cop);
});
f->open_array_section("ops");
using crimson::common::local_conf;
size_t ops_count = 0;
for (auto it = std::begin(sorted_slowest_ops);
ops_count < local_conf()->osd_op_history_slow_op_size
&& it != std::end(sorted_slowest_ops);
++it, ++ops_count)
{
it->second->dump(f);
f->open_array_section("ops");
for (const auto& op : slow_historic_client_registry) {
op.dump(f);
++ops_count;
}
f->close_section();
}
f->close_section();
return ops_count;
Expand Down
3 changes: 2 additions & 1 deletion src/crimson/osd/osd_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ enum class OperationTypeCode {
background_recovery_sub,
internal_client_request,
historic_client_request,
historic_slow_client_request,
logmissing_request,
logmissing_request_reply,
snaptrim_event,
Expand All @@ -72,6 +73,7 @@ static constexpr const char* const OP_NAMES[] = {
"background_recovery_sub",
"internal_client_request",
"historic_client_request",
"historic_slow_client_request",
"logmissing_request",
"logmissing_request_reply",
"snaptrim_event",
Expand Down Expand Up @@ -230,7 +232,6 @@ struct OSDOperationRegistry : OperationRegistryT<
size_t dump_slowest_historic_client_requests(ceph::Formatter* f) const;

private:
op_list::const_iterator last_of_recents;
size_t num_recent_ops = 0;
size_t num_slow_ops = 0;
};
Expand Down