Skip to content

Commit

Permalink
osd/scrub: expose scrubbing schedule to operator
Browse files Browse the repository at this point in the history
Add a 'scrub scheduling info' column to pgs dump.
Modify the name and behavior of 'last-scrub-duration'.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
Co-Authored-By: Aishwarya Mathuria <amathuri@redhat.com>
  • Loading branch information
ronen-fr and amathuria committed Nov 5, 2021
1 parent 4f56c91 commit 4bf07e0
Show file tree
Hide file tree
Showing 13 changed files with 319 additions and 70 deletions.
32 changes: 32 additions & 0 deletions src/include/utime_fmt.h
@@ -0,0 +1,32 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
/**
* \file fmtlib formatter for utime_t
*/
#include <fmt/format.h>
#include <fmt/chrono.h>

#include <string_view>

#include "include/utime.h"

template <>
struct fmt::formatter<utime_t> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }

template <typename FormatContext>
auto format(const utime_t& utime, FormatContext& ctx)
{
if (utime.sec() < ((time_t)(60 * 60 * 24 * 365 * 10))) {
// raw seconds. this looks like a relative time.
return fmt::format_to(ctx.out(), "{}.{:06}", (long)utime.sec(),
utime.usec());
}

// this looks like an absolute time.
// conform to http://en.wikipedia.org/wiki/ISO_8601
auto asgmt = fmt::gmtime(utime.sec());
return fmt::format_to(ctx.out(), "{:%FT%T}.{:06}{:%z}", asgmt, utime.usec(), asgmt);
}
};
11 changes: 7 additions & 4 deletions src/mon/PGMap.cc
Expand Up @@ -1657,7 +1657,8 @@ void PGMap::dump_pg_stats_plain(
tab.define_column("LAST_DEEP_SCRUB", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SNAPTRIMQ_LEN", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("LAST_SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SCRUB_SCHEDULING", TextTable::LEFT, TextTable::LEFT);
}

for (auto i = pg_stats.begin();
Expand Down Expand Up @@ -2230,7 +2231,8 @@ void PGMap::dump_filtered_pg_stats(ostream& ss, set<pg_t>& pgs) const
tab.define_column("ACTING", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("DEEP_SCRUB_STAMP", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("LAST_SCRUB_DURATION", TextTable::LEFT, TextTable::RIGHT);
tab.define_column("SCRUB_SCHEDULING", TextTable::LEFT, TextTable::LEFT);

for (auto i = pgs.begin(); i != pgs.end(); ++i) {
const pg_stat_t& st = pg_stat.at(*i);
Expand Down Expand Up @@ -2258,8 +2260,9 @@ void PGMap::dump_filtered_pg_stats(ostream& ss, set<pg_t>& pgs) const
<< actingstr.str()
<< st.last_scrub_stamp
<< st.last_deep_scrub_stamp
<< st.scrub_duration
<< TextTable::endrow;
<< st.last_scrub_duration
<< st.dump_scrub_schedule()
<< TextTable::endrow;
}

ss << tab;
Expand Down
19 changes: 10 additions & 9 deletions src/osd/PG.cc
Expand Up @@ -818,10 +818,17 @@ void PG::publish_stats_to_osd()
if (!is_primary())
return;

if (m_scrubber) {
recovery_state.update_stats_wo_resched(
[scrubber = m_scrubber.get()](pg_history_t& hist,
pg_stat_t& info) mutable -> void {
info.scrub_sched_status = scrubber->get_schedule();
});
}

std::lock_guard l{pg_stats_publish_lock};
auto stats = recovery_state.prepare_stats_for_publish(
pg_stats_publish,
unstable_stats);
auto stats =
recovery_state.prepare_stats_for_publish(pg_stats_publish, unstable_stats);
if (stats) {
pg_stats_publish = std::move(stats);
}
Expand Down Expand Up @@ -2533,12 +2540,6 @@ void PG::handle_query_state(Formatter *f)
dout(10) << "handle_query_state" << dendl;
PeeringState::QueryState q(f);
recovery_state.handle_event(q, 0);

// This code has moved to after the close of recovery_state array.
// I don't think that scrub is a recovery state
if (is_primary() && is_active() && m_scrubber && m_scrubber->is_scrub_active()) {
m_scrubber->handle_query_state(f);
}
}

void PG::init_collection_pool_opts()
Expand Down
7 changes: 7 additions & 0 deletions src/osd/PeeringState.cc
Expand Up @@ -3984,6 +3984,13 @@ void PeeringState::update_stats(
}
}

void PeeringState::update_stats_wo_resched(
std::function<void(pg_history_t &, pg_stat_t &)> f)
{
f(info.history, info.stats);
}


bool PeeringState::append_log_entries_update_missing(
const mempool::osd_pglog::list<pg_log_entry_t> &entries,
ObjectStore::Transaction &t, std::optional<eversion_t> trim_to,
Expand Down
3 changes: 3 additions & 0 deletions src/osd/PeeringState.h
Expand Up @@ -1809,6 +1809,9 @@ class PeeringState : public MissingLoc::MappingInfo {
std::function<bool(pg_history_t &, pg_stat_t &)> f,
ObjectStore::Transaction *t = nullptr);

void update_stats_wo_resched(
std::function<void(pg_history_t &, pg_stat_t &)> f);

/**
* adjust_purged_snaps
*
Expand Down
2 changes: 1 addition & 1 deletion src/osd/PrimaryLogPG.cc
Expand Up @@ -1032,7 +1032,7 @@ void PrimaryLogPG::do_command(
f->close_section();

if (is_primary() && is_active() && m_scrubber) {
m_scrubber->dump(f.get());
m_scrubber->dump_scrubber(f.get(), m_planned_scrub);
}

f->open_object_section("agent_state");
Expand Down
72 changes: 67 additions & 5 deletions src/osd/osd_types.cc
Expand Up @@ -36,8 +36,10 @@ extern "C" {

#include "common/Formatter.h"
#include "common/StackStringStream.h"
#include "include/utime_fmt.h"
#include "OSDMap.h"
#include "osd_types.h"
#include "osd_types_fmt.h"
#include "os/Transaction.h"

using std::list;
Expand Down Expand Up @@ -2856,7 +2858,8 @@ void pg_stat_t::dump(Formatter *f) const
f->dump_bool("pin_stats_invalid", pin_stats_invalid);
f->dump_bool("manifest_stats_invalid", manifest_stats_invalid);
f->dump_unsigned("snaptrimq_len", snaptrimq_len);
f->dump_float("scrub_duration", scrub_duration);
f->dump_int("last_scrub_duration", last_scrub_duration);
f->dump_string("scrub_schedule", dump_scrub_schedule());
stats.dump(f);
f->open_array_section("up");
for (auto p = up.cbegin(); p != up.cend(); ++p)
Expand Down Expand Up @@ -2909,6 +2912,47 @@ void pg_stat_t::dump_brief(Formatter *f) const
f->dump_int("acting_primary", acting_primary);
}

std::string pg_stat_t::dump_scrub_schedule() const
{
if (scrub_sched_status.m_is_active) {
return fmt::format(
"{}scrubbing for {}s",
((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""),
scrub_sched_status.m_duration_seconds);
}
switch (scrub_sched_status.m_sched_status) {
case pg_scrub_sched_status_t::unknown:
// no reported scrub schedule yet
return "--"s;
case pg_scrub_sched_status_t::not_queued:
return "no scrub is scheduled"s;
case pg_scrub_sched_status_t::scheduled:
return fmt::format(
"{} {}scrub scheduled @ {}",
(scrub_sched_status.m_is_periodic ? "periodic" : "user requested"),
((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""),
scrub_sched_status.m_scheduled_at);
case pg_scrub_sched_status_t::queued:
return fmt::format(
"queued for {}scrub",
((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""));
default:
// a bug!
return "SCRUB STATE MISMATCH!"s;
}
}

bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r)
{
return
l.m_sched_status == r.m_sched_status &&
l.m_scheduled_at == r.m_scheduled_at &&
l.m_duration_seconds == r.m_duration_seconds &&
l.m_is_active == r.m_is_active &&
l.m_is_deep == r.m_is_deep &&
l.m_is_periodic == r.m_is_periodic;
}

void pg_stat_t::encode(ceph::buffer::list &bl) const
{
ENCODE_START(27, 22, bl);
Expand Down Expand Up @@ -2959,7 +3003,13 @@ void pg_stat_t::encode(ceph::buffer::list &bl) const
encode(manifest_stats_invalid, bl);
encode(avail_no_missing, bl);
encode(object_location_counts, bl);
encode(scrub_duration, bl);
encode(last_scrub_duration, bl);
encode(scrub_sched_status.m_scheduled_at, bl);
encode(scrub_sched_status.m_duration_seconds, bl);
encode((__u16)scrub_sched_status.m_sched_status, bl);
encode(scrub_sched_status.m_is_active, bl);
encode((scrub_sched_status.m_is_deep==scrub_level_t::deep), bl);
encode(scrub_sched_status.m_is_periodic, bl);
ENCODE_FINISH(bl);
}

Expand Down Expand Up @@ -3035,7 +3085,18 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl)
decode(object_location_counts, bl);
}
if (struct_v >= 27) {
decode(scrub_duration, bl);
decode(last_scrub_duration, bl);
decode(scrub_sched_status.m_scheduled_at, bl);
decode(scrub_sched_status.m_duration_seconds, bl);
__u16 scrub_sched_as_u16;
decode(scrub_sched_as_u16, bl);
scrub_sched_status.m_sched_status = (pg_scrub_sched_status_t)(scrub_sched_as_u16);
decode(tmp, bl);
scrub_sched_status.m_is_active = tmp;
decode(tmp, bl);
scrub_sched_status.m_is_deep = tmp ? scrub_level_t::deep : scrub_level_t::shallow;
decode(tmp, bl);
scrub_sched_status.m_is_periodic = tmp;
}
}
DECODE_FINISH(bl);
Expand Down Expand Up @@ -3069,7 +3130,7 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
a.last_deep_scrub = eversion_t(13, 14);
a.last_deep_scrub_stamp = utime_t(15, 16);
a.last_clean_scrub_stamp = utime_t(17, 18);
a.scrub_duration = 0.003;
a.last_scrub_duration = 3617;
a.snaptrimq_len = 1048576;
list<object_stat_collection_t*> l;
object_stat_collection_t::generate_test_instances(l);
Expand Down Expand Up @@ -3144,7 +3205,8 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r)
l.manifest_stats_invalid == r.manifest_stats_invalid &&
l.purged_snaps == r.purged_snaps &&
l.snaptrimq_len == r.snaptrimq_len &&
l.scrub_duration == r.scrub_duration;
l.last_scrub_duration == r.last_scrub_duration &&
l.scrub_sched_status == r.scrub_sched_status;
}

// -- store_statfs_t --
Expand Down
34 changes: 27 additions & 7 deletions src/osd/osd_types.h
Expand Up @@ -2179,6 +2179,28 @@ inline bool operator==(const object_stat_collection_t& l,
return l.sum == r.sum;
}

enum class scrub_level_t : bool { shallow = false, deep = true };
enum class scrub_type_t : bool { not_repair = false, do_repair = true };

/// is there a scrub in our future?
enum class pg_scrub_sched_status_t : uint16_t {
unknown, ///< status not reported yet
not_queued, ///< not in the OSD's scrub queue. Probably not active.
active, ///< scrubbing
scheduled, ///< scheduled for a scrub at an already determined time
queued ///< queued to be scrubbed
};

struct pg_scrubbing_status_t {
utime_t m_scheduled_at{};
int32_t m_duration_seconds{0}; // relevant when scrubbing
pg_scrub_sched_status_t m_sched_status{pg_scrub_sched_status_t::unknown};
bool m_is_active{false};
scrub_level_t m_is_deep{scrub_level_t::shallow};
bool m_is_periodic{true};
};

bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r);

/** pg_stat
* aggregate stats for a single PG.
Expand Down Expand Up @@ -2213,6 +2235,7 @@ struct pg_stat_t {
utime_t last_scrub_stamp;
utime_t last_deep_scrub_stamp;
utime_t last_clean_scrub_stamp;
int32_t last_scrub_duration{0};

object_stat_collection_t stats;

Expand All @@ -2239,6 +2262,8 @@ struct pg_stat_t {
// absurd already, so cap it to 2^32 and save 4 bytes at the same time
uint32_t snaptrimq_len;

pg_scrubbing_status_t scrub_sched_status;

bool stats_invalid:1;
/// true if num_objects_dirty is not accurate (because it was not
/// maintained starting from pool creation)
Expand All @@ -2249,8 +2274,6 @@ struct pg_stat_t {
bool pin_stats_invalid:1;
bool manifest_stats_invalid:1;

double scrub_duration;

pg_stat_t()
: reported_seq(0),
reported_epoch(0),
Expand All @@ -2268,8 +2291,7 @@ struct pg_stat_t {
hitset_stats_invalid(false),
hitset_bytes_stats_invalid(false),
pin_stats_invalid(false),
manifest_stats_invalid(false),
scrub_duration(0)
manifest_stats_invalid(false)
{ }

epoch_t get_effective_last_epoch_clean() const {
Expand Down Expand Up @@ -2329,6 +2351,7 @@ struct pg_stat_t {
bool is_acting_osd(int32_t osd, bool primary) const;
void dump(ceph::Formatter *f) const;
void dump_brief(ceph::Formatter *f) const;
std::string dump_scrub_schedule() const;
void encode(ceph::buffer::list &bl) const;
void decode(ceph::buffer::list::const_iterator &bl);
static void generate_test_instances(std::list<pg_stat_t*>& o);
Expand Down Expand Up @@ -6082,9 +6105,6 @@ struct PushOp {
WRITE_CLASS_ENCODER_FEATURES(PushOp)
std::ostream& operator<<(std::ostream& out, const PushOp &op);

enum class scrub_level_t : bool { shallow = false, deep = true };
enum class scrub_type_t : bool { not_repair = false, do_repair = true };

/*
* summarize pg contents for purposes of a scrub
*/
Expand Down
23 changes: 22 additions & 1 deletion src/osd/scrubber/osd_scrub_sched.cc
Expand Up @@ -2,8 +2,9 @@
// vim: ts=8 sw=2 smarttab
#include "./osd_scrub_sched.h"

#include "include/utime.h"
#include "include/utime_fmt.h"
#include "osd/OSD.h"
#include "osd/osd_types_fmt.h"

#include "pg_scrubber.h"

Expand Down Expand Up @@ -48,6 +49,26 @@ void ScrubQueue::ScrubJob::update_schedule(
<< registration_state() << dendl;
}

std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
bool is_deep_expected) const
{
// if not in the OSD scheduling queues, not a candidate for scrubbing
if (state != qu_state_t::registered) {
return "no scrub is scheduled";
}

// if the time has passed, we are surely in the queue
// (note that for now we do not tell client if 'penalized')
if (now_is > schedule.scheduled_at) {
// we are never sure that the next scrub will indeed be shallow:
return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : ""));
}

return fmt::format("{}scrub scheduled @ {}", (is_deep_expected ? "deep " : ""),
schedule.scheduled_at);
}


// ////////////////////////////////////////////////////////////////////////// //
// ScrubQueue

Expand Down
6 changes: 6 additions & 0 deletions src/osd/scrubber/osd_scrub_sched.h
Expand Up @@ -139,6 +139,12 @@ class ScrubQueue {
: " not-queued";
}

/**
* a text description of the "scheduling intentions" of this PG:
* are we already scheduled for a scrub/deep scrub? when?
*/
std::string scheduling_state(utime_t now_is, bool is_deep_expected) const;

friend std::ostream& operator<<(std::ostream& out, const ScrubJob& pg);
};

Expand Down

0 comments on commit 4bf07e0

Please sign in to comment.