Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reef: mon/OSDMonitor: fix get_min_last_epoch_clean() #55867

Merged
merged 4 commits into from Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 14 additions & 11 deletions src/mon/OSDMonitor.cc
Expand Up @@ -395,7 +395,7 @@ void LastEpochClean::report(unsigned pg_num, const pg_t& pg,
return lec.report(pg_num, pg.ps(), last_epoch_clean);
}

epoch_t LastEpochClean::get_lower_bound(const OSDMap& latest) const
epoch_t LastEpochClean::get_lower_bound_by_pool(const OSDMap& latest) const
{
auto floor = latest.get_epoch();
for (auto& pool : latest.get_pools()) {
Expand Down Expand Up @@ -901,12 +901,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
if (state & CEPH_OSD_UP) {
// could be marked up *or* down, but we're too lazy to check which
last_osd_report.erase(osd);
}
}
for (auto [osd, weight] : inc.new_weight) {
if (weight == CEPH_OSD_OUT) {
// manually marked out, so drop it
osd_epochs.erase(osd);
osd_epochs.erase(osd);
}
}
}
Expand Down Expand Up @@ -2329,13 +2324,21 @@ version_t OSDMonitor::get_trim_to() const
return 0;
}

/* There are two constraints on trimming:
* 1. we must not trim past the last_epoch_clean for any pg
* 2. we must not trim past the last reported epoch for any up
* osds.
*
* LastEpochClean::get_lower_bound_by_pool gives a value <= constraint 1.
* For constraint 2, we take the min over osd_epochs, which is populated with
* MOSDBeacon::version, see OSDMonitor::prepare_beacon
*/
epoch_t OSDMonitor::get_min_last_epoch_clean() const
{
auto floor = last_epoch_clean.get_lower_bound(osdmap);
// also scan osd epochs
// don't trim past the oldest reported osd epoch
auto floor = last_epoch_clean.get_lower_bound_by_pool(osdmap);
for (auto [osd, epoch] : osd_epochs) {
if (epoch < floor) {
ceph_assert(osdmap.is_up(osd));
floor = epoch;
}
}
Expand Down Expand Up @@ -4443,8 +4446,8 @@ bool OSDMonitor::prepare_beacon(MonOpRequestRef op)

last_osd_report[from].first = ceph_clock_now();
last_osd_report[from].second = beacon->osd_beacon_report_interval;
ceph_assert(osdmap.is_up(from));
osd_epochs[from] = beacon->version;

for (const auto& pg : beacon->pgs) {
if (auto* pool = osdmap.get_pg_pool(pg.pool()); pool != nullptr) {
unsigned pg_num = pool->get_pg_num();
Expand Down
22 changes: 19 additions & 3 deletions src/mon/OSDMonitor.h
Expand Up @@ -114,7 +114,13 @@ class LastEpochClean {
public:
void report(unsigned pg_num, const pg_t& pg, epoch_t last_epoch_clean);
void remove_pool(uint64_t pool);
epoch_t get_lower_bound(const OSDMap& latest) const;
/**
* get_lower_bound_by_pool
*
* Returns epoch e such that e <= pg.last_epoch_clean for all pgs in cluster.
* May return 0 if any pool does not have comprehensive values for all pgs.
*/
epoch_t get_lower_bound_by_pool(const OSDMap& latest) const;

void dump(Formatter *f) const;
};
Expand Down Expand Up @@ -643,8 +649,18 @@ class OSDMonitor : public PaxosService,

// when we last received PG stats from each osd and the osd's osd_beacon_report_interval
std::map<int, std::pair<utime_t, int>> last_osd_report;
// TODO: use last_osd_report to store the osd report epochs, once we don't
// need to upgrade from pre-luminous releases.
/**
* osd_epochs
*
* Records the MOSDBeacon::version (the osd epoch at which the OSD sent the
* beacon) of the most recent beacon recevied from each currently up OSD.
* Used in OSDMonitor::get_min_last_epoch_clean().
* Down osds are trimmed upon commit of each map
* (OSDMonitor::update_from_paxos).
*
* TODO: use last_osd_report to store the osd report epochs, once we don't
* need to upgrade from pre-luminous releases.
*/
std::map<int,epoch_t> osd_epochs;
LastEpochClean last_epoch_clean;
bool preprocess_beacon(MonOpRequestRef op);
Expand Down