Skip to content

Commit

Permalink
mon/OSDMonitor: prune purged snaps
Browse files Browse the repository at this point in the history
Be a bit careful here because the mon has to do some bookkeeping to avoid
pruning things twice.  If the PGMapDigest set appears obviously stale,
skip some work (looking at this particular interval) until it is not
obviously stale--move onto the next interval instead.

Signed-off-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Oct 30, 2017
1 parent 1adef0c commit 2e7f411
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,10 @@ std::vector<Option> get_global_options() {
.set_default(true)
.set_description("Enable POOL_APP_NOT_ENABLED health check"),

Option("mon_max_snap_prune_per_epoch", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
.set_description("Max number of pruned snaps we will process in a single OSDMap epoch"),

Option("mon_min_osdmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(500)
.set_description(""),
Expand Down
4 changes: 4 additions & 0 deletions src/mon/MgrStatMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ class MgrStatMonitor : public PaxosService {
return nullptr;
}

const PGMapDigest& get_digest() {
return digest;
}

ceph_statfs get_statfs(OSDMap& osdmap,
boost::optional<int64_t> data_pool) const {
return digest.get_statfs(osdmap, data_pool);
Expand Down
127 changes: 127 additions & 0 deletions src/mon/OSDMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,17 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
t->put(OSD_SNAP_PREFIX, k, v);
}
}
for (auto& i : pending_inc.new_purged_snaps) {
for (auto q = i.second.begin();
q != i.second.end();
++q) {
bufferlist v;
string k = make_snap_purged_key_value(i.first, q.get_start(),
q.get_len(), pending_inc.epoch,
&v);
t->put(OSD_SNAP_PREFIX, k, v);
}
}
}

// health
Expand Down Expand Up @@ -3497,6 +3508,10 @@ void OSDMonitor::tick()
}
}

if (try_prune_purged_snaps()) {
do_propose = true;
}

if (update_pools_status())
do_propose = true;

Expand Down Expand Up @@ -4983,6 +4998,7 @@ string OSDMonitor::make_snap_key(int64_t pool, snapid_t snap)
return k;
}


string OSDMonitor::make_snap_key_value(
int64_t pool, snapid_t snap, snapid_t num,
epoch_t epoch, bufferlist *v)
Expand All @@ -4995,6 +5011,117 @@ string OSDMonitor::make_snap_key_value(
return make_snap_key(pool, snap + num - 1);
}

string OSDMonitor::make_snap_purged_key(int64_t pool, snapid_t snap)
{
char k[40];
snprintf(k, sizeof(k), "purged_snap_%lld_%016llx",
(unsigned long long)pool, (unsigned long long)snap);
return k;
}
string OSDMonitor::make_snap_purged_key_value(
int64_t pool, snapid_t snap, snapid_t num,
epoch_t epoch, bufferlist *v)
{
// encode the *last* epoch in the key so that we can use forward
// iteration only to search for an epoch in an interval.
::encode(snap, *v);
::encode(snap + num, *v);
::encode(epoch, *v);
return make_snap_purged_key(pool, snap + num - 1);
}

int OSDMonitor::lookup_pruned_snap(int64_t pool, snapid_t snap,
snapid_t *begin, snapid_t *end)
{
string k = make_snap_key(pool, snap);
auto it = mon->store->get_iterator(OSD_SNAP_PREFIX);
it->lower_bound(k);
if (!it->valid()) {
return -ENOENT;
}
if (it->key().compare(0, sizeof(OSD_SNAP_PREFIX), OSD_SNAP_PREFIX)) {
return -ENOENT;
}
bufferlist v = it->value();
auto p = v.begin();
::decode(*begin, p);
::decode(*end, p);
if (snap < *begin || snap >= *end) {
return -ENOENT;
}
return 0;
}

bool OSDMonitor::try_prune_purged_snaps()
{
if (!mon->mgrstatmon()->is_readable()) {
return false;
}
if (osdmap.require_osd_release < CEPH_RELEASE_MIMIC) {
return false;
}
if (!pending_inc.new_purged_snaps.empty()) {
return false; // we already pruned for this epoch
}

unsigned max_prune = cct->_conf->get_val<uint64_t>(
"mon_max_snap_prune_per_epoch");
if (!max_prune) {
max_prune = 100000;
}
dout(10) << __func__ << " max_prune " << max_prune << dendl;

unsigned num_pruned = 0;
auto& purged_snaps = mon->mgrstatmon()->get_digest().purged_snaps;
for (auto& p : osdmap.get_pools()) {
auto q = purged_snaps.find(p.first);
if (q == purged_snaps.end()) {
continue;
}
auto& purged = q->second;
if (purged.empty()) {
dout(20) << __func__ << " " << p.first << " nothing purged" << dendl;
continue;
}
OSDMap::snap_interval_set_t to_prune;
for (auto i = purged.begin(); i != purged.end(); ++i) {
snapid_t begin = i.get_start();
auto end = i.get_start() + i.get_len();
snapid_t pbegin = 0, pend = 0;
int r = lookup_pruned_snap(p.first, begin, &pbegin, &pend);
if (r == 0) {
// already purged.
// be a bit aggressive about backing off here, because the mon may
// do a lot of work going through this set, and if we know the
// purged set from the OSDs is at least *partly* stale we may as
// well wait for it to be fresh.
dout(20) << __func__ << " we've already pruned " << pbegin
<< "~" << (pend - pbegin) << dendl;
break; // next extent
}
if (pbegin && pbegin < end) {
// the tail of [begin,end) is purged, move end up
end = pbegin;
}
to_prune.insert(begin, end - begin);
num_pruned += end - begin;
if (num_pruned >= max_prune) {
break;
}
}
if (!to_prune.empty()) {
dout(10) << __func__ << " pool " << p.first << " pruned " << to_prune
<< dendl;
pending_inc.new_purged_snaps[p.first].swap(to_prune);
}
if (num_pruned >= max_prune) {
break;
}
}
dout(10) << __func__ << " pruned " << num_pruned << dendl;
return !!num_pruned;
}

bool OSDMonitor::update_pools_status()
{
if (!mon->mgrstatmon()->is_readable())
Expand Down
6 changes: 6 additions & 0 deletions src/mon/OSDMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,12 @@ class OSDMonitor : public PaxosService {
string make_snap_key(int64_t pool, snapid_t snap);
string make_snap_key_value(int64_t pool, snapid_t snap, snapid_t num,
epoch_t epoch, bufferlist *v);
string make_snap_purged_key(int64_t pool, snapid_t snap);
string make_snap_purged_key_value(int64_t pool, snapid_t snap, snapid_t num,
epoch_t epoch, bufferlist *v);
bool try_prune_purged_snaps();
int lookup_pruned_snap(int64_t pool, snapid_t snap,
snapid_t *begin, snapid_t *end);

bool prepare_set_flag(MonOpRequestRef op, int flag);
bool prepare_unset_flag(MonOpRequestRef op, int flag);
Expand Down

0 comments on commit 2e7f411

Please sign in to comment.