Skip to content

Commit

Permalink
Merge pull request #18276 from liewegas/wip-removed-snaps
Browse files Browse the repository at this point in the history
mon,osd,osdc: refactor snap trimming (phase 1)

Reviewed-by: Greg Farnum <gfarnum@redhat.com>
  • Loading branch information
liewegas committed Dec 7, 2017
2 parents d10aad6 + 8c44dab commit f3b2eb9
Show file tree
Hide file tree
Showing 27 changed files with 1,022 additions and 169 deletions.
1 change: 1 addition & 0 deletions qa/suites/rados/singleton/all/thrash-eio.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ tasks:
- \(PG_
- \(OBJECT_MISPLACED\)
- \(OSD_
- \(OBJECT_
- thrashosds:
op_delay: 30
clean_interval: 120
Expand Down
8 changes: 8 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,10 @@ std::vector<Option> get_global_options() {
.set_default(true)
.set_description("Enable POOL_APP_NOT_ENABLED health check"),

Option("mon_max_snap_prune_per_epoch", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
.set_description("Max number of pruned snaps we will process in a single OSDMap epoch"),

Option("mon_min_osdmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(500)
.set_description(""),
Expand Down Expand Up @@ -2457,6 +2461,10 @@ std::vector<Option> get_global_options() {
.set_default(.9)
.set_description(""),

Option("osd_max_snap_prune_intervals_per_epoch", Option::TYPE_UINT, Option::LEVEL_DEV)
.set_default(512)
.set_description("Max number of snap intervals to report to mgr in pg_stat_t"),

Option("osd_default_data_pool_replay_window", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(45)
.set_description(""),
Expand Down
6 changes: 6 additions & 0 deletions src/include/interval_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class interval_set {
T get_len() const {
return _iter->second;
}
T get_end() const {
return _iter->first + _iter->second;
}

// Set the interval length.
void set_len(T len) {
Expand Down Expand Up @@ -135,6 +138,9 @@ class interval_set {
T get_start() const {
return _iter->first;
}
T get_end() const {
return _iter->first + _iter->second;
}

// Return the interval length.
T get_len() const {
Expand Down
9 changes: 9 additions & 0 deletions src/include/mempool.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include <mutex>
#include <atomic>
#include <typeinfo>
#include <boost/container/flat_set.hpp>
#include <boost/container/flat_map.hpp>

#include <common/Formatter.h>
#include "include/assert.h"
Expand Down Expand Up @@ -405,6 +407,13 @@ class pool_allocator {
template<typename k, typename cmp = std::less<k> > \
using set = std::set<k,cmp,pool_allocator<k>>; \
\
template<typename k, typename cmp = std::less<k> > \
using flat_set = boost::container::flat_set<k,cmp,pool_allocator<k>>; \
\
template<typename k, typename v, typename cmp = std::less<k> > \
using flat_map = boost::container::flat_map<k,v,cmp, \
pool_allocator<std::pair<k,v>>>; \
\
template<typename v> \
using list = std::list<v,pool_allocator<v>>; \
\
Expand Down
1 change: 1 addition & 0 deletions src/include/rados.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ extern const char *ceph_osd_state_name(int s);
#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
#define CEPH_OSDMAP_PURGED_SNAPDIRS (1<<20) /* osds have converted snapsets */
#define CEPH_OSDMAP_NOSNAPTRIM (1<<21) /* disable snap trimming */

/* these are hidden in 'ceph status' view */
#define CEPH_OSDMAP_SEMIHIDDEN_FLAGS (CEPH_OSDMAP_REQUIRE_JEWEL| \
Expand Down
28 changes: 28 additions & 0 deletions src/include/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ extern "C" {
#include <string>
#include <list>
#include <set>
#include <boost/container/flat_set.hpp>
#include <boost/container/flat_map.hpp>
#include <map>
#include <vector>
#include <iostream>
Expand Down Expand Up @@ -106,6 +108,10 @@ inline ostream& operator<<(ostream& out, const list<A,Alloc>& ilist);
template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const set<A, Comp, Alloc>& iset);
template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_set<A, Comp, Alloc>& iset);
template<class A, class B, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_map<A, B, Comp, Alloc>& iset);
template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const multiset<A,Comp,Alloc>& iset);
template<class A, class B, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const map<A,B,Comp,Alloc>& m);
Expand Down Expand Up @@ -166,6 +172,28 @@ inline ostream& operator<<(ostream& out, const set<A, Comp, Alloc>& iset) {
return out;
}

template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_set<A, Comp, Alloc>& iset) {
for (auto it = iset.begin();
it != iset.end();
++it) {
if (it != iset.begin()) out << ",";
out << *it;
}
return out;
}

template<class A, class B, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_map<A, B, Comp, Alloc>& m) {
for (auto it = m.begin();
it != m.end();
++it) {
if (it != m.begin()) out << ",";
out << it->first << "=" << it->second;
}
return out;
}

template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const multiset<A,Comp,Alloc>& iset) {
for (auto it = iset.begin();
Expand Down
6 changes: 3 additions & 3 deletions src/mds/SnapServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ void SnapServer::reset_state()
// needing removal, skip.
continue;
}
if (!pi->removed_snaps.empty() &&
pi->removed_snaps.range_end() > first_free)
first_free = pi->removed_snaps.range_end();
if (pi->snap_seq > first_free) {
first_free = pi->snap_seq;
}
}
});
if (first_free > last_snap)
Expand Down
22 changes: 19 additions & 3 deletions src/messages/MOSDMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@

class MOSDMap : public Message {

static const int HEAD_VERSION = 3;
static const int HEAD_VERSION = 4;
static const int COMPAT_VERSION = 3;

public:
uuid_d fsid;
map<epoch_t, bufferlist> maps;
map<epoch_t, bufferlist> incremental_maps;
epoch_t oldest_map =0, newest_map = 0;

// if we are fetching maps from the mon and have to jump a gap
// (client's next needed map is older than mon's oldest) we can
// share removed snaps from the gap here.
mempool::osdmap::map<int64_t,OSDMap::snap_interval_set_t> gap_removed_snaps;

epoch_t get_first() const {
epoch_t e = 0;
map<epoch_t, bufferlist>::const_iterator i = maps.begin();
Expand All @@ -56,9 +62,9 @@ class MOSDMap : public Message {
}


MOSDMap() : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION) { }
MOSDMap() : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION) { }
MOSDMap(const uuid_d &f)
: Message(CEPH_MSG_OSD_MAP, HEAD_VERSION),
: Message(CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION),
fsid(f),
oldest_map(0), newest_map(0) { }
private:
Expand All @@ -78,9 +84,13 @@ class MOSDMap : public Message {
oldest_map = 0;
newest_map = 0;
}
if (header.version >= 4) {
::decode(gap_removed_snaps, p);
}
}
void encode_payload(uint64_t features) override {
header.version = HEAD_VERSION;
header.compat_version = COMPAT_VERSION;
::encode(fsid, payload);
if ((features & CEPH_FEATURE_PGID64) == 0 ||
(features & CEPH_FEATURE_PGPOOL3) == 0 ||
Expand All @@ -93,6 +103,7 @@ class MOSDMap : public Message {
header.version = 1; // old old_client version
else if ((features & CEPH_FEATURE_OSDENC) == 0)
header.version = 2; // old pg_pool_t
header.compat_version = 0;

// reencode maps using old format
//
Expand Down Expand Up @@ -138,13 +149,18 @@ class MOSDMap : public Message {
::encode(oldest_map, payload);
::encode(newest_map, payload);
}
if (header.version >= 4) {
::encode(gap_removed_snaps, payload);
}
}

const char *get_type_name() const override { return "osdmap"; }
void print(ostream& out) const override {
out << "osd_map(" << get_first() << ".." << get_last();
if (oldest_map || newest_map)
out << " src has " << oldest_map << ".." << newest_map;
if (!gap_removed_snaps.empty())
out << " +gap_removed_snaps";
out << ")";
}
};
Expand Down
8 changes: 8 additions & 0 deletions src/mgr/ClusterState.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ class ClusterState
return std::forward<Callback>(cb)(pg_map, std::forward<Args>(args)...);
}

template<typename Callback, typename...Args>
auto with_mutable_pgmap(Callback&& cb, Args&&...args) ->
decltype(cb(pg_map, std::forward<Args>(args)...))
{
Mutex::Locker l(lock);
return std::forward<Callback>(cb)(pg_map, std::forward<Args>(args)...);
}

template<typename... Args>
void with_monmap(Args &&... args) const
{
Expand Down
2 changes: 1 addition & 1 deletion src/mgr/DaemonServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1422,7 +1422,7 @@ void DaemonServer::send_report()
auto m = new MMonMgrReport();
py_modules.get_health_checks(&m->health_checks);

cluster_state.with_pgmap([&](const PGMap& pg_map) {
cluster_state.with_mutable_pgmap([&](PGMap& pg_map) {
cluster_state.update_delta_stats();

if (pending_service_map.epoch) {
Expand Down
5 changes: 5 additions & 0 deletions src/mon/MgrStatMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,11 @@ bool MgrStatMonitor::prepare_report(MonOpRequestRef op)
}
dout(10) << __func__ << " " << pending_digest << ", "
<< pending_health_checks.checks.size() << " health checks" << dendl;
dout(20) << "pending_digest:\n";
JSONFormatter jf(true);
pending_digest.dump(&jf);
jf.flush(*_dout);
*_dout << dendl;
return true;
}

Expand Down
4 changes: 4 additions & 0 deletions src/mon/MgrStatMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ class MgrStatMonitor : public PaxosService {
return nullptr;
}

const PGMapDigest& get_digest() {
return digest;
}

ceph_statfs get_statfs(OSDMap& osdmap,
boost::optional<int64_t> data_pool) const {
return digest.get_statfs(osdmap, data_pool);
Expand Down
4 changes: 2 additions & 2 deletions src/mon/MonCommands.h
Original file line number Diff line number Diff line change
Expand Up @@ -728,11 +728,11 @@ COMMAND("osd erasure-code-profile ls", \
"list all erasure code profiles", \
"osd", "r", "cli,rest")
COMMAND("osd set " \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds " \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|nosnaptrim|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds " \
"name=sure,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
"set <key>", "osd", "rw", "cli,rest")
COMMAND("osd unset " \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent", \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|nosnaptrim", \
"unset <key>", "osd", "rw", "cli,rest")
COMMAND("osd require-osd-release "\
"name=release,type=CephChoices,strings=luminous|mimic " \
Expand Down
Loading

0 comments on commit f3b2eb9

Please sign in to comment.