Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mon,osd,osdc: refactor snap trimming (phase 1) #18276

Merged
merged 32 commits into from
Dec 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c536d4c
osd/osd_types: note about removed_snaps hack
liewegas Oct 13, 2017
c8bfe3f
osd/PG: share_pg_info shares past_itnervals, not PastIntervals()
liewegas Dec 1, 2017
81d63f2
osd/OSDMap: improve osdmap flag dumping in json
liewegas Dec 1, 2017
df7523b
qa/suites/rados/singleton/all/thrash-eio: more whitelist
liewegas Dec 2, 2017
ea308ad
include/interval_set: add get_end() to iterator
liewegas Oct 30, 2017
3119cf5
include/mempool: add flat_set alias
liewegas Oct 16, 2017
1b1eec2
include/types: flat_set operator<<
liewegas Oct 16, 2017
b9c5a24
osd/osd_types: SnapSet: remove get_first_snap_after()
liewegas Oct 28, 2017
e89649d
mds/SnapServer: fix reset()
liewegas Oct 17, 2017
1f133a2
mon/OSDMonitor: reset OSDMap state before decode
liewegas Oct 13, 2017
37c4aff
mon/OSDMonitor: clear pending_metadata* in create_pending
liewegas Oct 12, 2017
553048f
osd/OSDMap: track newly removed and purged snaps in each epoch
liewegas Oct 11, 2017
9d606c5
mon/OSDMonitor: record removed_snaps by epoch outside of the osdmap
liewegas Oct 13, 2017
49833c3
mon/OSDMonitor: share snaps removed during a map gap
liewegas Oct 12, 2017
38e96ec
mon/MgrStatMonitor: dump PGMapDigest at debug level 20
liewegas Nov 29, 2017
32d7538
osdc/Objecter: prune new_removed_snaps from active op snapc's
liewegas Oct 12, 2017
b1b8fc6
osdc/Objecter: rename _scan_requests force_resend -> skipped_map
liewegas Oct 12, 2017
192a8dc
osdc/Objecter: apply removed_snaps from gap to in-flight requests
liewegas Oct 12, 2017
a53ba73
osd,mon: add 'nosnaptrim' osd flag
liewegas Dec 1, 2017
345d3b6
osd/osd_types: add purged_snaps to pg_stat_t
liewegas Oct 12, 2017
6df912b
osd/PG: share purged_snaps with mgr at mimic
liewegas Oct 12, 2017
86f0b81
mon/PGMap: add purged_snaps map to PGMapDigest
liewegas Oct 13, 2017
e5f62fb
osd/PG: move debug_verify_cached_snaps check into PGPool::update
liewegas Oct 13, 2017
33c9907
osd/PG: some whitespace
liewegas Nov 3, 2017
f04729c
osd/PG: break out of Active AdvMap handler if interval change
liewegas Dec 1, 2017
231ec67
osd/PG: simplify replica purged_snaps update
liewegas Dec 1, 2017
6e1b7c4
osd/PG: use new mimic osdmap structures for removed, pruned snaps
liewegas Nov 3, 2017
16c5bcc
osd/osd_types: pg_pool_t: add FLAG_{SELFMANAGED,POOL}_SNAPS flags
liewegas Oct 13, 2017
fd6a59e
mon/OSDMonitor: convert removed_snaps on first mimic map
liewegas Oct 16, 2017
9607a2d
mon/OSDMonitor: prune purged snaps
liewegas Oct 28, 2017
f2d602a
mon/OSDMonitor: propagate new_removed_snaps to other tiers
liewegas Nov 5, 2017
8c44dab
osd/PG: ignore purged_snaps inconsistencies for now
liewegas Dec 2, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions qa/suites/rados/singleton/all/thrash-eio.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ tasks:
- \(SLOW_OPS\)
- \(PG_
- \(OSD_
- \(OBJECT_
- thrashosds:
op_delay: 30
clean_interval: 120
Expand Down
8 changes: 8 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,10 @@ std::vector<Option> get_global_options() {
.set_default(true)
.set_description("Enable POOL_APP_NOT_ENABLED health check"),

Option("mon_max_snap_prune_per_epoch", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
.set_description("Max number of pruned snaps we will process in a single OSDMap epoch"),

Option("mon_min_osdmap_epochs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(500)
.set_description(""),
Expand Down Expand Up @@ -2453,6 +2457,10 @@ std::vector<Option> get_global_options() {
.set_default(.9)
.set_description(""),

Option("osd_max_snap_prune_intervals_per_epoch", Option::TYPE_UINT, Option::LEVEL_DEV)
.set_default(512)
.set_description("Max number of snap intervals to report to mgr in pg_stat_t"),

Option("osd_default_data_pool_replay_window", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(45)
.set_description(""),
Expand Down
6 changes: 6 additions & 0 deletions src/include/interval_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class interval_set {
T get_len() const {
return _iter->second;
}
T get_end() const {
return _iter->first + _iter->second;
}

// Set the interval length.
void set_len(T len) {
Expand Down Expand Up @@ -135,6 +138,9 @@ class interval_set {
T get_start() const {
return _iter->first;
}
T get_end() const {
return _iter->first + _iter->second;
}

// Return the interval length.
T get_len() const {
Expand Down
9 changes: 9 additions & 0 deletions src/include/mempool.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include <mutex>
#include <atomic>
#include <typeinfo>
#include <boost/container/flat_set.hpp>
#include <boost/container/flat_map.hpp>

#include <common/Formatter.h>
#include "include/assert.h"
Expand Down Expand Up @@ -405,6 +407,13 @@ class pool_allocator {
template<typename k, typename cmp = std::less<k> > \
using set = std::set<k,cmp,pool_allocator<k>>; \
\
template<typename k, typename cmp = std::less<k> > \
using flat_set = boost::container::flat_set<k,cmp,pool_allocator<k>>; \
\
template<typename k, typename v, typename cmp = std::less<k> > \
using flat_map = boost::container::flat_map<k,v,cmp, \
pool_allocator<std::pair<k,v>>>; \
\
template<typename v> \
using list = std::list<v,pool_allocator<v>>; \
\
Expand Down
1 change: 1 addition & 0 deletions src/include/rados.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ extern const char *ceph_osd_state_name(int s);
#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
#define CEPH_OSDMAP_PURGED_SNAPDIRS (1<<20) /* osds have converted snapsets */
#define CEPH_OSDMAP_NOSNAPTRIM (1<<21) /* disable snap trimming */

/* these are hidden in 'ceph status' view */
#define CEPH_OSDMAP_SEMIHIDDEN_FLAGS (CEPH_OSDMAP_REQUIRE_JEWEL| \
Expand Down
28 changes: 28 additions & 0 deletions src/include/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ extern "C" {
#include <string>
#include <list>
#include <set>
#include <boost/container/flat_set.hpp>
#include <boost/container/flat_map.hpp>
#include <map>
#include <vector>
#include <iostream>
Expand Down Expand Up @@ -106,6 +108,10 @@ inline ostream& operator<<(ostream& out, const list<A,Alloc>& ilist);
template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const set<A, Comp, Alloc>& iset);
template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_set<A, Comp, Alloc>& iset);
template<class A, class B, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_map<A, B, Comp, Alloc>& iset);
template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const multiset<A,Comp,Alloc>& iset);
template<class A, class B, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const map<A,B,Comp,Alloc>& m);
Expand Down Expand Up @@ -166,6 +172,28 @@ inline ostream& operator<<(ostream& out, const set<A, Comp, Alloc>& iset) {
return out;
}

template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_set<A, Comp, Alloc>& iset) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this boost-specific instead of using the generic ceph-namespaced set/map?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flat_set and flat_map != set and map

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant ceph::flat_map instead of boost::flat_map, guess I left off too many words!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh! because they're not aliased in the ceph namespace. i'm not a real fan of doing that unless there is a reason we'd swap implementations (like we had to with shared_ptr forever ago); it just obscures things for someone reading the code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh hmm, I misread the mempool setup and thought you were adding an alias.

I always kind of liked them thanks to the shared_ptr experience. I thought @wjwithagen had taken advantage of that pattern for some porting work as well, but maybe it doesn't matter for boost bits.

for (auto it = iset.begin();
it != iset.end();
++it) {
if (it != iset.begin()) out << ",";
out << *it;
}
return out;
}

template<class A, class B, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const boost::container::flat_map<A, B, Comp, Alloc>& m) {
for (auto it = m.begin();
it != m.end();
++it) {
if (it != m.begin()) out << ",";
out << it->first << "=" << it->second;
}
return out;
}

template<class A, class Comp, class Alloc>
inline ostream& operator<<(ostream& out, const multiset<A,Comp,Alloc>& iset) {
for (auto it = iset.begin();
Expand Down
6 changes: 3 additions & 3 deletions src/mds/SnapServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ void SnapServer::reset_state()
// needing removal, skip.
continue;
}
if (!pi->removed_snaps.empty() &&
pi->removed_snaps.range_end() > first_free)
first_free = pi->removed_snaps.range_end();
if (pi->snap_seq > first_free) {
first_free = pi->snap_seq;
}
}
});
if (first_free > last_snap)
Expand Down
22 changes: 19 additions & 3 deletions src/messages/MOSDMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@

class MOSDMap : public Message {

static const int HEAD_VERSION = 3;
static const int HEAD_VERSION = 4;
static const int COMPAT_VERSION = 3;

public:
uuid_d fsid;
map<epoch_t, bufferlist> maps;
map<epoch_t, bufferlist> incremental_maps;
epoch_t oldest_map =0, newest_map = 0;

// if we are fetching maps from the mon and have to jump a gap
// (client's next needed map is older than mon's oldest) we can
// share removed snaps from the gap here.
mempool::osdmap::map<int64_t,OSDMap::snap_interval_set_t> gap_removed_snaps;

epoch_t get_first() const {
epoch_t e = 0;
map<epoch_t, bufferlist>::const_iterator i = maps.begin();
Expand All @@ -56,9 +62,9 @@ class MOSDMap : public Message {
}


MOSDMap() : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION) { }
MOSDMap() : Message(CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION) { }
MOSDMap(const uuid_d &f)
: Message(CEPH_MSG_OSD_MAP, HEAD_VERSION),
: Message(CEPH_MSG_OSD_MAP, HEAD_VERSION, COMPAT_VERSION),
fsid(f),
oldest_map(0), newest_map(0) { }
private:
Expand All @@ -78,9 +84,13 @@ class MOSDMap : public Message {
oldest_map = 0;
newest_map = 0;
}
if (header.version >= 4) {
::decode(gap_removed_snaps, p);
}
}
void encode_payload(uint64_t features) override {
header.version = HEAD_VERSION;
header.compat_version = COMPAT_VERSION;
::encode(fsid, payload);
if ((features & CEPH_FEATURE_PGID64) == 0 ||
(features & CEPH_FEATURE_PGPOOL3) == 0 ||
Expand All @@ -93,6 +103,7 @@ class MOSDMap : public Message {
header.version = 1; // old old_client version
else if ((features & CEPH_FEATURE_OSDENC) == 0)
header.version = 2; // old pg_pool_t
header.compat_version = 0;

// reencode maps using old format
//
Expand Down Expand Up @@ -138,13 +149,18 @@ class MOSDMap : public Message {
::encode(oldest_map, payload);
::encode(newest_map, payload);
}
if (header.version >= 4) {
::encode(gap_removed_snaps, payload);
}
}

const char *get_type_name() const override { return "osdmap"; }
void print(ostream& out) const override {
out << "osd_map(" << get_first() << ".." << get_last();
if (oldest_map || newest_map)
out << " src has " << oldest_map << ".." << newest_map;
if (!gap_removed_snaps.empty())
out << " +gap_removed_snaps";
out << ")";
}
};
Expand Down
8 changes: 8 additions & 0 deletions src/mgr/ClusterState.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ class ClusterState
return std::forward<Callback>(cb)(pg_map, std::forward<Args>(args)...);
}

template<typename Callback, typename...Args>
auto with_mutable_pgmap(Callback&& cb, Args&&...args) ->
decltype(cb(pg_map, std::forward<Args>(args)...))
{
Mutex::Locker l(lock);
return std::forward<Callback>(cb)(pg_map, std::forward<Args>(args)...);
}

template<typename... Args>
void with_monmap(Args &&... args) const
{
Expand Down
2 changes: 1 addition & 1 deletion src/mgr/DaemonServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1422,7 +1422,7 @@ void DaemonServer::send_report()
auto m = new MMonMgrReport();
py_modules.get_health_checks(&m->health_checks);

cluster_state.with_pgmap([&](const PGMap& pg_map) {
cluster_state.with_mutable_pgmap([&](PGMap& pg_map) {
cluster_state.update_delta_stats();

if (pending_service_map.epoch) {
Expand Down
5 changes: 5 additions & 0 deletions src/mon/MgrStatMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,11 @@ bool MgrStatMonitor::prepare_report(MonOpRequestRef op)
}
dout(10) << __func__ << " " << pending_digest << ", "
<< pending_health_checks.checks.size() << " health checks" << dendl;
dout(20) << "pending_digest:\n";
JSONFormatter jf(true);
pending_digest.dump(&jf);
jf.flush(*_dout);
*_dout << dendl;
return true;
}

Expand Down
4 changes: 4 additions & 0 deletions src/mon/MgrStatMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ class MgrStatMonitor : public PaxosService {
return nullptr;
}

const PGMapDigest& get_digest() {
return digest;
}

ceph_statfs get_statfs(OSDMap& osdmap,
boost::optional<int64_t> data_pool) const {
return digest.get_statfs(osdmap, data_pool);
Expand Down
4 changes: 2 additions & 2 deletions src/mon/MonCommands.h
Original file line number Diff line number Diff line change
Expand Up @@ -728,11 +728,11 @@ COMMAND("osd erasure-code-profile ls", \
"list all erasure code profiles", \
"osd", "r", "cli,rest")
COMMAND("osd set " \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds " \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|nosnaptrim|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds " \
"name=sure,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
"set <key>", "osd", "rw", "cli,rest")
COMMAND("osd unset " \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent", \
"name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|nosnaptrim", \
"unset <key>", "osd", "rw", "cli,rest")
COMMAND("osd require-osd-release "\
"name=release,type=CephChoices,strings=luminous|mimic " \
Expand Down
Loading