Skip to content

Commit

Permalink
osd: use -1 for deep scrub digest seed on new OSDs
Browse files Browse the repository at this point in the history
0 is a weak initial value for a CRC since it doesn't change with a sequence
of 0 bytes (which are relatively common).  -1 is better.  Use -1 when
everyone in the acting set supports it.

Signed-off-by: Sage Weil <sage@redhat.com>
  • Loading branch information
liewegas committed Dec 20, 2014
1 parent e68d771 commit 7d73f41
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 30 deletions.
4 changes: 3 additions & 1 deletion src/include/ceph_features.h
Expand Up @@ -55,6 +55,7 @@
#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */
#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47)

#define CEPH_FEATURE_RESERVED2 (1ULL<<61) /* slow down, we are almost out... */
Expand Down Expand Up @@ -134,7 +135,8 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
CEPH_FEATURE_OSD_POOLRESEND | \
CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 | \
CEPH_FEATURE_OSD_SET_ALLOC_HINT | \
CEPH_FEATURE_OSD_FADVISE_FLAGS | \
CEPH_FEATURE_OSD_FADVISE_FLAGS | \
CEPH_FEATURE_OSD_OBJECT_DIGEST | \
CEPH_FEATURE_MDS_QUOTA | \
0ULL)

Expand Down
24 changes: 18 additions & 6 deletions src/messages/MOSDRepScrub.h
Expand Up @@ -24,7 +24,7 @@

struct MOSDRepScrub : public Message {

static const int HEAD_VERSION = 5;
static const int HEAD_VERSION = 6;
static const int COMPAT_VERSION = 2;

spg_t pgid; // PG to scrub
Expand All @@ -35,10 +35,13 @@ struct MOSDRepScrub : public Message {
hobject_t start; // lower bound of scrub, inclusive
hobject_t end; // upper bound of scrub, exclusive
bool deep; // true if scrub should be deep
uint32_t seed; // seed value for digest calculation

MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
MOSDRepScrub()
: Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
chunky(false),
deep(false) { }
deep(false),
seed(0) { }

MOSDRepScrub(spg_t pgid, eversion_t scrub_from, eversion_t scrub_to,
epoch_t map_epoch)
Expand All @@ -48,18 +51,20 @@ struct MOSDRepScrub : public Message {
scrub_to(scrub_to),
map_epoch(map_epoch),
chunky(false),
deep(false) { }
deep(false),
seed(0) { }

MOSDRepScrub(spg_t pgid, eversion_t scrub_to, epoch_t map_epoch,
hobject_t start, hobject_t end, bool deep)
hobject_t start, hobject_t end, bool deep, uint32_t seed)
: Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
pgid(pgid),
scrub_to(scrub_to),
map_epoch(map_epoch),
chunky(true),
start(start),
end(end),
deep(deep) { }
deep(deep),
seed(seed) { }


private:
Expand All @@ -73,6 +78,7 @@ struct MOSDRepScrub : public Message {
<< ",epoch:" << map_epoch << ",start:" << start << ",end:" << end
<< ",chunky:" << chunky
<< ",deep:" << deep
<< ",seed:" << seed
<< ",version:" << header.version;
out << ")";
}
Expand All @@ -87,6 +93,7 @@ struct MOSDRepScrub : public Message {
::encode(end, payload);
::encode(deep, payload);
::encode(pgid.shard, payload);
::encode(seed, payload);
}
void decode_payload() {
bufferlist::iterator p = payload.begin();
Expand Down Expand Up @@ -114,6 +121,11 @@ struct MOSDRepScrub : public Message {
} else {
pgid.shard = shard_id_t::NO_SHARD;
}
if (header.version >= 6) {
::decode(seed, p);
} else {
seed = 0;
}
}
};

Expand Down
3 changes: 2 additions & 1 deletion src/osd/ECBackend.cc
Expand Up @@ -1746,9 +1746,10 @@ void ECBackend::rollback_append(

void ECBackend::be_deep_scrub(
const hobject_t &poid,
uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle) {
bufferhash h(-1);
bufferhash h(-1); // we always used -1
int r;
uint64_t stride = cct->_conf->osd_deep_scrub_stride;
if (stride % sinfo.get_chunk_size())
Expand Down
1 change: 1 addition & 0 deletions src/osd/ECBackend.h
Expand Up @@ -469,6 +469,7 @@ class ECBackend : public PGBackend {

void be_deep_scrub(
const hobject_t &obj,
uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle);
uint64_t be_get_ondisk_size(uint64_t logical_size) {
Expand Down
31 changes: 20 additions & 11 deletions src/osd/PG.cc
Expand Up @@ -3288,14 +3288,15 @@ void PG::_request_scrub_map_classic(pg_shard_t replica, eversion_t version)
void PG::_request_scrub_map(
pg_shard_t replica, eversion_t version,
hobject_t start, hobject_t end,
bool deep)
bool deep, uint32_t seed)
{
assert(replica != pg_whoami);
dout(10) << "scrub requesting scrubmap from osd." << replica << dendl;
dout(10) << "scrub requesting scrubmap from osd." << replica
<< " deep " << (int)deep << " seed " << seed << dendl;
MOSDRepScrub *repscrubop = new MOSDRepScrub(
spg_t(info.pgid.pgid, replica.shard), version,
get_osdmap()->get_epoch(),
start, end, deep);
start, end, deep, seed);
osd->send_message_osd_cluster(
replica.osd, repscrubop, get_osdmap()->get_epoch());
}
Expand Down Expand Up @@ -3561,10 +3562,11 @@ void PG::_scan_snaps(ScrubMap &smap)
*/
int PG::build_scrub_map_chunk(
ScrubMap &map,
hobject_t start, hobject_t end, bool deep,
hobject_t start, hobject_t end, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle)
{
dout(10) << __func__ << " [" << start << "," << end << ")" << dendl;
dout(10) << __func__ << " [" << start << "," << end << ") "
<< " seed " << seed << dendl;

map.valid_through = info.last_update;

Expand All @@ -3583,7 +3585,7 @@ int PG::build_scrub_map_chunk(
}


get_pgbackend()->be_scan_list(map, ls, deep, handle);
get_pgbackend()->be_scan_list(map, ls, deep, seed, handle);
_scan_rollback_obs(rollback_obs, handle);
_scan_snaps(map);

Expand Down Expand Up @@ -3612,7 +3614,7 @@ void PG::build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle)
vector<hobject_t> ls;
osd->store->collection_list(coll, ls);

get_pgbackend()->be_scan_list(map, ls, false, handle);
get_pgbackend()->be_scan_list(map, ls, false, 0, handle);
lock();
_scan_snaps(map);

Expand Down Expand Up @@ -3658,7 +3660,7 @@ void PG::build_inc_scrub_map(
}
}

get_pgbackend()->be_scan_list(map, ls, false, handle);
get_pgbackend()->be_scan_list(map, ls, false, 0, handle);
}

void PG::repair_object(
Expand Down Expand Up @@ -3723,7 +3725,7 @@ void PG::replica_scrub(
}

build_scrub_map_chunk(
map, msg->start, msg->end, msg->deep,
map, msg->start, msg->end, msg->deep, msg->seed,
handle);

vector<OSDOp> scrub(1);
Expand Down Expand Up @@ -3926,6 +3928,12 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
oss << info.pgid.pgid << " " << mode << " starts" << std::endl;
osd->clog->info(oss);
}

if (peer_features & CEPH_FEATURE_OSD_OBJECT_DIGEST)
scrubber.seed = -1; // better, and enables oi digest checks
else
scrubber.seed = 0; // compat

break;

case PG::Scrubber::NEW_CHUNK:
Expand Down Expand Up @@ -4007,7 +4015,8 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
++i) {
if (*i == pg_whoami) continue;
_request_scrub_map(*i, scrubber.subset_last_update,
scrubber.start, scrubber.end, scrubber.deep);
scrubber.start, scrubber.end, scrubber.deep,
scrubber.seed);
scrubber.waiting_on_whom.insert(*i);
++scrubber.waiting_on;
}
Expand Down Expand Up @@ -4041,7 +4050,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
// build my own scrub map
ret = build_scrub_map_chunk(scrubber.primary_scrubmap,
scrubber.start, scrubber.end,
scrubber.deep,
scrubber.deep, scrubber.seed,
handle);
if (ret < 0) {
dout(5) << "error building scrub map: " << ret << ", aborting" << dendl;
Expand Down
10 changes: 7 additions & 3 deletions src/osd/PG.h
Expand Up @@ -1030,7 +1030,8 @@ class PG {
active_rep_scrub(0),
must_scrub(false), must_deep_scrub(false), must_repair(false),
state(INACTIVE),
deep(false)
deep(false),
seed(0)
{
}

Expand Down Expand Up @@ -1081,6 +1082,7 @@ class PG {

// deep scrub
bool deep;
uint32_t seed;

list<Context*> callbacks;
void add_callback(Context *context) {
Expand Down Expand Up @@ -1151,6 +1153,7 @@ class PG {
deep_errors = 0;
fixed = 0;
deep = false;
seed = 0;
run_callbacks();
inconsistent.clear();
missing.clear();
Expand Down Expand Up @@ -1183,10 +1186,11 @@ class PG {
ThreadPool::TPHandle &handle);
void _request_scrub_map_classic(pg_shard_t replica, eversion_t version);
void _request_scrub_map(pg_shard_t replica, eversion_t version,
hobject_t start, hobject_t end, bool deep);
hobject_t start, hobject_t end, bool deep,
uint32_t seed);
int build_scrub_map_chunk(
ScrubMap &map,
hobject_t start, hobject_t end, bool deep,
hobject_t start, hobject_t end, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle);
void build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle);
void build_inc_scrub_map(
Expand Down
4 changes: 2 additions & 2 deletions src/osd/PGBackend.cc
Expand Up @@ -320,7 +320,7 @@ PGBackend *PGBackend::build_pg_backend(
* pg lock may or may not be held
*/
void PGBackend::be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep,
ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle)
{
dout(10) << __func__ << " scanning " << ls.size() << " objects"
Expand Down Expand Up @@ -351,7 +351,7 @@ void PGBackend::be_scan_list(

// calculate the CRC32 on deep scrubs
if (deep) {
be_deep_scrub(*p, o, handle);
be_deep_scrub(*p, seed, o, handle);
}

dout(25) << __func__ << " " << poid << dendl;
Expand Down
3 changes: 2 additions & 1 deletion src/osd/PGBackend.h
Expand Up @@ -585,7 +585,7 @@

virtual bool scrub_supported() { return false; }
void be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep,
ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle);
enum scrub_error_type be_compare_scrub_objects(
const ScrubMap::object &auth,
Expand All @@ -607,6 +607,7 @@
uint64_t logical_size) { assert(0); return 0; }
virtual void be_deep_scrub(
const hobject_t &poid,
uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle) { assert(0); }

Expand Down
19 changes: 14 additions & 5 deletions src/osd/ReplicatedBackend.cc
Expand Up @@ -693,10 +693,12 @@ void ReplicatedBackend::sub_op_modify_reply(OpRequestRef op)

void ReplicatedBackend::be_deep_scrub(
const hobject_t &poid,
uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle)
{
bufferhash h, oh;
dout(10) << __func__ << " " << poid << " seed " << seed << dendl;
bufferhash h(seed), oh(seed);
bufferlist bl, hdrbl;
int r;
__u64 pos = 0;
Expand Down Expand Up @@ -726,12 +728,19 @@ void ReplicatedBackend::be_deep_scrub(
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
&hdrbl, true);
if (r == 0) {
// NOTE: bobtail to giant, we would crc the head as (len, head).
// that changes at the same time we start using a non-zero seed.
if (r == 0 && hdrbl.length()) {
dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length())
<< dendl;
::encode(hdrbl, bl);
oh << bl;
bl.clear();
if (seed == 0) {
// legacy
bufferlist bl;
::encode(hdrbl, bl);
oh << bl;
} else {
oh << hdrbl;
}
} else if (r == -EIO) {
dout(25) << __func__ << " " << poid << " got "
<< r << " on omap header read, read_error" << dendl;
Expand Down
1 change: 1 addition & 0 deletions src/osd/ReplicatedBackend.h
Expand Up @@ -413,6 +413,7 @@ class ReplicatedBackend : public PGBackend {

void be_deep_scrub(
const hobject_t &obj,
uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle);
uint64_t be_get_ondisk_size(uint64_t logical_size) { return logical_size; }
Expand Down

0 comments on commit 7d73f41

Please sign in to comment.