Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Omap small bugs #6046

Closed
wants to merge 10 commits into from
22 changes: 18 additions & 4 deletions src/osd/ECBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2065,7 +2065,24 @@ void ECBackend::be_deep_scrub(
o.read_error = true;
}

ECUtil::HashInfoRef hinfo = get_hash_info(poid, false);
//copy from get_hash_info. Because we already got xattrs, so it don't need
//do stat and getattr.
ECUtil::HashInfoRef hinfo = unstable_hashinfo_registry.lookup(poid);
{
if (!hinfo) {
map<string, bufferptr>::iterator k = o.attrs.find(ECUtil::get_hinfo_key());
if (k != o.attrs.end()) {
bufferlist bl;
ECUtil::HashInfo info(ec_impl->get_chunk_count());
bl.push_back(k->second);
bufferlist::iterator bliter = bl.begin();
::decode(info, bliter);
hinfo = unstable_hashinfo_registry.lookup_or_create(poid, info);
} else
dout(10) << __func__ << " " << poid << " don't find hinfo attr" << dendl;
}
}

if (!hinfo) {
dout(0) << "_scan_list " << poid << " could not retrieve hash info" << dendl;
o.read_error = true;
Expand All @@ -2090,7 +2107,4 @@ void ECBackend::be_deep_scrub(
o.digest = hinfo->get_chunk_hash(0);
o.digest_present = true;
}

o.omap_digest = seed;
o.omap_digest_present = true;
}
17 changes: 11 additions & 6 deletions src/osd/PGBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,8 @@ enum scrub_error_type PGBackend::be_compare_scrub_objects(
if (auth.size != candidate.size) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
if (error != DEEP_ERROR)
error = SHALLOW_ERROR;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a comment above that says that the shallow error here takes precedence because this can be seen by both kinds of scrubs. This is a bit odd, but I'd say remove this entire commit unless you are seeing a particular problem.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... making shallow take precedence doesn't make sense to me. Anybody remember why it says that? @athanatos ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@liewegas In addition to your change at line 412 we should not set read_error in be_scan_list() on stat() errors and set the stat size to something invalid instead. Also, remove the comments at line 374 and at line 407.

This way a regular scrub will never get a DEEP_ERROR from this function, but a deep-scrub could possibly see a SHALLOW_ERROR if only stat() fails. This keeps the error counts from getting messed up.

errorstream << "size " << candidate.size
<< " != known size " << auth.size;
}
Expand All @@ -419,12 +420,14 @@ enum scrub_error_type PGBackend::be_compare_scrub_objects(
if (!candidate.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
if (error != DEEP_ERROR)
error = SHALLOW_ERROR;
errorstream << "missing attr " << i->first;
} else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
if (error != DEEP_ERROR)
error = SHALLOW_ERROR;
errorstream << "attr value mismatch " << i->first;
}
}
Expand All @@ -434,7 +437,8 @@ enum scrub_error_type PGBackend::be_compare_scrub_objects(
if (!auth.attrs.count(i->first)) {
if (error != CLEAN)
errorstream << ", ";
error = SHALLOW_ERROR;
if (error != DEEP_ERROR)
error = SHALLOW_ERROR;
errorstream << "extra attr " << i->first;
}
}
Expand Down Expand Up @@ -654,11 +658,12 @@ void PGBackend::be_compare_scrubmaps(
update = FORCE;
}

if (auth_object.digest_present && auth_object.omap_digest_present &&
(!auth_oi.is_data_digest() || !auth_oi.is_omap_digest())) {
if ((auth_object.digest_present && !auth_oi.is_data_digest()) ||
(auth_object.omap_digest_present && !auth_oi.is_omap_digest())) {
dout(20) << __func__ << " missing digest on " << *k << dendl;
update = MAYBE;
}

if (g_conf->osd_debug_scrub_chance_rewrite_digest &&
(((unsigned)rand() % 100) >
g_conf->osd_debug_scrub_chance_rewrite_digest)) {
Expand Down
135 changes: 79 additions & 56 deletions src/osd/ReplicatedBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -746,17 +746,22 @@ void ReplicatedBackend::be_deep_scrub(
bufferlist bl, hdrbl;
int r;
__u64 pos = 0;
bool omap_exist = false;

uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL | CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;

while ( (r = store->read(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
pos,
cct->_conf->osd_deep_scrub_stride, bl,
fadvise_flags, true)) > 0) {
while (true) {
handle.reset_tp_timeout();
r = store->read(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
pos,
cct->_conf->osd_deep_scrub_stride, bl,
fadvise_flags, true);
if (r <= 0)
break;

h << bl;
pos += bl.length();
bl.clear();
Expand All @@ -769,61 +774,79 @@ void ReplicatedBackend::be_deep_scrub(
o.digest = h.digest();
o.digest_present = true;

bl.clear();
r = store->omap_get_header(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
&hdrbl, true);
// NOTE: bobtail to giant, we would crc the head as (len, head).
// that changes at the same time we start using a non-zero seed.
if (r == 0 && hdrbl.length()) {
dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length())
<< dendl;
if (seed == 0) {
// legacy
bufferlist bl;
::encode(hdrbl, bl);
oh << bl;
} else {
oh << hdrbl;
map<string, bufferptr>::iterator k = o.attrs.find(OI_ATTR);
if (k != o.attrs.end()) {
bufferlist bl;
bl.push_back(k->second);
object_info_t oi;
try {
bufferlist::iterator bliter = bl.begin();
::decode(oi, bliter);
if (oi.is_omap())
omap_exist = true;
} catch (...) {
dout(10) << __func__ << " " << poid << " get corrupt oi attr" << dendl;
}
} else if (r == -EIO) {
dout(25) << __func__ << " " << poid << " got "
<< r << " on omap header read, read_error" << dendl;
o.read_error = true;
}
} else
dout(10) << __func__ << " " << poid << " not found oi attr" << dendl;

ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
assert(iter);
uint64_t keys_scanned = 0;
for (iter->seek_to_first(); iter->valid() ; iter->next()) {
if (cct->_conf->osd_scan_list_ping_tp_interval &&
(keys_scanned % cct->_conf->osd_scan_list_ping_tp_interval == 0)) {
handle.reset_tp_timeout();
if (omap_exist) {
bl.clear();
r = store->omap_get_header(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
&hdrbl, true);
// NOTE: bobtail to giant, we would crc the head as (len, head).
// that changes at the same time we start using a non-zero seed.
if (r == 0 && hdrbl.length()) {
dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length())
<< dendl;
if (seed == 0) {
// legacy
bufferlist bl;
::encode(hdrbl, bl);
oh << bl;
} else {
oh << hdrbl;
}
} else if (r == -EIO) {
dout(25) << __func__ << " " << poid << " got "
<< r << " on omap header read, read_error" << dendl;
o.read_error = true;
}
++keys_scanned;

dout(25) << "CRC key " << iter->key() << " value "
<< string(iter->value().c_str(), iter->value().length()) << dendl;
ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(
coll,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard));
assert(iter);
uint64_t keys_scanned = 0;
for (iter->seek_to_first(); iter->valid() ; iter->next()) {
if (cct->_conf->osd_scan_list_ping_tp_interval &&
(keys_scanned % cct->_conf->osd_scan_list_ping_tp_interval == 0)) {
handle.reset_tp_timeout();
}
++keys_scanned;

::encode(iter->key(), bl);
::encode(iter->value(), bl);
oh << bl;
bl.clear();
}
if (iter->status() == -EIO) {
dout(25) << __func__ << " " << poid << " got "
<< r << " on omap scan, read_error" << dendl;
o.read_error = true;
}
dout(25) << "CRC key " << iter->key() << " value "
<< string(iter->value().c_str(), iter->value().length()) << dendl;

//Store final calculated CRC32 of omap header & key/values
o.omap_digest = oh.digest();
o.omap_digest_present = true;
::encode(iter->key(), bl);
::encode(iter->value(), bl);
oh << bl;
bl.clear();
}
if (iter->status() == -EIO) {
dout(25) << __func__ << " " << poid << " got "
<< r << " on omap scan, read_error" << dendl;
o.read_error = true;
}

//Store final calculated CRC32 of omap header & key/values
o.omap_digest = oh.digest();
o.omap_digest_present = true;
}
}

void ReplicatedBackend::_do_push(OpRequestRef op)
Expand Down
9 changes: 2 additions & 7 deletions src/osd/ReplicatedPG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5369,7 +5369,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
ctx->delta_stats.num_wr++;
}
obs.oi.set_flag(object_info_t::FLAG_OMAP);
obs.oi.clear_omap_digest();
break;

case CEPH_OSD_OP_OMAPCLEAR:
Expand All @@ -5385,12 +5384,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -ENOENT;
break;
}
t->touch(soid);
t->omap_clear(soid);
ctx->delta_stats.num_wr++;
}
obs.oi.set_flag(object_info_t::FLAG_OMAP);
obs.oi.set_omap_digest(-1);
obs.oi.clear_omap_digest();
break;

case CEPH_OSD_OP_OMAPRMKEYS:
Expand All @@ -5407,7 +5404,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
tracepoint(osd, do_osd_op_pre_omaprmkeys, soid.oid.name.c_str(), soid.snap.val, "???");
break;
}
t->touch(soid);
set<string> to_rm;
try {
::decode(to_rm, bp);
Expand All @@ -5421,7 +5417,6 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
t->omap_rmkeys(soid, to_rm);
ctx->delta_stats.num_wr++;
}
obs.oi.set_flag(object_info_t::FLAG_OMAP);
obs.oi.clear_omap_digest();
break;

Expand Down Expand Up @@ -6536,7 +6531,7 @@ int ReplicatedPG::fill_in_copy_get(

// omap
uint32_t omap_keys = 0;
if (!pool.info.supports_omap()) {
if (!pool.info.supports_omap() || !oi.is_omap()) {
cursor.omap_complete = true;
} else {
if (left > 0 && !cursor.omap_complete) {
Expand Down