Skip to content

Commit

Permalink
osd: Log shard errors in be_select_auth_object() as other errors
Browse files Browse the repository at this point in the history
Signed-off-by: David Zafman <dzafman@redhat.com>
  • Loading branch information
dzafman committed Aug 23, 2018
1 parent c789fd1 commit cadf727
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 32 deletions.
86 changes: 55 additions & 31 deletions src/osd/PGBackend.cc
Expand Up @@ -628,15 +628,6 @@ bool PGBackend::be_compare_scrub_objects(
bool has_snapset)
{
enum { CLEAN, FOUND_ERROR } error = CLEAN;
if (candidate.stat_error) {
assert(shard_result.has_stat_error());
error = FOUND_ERROR;
errorstream << "candidate had a stat error";
}
if (candidate.read_error || candidate.ec_hash_mismatch || candidate.ec_size_mismatch) {
error = FOUND_ERROR;
errorstream << "candidate had a read error";
}
if (auth.digest_present && candidate.digest_present) {
if (auth.digest != candidate.digest) {
if (error != CLEAN)
Expand Down Expand Up @@ -818,7 +809,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
const map<pg_shard_t,ScrubMap*> &maps,
object_info_t *auth_oi,
map<pg_shard_t, shard_info_wrapper> &shard_map,
bool &digest_match)
bool &digest_match,
spg_t pgid,
ostream &errorstream)
{
eversion_t auth_version;

Expand All @@ -837,27 +830,37 @@ map<pg_shard_t, ScrubMap *>::const_iterator
map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
digest_match = true;
for (auto &l : shards) {
ostringstream shard_errorstream;
bool error = false;
map<pg_shard_t, ScrubMap *>::const_iterator j = maps.find(l);
map<hobject_t, ScrubMap::object>::iterator i =
j->second->objects.find(obj);
if (i == j->second->objects.end()) {
continue;
}
string error_string;
auto& shard_info = shard_map[j->first];
if (j->first == get_parent()->whoami_shard())
shard_info.primary = true;
if (i->second.read_error) {
shard_info.set_read_error();
error_string += " read_error";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a read error";
}
if (i->second.ec_hash_mismatch) {
shard_info.set_ec_hash_mismatch();
error_string += " ec_hash_mismatch";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had an ec hash mismatch";
}
if (i->second.ec_size_mismatch) {
shard_info.set_ec_size_mismatch();
error_string += " ec_size_mismatch";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had an ec size mismatch";
}

object_info_t oi;
Expand All @@ -868,7 +871,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator

if (i->second.stat_error) {
shard_info.set_stat_error();
error_string += " stat_error";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a stat error";
// With stat_error no further checking
// We don't need to also see a missing_object_info_attr
goto out;
Expand All @@ -880,7 +886,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
k = i->second.attrs.find(SS_ATTR);
if (k == i->second.attrs.end()) {
shard_info.set_snapset_missing();
error_string += " snapset_missing";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a missing snapset key";
} else {
ss_bl.push_back(k->second);
try {
Expand All @@ -889,7 +898,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
} catch (...) {
// invalid snapset, probably corrupt
shard_info.set_snapset_corrupted();
error_string += " snapset_corrupted";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a corrupt snapset";
}
}
}
Expand All @@ -899,7 +911,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
k = i->second.attrs.find(ECUtil::get_hinfo_key());
if (k == i->second.attrs.end()) {
shard_info.set_hinfo_missing();
error_string += " hinfo_key_missing";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a missing hinfo key";
} else {
hk_bl.push_back(k->second);
try {
Expand All @@ -908,7 +923,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
} catch (...) {
// invalid snapset, probably corrupt
shard_info.set_hinfo_corrupted();
error_string += " hinfo_corrupted";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a corrupt hinfo";
}
}
}
Expand All @@ -917,7 +935,10 @@ map<pg_shard_t, ScrubMap *>::const_iterator
if (k == i->second.attrs.end()) {
// no object info on object, probably corrupt
shard_info.set_info_missing();
error_string += " info_missing";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a missing info key";
goto out;
}
bl.push_back(k->second);
Expand All @@ -927,17 +948,23 @@ map<pg_shard_t, ScrubMap *>::const_iterator
} catch (...) {
// invalid object info, probably corrupt
shard_info.set_info_corrupted();
error_string += " info_corrupted";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate had a corrupt info";
goto out;
}

// This is automatically corrected in PG::_repair_oinfo_oid()
assert(oi.soid == obj);

if (i->second.size != be_get_ondisk_size(oi.size)) {
dout(5) << __func__ << " size " << i->second.size << " oi size " << oi.size << dendl;
shard_info.set_obj_size_info_mismatch();
error_string += " obj_size_info_mismatch";
if (error)
shard_errorstream << ", ";
error = true;
shard_errorstream << "candidate size " << i->second.size << " info size "
<< oi.size << " mismatch";
}

// digest_match will only be true if computed digests are the same
Expand All @@ -964,13 +991,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
}

out:
// Check error_string because some errors already generated messages
if (error_string != "") {
dout(10) << __func__ << ": error(s) osd " << j->first
<< " for obj " << obj
<< "," << error_string
<< dendl;
}
if (error)
errorstream << pgid.pgid << " shard " << l << ": soid " << obj
<< " " << shard_errorstream.str() << "\n";
// Keep scanning other shards
}
dout(10) << __func__ << ": selecting osd " << auth->first
Expand Down Expand Up @@ -1008,7 +1031,8 @@ void PGBackend::be_compare_scrubmaps(

bool digest_match;
map<pg_shard_t, ScrubMap *>::const_iterator auth =
be_select_auth_object(*k, maps, &auth_oi, shard_map, digest_match);
be_select_auth_object(*k, maps, &auth_oi, shard_map, digest_match,
pgid, errorstream);

list<pg_shard_t> auth_list;
set<pg_shard_t> object_errors;
Expand Down
4 changes: 3 additions & 1 deletion src/osd/PGBackend.h
Expand Up @@ -577,7 +577,9 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
const map<pg_shard_t,ScrubMap*> &maps,
object_info_t *auth_oi,
map<pg_shard_t, shard_info_wrapper> &shard_map,
bool &digest_match);
bool &digest_match,
spg_t pgid,
ostream &errorstream);
void be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps,
const set<hobject_t> &master_set,
Expand Down

0 comments on commit cadf727

Please sign in to comment.