Skip to content

Commit

Permalink
Merge pull request #20450 from dzafman/wip-22996
Browse files Browse the repository at this point in the history
Snapset inconsistency is detected with its own error

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
  • Loading branch information
dzafman committed Feb 20, 2018
2 parents 379897e + 33e7477 commit 648a4c3
Show file tree
Hide file tree
Showing 6 changed files with 179 additions and 6 deletions.
3 changes: 2 additions & 1 deletion doc/rados/command/list-inconsistent-obj.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@
"omap_digest_mismatch",
"size_mismatch",
"attr_value_mismatch",
"attr_name_mismatch"
"attr_name_mismatch",
"snapset_inconsistency"
]
},
"minItems": 0,
Expand Down
147 changes: 147 additions & 0 deletions qa/standalone/scrub/osd-scrub-repair.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2767,6 +2767,153 @@ function TEST_periodic_scrub_replicated() {
rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
}

#
# Corrupt snapset in replicated pool
#
function TEST_corrupt_snapset_scrub_rep() {
local dir=$1
local poolname=csr_pool
local total_objs=2

setup $dir || return 1
run_mon $dir a --osd_pool_default_size=2 || return 1
run_mgr $dir x || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
create_rbd_pool || return 1
wait_for_clean || return 1

create_pool foo 1 || return 1
create_pool $poolname 1 1 || return 1
wait_for_clean || return 1

for i in $(seq 1 $total_objs) ; do
objname=ROBJ${i}
add_something $dir $poolname $objname || return 1

rados --pool $poolname setomapheader $objname hdr-$objname || return 1
rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
done

local pg=$(get_pg $poolname ROBJ0)

for i in $(seq 1 $total_objs) ; do
objname=ROBJ${i}

# Alternate corruption between osd.0 and osd.1
local osd=$(expr $i % 2)

rados -p $poolname mksnap snap1
echo -n head_of_snapshot_data > $dir/change

case $i in
1)
rados --pool $poolname put $objname $dir/change
objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
;;

2)
rados --pool $poolname put $objname $dir/change
objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1
;;

esac
done
rm $dir/change

pg_scrub $pg

rados list-inconsistent-pg $poolname > $dir/json || return 1
# Check pg count
test $(jq '. | length' $dir/json) = "1" || return 1
# Check pgid
test $(jq -r '.[0]' $dir/json) = $pg || return 1

rados list-inconsistent-obj $pg > $dir/json || return 1

jq "$jqfilter" << EOF | python -c "$sortkeys" | sed -e "$sedfilter" > $dir/checkcsjson
{
"epoch": 34,
"inconsistents": [
{
"object": {
"name": "ROBJ1",
"nspace": "",
"locator": "",
"snap": "head",
"version": 8
},
"errors": [
"snapset_inconsistency"
],
"union_shard_errors": [],
"selected_object_info": "3:ce3f1d6a:::ROBJ1:head(27'8 client.4143.0:1 dirty|omap|data_digest s 21 uv 8 dd 53acb008 alloc_hint [0 0 0])",
"shards": [
{
"osd": 0,
"primary": false,
"errors": [],
"size": 21,
"snapset": "1=[1]:{1=[1]}"
},
{
"osd": 1,
"primary": true,
"errors": [],
"size": 21,
"snapset": "0=[]:{1=[1]}"
}
]
},
{
"object": {
"name": "ROBJ2",
"nspace": "",
"locator": "",
"snap": "head",
"version": 10
},
"errors": [
"snapset_inconsistency"
],
"union_shard_errors": [],
"selected_object_info": "3:e97ce31e:::ROBJ2:head(31'10 client.4155.0:1 dirty|omap|data_digest s 21 uv 10 dd 53acb008 alloc_hint [0 0 0])",
"shards": [
{
"osd": 0,
"primary": false,
"errors": [],
"size": 21,
"snapset": "0=[]:{1=[1]}"
},
{
"osd": 1,
"primary": true,
"errors": [],
"size": 21,
"snapset": "1=[1]:{1=[1]}"
}
]
}
]
}
EOF

jq "$jqfilter" $dir/json | python -c "$sortkeys" | sed -e "$sedfilter" > $dir/csjson
diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
if test $getjson = "yes"
then
jq '.' $dir/json > save6.json
fi

if which jsonschema > /dev/null;
then
jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
fi

rados rmpool $poolname $poolname --yes-i-really-really-mean-it
teardown $dir || return 1
}

main osd-scrub-repair "$@"

Expand Down
3 changes: 3 additions & 0 deletions src/common/scrub_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
void set_attr_name_mismatch() {
errors |= obj_err_t::ATTR_NAME_MISMATCH;
}
void set_snapset_inconsistency() {
errors |= obj_err_t::SNAPSET_INCONSISTENCY;
}
void add_shard(const pg_shard_t& pgs, const shard_info_wrapper& shard);
void set_auth_missing(const hobject_t& hoid,
const map<pg_shard_t, ScrubMap*>&,
Expand Down
6 changes: 5 additions & 1 deletion src/include/rados/rados_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,11 @@ struct obj_err_t {
SIZE_MISMATCH = 1 << 6,
ATTR_VALUE_MISMATCH = 1 << 7,
ATTR_NAME_MISMATCH = 1 << 8,
SNAPSET_INCONSISTENCY = 1 << 9,
// When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS
};
uint64_t errors = 0;
static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH;
static constexpr uint64_t SHALLOW_ERRORS = OBJECT_INFO_INCONSISTENCY|SIZE_MISMATCH|ATTR_VALUE_MISMATCH|ATTR_NAME_MISMATCH|SNAPSET_INCONSISTENCY;
static constexpr uint64_t DEEP_ERRORS = DATA_DIGEST_MISMATCH|OMAP_DIGEST_MISMATCH;
bool has_object_info_inconsistency() const {
return errors & OBJECT_INFO_INCONSISTENCY;
Expand All @@ -180,6 +181,9 @@ struct obj_err_t {
bool has_deep_errors() const {
return errors & DEEP_ERRORS;
}
bool has_snapset_inconsistency() const {
return errors & SNAPSET_INCONSISTENCY;
}
};

struct inconsistent_obj_t : obj_err_t {
Expand Down
14 changes: 10 additions & 4 deletions src/osd/PGBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ map<pg_shard_t, ScrubMap *>::const_iterator
inconsistent_obj_wrapper &object_error)
{
eversion_t auth_version;
bufferlist first_bl;
bufferlist first_oi_bl, first_ss_bl;

// Create list of shards with primary first so it will be auth copy all
// other things being equal.
Expand Down Expand Up @@ -826,6 +826,12 @@ map<pg_shard_t, ScrubMap *>::const_iterator
try {
bufferlist::iterator bliter = ss_bl.begin();
decode(ss, bliter);
if (first_ss_bl.length() == 0) {
first_ss_bl.append(ss_bl);
} else if (!object_error.has_snapset_inconsistency() && !ss_bl.contents_equal(first_ss_bl)) {
object_error.set_snapset_inconsistency();
error_string += " snapset_inconsistency";
}
} catch (...) {
// invalid snapset, probably corrupt
shard_info.set_ss_attr_corrupted();
Expand Down Expand Up @@ -855,9 +861,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
// This is automatically corrected in PG::_repair_oinfo_oid()
assert(oi.soid == obj);

if (first_bl.length() == 0) {
first_bl.append(bl);
} else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_bl)) {
if (first_oi_bl.length() == 0) {
first_oi_bl.append(bl);
} else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_oi_bl)) {
object_error.set_object_info_inconsistency();
error_string += " object_info_inconsistency";
}
Expand Down
12 changes: 12 additions & 0 deletions src/tools/rados/rados.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,16 @@ static void dump_shard(const shard_info_t& shard,
decode(oi, bliter); // Can't be corrupted
f.dump_stream("object_info") << oi;
}
if (!shard.has_ss_attr_missing() && !shard.has_ss_attr_corrupted() &&
inc.has_snapset_inconsistency()) {
SnapSet ss;
bufferlist bl;
map<std::string, ceph::bufferlist>::iterator k = (const_cast<shard_info_t&>(shard)).attrs.find(SS_ATTR);
assert(k != shard.attrs.end()); // Can't be missing
bufferlist::iterator bliter = k->second.begin();
decode(ss, bliter); // Can't be corrupted
f.dump_stream("snapset") << ss;
}
if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()
|| inc.union_shards.has_oi_attr_missing()
|| inc.union_shards.has_oi_attr_corrupted()
Expand Down Expand Up @@ -1412,6 +1422,8 @@ static void dump_obj_errors(const obj_err_t &err, Formatter &f)
f.dump_string("error", "attr_value_mismatch");
if (err.has_attr_name_mismatch())
f.dump_string("error", "attr_name_mismatch");
if (err.has_snapset_inconsistency())
f.dump_string("error", "snapset_inconsistency");
f.close_section();
}

Expand Down

0 comments on commit 648a4c3

Please sign in to comment.