diff --git a/doc/dev/placement-group.rst b/doc/dev/placement-group.rst index 2994752f04bbb..e29be2fa69c85 100644 --- a/doc/dev/placement-group.rst +++ b/doc/dev/placement-group.rst @@ -186,6 +186,9 @@ User-visible PG States *forced_backfill* the PG has been marked for highest priority backfill +*failed_repair* + an attempt to repair the PG has failed. Manual intervention is required. + OMAP STATISTICS =============== diff --git a/doc/rados/configuration/osd-config-ref.rst b/doc/rados/configuration/osd-config-ref.rst index 937c8b848a37d..935f4075c1f4b 100644 --- a/doc/rados/configuration/osd-config-ref.rst +++ b/doc/rados/configuration/osd-config-ref.rst @@ -361,8 +361,8 @@ scrubbing operations. ``osd scrub auto repair`` :Description: Setting this to ``true`` will enable automatic pg repair when errors - are found in deep-scrub. However, if more than ``osd scrub auto repair num errors`` - errors are found a repair is NOT performed. + are found in scrub or deep-scrub. However, if more than + ``osd scrub auto repair num errors`` errors are found a repair is NOT performed. :Type: Boolean :Default: ``false`` diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh index de35bc18b0700..6b501bc875c27 100755 --- a/qa/standalone/osd/osd-rep-recov-eio.sh +++ b/qa/standalone/osd/osd-rep-recov-eio.sh @@ -110,18 +110,30 @@ function rados_get_data() { local poolname=pool-rep local objname=obj-$inject-$$ + local pgid=$(get_pg $poolname $objname) + rados_put $dir $poolname $objname || return 1 inject_$inject rep data $poolname $objname $dir 0 || return 1 rados_get $dir $poolname $objname || return 1 + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 + inject_$inject rep data $poolname $objname $dir 0 || return 1 inject_$inject rep data $poolname $objname $dir 1 || return 1 rados_get $dir $poolname $objname || return 1 + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "2" || return 1 + inject_$inject rep data $poolname $objname $dir 0 || return 1 inject_$inject rep data $poolname $objname $dir 1 || return 1 inject_$inject rep data $poolname $objname $dir 2 || return 1 rados_get $dir $poolname $objname hang || return 1 + + # After hang another repair couldn't happen, so count stays the same + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "2" || return 1 } function TEST_rados_get_with_eio() { diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index a81ad10e366c2..b62e2c086c813 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -193,7 +193,9 @@ function corrupt_and_repair_erasure_coded() { function create_ec_pool() { local pool_name=$1 - local allow_overwrites=$2 + shift + local allow_overwrites=$1 + shift ceph osd erasure-code-profile set myprofile crush-failure-domain=osd "$@" || return 1 @@ -266,6 +268,383 @@ function TEST_auto_repair_erasure_coded_overwrites() { fi } +function TEST_auto_repair_bluestore_basic() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_deep_scrub $pgid + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + ceph pg dump pgs + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # Tear down + teardown $dir || return 1 +} + +function TEST_auto_repair_bluestore_scrub() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid + + # Wait for scrub -> auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + ceph pg dump pgs + # Actually this causes 2 scrubs, so we better wait a little longer + sleep 5 + wait_for_clean || return 1 + ceph pg dump pgs + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # This should have caused 1 object to be repaired + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 + + # Tear down + teardown $dir || return 1 +} + +function TEST_auto_repair_bluestore_failed() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 10) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1 + # obj2 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1 + + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_deep_scrub $pgid + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + flush_pg_stats + grep scrub_finish $dir/osd.${primary}.log + grep -q "scrub_finish.*still present after re-scrub" $dir/osd.${primary}.log || return 1 + ceph pg dump pgs + ceph pg dump pgs | grep -q "^$(pgid).*+failed_repair" || return 1 + + # Verify - obj1 should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname obj1) obj1 list-attrs || return 1 + rados --pool $poolname get obj1 $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # Make it repairable + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 remove || return 1 + repair $pgid + sleep 2 + + ceph pg dump pgs + ceph pg dump pgs | grep -q "^$(pgid).* active+clean " || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # Tear down + teardown $dir || return 1 +} + +function TEST_auto_repair_bluestore_failed_norecov() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 10) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + # obj1 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj1 rm-attr _ || return 1 + # obj2 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1 + + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_deep_scrub $pgid + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$primary) trigger_scrub $pgid + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + flush_pg_stats + grep -q "scrub_finish.*present with no repair possible" $dir/osd.${primary}.log || return 1 + ceph pg dump pgs + ceph pg dump pgs | grep -q "^$(pgid).*+failed_repair" || return 1 + + # Tear down + teardown $dir || return 1 +} + +function TEST_repair_stats() { + local dir=$1 + local poolname=testpool + local OSDS=2 + local OBJS=30 + # This need to be an even number + local REPAIRS=20 + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 $(expr $OSDS - 1)) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 + done + run_osd_bluestore $dir $primary $ceph_osd_args || return 1 + run_osd_bluestore $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + + # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 + + # Tear down + teardown $dir || return 1 +} + +function TEST_repair_stats_ec() { + local dir=$1 + local poolname=testpool + local OSDS=3 + local OBJS=30 + # This need to be an even number + local REPAIRS=26 + local allow_overwrites=false + + # Launch a cluster with 5 seconds scrub interval + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 $(expr $OSDS - 1)) ; do + run_osd_bluestore $dir $id $ceph_osd_args || return 1 + done + + # Create an EC pool + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 + done + run_osd_bluestore $dir $primary $ceph_osd_args || return 1 + run_osd_bluestore $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + + # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) ; do + if [ $osd = $other -o $osd = $primary ]; then + repair=$(expr $REPAIRS / 2) + else + repair="0" + fi + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired") + test "$COUNT" = "$repair" || return 1 + done + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 + + # Tear down + teardown $dir || return 1 +} + function corrupt_and_repair_jerasure() { local dir=$1 local allow_overwrites=$2 @@ -291,7 +670,7 @@ function corrupt_and_repair_jerasure() { } function TEST_corrupt_and_repair_jerasure_appends() { - corrupt_and_repair_jerasure $1 + corrupt_and_repair_jerasure $1 false } function TEST_corrupt_and_repair_jerasure_overwrites() { @@ -325,12 +704,12 @@ function corrupt_and_repair_lrc() { } function TEST_corrupt_and_repair_lrc_appends() { - corrupt_and_repair_jerasure $1 + corrupt_and_repair_lrc $1 false } function TEST_corrupt_and_repair_lrc_overwrites() { if [ "$use_ec_overwrite" = "true" ]; then - corrupt_and_repair_jerasure $1 true + corrupt_and_repair_lrc $1 true fi } @@ -393,7 +772,7 @@ function unfound_erasure_coded() { } function TEST_unfound_erasure_coded_appends() { - unfound_erasure_coded $1 + unfound_erasure_coded $1 false } function TEST_unfound_erasure_coded_overwrites() { diff --git a/src/messages/MOSDPGPush.h b/src/messages/MOSDPGPush.h index d7da913306728..3960ad70ea387 100644 --- a/src/messages/MOSDPGPush.h +++ b/src/messages/MOSDPGPush.h @@ -21,7 +21,7 @@ class MOSDPGPush : public MessageInstance { public: friend factory; private: - static constexpr int HEAD_VERSION = 3; + static constexpr int HEAD_VERSION = 4; static constexpr int COMPAT_VERSION = 2; public: @@ -29,6 +29,7 @@ class MOSDPGPush : public MessageInstance { spg_t pgid; epoch_t map_epoch = 0, min_epoch = 0; vector pushes; + bool is_repair = false; private: uint64_t cost; @@ -79,6 +80,11 @@ class MOSDPGPush : public MessageInstance { } else { min_epoch = map_epoch; } + if (header.version >= 4) { + decode(is_repair, p); + } else { + is_repair = false; + } } void encode_payload(uint64_t features) override { @@ -90,6 +96,7 @@ class MOSDPGPush : public MessageInstance { encode(pgid.shard, payload); encode(from, payload); encode(min_epoch, payload); + encode(is_repair, payload); } std::string_view get_type_name() const override { return "MOSDPGPush"; } diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 9967cad86234a..78731fd707911 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -2380,7 +2380,7 @@ void PGMap::get_health_checks( // Specialized state printer that takes account of inversion of // ACTIVE, CLEAN checks. - auto state_name = [](const uint32_t &state) { + auto state_name = [](const uint64_t &state) { // Special cases for the states that are inverted checks if (state == PG_STATE_CLEAN) { return std::string("unclean"); diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index c20e10a25ac24..36a77cc7417a9 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -285,7 +285,8 @@ struct RecoveryMessages { void ECBackend::handle_recovery_push( const PushOp &op, - RecoveryMessages *m) + RecoveryMessages *m, + bool is_repair) { if (get_parent()->check_failsafe_full()) { dout(10) << __func__ << " Out of space (failsafe) processing push request." << dendl; @@ -361,6 +362,8 @@ void ECBackend::handle_recovery_push( if ((get_parent()->pgb_is_primary())) { ceph_assert(recovery_ops.count(op.soid)); ceph_assert(recovery_ops[op.soid].obc); + if (get_parent()->pg_is_repair()) + get_parent()->inc_osd_stat_repaired(); get_parent()->on_local_recover( op.soid, op.recovery_info, @@ -368,6 +371,9 @@ void ECBackend::handle_recovery_push( false, &m->t); } else { + // If primary told us this is a repair, bump osd_stat_t::num_objects_repaired + if (is_repair) + get_parent()->inc_osd_stat_repaired(); get_parent()->on_local_recover( op.soid, op.recovery_info, @@ -517,6 +523,7 @@ void ECBackend::dispatch_recovery_messages(RecoveryMessages &m, int priority) msg->pgid = spg_t(get_parent()->get_info().pgid.pgid, i->first.shard); msg->pushes.swap(i->second); msg->compute_cost(cct); + msg->is_repair = get_parent()->pg_is_repair(); get_parent()->send_message( i->first.osd, msg); @@ -682,6 +689,8 @@ void ECBackend::continue_recovery_op( stat.num_bytes_recovered = op.recovery_info.size; stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ? stat.num_objects_recovered = 1; + if (get_parent()->pg_is_repair()) + stat.num_objects_repaired = 1; get_parent()->on_global_recover(op.hoid, stat, false); dout(10) << __func__ << ": WRITING return " << op << dendl; recovery_ops.erase(op.hoid); @@ -823,7 +832,7 @@ bool ECBackend::_handle_message( for (vector::const_iterator i = op->pushes.begin(); i != op->pushes.end(); ++i) { - handle_recovery_push(*i, &rm); + handle_recovery_push(*i, &rm, op->is_repair); } dispatch_recovery_messages(rm, priority); return true; diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 89d5dcbcb5240..e003a08c73667 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -306,7 +306,8 @@ class ECBackend : public PGBackend { RecoveryMessages *m); void handle_recovery_push( const PushOp &op, - RecoveryMessages *m); + RecoveryMessages *m, + bool is_repair); void handle_recovery_push_reply( const PushReplyOp &op, pg_shard_t from, diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 020fb437294e1..d32c73b25d568 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -945,6 +945,13 @@ osd_stat_t OSDService::set_osd_stat(vector& hb_peers, return osd_stat; } +void OSDService::inc_osd_stat_repaired() +{ + std::lock_guard l(stat_lock); + osd_stat.num_shards_repaired++; + return; +} + float OSDService::compute_adjusted_ratio(osd_stat_t new_stat, float *pratio, uint64_t adjust_used) { diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 12ceefc9f5f0d..ba01a8eb46379 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -907,6 +907,7 @@ class OSDService { void set_statfs(const struct store_statfs_t &stbuf, osd_alert_list_t& alerts); osd_stat_t set_osd_stat(vector& hb_peers, int num_pgs); + void inc_osd_stat_repaired(void); float compute_adjusted_ratio(osd_stat_t new_stat, float *pratio, uint64_t adjust_used = 0); osd_stat_t get_osd_stat() { std::lock_guard l(stat_lock); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 1a39044bef8cd..4859fc04b0d49 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1122,6 +1122,8 @@ PG::Scrubber::Scrubber() shallow_errors(0), deep_errors(0), fixed(0), must_scrub(false), must_deep_scrub(false), must_repair(false), auto_repair(false), + check_repair(false), + deep_scrub_on_error(false), num_digest_updates_pending(0), state(INACTIVE), deep(false) @@ -2330,6 +2332,8 @@ bool PG::queue_scrub() if (is_scrubbing()) { return false; } + // An interrupted recovery repair could leave this set. + state_clear(PG_STATE_REPAIR); scrubber.priority = scrubber.must_scrub ? cct->_conf->osd_requested_scrub_priority : get_scrub_priority(); scrubber.must_scrub = false; @@ -2539,6 +2543,8 @@ Context *PG::finish_recovery() void PG::_finish_recovery(Context *c) { lock(); + // When recovery is initiated by a repair, that flag is left on + state_clear(PG_STATE_REPAIR); if (deleting) { unlock(); return; @@ -2554,6 +2560,7 @@ void PG::_finish_recovery(Context *c) dout(10) << "_finish_recovery requeueing for scrub" << dendl; scrub_after_recovery = false; scrubber.must_deep_scrub = true; + scrubber.check_repair = true; queue_scrub(); } } else { @@ -3443,8 +3450,10 @@ void PG::publish_stats_to_osd() if (info.stats.stats.sum.num_scrub_errors) state_set(PG_STATE_INCONSISTENT); - else + else { state_clear(PG_STATE_INCONSISTENT); + state_clear(PG_STATE_FAILED_REPAIR); + } utime_t now = ceph_clock_now(); if (info.stats.state != state) { @@ -4311,19 +4320,23 @@ bool PG::sched_scrub() } } + // Clear these in case user issues the scrub/repair command during + // the scheduling of the scrub/repair (e.g. request reservation) + scrubber.deep_scrub_on_error = false; + scrubber.auto_repair = false; if (cct->_conf->osd_scrub_auto_repair && get_pgbackend()->auto_repair_supported() - && time_for_deep // respect the command from user, and not do auto-repair && !scrubber.must_repair && !scrubber.must_scrub && !scrubber.must_deep_scrub) { - dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl; - scrubber.auto_repair = true; - } else { - // this happens when user issue the scrub/repair command during - // the scheduling of the scrub/repair (e.g. request reservation) - scrubber.auto_repair = false; + if (time_for_deep) { + dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl; + scrubber.auto_repair = true; + } else { + dout(20) << __func__ << ": auto repair with scrubbing, rescrub if errors found" << dendl; + scrubber.deep_scrub_on_error = true; + } } bool ret = true; @@ -5533,11 +5546,12 @@ bool PG::range_intersects_scrub(const hobject_t &start, const hobject_t& end) end >= scrubber.start); } -void PG::scrub_clear_state() +void PG::scrub_clear_state(bool has_error) { ceph_assert(is_locked()); state_clear(PG_STATE_SCRUBBING); - state_clear(PG_STATE_REPAIR); + if (!has_error) + state_clear(PG_STATE_REPAIR); state_clear(PG_STATE_DEEP_SCRUB); publish_stats_to_osd(); @@ -5722,7 +5736,9 @@ bool PG::ops_blocked_by_scrub() const { // the part that actually finalizes a scrub void PG::scrub_finish() { + dout(20) << __func__ << dendl; bool repair = state_test(PG_STATE_REPAIR); + bool do_deep_scrub = false; // if the repair request comes from auto-repair and large number of errors, // we would like to cancel auto-repair if (repair && scrubber.auto_repair @@ -5733,6 +5749,15 @@ void PG::scrub_finish() bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); + // if a regular scrub had errors within the limit, do a deep scrub to auto repair. + if (scrubber.deep_scrub_on_error + && scrubber.authoritative.size() <= cct->_conf->osd_scrub_auto_repair_num_errors) { + ceph_assert(!deep_scrub); + scrubber.deep_scrub_on_error = false; + do_deep_scrub = true; + dout(20) << __func__ << " Try to auto repair after scrub errors" << dendl; + } + // type-specific finish (can tally more errors) _scrub_finish(); @@ -5772,10 +5797,17 @@ void PG::scrub_finish() if (scrubber.fixed == scrubber.shallow_errors + scrubber.deep_errors) { ceph_assert(deep_scrub); scrubber.shallow_errors = scrubber.deep_errors = 0; - } else { + dout(20) << __func__ << " All may be fixed" << dendl; + } else if (has_error) { // Deep scrub in order to get corrected error counts scrub_after_recovery = true; - } + dout(20) << __func__ << " Set scrub_after_recovery" << dendl; + } else if (scrubber.shallow_errors || scrubber.deep_errors) { + // We have errors but nothing can be fixed, so there is no repair + // possible. + state_set(PG_STATE_FAILED_REPAIR); } + dout(10) << __func__ << " " << (scrubber.shallow_errors + scrubber.deep_errors) + << " error(s) present with no repair possible" << dendl; } if (deep_scrub) { if ((scrubber.shallow_errors == 0) && (scrubber.deep_errors == 0)) @@ -5788,7 +5820,6 @@ void PG::scrub_finish() dout(25) << __func__ << " shard " << pg_whoami << " num_omap_bytes = " << info.stats.stats.sum.num_omap_bytes << " num_omap_keys = " << info.stats.stats.sum.num_omap_keys << dendl; - publish_stats_to_osd(); } else { info.stats.stats.sum.num_shallow_scrub_errors = scrubber.shallow_errors; // XXX: last_clean_scrub_stamp doesn't mean the pg is not inconsistent @@ -5799,6 +5830,22 @@ void PG::scrub_finish() info.stats.stats.sum.num_scrub_errors = info.stats.stats.sum.num_shallow_scrub_errors + info.stats.stats.sum.num_deep_scrub_errors; + if (scrubber.check_repair) { + scrubber.check_repair = false; + if (info.stats.stats.sum.num_scrub_errors) { + state_set(PG_STATE_FAILED_REPAIR); + dout(10) << __func__ << " " << info.stats.stats.sum.num_scrub_errors + << " error(s) still present after re-scrub" << dendl; + } + } + publish_stats_to_osd(); + if (do_deep_scrub) { + // XXX: Auto scrub won't activate if must_scrub is set, but + // setting the scrub stamps affects what users see. + utime_t stamp = utime_t(0,1); + set_last_scrub_stamp(stamp); + set_last_deep_scrub_stamp(stamp); + } reg_next_scrub(); { @@ -5819,7 +5866,7 @@ void PG::scrub_finish() DoRecovery()))); } - scrub_clear_state(); + scrub_clear_state(has_error); scrub_unreserve_replicas(); if (is_active() && is_primary()) { @@ -6460,6 +6507,10 @@ ostream& operator<<(ostream& out, const PG& pg) out << " MUST_REPAIR"; if (pg.scrubber.auto_repair) out << " AUTO_REPAIR"; + if (pg.scrubber.check_repair) + out << " CHECK_REPAIR"; + if (pg.scrubber.deep_scrub_on_error) + out << " DEEP_SCRUB_ON_ERROR"; if (pg.scrubber.must_deep_scrub) out << " MUST_DEEP_SCRUB"; if (pg.scrubber.must_scrub) @@ -7641,6 +7692,7 @@ PG::RecoveryState::NotBackfilling::NotBackfilling(my_context ctx) { context< RecoveryMachine >().log_enter(state_name); PG *pg = context< RecoveryMachine >().pg; + pg->state_clear(PG_STATE_REPAIR); pg->publish_stats_to_osd(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index c9203d1b65c1c..53861b3aac9e1 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1702,6 +1702,11 @@ class PG : public DoutPrefixProvider { // this flag indicates whether we would like to do auto-repair of the PG or not bool auto_repair; + // this flag indicates that we are scrubbing post repair to verify everything is fixed + bool check_repair; + // this flag indicates that if a regular scrub detects errors <= osd_scrub_auto_repair_num_errors, + // we should deep scrub in order to auto repair + bool deep_scrub_on_error; // Maps from objects with errors to missing/inconsistent peers map> missing; @@ -1814,6 +1819,8 @@ class PG : public DoutPrefixProvider { must_deep_scrub = false; must_repair = false; auto_repair = false; + check_repair = false; + deep_scrub_on_error = false; state = PG::Scrubber::INACTIVE; start = hobject_t(); @@ -1872,7 +1879,7 @@ class PG : public DoutPrefixProvider { bool scrub_process_inconsistent(); bool ops_blocked_by_scrub() const; void scrub_finish(); - void scrub_clear_state(); + void scrub_clear_state(bool keep_repair = false); void _scan_snaps(ScrubMap &map); void _repair_oinfo_oid(ScrubMap &map); void _scan_rollback_obs(const vector &rollback_obs); @@ -2932,7 +2939,7 @@ class PG : public DoutPrefixProvider { bool is_complete() const { return info.last_complete == info.last_update; } bool should_send_notify() const { return send_notify; } - int get_state() const { return state; } + uint64_t get_state() const { return state; } bool is_active() const { return state_test(PG_STATE_ACTIVE); } bool is_activating() const { return state_test(PG_STATE_ACTIVATING); } bool is_peering() const { return state_test(PG_STATE_PEERING); } @@ -2950,6 +2957,7 @@ class PG : public DoutPrefixProvider { } bool is_recovering() const { return state_test(PG_STATE_RECOVERING); } bool is_premerge() const { return state_test(PG_STATE_PREMERGE); } + bool is_repair() const { return state_test(PG_STATE_REPAIR); } bool is_empty() const { return info.last_update == eversion_t(0,0); } diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 6fdf6dd05b583..fa1354c70b269 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -227,6 +227,7 @@ typedef std::shared_ptr OSDMapRef; const hobject_t &hoid) = 0; virtual bool pg_is_undersized() const = 0; + virtual bool pg_is_repair() const = 0; virtual void log_operation( const vector &logv, @@ -293,6 +294,8 @@ typedef std::shared_ptr OSDMapRef; virtual bool check_osdmap_full(const set &missing_on) = 0; + virtual bool pg_is_repair() = 0; + virtual void inc_osd_stat_repaired() = 0; virtual bool pg_is_remote_backfilling() = 0; virtual void pg_add_local_num_bytes(int64_t num_bytes) = 0; virtual void pg_sub_local_num_bytes(int64_t num_bytes) = 0; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 9d206ecfcc6c3..66017e9b7f5ed 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -11495,6 +11495,8 @@ int PrimaryLogPG::recover_missing( if (!object_missing) { object_stat_sum_t stat_diff; stat_diff.num_objects_recovered = 1; + if (scrub_after_recovery) + stat_diff.num_objects_repaired = 1; on_global_recover(soid, stat_diff, true); } else { auto recovery_handle = pgbackend->open_recovery_op(); @@ -15232,6 +15234,7 @@ int PrimaryLogPG::rep_repair_primary_object(const hobject_t& soid, OpContext *ct if (!eio_errors_to_process) { eio_errors_to_process = true; ceph_assert(is_clean()); + state_set(PG_STATE_REPAIR); queue_peering_event( PGPeeringEventRef( std::make_shared( diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 1fdc11d4b11e6..c0f4afb1846ad 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -402,6 +402,12 @@ class PrimaryLogPG : public PG, public PGBackend::Listener { release_object_locks(manager); } + bool pg_is_repair() override { + return is_repair(); + } + void inc_osd_stat_repaired() override { + osd->inc_osd_stat_repaired(); + } bool pg_is_remote_backfilling() override { return is_remote_backfilling(); } @@ -454,6 +460,10 @@ class PrimaryLogPG : public PG, public PGBackend::Listener { return is_undersized(); } + bool pg_is_repair() const override { + return is_repair(); + } + void update_peer_last_complete_ondisk( pg_shard_t fromosd, eversion_t lcod) override { diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index d471e6b7a09ab..9614a58997343 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -750,7 +750,7 @@ void ReplicatedBackend::_do_push(OpRequestRef op) i != m->pushes.end(); ++i) { replies.push_back(PushReplyOp()); - handle_push(from, *i, &(replies.back()), &t); + handle_push(from, *i, &(replies.back()), &t, m->is_repair); } MOSDPGPushReply *reply = new MOSDPGPushReply; @@ -1725,6 +1725,11 @@ bool ReplicatedBackend::handle_pull_response( if (complete) { pi.stat.num_objects_recovered++; + // XXX: This could overcount if regular recovery is needed right after a repair + if (get_parent()->pg_is_repair()) { + pi.stat.num_objects_repaired++; + get_parent()->inc_osd_stat_repaired(); + } clear_pull_from(piter); to_continue->push_back({hoid, pi.stat}); get_parent()->on_local_recover( @@ -1740,7 +1745,7 @@ bool ReplicatedBackend::handle_pull_response( void ReplicatedBackend::handle_push( pg_shard_t from, const PushOp &pop, PushReplyOp *response, - ObjectStore::Transaction *t) + ObjectStore::Transaction *t, bool is_repair) { dout(10) << "handle_push " << pop.recovery_info @@ -1764,13 +1769,18 @@ void ReplicatedBackend::handle_push( pop.omap_entries, t); - if (complete) + if (complete) { + if (is_repair) { + get_parent()->inc_osd_stat_repaired(); + dout(20) << __func__ << " repair complete" << dendl; + } get_parent()->on_local_recover( pop.recovery_info.soid, pop.recovery_info, ObjectContextRef(), // ok, is replica false, t); + } } void ReplicatedBackend::send_pushes(int prio, map > &pushes) @@ -1793,6 +1803,7 @@ void ReplicatedBackend::send_pushes(int prio, map > & msg->map_epoch = get_osdmap_epoch(); msg->min_epoch = get_parent()->get_last_peering_reset_epoch(); msg->set_priority(prio); + msg->is_repair = get_parent()->pg_is_repair(); for (; (j != i->second.end() && cost < cct->_conf->osd_max_push_cost && @@ -1996,8 +2007,11 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info, if (new_progress.is_complete(recovery_info)) { new_progress.data_complete = true; - if (stat) + if (stat) { stat->num_objects_recovered++; + if (get_parent()->pg_is_repair()) + stat->num_objects_repaired++; + } } if (stat) { diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index 35feef16bce7b..8f447495a4ed1 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -244,7 +244,7 @@ class ReplicatedBackend : public PGBackend { list *to_continue, ObjectStore::Transaction *t); void handle_push(pg_shard_t from, const PushOp &op, PushReplyOp *response, - ObjectStore::Transaction *t); + ObjectStore::Transaction *t, bool is_repair); static void trim_pushed_data(const interval_set ©_subset, const interval_set &intervals_received, @@ -416,7 +416,7 @@ class ReplicatedBackend : public PGBackend { struct C_OSD_RepModifyCommit; void repop_commit(RepModifyRef rm); - bool auto_repair_supported() const override { return false; } + bool auto_repair_supported() const override { return store->has_builtin_csum(); } int be_deep_scrub( diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 130a8f2e2f86a..af3f0d70a01c3 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -381,6 +381,7 @@ void osd_stat_t::dump(Formatter *f) const f->close_section(); f->dump_int("snap_trim_queue_len", snap_trim_queue_len); f->dump_int("num_snap_trimming", num_snap_trimming); + f->dump_int("num_shards_repaired", num_shards_repaired); f->open_object_section("op_queue_age_hist"); op_queue_age_hist.dump(f); f->close_section(); @@ -394,7 +395,7 @@ void osd_stat_t::dump(Formatter *f) const void osd_stat_t::encode(bufferlist &bl, uint64_t features) const { - ENCODE_START(10, 2, bl); + ENCODE_START(11, 2, bl); //////// for compatibility //////// int64_t kb = statfs.kb(); @@ -425,6 +426,7 @@ void osd_stat_t::encode(bufferlist &bl, uint64_t features) const encode(statfs, bl); /////////////////////////////////// encode(os_alerts, bl); + encode(num_shards_repaired, bl); ENCODE_FINISH(bl); } @@ -432,7 +434,7 @@ void osd_stat_t::decode(bufferlist::const_iterator &bl) { int64_t kb, kb_used,kb_avail; int64_t kb_used_data, kb_used_omap, kb_used_meta; - DECODE_START_LEGACY_COMPAT_LEN(10, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(11, 2, 2, bl); decode(kb, bl); decode(kb_used, bl); decode(kb_avail, bl); @@ -487,6 +489,11 @@ void osd_stat_t::decode(bufferlist::const_iterator &bl) } else { os_alerts.clear(); } + if (struct_v >= 11) { + decode(num_shards_repaired, bl); + } else { + num_shards_repaired = 0; + } DECODE_FINISH(bl); } @@ -501,6 +508,7 @@ void osd_stat_t::generate_test_instances(std::list& o) o.back()->hb_peers.push_back(7); o.back()->snap_trim_queue_len = 8; o.back()->num_snap_trimming = 99; + o.back()->num_shards_repaired = 101; o.back()->os_alerts[0].emplace( "some alert", "some alert details"); o.back()->os_alerts[1].emplace( @@ -976,6 +984,8 @@ std::string pg_state_string(uint64_t state) oss << "snaptrim_wait+"; if (state & PG_STATE_SNAPTRIM_ERROR) oss << "snaptrim_error+"; + if (state & PG_STATE_FAILED_REPAIR) + oss << "failed_repair+"; string ret(oss.str()); if (ret.length() > 0) ret.resize(ret.length() - 1); @@ -1047,6 +1057,8 @@ boost::optional pg_string_state(const std::string& state) type = PG_STATE_SNAPTRIM_ERROR; else if (state == "creating") type = PG_STATE_CREATING; + else if (state == "failed_repair") + type = PG_STATE_FAILED_REPAIR; else if (state == "unknown") type = 0; else @@ -2225,11 +2237,12 @@ void object_stat_sum_t::dump(Formatter *f) const f->dump_int("num_objects_manifest", num_objects_manifest); f->dump_int("num_omap_bytes", num_omap_bytes); f->dump_int("num_omap_keys", num_omap_keys); + f->dump_int("num_objects_repaired", num_objects_repaired); } void object_stat_sum_t::encode(bufferlist& bl) const { - ENCODE_START(19, 14, bl); + ENCODE_START(20, 14, bl); #if defined(CEPH_LITTLE_ENDIAN) bl.append((char *)(&num_bytes), sizeof(object_stat_sum_t)); #else @@ -2272,6 +2285,7 @@ void object_stat_sum_t::encode(bufferlist& bl) const encode(num_objects_manifest, bl); encode(num_omap_bytes, bl); encode(num_omap_keys, bl); + encode(num_objects_repaired, bl); #endif ENCODE_FINISH(bl); } @@ -2279,7 +2293,7 @@ void object_stat_sum_t::encode(bufferlist& bl) const void object_stat_sum_t::decode(bufferlist::const_iterator& bl) { bool decode_finish = false; - DECODE_START(19, bl); // make sure to also update fast decode below + DECODE_START(20, bl); // make sure to also update fast decode below #if defined(CEPH_LITTLE_ENDIAN) if (struct_v >= 19) { // this must match newest decode version bl.copy(sizeof(object_stat_sum_t), (char*)(&num_bytes)); @@ -2336,6 +2350,9 @@ void object_stat_sum_t::decode(bufferlist::const_iterator& bl) decode(num_omap_bytes, bl); decode(num_omap_keys, bl); } + if (struct_v >= 20) { + decode(num_objects_repaired, bl); + } } DECODE_FINISH(bl); } @@ -2379,6 +2396,7 @@ void object_stat_sum_t::generate_test_instances(list& o) a.num_objects_manifest = 2; a.num_omap_bytes = 20000; a.num_omap_keys = 200; + a.num_objects_repaired = 300; o.push_back(new object_stat_sum_t(a)); } @@ -2423,6 +2441,7 @@ void object_stat_sum_t::add(const object_stat_sum_t& o) num_objects_manifest += o.num_objects_manifest; num_omap_bytes += o.num_omap_bytes; num_omap_keys += o.num_omap_keys; + num_objects_repaired += o.num_objects_repaired; } void object_stat_sum_t::sub(const object_stat_sum_t& o) @@ -2466,6 +2485,7 @@ void object_stat_sum_t::sub(const object_stat_sum_t& o) num_objects_manifest -= o.num_objects_manifest; num_omap_bytes -= o.num_omap_bytes; num_omap_keys -= o.num_omap_keys; + num_objects_repaired -= o.num_objects_repaired; } bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r) @@ -2509,7 +2529,8 @@ bool operator==(const object_stat_sum_t& l, const object_stat_sum_t& r) l.num_large_omap_objects == r.num_large_omap_objects && l.num_objects_manifest == r.num_objects_manifest && l.num_omap_bytes == r.num_omap_bytes && - l.num_omap_keys == r.num_omap_keys; + l.num_omap_keys == r.num_omap_keys && + l.num_objects_repaired == r.num_objects_repaired; } // -- object_stat_collection_t -- diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index e2cd7721ec9ce..440d43357d388 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -960,6 +960,7 @@ WRITE_CLASS_ENCODER_FEATURES(objectstore_perf_stat_t) #define PG_STATE_SNAPTRIM_ERROR (1ULL << 29) // error stopped trimming snaps #define PG_STATE_FORCED_RECOVERY (1ULL << 30) // force recovery of this pg before any other #define PG_STATE_FORCED_BACKFILL (1ULL << 31) // force backfill of this pg before any other +#define PG_STATE_FAILED_REPAIR (1ULL << 32) // A repair failed to fix all errors std::string pg_state_string(uint64_t state); std::string pg_vector_string(const vector &a); @@ -1772,6 +1773,7 @@ struct object_stat_sum_t { int64_t num_objects_manifest = 0; int64_t num_omap_bytes = 0; int64_t num_omap_keys = 0; + int64_t num_objects_repaired = 0; object_stat_sum_t() : num_bytes(0), @@ -1844,6 +1846,7 @@ struct object_stat_sum_t { FLOOR(num_evict_mode_full); FLOOR(num_objects_pinned); FLOOR(num_legacy_snapsets); + FLOOR(num_objects_repaired); #undef FLOOR } @@ -1880,6 +1883,7 @@ struct object_stat_sum_t { SPLIT(num_objects_manifest); SPLIT(num_omap_bytes); SPLIT(num_omap_keys); + SPLIT(num_objects_repaired); SPLIT_PRESERVE_NONZERO(num_shallow_scrub_errors); SPLIT_PRESERVE_NONZERO(num_deep_scrub_errors); for (unsigned i = 0; i < out.size(); ++i) { @@ -1944,6 +1948,7 @@ struct object_stat_sum_t { sizeof(num_objects_manifest) + sizeof(num_omap_bytes) + sizeof(num_omap_keys) + + sizeof(num_objects_repaired) + sizeof(num_objects_recovered) + sizeof(num_bytes_recovered) + sizeof(num_keys_recovered) + @@ -2316,6 +2321,7 @@ struct osd_stat_t { store_statfs_t statfs; vector hb_peers; int32_t snap_trim_queue_len, num_snap_trimming; + uint64_t num_shards_repaired; pow2_hist_t op_queue_age_hist; @@ -2327,12 +2333,14 @@ struct osd_stat_t { uint32_t num_pgs = 0; - osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0) {} + osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0), + num_shards_repaired(0) {} void add(const osd_stat_t& o) { statfs.add(o.statfs); snap_trim_queue_len += o.snap_trim_queue_len; num_snap_trimming += o.num_snap_trimming; + num_shards_repaired += o.num_shards_repaired; op_queue_age_hist.add(o.op_queue_age_hist); os_perf_stat.add(o.os_perf_stat); num_pgs += o.num_pgs; @@ -2347,6 +2355,7 @@ struct osd_stat_t { statfs.sub(o.statfs); snap_trim_queue_len -= o.snap_trim_queue_len; num_snap_trimming -= o.num_snap_trimming; + num_shards_repaired -= o.num_shards_repaired; op_queue_age_hist.sub(o.op_queue_age_hist); os_perf_stat.sub(o.os_perf_stat); num_pgs -= o.num_pgs; @@ -2371,6 +2380,7 @@ inline bool operator==(const osd_stat_t& l, const osd_stat_t& r) { return l.statfs == r.statfs && l.snap_trim_queue_len == r.snap_trim_queue_len && l.num_snap_trimming == r.num_snap_trimming && + l.num_shards_repaired == r.num_shards_repaired && l.hb_peers == r.hb_peers && l.op_queue_age_hist == r.op_queue_age_hist && l.os_perf_stat == r.os_perf_stat &&