Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

osd: Cancel in-progress scrubs (not user requested) #35909

Merged
merged 5 commits into from Jul 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions PendingReleaseNotes
Expand Up @@ -96,3 +96,7 @@
* OSD: A new configuration option ``osd_compact_on_start`` has been added which triggers
an OSD compaction on start. Setting this option to ``true`` and restarting an OSD
will result in an offline compaction of the OSD prior to booting.

* Now when noscrub and/or nodeep-scrub flags are set globally or per pool,
scheduled scrubs of the type disabled will be aborted. All user initiated
scrubs are NOT interrupted.
4 changes: 2 additions & 2 deletions qa/standalone/mon/mon-last-epoch-clean.sh
Expand Up @@ -181,8 +181,8 @@ function TEST_mon_last_clean_epoch() {

sleep 5

ceph tell osd.* injectargs '--osd-beacon-report-interval 10' || exit 1
ceph tell mon.* injectargs \
ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1
ceph tell 'mon.*' injectargs \
'--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1

create_pool foo 32
Expand Down
114 changes: 114 additions & 0 deletions qa/standalone/scrub/osd-scrub-test.sh
Expand Up @@ -230,6 +230,120 @@ function TEST_scrub_extented_sleep() {
teardown $dir || return 1
}

function _scrub_abort() {
local dir=$1
local poolname=test
local OSDS=3
local objects=1000
local type=$2

TESTDATA="testdata.$$"
if test $type = "scrub";
then
stopscrub="noscrub"
check="noscrub"
else
stopscrub="nodeep-scrub"
check="nodeep_scrub"
fi


setup $dir || return 1
run_mon $dir a --osd_pool_default_size=3 || return 1
run_mgr $dir x || return 1
for osd in $(seq 0 $(expr $OSDS - 1))
do
run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \
--osd_deep_scrub_randomize_ratio=0.0 \
--osd_scrub_sleep=5.0 \
--osd_scrub_interval_randomize_ratio=0 || return 1
done

# Create a pool with a single pg
create_pool $poolname 1 1
wait_for_clean || return 1
poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }')

dd if=/dev/urandom of=$TESTDATA bs=1032 count=1
for i in `seq 1 $objects`
do
rados -p $poolname put obj${i} $TESTDATA
done
rm -f $TESTDATA

local primary=$(get_primary $poolname obj1)
local pgid="${poolid}.0"

ceph tell $pgid $type || return 1
# deep-scrub won't start without scrub noticing
if [ "$type" = "deep_scrub" ];
then
ceph tell $pgid scrub || return 1
fi

# Wait for scrubbing to start
set -o pipefail
found="no"
for i in $(seq 0 200)
do
flush_pg_stats
if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing"
then
found="yes"
#ceph pg dump pgs
break
fi
done
set +o pipefail

if test $found = "no";
then
echo "Scrubbing never started"
return 1
fi

ceph osd set $stopscrub

# Wait for scrubbing to end
set -o pipefail
for i in $(seq 0 200)
do
flush_pg_stats
if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing"
then
continue
fi
#ceph pg dump pgs
break
done
set +o pipefail

sleep 5

if ! grep "$check set, aborting" $dir/osd.${primary}.log
then
echo "Abort not seen in log"
return 1
fi

local last_scrub=$(get_last_scrub_stamp $pgid)
ceph osd unset noscrub
TIMEOUT=$(($objects / 2))
wait_for_scrub $pgid "$last_scrub" || return 1

teardown $dir || return 1
}

function TEST_scrub_abort() {
local dir=$1
_scrub_abort $dir scrub
}

function TEST_deep_scrub_abort() {
local dir=$1
_scrub_abort $dir deep_scrub
}

main osd-scrub-test "$@"

# Local Variables:
Expand Down
15 changes: 7 additions & 8 deletions src/osd/OSD.cc
Expand Up @@ -3743,7 +3743,7 @@ void OSD::final_init()
ceph_assert(r == 0);
r = admin_socket->register_command("dump_scrub_reservations",
asok_hook,
"show recovery reservations");
"show scrub reservations");
ceph_assert(r == 0);
r = admin_socket->register_command("get_latest_osdmap",
asok_hook,
Expand Down Expand Up @@ -7447,16 +7447,15 @@ void OSD::sched_scrub()
return;
}
bool allow_requested_repair_only = false;
if (service.is_recovery_active()) {
if (!cct->_conf->osd_scrub_during_recovery && cct->_conf->osd_repair_during_recovery) {
dout(10) << __func__
<< " will only schedule explicitly requested repair due to active recovery"
<< dendl;
allow_requested_repair_only = true;
} else if (!cct->_conf->osd_scrub_during_recovery && !cct->_conf->osd_repair_during_recovery) {
if (service.is_recovery_active() && !cct->_conf->osd_scrub_during_recovery) {
if (!cct->_conf->osd_repair_during_recovery) {
dout(20) << __func__ << " not scheduling scrubs due to active recovery" << dendl;
return;
}
dout(10) << __func__
<< " will only schedule explicitly requested repair due to active recovery"
<< dendl;
allow_requested_repair_only = true;
}

utime_t now = ceph_clock_now();
Expand Down
42 changes: 37 additions & 5 deletions src/osd/PG.cc
Expand Up @@ -213,6 +213,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
pg_stats_publish_valid(false),
finish_sync_event(NULL),
scrub_after_recovery(false),
save_req_scrub(false),
active_pushes(0),
recovery_state(
o->cct,
Expand Down Expand Up @@ -365,6 +366,7 @@ void PG::clear_primary_state()

scrubber.reserved_peers.clear();
scrub_after_recovery = false;
save_req_scrub = false;

agent_clear();
}
Expand All @@ -375,7 +377,7 @@ PG::Scrubber::Scrubber()
active(false),
shallow_errors(0), deep_errors(0), fixed(0),
must_scrub(false), must_deep_scrub(false), must_repair(false),
need_auto(false), time_for_deep(false),
need_auto(false), req_scrub(false), time_for_deep(false),
auto_repair(false),
check_repair(false),
deep_scrub_on_error(false),
Expand Down Expand Up @@ -529,6 +531,8 @@ void PG::_finish_recovery(Context *c)
scrub_after_recovery = false;
scrubber.must_deep_scrub = true;
scrubber.check_repair = true;
// We remember whether req_scrub was set when scrub_after_recovery set to true
scrubber.req_scrub = save_req_scrub;
queue_scrub();
}
} else {
Expand Down Expand Up @@ -1564,6 +1568,7 @@ void PG::scrub_requested(bool deep, bool repair, bool need_auto)
scrubber.must_repair = repair;
// User might intervene, so clear this
scrubber.need_auto = false;
scrubber.req_scrub = true;
}
reg_next_scrub();
}
Expand Down Expand Up @@ -2566,6 +2571,12 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
chunky_scrub(handle);
}

void PG::abort_scrub()
{
scrub_clear_state();
scrub_unreserve_replicas();
}

/*
* Chunky scrub scrubs objects one chunk at a time with writes blocked for that
* chunk.
Expand Down Expand Up @@ -2646,12 +2657,29 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
*/
void PG::chunky_scrub(ThreadPool::TPHandle &handle)
{
// Since repair is only by request and we need to scrub afterward
// treat the same as req_scrub.
if (!scrubber.req_scrub) {
if (state_test(PG_STATE_DEEP_SCRUB)) {
if (get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)) {
dout(10) << "nodeep_scrub set, aborting" << dendl;
abort_scrub();
return;
}
} else if (state_test(PG_STATE_SCRUBBING)) {
if (get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) {
dout(10) << "noscrub set, aborting" << dendl;
abort_scrub();
return;
}
}
}
// check for map changes
if (scrubber.is_chunky_scrub_active()) {
if (scrubber.epoch_start != info.history.same_interval_since) {
dout(10) << "scrub pg changed, aborting" << dendl;
scrub_clear_state();
scrub_unreserve_replicas();
dout(10) << "scrub pg changed, aborting" << dendl;
abort_scrub();
return;
}
}
Expand Down Expand Up @@ -3036,6 +3064,7 @@ void PG::scrub_clear_state(bool has_error)
state_clear(PG_STATE_DEEP_SCRUB);
publish_stats_to_osd();

scrubber.req_scrub = false;
// local -> nothing.
if (scrubber.local_reserved) {
osd->dec_scrubs_local();
Expand Down Expand Up @@ -3268,7 +3297,8 @@ void PG::scrub_finish()
} else if (has_error) {
// Deep scrub in order to get corrected error counts
scrub_after_recovery = true;
dout(20) << __func__ << " Set scrub_after_recovery" << dendl;
save_req_scrub = scrubber.req_scrub;
dout(20) << __func__ << " Set scrub_after_recovery, req_scrub=" << save_req_scrub << dendl;
} else if (scrubber.shallow_errors || scrubber.deep_errors) {
// We have errors but nothing can be fixed, so there is no repair
// possible.
Expand Down Expand Up @@ -3414,6 +3444,8 @@ ostream& operator<<(ostream& out, const PG& pg)
out << " TIME_FOR_DEEP";
if (pg.scrubber.need_auto)
out << " NEED_AUTO";
if (pg.scrubber.req_scrub)
out << " REQ_SCRUB";

if (pg.recovery_ops_active)
out << " rops=" << pg.recovery_ops_active;
Expand Down
5 changes: 4 additions & 1 deletion src/osd/PG.h
Expand Up @@ -1038,7 +1038,7 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
utime_t sleep_start;

// flags to indicate explicitly requested scrubs (by admin)
bool must_scrub, must_deep_scrub, must_repair, need_auto;
bool must_scrub, must_deep_scrub, must_repair, need_auto, req_scrub;

// Priority to use for scrub scheduling
unsigned priority = 0;
Expand Down Expand Up @@ -1163,6 +1163,7 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
must_deep_scrub = false;
must_repair = false;
need_auto = false;
req_scrub = false;
time_for_deep = false;
auto_repair = false;
check_repair = false;
Expand Down Expand Up @@ -1199,6 +1200,7 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {

protected:
bool scrub_after_recovery;
bool save_req_scrub; // Saved for scrub_after_recovery

int active_pushes;

Expand All @@ -1218,6 +1220,7 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
const std::list<std::pair<ScrubMap::object, pg_shard_t> > &ok_peers,
const std::set<pg_shard_t> &bad_peers);

void abort_scrub();
void chunky_scrub(ThreadPool::TPHandle &handle);
void scrub_compare_maps();
/**
Expand Down