Skip to content

Commit

Permalink
osd, test: Add test case with osd support for overdue PG scrubs and d…
Browse files Browse the repository at this point in the history
…eep scrubs

Add trigger_deep_scrub osd command for testing
Publish stats when trigger_scrub/trigger_deep_scrub is used for testing
Add optional argument to trigger_scrub/trigger_deep_scrub
for amount of extra time to change last scrub stamps

Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit ef2dc05)

Conflicts:
        src/osd/OSD.cc : Resolved in test_ops

Modify osd-scrub-repair.sh from master branch version to use mimic
mon_warn_not_scrubbed and mon_warn_not_deep_scrubbed config values.

(cherry picked from commit fcd582f)

Conflicts:
	src/osd/OSD.cc (trivial)
	src/osd/PG.h (trivial, location of set_last_scrub_stamp())
  • Loading branch information
dzafman committed Feb 12, 2019
1 parent d2d5165 commit dabec3d
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 9 deletions.
85 changes: 85 additions & 0 deletions qa/standalone/scrub/osd-scrub-repair.sh
Expand Up @@ -5274,6 +5274,91 @@ function TEST_periodic_scrub_replicated() {
rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1
}

function TEST_scrub_warning() {
local dir=$1
local poolname=psr_pool
local objname=POBJ
local scrubs=5
local deep_scrubs=5
local i1_day=86400
local i7_days=$(calc $i1_day \* 7)
local i14_days=$(calc $i1_day \* 14)
local overdue=$i1_day
local conf_overdue_seconds=$(calc $i7_days + $overdue )
local pool_overdue_seconds=$(calc $i14_days + $overdue )

setup $dir || return 1
run_mon $dir a --osd_pool_default_size=1 || return 1
run_mgr $dir x --mon_warn_not_scrubbed=${overdue} --mon_warn_not_deep_scrubbed=${overdue} || return 1
run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1

for i in $(seq 1 $(expr $scrubs + $deep_scrubs))
do
create_pool $poolname-$i 1 1 || return 1
wait_for_clean || return 1
if [ $i = "1" ];
then
ceph osd pool set $poolname-$i scrub_max_interval $i14_days
fi
if [ $i = $(expr $scrubs + 1) ];
then
ceph osd pool set $poolname-$i deep_scrub_interval $i14_days
fi
done

# Only 1 osd
local primary=0

ceph osd set noscrub || return 1
ceph osd set nodeep-scrub || return 1
ceph config set global osd_scrub_interval_randomize_ratio 0
ceph config set global osd_deep_scrub_randomize_ratio 0
ceph config set global osd_scrub_max_interval ${i7_days}
ceph config set global osd_deep_scrub_interval ${i7_days}

# Fake schedule scrubs
for i in $(seq 1 $scrubs)
do
if [ $i = "1" ];
then
overdue_seconds=$pool_overdue_seconds
else
overdue_seconds=$conf_overdue_seconds
fi
CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \
trigger_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1
done
# Fake schedule deep scrubs
for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs))
do
if [ $i = "$(expr $scrubs + 1)" ];
then
overdue_seconds=$pool_overdue_seconds
else
overdue_seconds=$conf_overdue_seconds
fi
CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \
trigger_deep_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1
done
flush_pg_stats

ceph health
ceph health detail
ceph health | grep -q "$deep_scrubs pgs not deep-scrubbed in time" || return 1
ceph health | grep -q "$scrubs pgs not scrubbed in time" || return 1
COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l)
if [ "$COUNT" != $scrubs ]; then
ceph health detail | grep "not scrubbed since"
return 1
fi
COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l)
if [ "$COUNT" != $deep_scrubs ]; then
ceph health detail | grep "not deep-scrubbed since"
return 1
fi
return 0
}

#
# Corrupt snapset in replicated pool
#
Expand Down
46 changes: 37 additions & 9 deletions src/osd/OSD.cc
Expand Up @@ -2987,10 +2987,19 @@ void OSD::final_init()
r = admin_socket->register_command(
"trigger_scrub",
"trigger_scrub " \
"name=pgid,type=CephString ",
"name=pgid,type=CephString " \
"name=time,type=CephInt,req=false",
test_ops_hook,
"Trigger a scheduled scrub ");
assert(r == 0);
r = admin_socket->register_command(
"trigger_deep_scrub",
"trigger_deep_scrub " \
"name=pgid,type=CephString " \
"name=time,type=CephInt,req=false",
test_ops_hook,
"Trigger a scheduled deep scrub ");
ceph_assert(r == 0);
r = admin_socket->register_command(
"injectfull",
"injectfull " \
Expand Down Expand Up @@ -5613,8 +5622,9 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
<< "to " << service->cct->_conf->osd_recovery_delay_start;
return;
}
if (command == "trigger_scrub") {
if (command == "trigger_scrub" || command == "trigger_deep_scrub") {
spg_t pgid;
bool deep = (command == "trigger_deep_scrub");
OSDMapRef curmap = service->get_osdmap();

string pgidstr;
Expand All @@ -5625,6 +5635,9 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
return;
}

int64_t time;
cmd_getval(service->cct, cmdmap, "time", time, (int64_t)0);

PG *pg = service->osd->_lookup_lock_pg(pgid);
if (pg == nullptr) {
ss << "Can't find pg " << pgid;
Expand All @@ -5635,16 +5648,31 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store,
pg->unreg_next_scrub();
const pg_pool_t *p = curmap->get_pg_pool(pgid.pool());
double pool_scrub_max_interval = 0;
p->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &pool_scrub_max_interval);
double scrub_max_interval = pool_scrub_max_interval > 0 ?
pool_scrub_max_interval : g_conf->osd_scrub_max_interval;
double scrub_max_interval;
if (deep) {
p->opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &pool_scrub_max_interval);
scrub_max_interval = pool_scrub_max_interval > 0 ?
pool_scrub_max_interval : g_conf->osd_deep_scrub_interval;
} else {
p->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &pool_scrub_max_interval);
scrub_max_interval = pool_scrub_max_interval > 0 ?
pool_scrub_max_interval : g_conf->osd_scrub_max_interval;
}
// Instead of marking must_scrub force a schedule scrub
utime_t stamp = ceph_clock_now();
stamp -= scrub_max_interval;
stamp -= 100.0; // push back last scrub more for good measure
pg->set_last_scrub_stamp(stamp);
if (time == 0)
stamp -= scrub_max_interval;
else
stamp -= (float)time;
stamp -= 100.0; // push back last scrub more for good measure
if (deep) {
pg->set_last_deep_scrub_stamp(stamp);
} else {
pg->set_last_scrub_stamp(stamp);
}
pg->reg_next_scrub();
ss << "ok";
pg->publish_stats_to_osd();
ss << "ok - set" << (deep ? " deep" : "" ) << " stamp " << stamp;
} else {
ss << "Not primary";
}
Expand Down
8 changes: 8 additions & 0 deletions src/osd/PG.h
Expand Up @@ -1059,6 +1059,7 @@ class PG : public DoutPrefixProvider {

void _update_calc_stats();
void _update_blocked_by();
friend class TestOpsSocketHook;
void publish_stats_to_osd();
void clear_publish_stats();

Expand Down Expand Up @@ -2497,8 +2498,15 @@ class PG : public DoutPrefixProvider {
const spg_t& get_pgid() const { return pg_id; }

void set_last_scrub_stamp(utime_t t) {
info.stats.last_scrub_stamp = t;
info.history.last_scrub_stamp = t;
}

void set_last_deep_scrub_stamp(utime_t t) {
info.stats.last_deep_scrub_stamp = t;
info.history.last_deep_scrub_stamp = t;
}

void reset_min_peer_features() {
peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
}
Expand Down

0 comments on commit dabec3d

Please sign in to comment.