Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kraken: osd: Implement asynchronous scrub sleep #15526

Merged
merged 2 commits into from Aug 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/osd/OSD.cc
Expand Up @@ -262,6 +262,9 @@ OSDService::OSDService(OSD *osd) :
osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */),
snap_reserver(&reserver_finisher,
cct->_conf->osd_max_trimming_pgs),
scrub_sleep_lock("OSDService::scrub_sleep_lock"),
scrub_sleep_timer(
osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */),
recovery_lock("OSDService::recovery_lock"),
recovery_ops_active(0),
recovery_ops_reserved(0),
Expand Down Expand Up @@ -537,6 +540,11 @@ void OSDService::shutdown()
snap_sleep_timer.shutdown();
}

{
Mutex::Locker l(scrub_sleep_lock);
scrub_sleep_timer.shutdown();
}

osdmap = OSDMapRef();
next_osdmap = OSDMapRef();
}
Expand All @@ -549,6 +557,7 @@ void OSDService::init()
watch_timer.init();
agent_timer.init();
snap_sleep_timer.init();
scrub_sleep_timer.init();

agent_thread.create("osd_srv_agent");

Expand Down Expand Up @@ -3334,6 +3343,11 @@ PG *OSD::_lookup_lock_pg(spg_t pgid)
return pg;
}

PG *OSD::lookup_lock_pg(spg_t pgid)
{
return _lookup_lock_pg(pgid);
}

PG *OSD::_lookup_lock_pg_with_map_lock_held(spg_t pgid)
{
assert(pg_map.count(pgid));
Expand Down
9 changes: 9 additions & 0 deletions src/osd/OSD.h
Expand Up @@ -894,6 +894,10 @@ class OSDService {
SafeTimer snap_sleep_timer;

AsyncReserver<spg_t> snap_reserver;

Mutex scrub_sleep_lock;
SafeTimer scrub_sleep_timer;

void queue_for_snap_trim(PG *pg);

void queue_for_scrub(PG *pg) {
Expand Down Expand Up @@ -2012,6 +2016,11 @@ class OSD : public Dispatcher,
Session *session);
PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid);
PG *_lookup_lock_pg(spg_t pgid);

public:
PG *lookup_lock_pg(spg_t pgid);

protected:
PG *_open_lock_pg(OSDMapRef createmap,
spg_t pg, bool no_lockdep_check=false);
enum res_result {
Expand Down
45 changes: 36 additions & 9 deletions src/osd/PG.cc
Expand Up @@ -4065,22 +4065,49 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
{
if (g_conf->osd_scrub_sleep > 0 &&
(scrubber.state == PG::Scrubber::NEW_CHUNK ||
scrubber.state == PG::Scrubber::INACTIVE)) {
scrubber.state == PG::Scrubber::INACTIVE) &&
scrubber.needs_sleep) {
ceph_assert(!scrubber.sleeping);
dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
unlock();
utime_t t;
t.set_from_double(g_conf->osd_scrub_sleep);
handle.suspend_tp_timeout();
t.sleep();
handle.reset_tp_timeout();
lock();
dout(20) << __func__ << " slept for " << t << dendl;

// Do an async sleep so we don't block the op queue
OSDService *osds = osd;
spg_t pgid = get_pgid();
int state = scrubber.state;
auto scrub_requeue_callback =
new FunctionContext([osds, pgid, state](int r) {
PG *pg = osds->osd->lookup_lock_pg(pgid);
if (pg == nullptr) {
lgeneric_dout(osds->osd->cct, 20)
<< "scrub_requeue_callback: Could not find "
<< "PG " << pgid << " can't complete scrub requeue after sleep"
<< dendl;
return;
}
pg->scrubber.sleeping = false;
pg->scrubber.needs_sleep = false;
lgeneric_dout(pg->cct, 20)
<< "scrub_requeue_callback: slept for "
<< ceph_clock_now() - pg->scrubber.sleep_start
<< ", re-queuing scrub with state " << state << dendl;
pg->scrub_queued = false;
pg->requeue_scrub();
pg->scrubber.sleep_start = utime_t();
pg->unlock();
});
Mutex::Locker l(osd->scrub_sleep_lock);
osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
scrub_requeue_callback);
scrubber.sleeping = true;
scrubber.sleep_start = ceph_clock_now();
return;
}
if (pg_has_reset_since(queued)) {
return;
}
assert(scrub_queued);
scrub_queued = false;
scrubber.needs_sleep = true;

if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) {
dout(10) << "scrub -- not primary or active or not clean" << dendl;
Expand Down
10 changes: 9 additions & 1 deletion src/osd/PG.h
Expand Up @@ -34,6 +34,7 @@
#include "osd_types.h"
#include "include/xlist.h"
#include "SnapMapper.h"
#include "common/Timer.h"

#include "PGLog.h"
#include "OSDMap.h"
Expand Down Expand Up @@ -872,7 +873,6 @@ class PG : protected DoutPrefixProvider {
public:
void clear_primary_state();

public:
bool is_actingbackfill(pg_shard_t osd) const {
return actingbackfill.count(osd);
}
Expand Down Expand Up @@ -1101,6 +1101,11 @@ class PG : protected DoutPrefixProvider {
OpRequestRef active_rep_scrub;
utime_t scrub_reg_stamp; // stamp we registered for

// For async sleep
bool sleeping = false;
bool needs_sleep = true;
utime_t sleep_start;

// flags to indicate explicitly requested scrubs (by admin)
bool must_scrub, must_deep_scrub, must_repair;

Expand Down Expand Up @@ -1219,6 +1224,9 @@ class PG : protected DoutPrefixProvider {
authoritative.clear();
num_digest_updates_pending = 0;
cleaned_meta_map = ScrubMap();
sleeping = false;
needs_sleep = true;
sleep_start = utime_t();
}

void create_results(const hobject_t& obj);
Expand Down