Skip to content

Commit

Permalink
Merge pull request #15526 from badone/wip-async-sleep-timer-fix-kraken
Browse files Browse the repository at this point in the history
kraken: osd: Implement asynchronous scrub sleep

Reviewed-by: Josh Durgin <jdurgin@redhat.com>
  • Loading branch information
smithfarm committed Aug 1, 2017
2 parents 72e5d61 + 719ed01 commit d0d9836
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 10 deletions.
14 changes: 14 additions & 0 deletions src/osd/OSD.cc
Expand Up @@ -262,6 +262,9 @@ OSDService::OSDService(OSD *osd) :
osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */),
snap_reserver(&reserver_finisher,
cct->_conf->osd_max_trimming_pgs),
scrub_sleep_lock("OSDService::scrub_sleep_lock"),
scrub_sleep_timer(
osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */),
recovery_lock("OSDService::recovery_lock"),
recovery_ops_active(0),
recovery_ops_reserved(0),
Expand Down Expand Up @@ -537,6 +540,11 @@ void OSDService::shutdown()
snap_sleep_timer.shutdown();
}

{
Mutex::Locker l(scrub_sleep_lock);
scrub_sleep_timer.shutdown();
}

osdmap = OSDMapRef();
next_osdmap = OSDMapRef();
}
Expand All @@ -549,6 +557,7 @@ void OSDService::init()
watch_timer.init();
agent_timer.init();
snap_sleep_timer.init();
scrub_sleep_timer.init();

agent_thread.create("osd_srv_agent");

Expand Down Expand Up @@ -3334,6 +3343,11 @@ PG *OSD::_lookup_lock_pg(spg_t pgid)
return pg;
}

PG *OSD::lookup_lock_pg(spg_t pgid)
{
return _lookup_lock_pg(pgid);
}

PG *OSD::_lookup_lock_pg_with_map_lock_held(spg_t pgid)
{
assert(pg_map.count(pgid));
Expand Down
9 changes: 9 additions & 0 deletions src/osd/OSD.h
Expand Up @@ -894,6 +894,10 @@ class OSDService {
SafeTimer snap_sleep_timer;

AsyncReserver<spg_t> snap_reserver;

Mutex scrub_sleep_lock;
SafeTimer scrub_sleep_timer;

void queue_for_snap_trim(PG *pg);

void queue_for_scrub(PG *pg) {
Expand Down Expand Up @@ -2012,6 +2016,11 @@ class OSD : public Dispatcher,
Session *session);
PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid);
PG *_lookup_lock_pg(spg_t pgid);

public:
PG *lookup_lock_pg(spg_t pgid);

protected:
PG *_open_lock_pg(OSDMapRef createmap,
spg_t pg, bool no_lockdep_check=false);
enum res_result {
Expand Down
45 changes: 36 additions & 9 deletions src/osd/PG.cc
Expand Up @@ -4065,22 +4065,49 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
{
if (g_conf->osd_scrub_sleep > 0 &&
(scrubber.state == PG::Scrubber::NEW_CHUNK ||
scrubber.state == PG::Scrubber::INACTIVE)) {
scrubber.state == PG::Scrubber::INACTIVE) &&
scrubber.needs_sleep) {
ceph_assert(!scrubber.sleeping);
dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
unlock();
utime_t t;
t.set_from_double(g_conf->osd_scrub_sleep);
handle.suspend_tp_timeout();
t.sleep();
handle.reset_tp_timeout();
lock();
dout(20) << __func__ << " slept for " << t << dendl;

// Do an async sleep so we don't block the op queue
OSDService *osds = osd;
spg_t pgid = get_pgid();
int state = scrubber.state;
auto scrub_requeue_callback =
new FunctionContext([osds, pgid, state](int r) {
PG *pg = osds->osd->lookup_lock_pg(pgid);
if (pg == nullptr) {
lgeneric_dout(osds->osd->cct, 20)
<< "scrub_requeue_callback: Could not find "
<< "PG " << pgid << " can't complete scrub requeue after sleep"
<< dendl;
return;
}
pg->scrubber.sleeping = false;
pg->scrubber.needs_sleep = false;
lgeneric_dout(pg->cct, 20)
<< "scrub_requeue_callback: slept for "
<< ceph_clock_now() - pg->scrubber.sleep_start
<< ", re-queuing scrub with state " << state << dendl;
pg->scrub_queued = false;
pg->requeue_scrub();
pg->scrubber.sleep_start = utime_t();
pg->unlock();
});
Mutex::Locker l(osd->scrub_sleep_lock);
osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
scrub_requeue_callback);
scrubber.sleeping = true;
scrubber.sleep_start = ceph_clock_now();
return;
}
if (pg_has_reset_since(queued)) {
return;
}
assert(scrub_queued);
scrub_queued = false;
scrubber.needs_sleep = true;

if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) {
dout(10) << "scrub -- not primary or active or not clean" << dendl;
Expand Down
10 changes: 9 additions & 1 deletion src/osd/PG.h
Expand Up @@ -34,6 +34,7 @@
#include "osd_types.h"
#include "include/xlist.h"
#include "SnapMapper.h"
#include "common/Timer.h"

#include "PGLog.h"
#include "OSDMap.h"
Expand Down Expand Up @@ -872,7 +873,6 @@ class PG : protected DoutPrefixProvider {
public:
void clear_primary_state();

public:
bool is_actingbackfill(pg_shard_t osd) const {
return actingbackfill.count(osd);
}
Expand Down Expand Up @@ -1101,6 +1101,11 @@ class PG : protected DoutPrefixProvider {
OpRequestRef active_rep_scrub;
utime_t scrub_reg_stamp; // stamp we registered for

// For async sleep
bool sleeping = false;
bool needs_sleep = true;
utime_t sleep_start;

// flags to indicate explicitly requested scrubs (by admin)
bool must_scrub, must_deep_scrub, must_repair;

Expand Down Expand Up @@ -1219,6 +1224,9 @@ class PG : protected DoutPrefixProvider {
authoritative.clear();
num_digest_updates_pending = 0;
cleaned_meta_map = ScrubMap();
sleeping = false;
needs_sleep = true;
sleep_start = utime_t();
}

void create_results(const hobject_t& obj);
Expand Down

0 comments on commit d0d9836

Please sign in to comment.