Skip to content

Commit

Permalink
osd: Move scrub sleep timer to osdservice
Browse files Browse the repository at this point in the history
PR 14886 erroneously creates a scrub sleep timer for every pg resulting
in a proliferation of threads. Move the timer to the osd service so
there can be only one.

Fixes: http://tracker.ceph.com/issues/19986

Signed-off-by: Brad Hubbard <bhubbard@redhat.com>
(cherry picked from commit f110a82)
  • Loading branch information
badone committed Jul 25, 2017
1 parent 460a820 commit 89131e3
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 26 deletions.
11 changes: 11 additions & 0 deletions src/osd/OSD.cc
Expand Up @@ -262,6 +262,9 @@ OSDService::OSDService(OSD *osd) :
osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */),
snap_reserver(&reserver_finisher,
cct->_conf->osd_max_trimming_pgs),
scrub_sleep_lock("OSDService::scrub_sleep_lock"),
scrub_sleep_timer(
osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */),
recovery_lock("OSDService::recovery_lock"),
recovery_ops_active(0),
recovery_ops_reserved(0),
Expand Down Expand Up @@ -535,6 +538,8 @@ void OSDService::shutdown()
{
Mutex::Locker l(snap_sleep_lock);
snap_sleep_timer.shutdown();
Mutex::Locker l(scrub_sleep_lock);
scrub_sleep_timer.shutdown();
}

osdmap = OSDMapRef();
Expand All @@ -549,6 +554,7 @@ void OSDService::init()
watch_timer.init();
agent_timer.init();
snap_sleep_timer.init();
scrub_sleep_timer.init();

agent_thread.create("osd_srv_agent");

Expand Down Expand Up @@ -3334,6 +3340,11 @@ PG *OSD::_lookup_lock_pg(spg_t pgid)
return pg;
}

PG *OSD::lookup_lock_pg(spg_t pgid)
{
return _lookup_lock_pg(pgid);
}

PG *OSD::_lookup_lock_pg_with_map_lock_held(spg_t pgid)
{
assert(pg_map.count(pgid));
Expand Down
9 changes: 9 additions & 0 deletions src/osd/OSD.h
Expand Up @@ -894,6 +894,10 @@ class OSDService {
SafeTimer snap_sleep_timer;

AsyncReserver<spg_t> snap_reserver;

Mutex scrub_sleep_lock;
SafeTimer scrub_sleep_timer;

void queue_for_snap_trim(PG *pg);

void queue_for_scrub(PG *pg) {
Expand Down Expand Up @@ -2012,6 +2016,11 @@ class OSD : public Dispatcher,
Session *session);
PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid);
PG *_lookup_lock_pg(spg_t pgid);

public:
PG *lookup_lock_pg(spg_t pgid);

protected:
PG *_open_lock_pg(OSDMapRef createmap,
spg_t pg, bool no_lockdep_check=false);
enum res_result {
Expand Down
53 changes: 31 additions & 22 deletions src/osd/PG.cc
Expand Up @@ -254,9 +254,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
do_sort_bitwise(false),
last_epoch(0),
scrub_sleep_lock("PG::scrub_sleep_lock"),
scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */)
last_epoch(0)
{
#ifdef PG_DEBUG_REFS
osd->add_pgid(p, this);
Expand All @@ -266,8 +264,6 @@ PG::PG(OSDService *o, OSDMapRef curmap,

PG::~PG()
{
Mutex::Locker l(scrub_sleep_lock);
scrub_sleep_timer.shutdown();
#ifdef PG_DEBUG_REFS
osd->remove_pgid(info.pgid, this);
#endif
Expand Down Expand Up @@ -2820,8 +2816,6 @@ void PG::init(
dirty_info = true;
dirty_big_info = true;
write_if_dirty(*t);

scrub_sleep_timer.init();
}

#pragma GCC diagnostic ignored "-Wpragmas"
Expand Down Expand Up @@ -4071,24 +4065,39 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
{
if (g_conf->osd_scrub_sleep > 0 &&
(scrubber.state == PG::Scrubber::NEW_CHUNK ||
scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) {
scrubber.state == PG::Scrubber::INACTIVE) &&
scrubber.needs_sleep) {
ceph_assert(!scrubber.sleeping);
dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;

// Do an async sleep so we don't block the op queue
auto scrub_requeue_callback = new FunctionContext([this](int r) {
lock();
scrubber.sleeping = false;
scrubber.needs_sleep = false;
dout(20) << __func__ << " slept for "
<< ceph_clock_now() - scrubber.sleep_start
<< ", re-queuing scrub" << dendl;
scrub_queued = false;
requeue_scrub();
scrubber.sleep_start = utime_t();
unlock();
});
Mutex::Locker l(scrub_sleep_lock);
scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback);
OSDService *osds = osd;
spg_t pgid = get_pgid();
int state = scrubber.state;
auto scrub_requeue_callback =
new FunctionContext([osds, pgid, state](int r) {
PG *pg = osds->osd->lookup_lock_pg(pgid);
if (pg == nullptr) {
lgeneric_dout(osds->osd->cct, 20)
<< "scrub_requeue_callback: Could not find "
<< "PG " << pgid << " can't complete scrub requeue after sleep"
<< dendl;
return;
}
pg->scrubber.sleeping = false;
pg->scrubber.needs_sleep = false;
lgeneric_dout(pg->cct, 20)
<< "scrub_requeue_callback: slept for "
<< ceph_clock_now() - pg->scrubber.sleep_start
<< ", re-queuing scrub with state " << state << dendl;
pg->scrub_queued = false;
pg->requeue_scrub();
pg->scrubber.sleep_start = utime_t();
pg->unlock();
});
Mutex::Locker l(osd->scrub_sleep_lock);
osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
scrub_requeue_callback);
scrubber.sleeping = true;
scrubber.sleep_start = ceph_clock_now();
return;
Expand Down
4 changes: 0 additions & 4 deletions src/osd/PG.h
Expand Up @@ -873,7 +873,6 @@ class PG : protected DoutPrefixProvider {
public:
void clear_primary_state();

public:
bool is_actingbackfill(pg_shard_t osd) const {
return actingbackfill.count(osd);
}
Expand Down Expand Up @@ -2088,9 +2087,6 @@ class PG : protected DoutPrefixProvider {
bool do_sort_bitwise;
epoch_t last_epoch;

Mutex scrub_sleep_lock;
SafeTimer scrub_sleep_timer;

public:
const spg_t& get_pgid() const { return pg_id; }

Expand Down

0 comments on commit 89131e3

Please sign in to comment.