From 460a820a3b2fbd48c8a7966502b235aae8d5d298 Mon Sep 17 00:00:00 2001 From: Brad Hubbard Date: Mon, 24 Apr 2017 14:10:47 +1000 Subject: [PATCH 1/2] osd: Implement asynchronous scrub sleep Rather than blocking the main op queue just do an async sleep. Fixes: http://tracker.ceph.com/issues/19497 Signed-off-by: Brad Hubbard (cherry picked from commit 7af3e86c2e4992db35637864b83832535c94d0e6) --- src/osd/PG.cc | 38 ++++++++++++++++++++++++++++---------- src/osd/PG.h | 12 ++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index a29576aa5cea1..bf71f8e1a80a0 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -254,7 +254,9 @@ PG::PG(OSDService *o, OSDMapRef curmap, acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), do_sort_bitwise(false), - last_epoch(0) + last_epoch(0), + scrub_sleep_lock("PG::scrub_sleep_lock"), + scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */) { #ifdef PG_DEBUG_REFS osd->add_pgid(p, this); @@ -264,6 +266,8 @@ PG::PG(OSDService *o, OSDMapRef curmap, PG::~PG() { + Mutex::Locker l(scrub_sleep_lock); + scrub_sleep_timer.shutdown(); #ifdef PG_DEBUG_REFS osd->remove_pgid(info.pgid, this); #endif @@ -2816,6 +2820,8 @@ void PG::init( dirty_info = true; dirty_big_info = true; write_if_dirty(*t); + + scrub_sleep_timer.init(); } #pragma GCC diagnostic ignored "-Wpragmas" @@ -4065,22 +4071,34 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle) { if (g_conf->osd_scrub_sleep > 0 && (scrubber.state == PG::Scrubber::NEW_CHUNK || - scrubber.state == PG::Scrubber::INACTIVE)) { + scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) { + ceph_assert(!scrubber.sleeping); dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl; - unlock(); - utime_t t; - t.set_from_double(g_conf->osd_scrub_sleep); - handle.suspend_tp_timeout(); - t.sleep(); - handle.reset_tp_timeout(); - lock(); - dout(20) << __func__ << " slept for " << t << dendl; + // Do an async sleep so we don't block the op queue + auto scrub_requeue_callback = new FunctionContext([this](int r) { + lock(); + scrubber.sleeping = false; + scrubber.needs_sleep = false; + dout(20) << __func__ << " slept for " + << ceph_clock_now() - scrubber.sleep_start + << ", re-queuing scrub" << dendl; + scrub_queued = false; + requeue_scrub(); + scrubber.sleep_start = utime_t(); + unlock(); + }); + Mutex::Locker l(scrub_sleep_lock); + scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback); + scrubber.sleeping = true; + scrubber.sleep_start = ceph_clock_now(); + return; } if (pg_has_reset_since(queued)) { return; } assert(scrub_queued); scrub_queued = false; + scrubber.needs_sleep = true; if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) { dout(10) << "scrub -- not primary or active or not clean" << dendl; diff --git a/src/osd/PG.h b/src/osd/PG.h index 76327121b8d97..862a98ec988cd 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -34,6 +34,7 @@ #include "osd_types.h" #include "include/xlist.h" #include "SnapMapper.h" +#include "common/Timer.h" #include "PGLog.h" #include "OSDMap.h" @@ -1101,6 +1102,11 @@ class PG : protected DoutPrefixProvider { OpRequestRef active_rep_scrub; utime_t scrub_reg_stamp; // stamp we registered for + // For async sleep + bool sleeping = false; + bool needs_sleep = true; + utime_t sleep_start; + // flags to indicate explicitly requested scrubs (by admin) bool must_scrub, must_deep_scrub, must_repair; @@ -1219,6 +1225,9 @@ class PG : protected DoutPrefixProvider { authoritative.clear(); num_digest_updates_pending = 0; cleaned_meta_map = ScrubMap(); + sleeping = false; + needs_sleep = true; + sleep_start = utime_t(); } void create_results(const hobject_t& obj); @@ -2079,6 +2088,9 @@ class PG : protected DoutPrefixProvider { bool do_sort_bitwise; epoch_t last_epoch; + Mutex scrub_sleep_lock; + SafeTimer scrub_sleep_timer; + public: const spg_t& get_pgid() const { return pg_id; } From 719ed0101b1bfdd4b71ef84101515492597153f9 Mon Sep 17 00:00:00 2001 From: Brad Hubbard Date: Mon, 22 May 2017 13:21:25 +1000 Subject: [PATCH 2/2] osd: Move scrub sleep timer to osdservice PR 14886 erroneously creates a scrub sleep timer for every pg resulting in a proliferation of threads. Move the timer to the osd service so there can be only one. Fixes: http://tracker.ceph.com/issues/19986 Signed-off-by: Brad Hubbard (cherry picked from commit f110a82437df79dc20207d296e8229fc0e9ce18b) --- src/osd/OSD.cc | 14 +++++++++++++ src/osd/OSD.h | 9 +++++++++ src/osd/PG.cc | 53 +++++++++++++++++++++++++++++--------------------- src/osd/PG.h | 4 ---- 4 files changed, 54 insertions(+), 26 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 579625cec88ad..6bc97e55d9f77 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -262,6 +262,9 @@ OSDService::OSDService(OSD *osd) : osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */), snap_reserver(&reserver_finisher, cct->_conf->osd_max_trimming_pgs), + scrub_sleep_lock("OSDService::scrub_sleep_lock"), + scrub_sleep_timer( + osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */), recovery_lock("OSDService::recovery_lock"), recovery_ops_active(0), recovery_ops_reserved(0), @@ -537,6 +540,11 @@ void OSDService::shutdown() snap_sleep_timer.shutdown(); } + { + Mutex::Locker l(scrub_sleep_lock); + scrub_sleep_timer.shutdown(); + } + osdmap = OSDMapRef(); next_osdmap = OSDMapRef(); } @@ -549,6 +557,7 @@ void OSDService::init() watch_timer.init(); agent_timer.init(); snap_sleep_timer.init(); + scrub_sleep_timer.init(); agent_thread.create("osd_srv_agent"); @@ -3334,6 +3343,11 @@ PG *OSD::_lookup_lock_pg(spg_t pgid) return pg; } +PG *OSD::lookup_lock_pg(spg_t pgid) +{ + return _lookup_lock_pg(pgid); +} + PG *OSD::_lookup_lock_pg_with_map_lock_held(spg_t pgid) { assert(pg_map.count(pgid)); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 715ea1a57b884..696ddaa579a51 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -894,6 +894,10 @@ class OSDService { SafeTimer snap_sleep_timer; AsyncReserver snap_reserver; + + Mutex scrub_sleep_lock; + SafeTimer scrub_sleep_timer; + void queue_for_snap_trim(PG *pg); void queue_for_scrub(PG *pg) { @@ -2012,6 +2016,11 @@ class OSD : public Dispatcher, Session *session); PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid); PG *_lookup_lock_pg(spg_t pgid); + +public: + PG *lookup_lock_pg(spg_t pgid); + +protected: PG *_open_lock_pg(OSDMapRef createmap, spg_t pg, bool no_lockdep_check=false); enum res_result { diff --git a/src/osd/PG.cc b/src/osd/PG.cc index bf71f8e1a80a0..744c51aa01d7a 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -254,9 +254,7 @@ PG::PG(OSDService *o, OSDMapRef curmap, acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), do_sort_bitwise(false), - last_epoch(0), - scrub_sleep_lock("PG::scrub_sleep_lock"), - scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */) + last_epoch(0) { #ifdef PG_DEBUG_REFS osd->add_pgid(p, this); @@ -266,8 +264,6 @@ PG::PG(OSDService *o, OSDMapRef curmap, PG::~PG() { - Mutex::Locker l(scrub_sleep_lock); - scrub_sleep_timer.shutdown(); #ifdef PG_DEBUG_REFS osd->remove_pgid(info.pgid, this); #endif @@ -2820,8 +2816,6 @@ void PG::init( dirty_info = true; dirty_big_info = true; write_if_dirty(*t); - - scrub_sleep_timer.init(); } #pragma GCC diagnostic ignored "-Wpragmas" @@ -4071,24 +4065,39 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle) { if (g_conf->osd_scrub_sleep > 0 && (scrubber.state == PG::Scrubber::NEW_CHUNK || - scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) { + scrubber.state == PG::Scrubber::INACTIVE) && + scrubber.needs_sleep) { ceph_assert(!scrubber.sleeping); dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl; + // Do an async sleep so we don't block the op queue - auto scrub_requeue_callback = new FunctionContext([this](int r) { - lock(); - scrubber.sleeping = false; - scrubber.needs_sleep = false; - dout(20) << __func__ << " slept for " - << ceph_clock_now() - scrubber.sleep_start - << ", re-queuing scrub" << dendl; - scrub_queued = false; - requeue_scrub(); - scrubber.sleep_start = utime_t(); - unlock(); - }); - Mutex::Locker l(scrub_sleep_lock); - scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback); + OSDService *osds = osd; + spg_t pgid = get_pgid(); + int state = scrubber.state; + auto scrub_requeue_callback = + new FunctionContext([osds, pgid, state](int r) { + PG *pg = osds->osd->lookup_lock_pg(pgid); + if (pg == nullptr) { + lgeneric_dout(osds->osd->cct, 20) + << "scrub_requeue_callback: Could not find " + << "PG " << pgid << " can't complete scrub requeue after sleep" + << dendl; + return; + } + pg->scrubber.sleeping = false; + pg->scrubber.needs_sleep = false; + lgeneric_dout(pg->cct, 20) + << "scrub_requeue_callback: slept for " + << ceph_clock_now() - pg->scrubber.sleep_start + << ", re-queuing scrub with state " << state << dendl; + pg->scrub_queued = false; + pg->requeue_scrub(); + pg->scrubber.sleep_start = utime_t(); + pg->unlock(); + }); + Mutex::Locker l(osd->scrub_sleep_lock); + osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, + scrub_requeue_callback); scrubber.sleeping = true; scrubber.sleep_start = ceph_clock_now(); return; diff --git a/src/osd/PG.h b/src/osd/PG.h index 862a98ec988cd..8c0ebbd26a845 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -873,7 +873,6 @@ class PG : protected DoutPrefixProvider { public: void clear_primary_state(); - public: bool is_actingbackfill(pg_shard_t osd) const { return actingbackfill.count(osd); } @@ -2088,9 +2087,6 @@ class PG : protected DoutPrefixProvider { bool do_sort_bitwise; epoch_t last_epoch; - Mutex scrub_sleep_lock; - SafeTimer scrub_sleep_timer; - public: const spg_t& get_pgid() const { return pg_id; }