Skip to content

Commit

Permalink
osd: add multiple objecter finishers
Browse files Browse the repository at this point in the history
Bluestore and Filestore already adapt multiple finishers to improve IO performance.
But Objecter finisher (for tier approach) is still single thread. This can be a
bottleneck if we generate many IOs as Bluestore and Filestore because most of
completion processes (proxy, writeback) are handled by a objecter finisher.

Signed-off-by: Myoungwon Oh <omwmw@sk.com>
  • Loading branch information
myoungwon committed Jul 25, 2017
1 parent 62b6d2d commit 66bb4d7
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 9 deletions.
1 change: 1 addition & 0 deletions src/common/legacy_config_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,7 @@ OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // numbe
OPTION(osd_tier_default_cache_min_write_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on write)
OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 20)
OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT, 1)
OPTION(osd_objecter_finishers, OPT_INT, 1)

OPTION(osd_map_dedup, OPT_BOOL, true)
OPTION(osd_map_max_advance, OPT_INT, 40) // make this < cache_size!
Expand Down
4 changes: 4 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2494,6 +2494,10 @@ const std::vector<Option> ceph_options = {
.set_default(1)
.set_description(""),

Option("osd_objecter_finishers", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(1)
.set_description(""),

Option("osd_map_dedup", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description(""),
Expand Down
24 changes: 20 additions & 4 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ OSDService::OSDService(OSD *osd) :
promote_max_objects(0),
promote_max_bytes(0),
objecter(new Objecter(osd->client_messenger->cct, osd->objecter_messenger, osd->monc, NULL, 0, 0)),
objecter_finisher(osd->client_messenger->cct),
m_objecter_finishers(cct->_conf->osd_objecter_finishers),
watch_lock("OSDService::watch_lock"),
watch_timer(osd->client_messenger->cct, watch_lock),
next_notif_id(0),
Expand Down Expand Up @@ -280,11 +280,23 @@ OSDService::OSDService(OSD *osd) :
#endif
{
objecter->init();

for (int i = 0; i < m_objecter_finishers; i++) {
ostringstream str;
str << "objecter-finisher-" << i;
Finisher *fin = new Finisher(osd->client_messenger->cct, str.str(), "finisher");
objecter_finishers.push_back(fin);
}
}

OSDService::~OSDService()
{
delete objecter;

for (auto f : objecter_finishers) {
delete f;
f = NULL;
}
}


Expand Down Expand Up @@ -526,8 +538,10 @@ void OSDService::shutdown()
}

objecter->shutdown();
objecter_finisher.wait_for_empty();
objecter_finisher.stop();
for (auto f : objecter_finishers) {
f->wait_for_empty();
f->stop();
}

{
Mutex::Locker l(recovery_request_lock);
Expand All @@ -551,7 +565,9 @@ void OSDService::shutdown()
void OSDService::init()
{
reserver_finisher.start();
objecter_finisher.start();
for (auto f : objecter_finishers) {
f->start();
}
objecter->set_client_incarnation(0);

// deprioritize objecter in daemonperf output
Expand Down
3 changes: 2 additions & 1 deletion src/osd/OSD.h
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,8 @@ class OSDService {

// -- Objecter, for tiering reads/writes from/to other OSDs --
Objecter *objecter;
Finisher objecter_finisher;
int m_objecter_finishers;
vector<Finisher*> objecter_finishers;

// -- Watch --
Mutex watch_lock;
Expand Down
12 changes: 8 additions & 4 deletions src/osd/PrimaryLogPG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2797,10 +2797,11 @@ void PrimaryLogPG::do_proxy_read(OpRequestRef op, ObjectContextRef obc)

C_ProxyRead *fin = new C_ProxyRead(this, soid, get_last_peering_reset(),
prdop);
unsigned n = info.pgid.hash_to_shard(osd->m_objecter_finishers);
ceph_tid_t tid = osd->objecter->read(
soid.oid, oloc, obj_op,
m->get_snapid(), NULL,
flags, new C_OnFinisher(fin, &osd->objecter_finisher),
flags, new C_OnFinisher(fin, osd->objecter_finishers[n]),
&prdop->user_version,
&prdop->data_offset,
m->get_features());
Expand Down Expand Up @@ -2985,10 +2986,11 @@ void PrimaryLogPG::do_proxy_write(OpRequestRef op, const hobject_t& missing_oid,

C_ProxyWrite_Commit *fin = new C_ProxyWrite_Commit(
this, soid, get_last_peering_reset(), pwop);
unsigned n = info.pgid.hash_to_shard(osd->m_objecter_finishers);
ceph_tid_t tid = osd->objecter->mutate(
soid.oid, oloc, obj_op, snapc,
ceph::real_clock::from_ceph_timespec(pwop->mtime),
flags, new C_OnFinisher(fin, &osd->objecter_finisher),
flags, new C_OnFinisher(fin, osd->objecter_finishers[n]),
&pwop->user_version, pwop->reqid);
fin->tid = tid;
pwop->objecter_tid = tid;
Expand Down Expand Up @@ -7878,8 +7880,9 @@ void PrimaryLogPG::_copy_some(ObjectContextRef obc, CopyOpRef cop)

C_Copyfrom *fin = new C_Copyfrom(this, obc->obs.oi.soid,
get_last_peering_reset(), cop);
unsigned n = info.pgid.hash_to_shard(osd->m_objecter_finishers);
gather.set_finisher(new C_OnFinisher(fin,
&osd->objecter_finisher));
osd->objecter_finishers[n]));

ceph_tid_t tid = osd->objecter->read(cop->src.oid, cop->oloc, op,
cop->src.snap, NULL,
Expand Down Expand Up @@ -8701,12 +8704,13 @@ int PrimaryLogPG::start_flush(
}
C_Flush *fin = new C_Flush(this, soid, get_last_peering_reset());

unsigned n = info.pgid.hash_to_shard(osd->m_objecter_finishers);
ceph_tid_t tid = osd->objecter->mutate(
soid.oid, base_oloc, o, snapc,
ceph::real_clock::from_ceph_timespec(oi.mtime),
CEPH_OSD_FLAG_IGNORE_OVERLAY | CEPH_OSD_FLAG_ENFORCE_SNAPC,
new C_OnFinisher(fin,
&osd->objecter_finisher));
osd->objecter_finishers[n]));
/* we're under the pg lock and fin->finish() is grabbing that */
fin->tid = tid;
fop->objecter_tid = tid;
Expand Down

0 comments on commit 66bb4d7

Please sign in to comment.