Skip to content

Commit

Permalink
rgw: meta sync thread crash at RGWMetaSyncShardCR
Browse files Browse the repository at this point in the history
Fixes: http://tracker.ceph.com/issues/20251

Signed-off-by: fang yuxiang fang.yuxiang@eisoo.com
(cherry picked from commit 45877d3)

Conflicts:
	src/rgw/rgw_data_sync.cc
          - kraken RGWContinuousLeaseCR() has slightly different options
	src/rgw/rgw_sync.cc
          - kraken RGWContinuousLeaseCR() has slightly different options
          - added "override" qualifier to ~RGWFetchAllMetaCR() definition
  • Loading branch information
fangyuxiangGL authored and smithfarm committed Jul 6, 2017
1 parent e12eae9 commit 8937145
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 52 deletions.
28 changes: 13 additions & 15 deletions src/rgw/rgw_data_sync.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1060,8 +1060,8 @@ class RGWDataSyncShardCR : public RGWCoroutine {

set<string> spawned_keys;

RGWContinuousLeaseCR *lease_cr;
RGWCoroutinesStack *lease_stack;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
string status_oid;


Expand Down Expand Up @@ -1102,7 +1102,6 @@ class RGWDataSyncShardCR : public RGWCoroutine {
delete marker_tracker;
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
if (error_repo) {
error_repo->put();
Expand Down Expand Up @@ -1150,13 +1149,12 @@ class RGWDataSyncShardCR : public RGWCoroutine {
string lock_name = "sync_lock";
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, status_oid,
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
store->get_zone_params().log_pool, status_oid,
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr, false));
}

int full_sync() {
Expand Down Expand Up @@ -1328,7 +1326,7 @@ class RGWDataSyncShardCR : public RGWCoroutine {
set_status() << "num_spawned() > spawn_window";
yield wait_for_child();
int ret;
while (collect(&ret, lease_stack)) {
while (collect(&ret, lease_stack.get())) {
if (ret < 0) {
ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl;
/* we have reported this error */
Expand Down Expand Up @@ -2663,12 +2661,12 @@ int RGWRunBucketSyncCoroutine::operate()
yield {
set_status("acquiring sync lock");
auto store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store,
store->get_zone_params().log_pool,
status_oid, "sync_lock",
cct->_conf->rgw_sync_lease_period,
this);
lease_stack = spawn(lease_cr.get(), false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
store->get_zone_params().log_pool,
status_oid, "sync_lock",
cct->_conf->rgw_sync_lease_period,
this));
lease_stack.reset(spawn(lease_cr.get(), false));
}
while (!lease_cr->is_locked()) {
if (lease_cr->is_done()) {
Expand Down
65 changes: 28 additions & 37 deletions src/rgw/rgw_sync.cc
Original file line number Diff line number Diff line change
Expand Up @@ -592,8 +592,8 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {

rgw_meta_sync_info status;
vector<RGWMetadataLogInfo> shards_info;
RGWContinuousLeaseCR *lease_cr;
RGWCoroutinesStack *lease_stack;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
public:
RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env,
const rgw_meta_sync_info &status)
Expand All @@ -604,7 +604,6 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {
~RGWInitSyncStatusCoroutine() {
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
}

Expand All @@ -616,10 +615,10 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, sync_env->status_oid(),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
store->get_zone_params().log_pool, sync_env->status_oid(),
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr.get(), false));
}
while (!lease_cr->is_locked()) {
if (lease_cr->is_done()) {
Expand Down Expand Up @@ -652,7 +651,7 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {
}
}

drain_all_but_stack(lease_stack); /* the lease cr still needs to run */
drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */

yield {
set_status("updating sync status");
Expand Down Expand Up @@ -737,8 +736,8 @@ class RGWFetchAllMetaCR : public RGWCoroutine {

std::unique_ptr<RGWShardedOmapCRManager> entries_index;

RGWContinuousLeaseCR *lease_cr;
RGWCoroutinesStack *lease_stack;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
bool lost_lock;
bool failed;

Expand All @@ -752,10 +751,7 @@ class RGWFetchAllMetaCR : public RGWCoroutine {
lost_lock(false), failed(false), markers(_markers) {
}

~RGWFetchAllMetaCR() {
if (lease_cr) {
lease_cr->put();
}
~RGWFetchAllMetaCR() override {
}

void append_section_from_set(set<string>& all_sections, const string& name) {
Expand Down Expand Up @@ -791,10 +787,11 @@ class RGWFetchAllMetaCR : public RGWCoroutine {
set_status(string("acquiring lock (") + sync_env->status_oid() + ")");
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, sync_env->store, sync_env->store->get_zone_params().log_pool, sync_env->status_oid(),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, sync_env->store,
sync_env->store->get_zone_params().log_pool,
sync_env->status_oid(), lock_name,
lock_duration, this));
lease_stack = spawn(lease_cr.get(), false);
}
while (!lease_cr->is_locked()) {
if (lease_cr->is_done()) {
Expand Down Expand Up @@ -875,7 +872,7 @@ class RGWFetchAllMetaCR : public RGWCoroutine {
}
}

drain_all_but_stack(lease_stack); /* the lease cr still needs to run */
drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */

yield lease_cr->go_down();

Expand Down Expand Up @@ -1267,8 +1264,9 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
boost::asio::coroutine incremental_cr;
boost::asio::coroutine full_cr;

RGWContinuousLeaseCR *lease_cr = nullptr;
RGWCoroutinesStack *lease_stack = nullptr;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;

bool lost_lock = false;

bool *reset_backoff;
Expand Down Expand Up @@ -1299,7 +1297,6 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
delete marker_tracker;
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
}

Expand Down Expand Up @@ -1391,15 +1388,11 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
yield {
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
if (lease_cr) {
lease_cr->put();
}
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, pool,
sync_env->shard_obj_name(shard_id),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, pool,
sync_env->shard_obj_name(shard_id),
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr.get(), false));
lost_lock = false;
}
while (!lease_cr->is_locked()) {
Expand Down Expand Up @@ -1492,8 +1485,7 @@ class RGWMetaSyncShardCR : public RGWCoroutine {

yield lease_cr->go_down();

lease_cr->put();
lease_cr = NULL;
lease_cr.reset();

drain_all();

Expand Down Expand Up @@ -1525,11 +1517,10 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, pool,
sync_env->shard_obj_name(shard_id),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store, pool,
sync_env->shard_obj_name(shard_id),
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr.get(), false));
lost_lock = false;
}
while (!lease_cr->is_locked()) {
Expand Down

0 comments on commit 8937145

Please sign in to comment.