From b2229bcb62f28cde7a3bcff6cd94ca9a0e423f1b Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Apr 2016 14:15:55 +0530 Subject: [PATCH] rgw: Have a flavor of bucket deletion to bypass GC and to trigger object deletions async. Fixes: http://tracker.ceph.com/issues/15557 Signed-off-by: Pavan Rallabhandi (cherry picked from commit b7a69fca248afeef1de1278890076693b16cf6d3) --- src/rgw/rgw_admin.cc | 18 ++- src/rgw/rgw_bucket.cc | 195 ++++++++++++++++++++++++++++-- src/rgw/rgw_bucket.h | 9 +- src/rgw/rgw_rados.cc | 45 +++++++ src/rgw/rgw_rados.h | 2 + src/test/cli/radosgw-admin/help.t | 4 + 6 files changed, 260 insertions(+), 13 deletions(-) diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index f6de39f7e5cf4..24435dde6126d 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -227,6 +227,10 @@ void _usage() cout << " --caps= list of caps (e.g., \"usage=read, write; user=read\"\n"; cout << " --yes-i-really-mean-it required for certain operations\n"; cout << " --reset-regions reset regionmap when regionmap update\n"; + cout << " --bypass-gc when specified with bucket deletion, triggers\n"; + cout << " object deletions by not involving GC\n"; + cout << " --inconsistent-index when specified with bucket deletion and bypass-gc set to true,\n"; + cout << " ignores bucket index consistency\n"; cout << "\n"; cout << " := \"YYYY-MM-DD[ hh:mm:ss]\"\n"; cout << "\nQuota options:\n"; @@ -2036,6 +2040,8 @@ int main(int argc, char **argv) int sync_stats = false; int reset_regions = false; + int bypass_gc = false; + int inconsistent_index = false; int extra_info = false; @@ -2247,7 +2253,10 @@ int main(int argc, char **argv) // do nothing } else if (ceph_argparse_binary_flag(args, i, &extra_info, NULL, "--extra-info", (char*)NULL)) { // do nothing - } else if (ceph_argparse_binary_flag(args, i, &reset_regions, NULL, "--reset-regions", (char*)NULL)) { + } else if (ceph_argparse_binary_flag(args, i, &bypass_gc, NULL, "--bypass-gc", (char*)NULL)) { + // do nothing + } else if (ceph_argparse_binary_flag(args, i, &inconsistent_index, NULL, "--inconsistent-index", (char*)NULL)) { + // do nothing } else if (ceph_argparse_witharg(args, i, &val, "--caps", (char*)NULL)) { caps = val; } else if (ceph_argparse_witharg(args, i, &val, "-i", "--infile", (char*)NULL)) { @@ -3687,6 +3696,7 @@ int main(int argc, char **argv) bucket_op.set_check_objects(check_objects); bucket_op.set_delete_children(delete_child_objects); bucket_op.set_fix_index(fix); + bucket_op.set_max_aio(max_concurrent_ios); // required to gather errors from operations std::string err_msg; @@ -4634,7 +4644,11 @@ int main(int argc, char **argv) } if (opt_cmd == OPT_BUCKET_RM) { - RGWBucketAdminOp::remove_bucket(store, bucket_op); + if (inconsistent_index == false) { + RGWBucketAdminOp::remove_bucket(store, bucket_op, bypass_gc, true); + } else { + RGWBucketAdminOp::remove_bucket(store, bucket_op, bypass_gc, false); + } } if (opt_cmd == OPT_GC_LIST) { diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc index 685968cd66c17..60b8cc6999d91 100644 --- a/src/rgw/rgw_bucket.cc +++ b/src/rgw/rgw_bucket.cc @@ -17,6 +17,7 @@ #include "rgw_user.h" #include "rgw_string.h" +#include "include/rados/librados.hpp" // until everything is moved from rgw_common #include "rgw_common.h" @@ -459,9 +460,7 @@ int rgw_remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children) map stats; std::vector objs; map common_prefixes; - rgw_obj obj; RGWBucketInfo info; - bufferlist bl; RGWObjectCtx obj_ctx(store); string bucket_ver, master_ver; @@ -470,8 +469,6 @@ int rgw_remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children) if (ret < 0) return ret; - obj.bucket = bucket; - ret = store->get_bucket_info(obj_ctx, bucket.tenant, bucket.name, info, NULL); if (ret < 0) return ret; @@ -490,7 +487,7 @@ int rgw_remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children) while (!objs.empty()) { std::vector::iterator it = objs.begin(); - for (it = objs.begin(); it != objs.end(); ++it) { + for (; it != objs.end(); ++it) { ret = rgw_remove_object(store, info, bucket, (*it).key); if (ret < 0) return ret; @@ -524,6 +521,173 @@ int rgw_remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children) return ret; } +static int aio_wait(librados::AioCompletion *handle) +{ + librados::AioCompletion *c = (librados::AioCompletion *)handle; + c->wait_for_complete(); + int ret = c->get_return_value(); + c->release(); + return ret; +} + +static int drain_handles(list& pending) +{ + int ret = 0; + while (!pending.empty()) { + librados::AioCompletion *handle = pending.front(); + pending.pop_front(); + int r = aio_wait(handle); + if (r < 0) { + ret = r; + } + } + return ret; +} + +int rgw_remove_bucket_bypass_gc(RGWRados *store, rgw_bucket& bucket, + int concurrent_max, bool keep_index_consistent) +{ + int ret; + map stats; + std::vector objs; + map common_prefixes; + RGWBucketInfo info; + RGWObjectCtx obj_ctx(store); + + string bucket_ver, master_ver; + + ret = store->get_bucket_stats(bucket, RGW_NO_SHARD, &bucket_ver, &master_ver, stats, NULL); + if (ret < 0) + return ret; + + ret = store->get_bucket_info(obj_ctx, bucket.tenant, bucket.name, info, NULL); + if (ret < 0) + return ret; + + + RGWRados::Bucket target(store, info); + RGWRados::Bucket::List list_op(&target); + + list_op.params.list_versions = true; + + std::list handles; + + int max = 1000; + int max_aio = concurrent_max; + ret = list_op.list_objects(max, &objs, &common_prefixes, NULL); + if (ret < 0) + return ret; + + while (!objs.empty()) { + std::vector::iterator it = objs.begin(); + for (; it != objs.end(); ++it) { + RGWObjState *astate = NULL; + rgw_obj obj(bucket, (*it).key.name); + obj.set_instance((*it).key.instance); + + ret = store->get_obj_state(&obj_ctx, obj, &astate, NULL); + if (ret == -ENOENT) { + dout(1) << "WARNING: cannot find obj state for obj " << obj.get_object() << dendl; + continue; + } + if (ret < 0) { + lderr(store->ctx()) << "ERROR: get obj state returned with error " << ret << dendl; + return ret; + } + + if (astate->has_manifest) { + rgw_obj head_obj; + RGWObjManifest& manifest = astate->manifest; + RGWObjManifest::obj_iterator miter = manifest.obj_begin(); + + if (miter.get_location().ns.empty()) { + head_obj = miter.get_location(); + } + + for (; miter != manifest.obj_end() && max_aio--; ++miter) { + if (!max_aio) { + ret = drain_handles(handles); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; + return ret; + } + max_aio = concurrent_max; + } + + rgw_obj last_obj = miter.get_location(); + if (last_obj == head_obj) { + // have the head obj deleted at the end + continue; + } + + ret = store->delete_obj_aio(last_obj, bucket, info, astate, handles, keep_index_consistent); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: delete obj aio failed with " << ret << dendl; + return ret; + } + } // for all shadow objs + + ret = store->delete_obj_aio(head_obj, bucket, info, astate, handles, keep_index_consistent); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: delete obj aio failed with " << ret << dendl; + return ret; + } + } + + if (!max_aio) { + ret = drain_handles(handles); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; + return ret; + } + max_aio = concurrent_max; + } + } // for all RGW objects + objs.clear(); + + ret = list_op.list_objects(max, &objs, &common_prefixes, NULL); + if (ret < 0) + return ret; + } + + ret = drain_handles(handles); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: could not drain handles as aio completion returned with " << ret << dendl; + return ret; + } + + ret = rgw_bucket_sync_user_stats(store, bucket.tenant, bucket.name); + if (ret < 0) { + dout(1) << "WARNING: failed sync user stats before bucket delete. ret=" << ret << dendl; + } + + RGWObjVersionTracker objv_tracker; + + ret = rgw_bucket_delete_bucket_obj(store, bucket.tenant, bucket.name, objv_tracker); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: could not remove bucket " << bucket.name << "with ret as " << ret << dendl; + return ret; + } + + if (!store->is_syncing_bucket_meta(bucket)) { + RGWObjVersionTracker objv_tracker; + string entry; + store->get_bucket_instance_entry(bucket, entry); + ret = rgw_bucket_instance_remove_entry(store, entry, &objv_tracker); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: could not remove bucket instance entry" << bucket.name << "with ret as " << ret << dendl; + return ret; + } + } + + ret = rgw_unlink_bucket(store, info.owner, bucket.tenant, bucket.name, false); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: unable to remove user bucket information" << dendl; + } + + return ret; +} + int rgw_bucket_delete_bucket_obj(RGWRados *store, const string& tenant_name, const string& bucket_name, @@ -689,12 +853,24 @@ int RGWBucket::unlink(RGWBucketAdminOpState& op_state, std::string *err_msg) return r; } -int RGWBucket::remove(RGWBucketAdminOpState& op_state, std::string *err_msg) +int RGWBucket::remove(RGWBucketAdminOpState& op_state, bool bypass_gc, + bool keep_index_consistent, std::string *err_msg) { bool delete_children = op_state.will_delete_children(); rgw_bucket bucket = op_state.get_bucket(); + int ret; + + if (bypass_gc) { + if (delete_children) { + ret = rgw_remove_bucket_bypass_gc(store, bucket, op_state.get_max_aio(), keep_index_consistent); + } else { + set_err_msg(err_msg, "purge objects should be set for gc to be bypassed"); + return -EINVAL; + } + } else { + ret = rgw_remove_bucket(store, bucket, delete_children); + } - int ret = rgw_remove_bucket(store, bucket, delete_children); if (ret < 0) { set_err_msg(err_msg, "unable to remove bucket" + cpp_strerror(-ret)); return ret; @@ -1101,7 +1277,8 @@ int RGWBucketAdminOp::check_index(RGWRados *store, RGWBucketAdminOpState& op_sta return 0; } -int RGWBucketAdminOp::remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_state) +int RGWBucketAdminOp::remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_state, + bool bypass_gc, bool keep_index_consistent) { RGWBucket bucket; @@ -1109,7 +1286,7 @@ int RGWBucketAdminOp::remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_s if (ret < 0) return ret; - return bucket.remove(op_state); + return bucket.remove(op_state, bypass_gc, keep_index_consistent); } int RGWBucketAdminOp::remove_object(RGWRados *store, RGWBucketAdminOpState& op_state) diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h index 8a2c28c0ead9d..c4f67b0782710 100644 --- a/src/rgw/rgw_bucket.h +++ b/src/rgw/rgw_bucket.h @@ -174,6 +174,7 @@ extern int rgw_unlink_bucket(RGWRados *store, const rgw_user& user_id, extern int rgw_remove_object(RGWRados *store, RGWBucketInfo& bucket_info, rgw_bucket& bucket, rgw_obj_key& key); extern int rgw_remove_bucket(RGWRados *store, rgw_bucket& bucket, bool delete_children); +extern int rgw_remove_bucket_bypass_gc(RGWRados *store, rgw_bucket& bucket, int concurrent_max); extern int rgw_bucket_set_attrs(RGWRados *store, RGWBucketInfo& bucket_info, map& attrs, @@ -194,6 +195,7 @@ struct RGWBucketAdminOpState { bool fix_index; bool delete_child_objects; bool bucket_stored; + int max_aio; rgw_bucket bucket; @@ -202,6 +204,8 @@ struct RGWBucketAdminOpState { void set_fix_index(bool value) { fix_index = value; } void set_delete_children(bool value) { delete_child_objects = value; } + void set_max_aio(int value) { max_aio = value; } + void set_user_id(rgw_user& user_id) { if (!user_id.empty()) uid = user_id; @@ -236,6 +240,7 @@ struct RGWBucketAdminOpState { bool is_user_op() { return !uid.empty(); } bool is_system_op() { return uid.empty(); } bool has_bucket_stored() { return bucket_stored; } + int get_max_aio() { return max_aio; } RGWBucketAdminOpState() : list_buckets(false), stat_buckets(false), check_objects(false), fix_index(false), delete_child_objects(false), @@ -275,7 +280,7 @@ class RGWBucket map& calculated_stats, std::string *err_msg = NULL); - int remove(RGWBucketAdminOpState& op_state, std::string *err_msg = NULL); + int remove(RGWBucketAdminOpState& op_state, bool bypass_gc = false, bool keep_index_consistent = true, std::string *err_msg = NULL); int link(RGWBucketAdminOpState& op_state, std::string *err_msg = NULL); int unlink(RGWBucketAdminOpState& op_state, std::string *err_msg = NULL); @@ -302,7 +307,7 @@ class RGWBucketAdminOp static int check_index(RGWRados *store, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher); - static int remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_state); + static int remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_state, bool bypass_gc = false, bool keep_index_consistent = true); static int remove_object(RGWRados *store, RGWBucketAdminOpState& op_state); static int info(RGWRados *store, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher); }; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index a76c276bb2cf6..e56ccfe832ecd 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -12111,3 +12111,48 @@ librados::Rados* RGWRados::get_rados_handle() } } +int RGWRados::delete_obj_aio(rgw_obj& obj, rgw_bucket& bucket, + RGWBucketInfo& bucket_info, RGWObjState *astate, + list& handles, bool keep_index_consistent) +{ + rgw_rados_ref ref; + int ret = get_obj_ref(obj, &ref, &bucket); + if (ret < 0) { + lderr(cct) << "ERROR: failed to get obj ref with ret=" << ret << dendl; + return ret; + } + + if (keep_index_consistent) { + RGWRados::Bucket bop(this, bucket_info); + RGWRados::Bucket::UpdateIndex index_op(&bop, obj, astate); + + ret = index_op.prepare(CLS_RGW_OP_DEL); + if (ret < 0) { + lderr(cct) << "ERROR: failed to prepare index op with ret=" << ret << dendl; + return ret; + } + } + + ObjectWriteOperation op; + list prefixes; + cls_rgw_remove_obj(op, prefixes); + + AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL); + ret = ref.ioctx.aio_operate(ref.oid, c, &op); + if (ret < 0) { + lderr(cct) << "ERROR: AioOperate failed with ret=" << ret << dendl; + return ret; + } + + handles.push_back(c); + + if (keep_index_consistent) { + ret = delete_obj_index(obj); + if (ret < 0) { + lderr(cct) << "ERROR: failed to delete obj index with ret=" << ret << dendl; + return ret; + } + } + return ret; +} + diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 9704fb6d1d61b..9caeadf885519 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -2929,6 +2929,8 @@ class RGWRados librados::Rados* get_rados_handle(); + int delete_obj_aio(rgw_obj& obj, rgw_bucket& bucket, RGWBucketInfo& info, RGWObjState *astate, + list& handles, bool keep_index_consistent); private: /** * This is a helper method, it generates a list of bucket index objects with the given diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index b38106e33b2db..ac32c878d21df 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -179,6 +179,10 @@ --caps= list of caps (e.g., "usage=read, write; user=read" --yes-i-really-mean-it required for certain operations --reset-regions reset regionmap when regionmap update + --bypass-gc when specified with bucket deletion, triggers + object deletions by not involving GC + --inconsistent-index when specified with bucket deletion and bypass-gc set to true, + ignores bucket index consistency := "YYYY-MM-DD[ hh:mm:ss]"