diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 5bf3ceec312346..f66e4a78d91d03 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -269,8 +269,16 @@ void usage() cout << " --index-pool= placement target index pool\n"; cout << " --data-pool= placement target data pool\n"; cout << " --data-extra-pool= placement target data extra (non-ec) pool\n"; + cout << " --current-tail-pool=\n"; + cout << " placement target tail pool\n"; + cout << " --data-tail-pools=[,...]\n"; + cout << " placement candidate tail pools\n"; + cout << " --new-tail-pool=\n"; + cout << " pool to be added to existing placement candidate tail pool list\n"; cout << " --placement-index-type=\n"; cout << " placement target index type (normal, indexless, or #id)\n"; + cout << " --placement-data-layout-type=\n"; + cout << " placement target data layout type (normal, splitted, or #id)\n"; cout << " --compression= placement target compression type (plugin name or empty/none)\n"; cout << " --tier-type= zone tier type\n"; cout << " --tier-config==[,...]\n"; @@ -2501,8 +2509,13 @@ int main(int argc, const char **argv) boost::optional index_pool; boost::optional data_pool; boost::optional data_extra_pool; + boost::optional current_tail_pool; + boost::optional new_tail_pool; + boost::optional< list > tail_pools; RGWBucketIndexType placement_index_type = RGWBIType_Normal; + RGWBucketDataLayoutType placement_data_layout_type = RGWDLType_SinglePool; bool index_type_specified = false; + bool data_layout_type_specified = false; boost::optional compression_type; @@ -2832,6 +2845,27 @@ int main(int argc, const char **argv) perm_policy_doc = val; } else if (ceph_argparse_witharg(args, i, &val, "--path-prefix", (char*)NULL)) { path_prefix = val; + } else if (ceph_argparse_witharg(args, i, &val, "--placement-data-layout-type", (char*)NULL)) { + if (val == "single-pool") { + placement_data_layout_type= RGWDLType_SinglePool; + } else if (val == "split-pool") { + placement_data_layout_type = RGWDLType_SplitPool; + } else { + placement_data_layout_type = (RGWBucketDataLayoutType)strict_strtol(val.c_str(), 10, &err); + if (!err.empty()) { + cerr << "ERROR: failed to parse data layout type: " << err << std::endl; + return EINVAL; + } + } + data_layout_type_specified = true; + } else if (ceph_argparse_witharg(args, i, &val, "--current-tail-pool", (char*)NULL)) { + current_tail_pool = val; + } else if (ceph_argparse_witharg(args, i, &val, "--data-tail-pools", (char*)NULL)) { + list tmp_tail_pools; + get_str_list(val, tmp_tail_pools); + tail_pools = tmp_tail_pools; + } else if (ceph_argparse_witharg(args, i, &val, "--new-tail-pool", (char*)NULL)) { + new_tail_pool = val; } else if (strncmp(*i, "-", 1) == 0) { cerr << "ERROR: invalid flag " << *i << std::endl; return EINVAL; @@ -4323,6 +4357,36 @@ int main(int argc, const char **argv) if (compression_type) { info.compression_type = *compression_type; } + if (data_layout_type_specified && current_tail_pool && tail_pools) { + if (placement_data_layout_type != RGWDLType_SplitPool) { + cerr << "ERROR: --placement-data-layout-type should be splitted to make " + "--current-tail-pool and --data-tail-pools valid." << std::endl; + return EINVAL; + } + if (! tail_pools || + std::find(tail_pools->begin(), + tail_pools->end(), + *current_tail_pool) == tail_pools->end()) { + cerr << "ERROR: the value specified by --current-tail-pool should be " + << "in the list specified by --data-tail-pools valid" + << std::endl; + return EINVAL; + } + info.data_layout_type = placement_data_layout_type; + info.current_tail_pool = *current_tail_pool; + info.data_tail_pools.clear(); + for (auto &p : *tail_pools) { + info.data_tail_pools.push_back(rgw_pool(p)); + } + } else if ((data_layout_type_specified && + placement_data_layout_type==RGWDLType_SplitPool) || + current_tail_pool || + tail_pools) { + cerr << "ERROR: --placement-data-layout-type, --current-tail-pool or " + << "--data-tail-pools is missing." + << std::endl; + return EINVAL; + } ret = check_pool_support_omap(info.get_data_extra_pool()); if (ret < 0) { @@ -4353,6 +4417,31 @@ int main(int argc, const char **argv) if (compression_type) { info.compression_type = *compression_type; } + if (data_layout_type_specified) { + info.data_layout_type = placement_data_layout_type; + } + if (current_tail_pool) { + info.current_tail_pool = *current_tail_pool; + } + if (tail_pools) { + for (auto &p : *tail_pools) { + info.data_tail_pools.push_back(rgw_pool(p)); + } + } + if (new_tail_pool) { + info.data_tail_pools.push_back(rgw_pool(*new_tail_pool)); + } + if (info.data_layout_type == RGWDLType_SplitPool) { + if (info.current_tail_pool.empty() || info.data_tail_pools.empty() || + std::find(info.data_tail_pools.begin(), + info.data_tail_pools.end(), + info.current_tail_pool) == info.data_tail_pools.end()) { + cerr << "ERROR: --current-tail-pool or --data-tail-pools " + << "is improper for splited data layout" + << std::endl; + return EINVAL; + } + } ret = check_pool_support_omap(info.get_data_extra_pool()); if (ret < 0) { diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 2e111423168729..8d5b84b8a63808 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -1197,6 +1197,24 @@ inline ostream& operator<<(ostream& out, const RGWBucketIndexType &index_type) } } +enum RGWBucketDataLayoutType { + RGWDLType_SinglePool = 0, + RGWDLType_SplitPool = 1, +}; + +inline ostream& operator<<(ostream& out, const RGWBucketDataLayoutType &data_layout_type) +{ + switch (data_layout_type) { + case RGWDLType_SinglePool: + return out << "SinglePool"; + case RGWDLType_SplitPool: + return out << "SplitPool"; + default: + return out << "Unknown"; + } +} + + struct RGWBucketInfo { enum BIShardsHashType { @@ -2111,6 +2129,39 @@ struct rgw_cache_entry_info { rgw_cache_entry_info() : gen(0) {} }; +struct rgw_data_placement_volatile_config { + RGWBucketDataLayoutType data_layout_type; + rgw_pool tail_data_pool; + + rgw_data_placement_volatile_config(): data_layout_type(RGWDLType_SinglePool) {} + rgw_data_placement_volatile_config(const rgw_data_placement_volatile_config&) = default; + rgw_data_placement_volatile_config(rgw_data_placement_volatile_config&&) = default; + + rgw_data_placement_volatile_config(RGWBucketDataLayoutType data_layout_type, + const rgw_pool& tail_data_pool) + : data_layout_type(data_layout_type), tail_data_pool(tail_data_pool) { + } + + rgw_data_placement_volatile_config& + operator=(const rgw_data_placement_volatile_config&) = default; + + RGWBucketDataLayoutType get_data_layout_type() const { + return data_layout_type; + } + + const rgw_pool& get_tail_data_pool() const { + return tail_data_pool; + } + + bool empty() const { + return (data_layout_type == RGWDLType_SinglePool); + } + + void dump(Formatter *f) const; + void decode_json(JSONObj *obj); +}; + + inline ostream& operator<<(ostream& out, const rgw_obj &o) { return out << o.bucket.name << ":" << o.get_oid(); } diff --git a/src/rgw/rgw_dencoder.cc b/src/rgw/rgw_dencoder.cc index 891d66141cffaf..c81ff23ec5a635 100644 --- a/src/rgw/rgw_dencoder.cc +++ b/src/rgw/rgw_dencoder.cc @@ -173,9 +173,12 @@ void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_st oid = *override_prefix; } + location->set_head(false); + location->set_data_placement_volatile_config(data_placement_vc); if (!cur_part_id) { if (ofs < max_head_size) { location->set_placement_rule(head_placement_rule); + location->set_head(true); *location = obj; return; } else { diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc index 798970723f51d0..5d729ebfc08940 100644 --- a/src/rgw/rgw_json_enc.cc +++ b/src/rgw/rgw_json_enc.cc @@ -107,6 +107,7 @@ void RGWObjManifest::dump(Formatter *f) const ::encode_json("rules", rules, f); ::encode_json("tail_instance", tail_instance, f); ::encode_json("tail_placement", tail_placement, f); + ::encode_json("data_placement_vc", data_placement_vc, f); } void rgw_log_entry::dump(Formatter *f) const @@ -583,6 +584,19 @@ void rgw_data_placement_target::decode_json(JSONObj *obj) { JSONDecoder::decode_json("index_pool", index_pool, obj); } +void rgw_data_placement_volatile_config::dump(Formatter *f) const +{ + encode_json("data_layout_type", (uint32_t)data_layout_type, f); + encode_json("tail_data_pool", tail_data_pool, f); +} + +void rgw_data_placement_volatile_config::decode_json(JSONObj *obj) { + uint32_t it(0); + JSONDecoder::decode_json("data_layout_type", it, obj); + data_layout_type = (RGWBucketDataLayoutType)it; + JSONDecoder::decode_json("tail_data_pool", tail_data_pool, obj); +} + void rgw_bucket::dump(Formatter *f) const { encode_json("name", name, f); @@ -930,8 +944,11 @@ void RGWZonePlacementInfo::dump(Formatter *f) const encode_json("index_pool", index_pool, f); encode_json("data_pool", data_pool, f); encode_json("data_extra_pool", data_extra_pool, f); + encode_json("current_tail_pool", current_tail_pool, f); + encode_json("data_tail_pools", data_tail_pools, f); encode_json("index_type", (uint32_t)index_type, f); encode_json("compression", compression_type, f); + encode_json("data_layout_type", (uint32_t)data_layout_type, f); } void RGWZonePlacementInfo::decode_json(JSONObj *obj) diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 253bc2a0ccf1d2..b92a81a8d6e41c 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -3038,7 +3038,12 @@ int RGWPutObjProcessor_Multipart::prepare(RGWRados *store, string *oid_rand) manifest.set_multipart_part_rule(store->ctx()->_conf->rgw_obj_stripe_size, num); - int r = manifest_gen.create_begin(store->ctx(), &manifest, s->bucket_info.placement_rule, bucket, target_obj); + int r = manifest_gen.create_begin(store->ctx(), + &manifest, + s->bucket_info.placement_rule, + data_placement_vc, + bucket, + target_obj); if (r < 0) { return r; } @@ -3141,6 +3146,9 @@ RGWPutObjProcessor *RGWPutObj::select_processor(RGWObjectCtx& obj_ctx, bool *is_ processor = new RGWPutObjProcessor_Atomic(obj_ctx, s->bucket_info, s->bucket, s->object.name, part_size, s->req_id, s->bucket_info.versioning_enabled()); (static_cast(processor))->set_olh_epoch(olh_epoch); (static_cast(processor))->set_version_id(version_id); + rgw_data_placement_volatile_config dpvc; + store->get_zone_params().get_data_placement_volatile_config(s->bucket_info.placement_rule, &dpvc); + (static_cast(processor))->set_data_placement_volatile_config(dpvc); } else { processor = new RGWPutObjProcessor_Multipart(obj_ctx, s->bucket_info, part_size, s); } diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index cca45ea3814348..07a26689bff24c 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -146,11 +146,22 @@ static bool rgw_obj_to_raw(const RGWZoneGroup& zonegroup, const RGWZoneParams& z return rgw_get_obj_data_pool(zonegroup, zone_params, placement_id, obj, &raw_obj->pool); } +static bool rgw_obj_to_raw(const rgw_pool& pool, const rgw_obj& obj, rgw_raw_obj *raw_obj) +{ + get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); + raw_obj->pool = pool; + return true; +} + rgw_raw_obj rgw_obj_select::get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const { if (!is_raw) { rgw_raw_obj r; - rgw_obj_to_raw(zonegroup, zone_params, placement_rule, obj, &r); + if (in_volatile_tail_pool()) { + rgw_obj_to_raw(data_placement_vc.get_tail_data_pool(), obj, &r); + } else { + rgw_obj_to_raw(zonegroup, zone_params, placement_rule, obj, &r); + } return r; } return raw_obj; @@ -160,7 +171,11 @@ rgw_raw_obj rgw_obj_select::get_raw_obj(RGWRados *store) const { if (!is_raw) { rgw_raw_obj r; - store->obj_to_raw(placement_rule, obj, &r); + if (in_volatile_tail_pool()) { + store->obj_to_raw(data_placement_vc.get_tail_data_pool(), obj, &r); + } else { + store->obj_to_raw(placement_rule, obj, &r); + } return r; } return raw_obj; @@ -2182,11 +2197,18 @@ void RGWObjManifest::obj_iterator::operator++() update_location(); } -int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m, const string& placement_rule, rgw_bucket& _b, rgw_obj& _obj) +int RGWObjManifest::generator::create_begin( + CephContext *cct, + RGWObjManifest *_m, + const string& placement_rule, + const rgw_data_placement_volatile_config& dpvc, + rgw_bucket& _b, + rgw_obj& _obj) { manifest = _m; manifest->set_tail_placement(placement_rule, _b); + manifest->set_data_placement_volatile_config(dpvc); manifest->set_head(placement_rule, _obj, 0); last_ofs = 0; @@ -2714,7 +2736,12 @@ int RGWPutObjProcessor_Atomic::prepare(RGWRados *store, string *oid_rand) manifest.set_trivial_rule(max_chunk_size, store->ctx()->_conf->rgw_obj_stripe_size); - r = manifest_gen.create_begin(store->ctx(), &manifest, bucket_info.placement_rule, head_obj.bucket, head_obj); + r = manifest_gen.create_begin(store->ctx(), + &manifest, + bucket_info.placement_rule, + data_placement_vc, + head_obj.bucket, + head_obj); if (r < 0) { return r; } @@ -6104,6 +6131,14 @@ bool RGWRados::obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_ return get_obj_data_pool(placement_rule, obj, &raw_obj->pool); } +bool RGWRados::obj_to_raw(const rgw_pool& pool, const rgw_obj& obj, rgw_raw_obj *raw_obj) +{ + get_obj_bucket_and_oid_loc(obj, raw_obj->oid, raw_obj->loc); + raw_obj->pool = pool; + return true; +} + + int RGWRados::update_placement_map() { bufferlist header; @@ -6270,6 +6305,19 @@ int RGWRados::get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj return 0; } +int RGWRados::get_obj_ioctx(const rgw_obj& obj, const rgw_pool& pool, librados::IoCtx *ioctx) +{ + string oid, key; + get_obj_bucket_and_oid_loc(obj, oid, key); + int r = open_pool_ctx(pool, *ioctx); + if (r < 0) { + return r; + } + ioctx->locator_set_key(key); + return 0; +} + + int RGWRados::get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref) { get_obj_bucket_and_oid_loc(obj, ref->oid, ref->key); @@ -9831,6 +9879,17 @@ int RGWRados::Object::Read::prepare() if (r < 0) { return r; } + if (astate->has_manifest && astate->manifest.has_tail() && + astate->manifest.get_data_placement_volatile_config().get_data_layout_type() == RGWDLType_SplitPool && + ! astate->manifest.get_data_placement_volatile_config().get_tail_data_pool().empty() && + astate->manifest.get_data_placement_volatile_config().get_tail_data_pool() != state.head_obj.pool) { + store->get_obj_ioctx(state.obj, + astate->manifest.get_data_placement_volatile_config().get_tail_data_pool(), + &state.tail_io_ctx); + } else { + state.tail_io_ctx.dup(state.io_ctx); + } + if (params.attrs) { *params.attrs = astate->attrset; if (cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) { @@ -10194,11 +10253,13 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl) len = max_chunk_size; - state.io_ctx.locator_set_key(read_obj.loc); + librados::IoCtx effective_io_ctx; read_len = len; if (reading_from_head) { + effective_io_ctx.dup(state.io_ctx); + effective_io_ctx.locator_set_key(read_obj.loc); /* only when reading from the head object do we need to do the atomic test */ r = store->append_atomic_test(&source->get_ctx(), source->get_bucket_info(), state.obj, op, &astate); if (r < 0) @@ -10222,12 +10283,15 @@ int RGWRados::Object::Read::read(int64_t ofs, int64_t end, bufferlist& bl) pbl = &read_bl; } } + }else { + effective_io_ctx.dup(state.tail_io_ctx); + effective_io_ctx.locator_set_key(read_obj.loc); } ldout(cct, 20) << "rados->read obj-ofs=" << ofs << " read_ofs=" << read_ofs << " read_len=" << read_len << dendl; op.read(read_ofs, read_len, pbl, NULL); - r = state.io_ctx.operate(read_obj.oid, &op, NULL); + r = effective_io_ctx.operate(read_obj.oid, &op, NULL); ldout(cct, 20) << "rados->read r=" << r << " bl.length=" << bl.length() << dendl; if (r < 0) { @@ -10347,6 +10411,7 @@ struct get_obj_data : public RefCountedObject { RGWRados *rados; RGWObjectCtx *ctx; IoCtx io_ctx; + IoCtx tail_io_ctx; map io_map; map completion_map; uint64_t total_read; @@ -10613,6 +10678,7 @@ int RGWRados::get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate, string oid, key; bufferlist *pbl; AioCompletion *c; + librados::IoCtx effective_io_ctx; int r; @@ -10642,6 +10708,9 @@ int RGWRados::get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate, if (!len) return 0; } + effective_io_ctx = d->io_ctx; + }else { + effective_io_ctx = d->tail_io_ctx; } d->throttle.get(len); @@ -10657,10 +10726,9 @@ int RGWRados::get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate, ldout(cct, 20) << "rados->get_obj_iterate_cb oid=" << read_obj.oid << " obj-ofs=" << obj_ofs << " read_ofs=" << read_ofs << " len=" << len << dendl; op.read(read_ofs, len, pbl, NULL); - librados::IoCtx io_ctx(d->io_ctx); - io_ctx.locator_set_key(read_obj.loc); + effective_io_ctx.locator_set_key(read_obj.loc); - r = io_ctx.aio_operate(read_obj.oid, c, &op, NULL); + r = effective_io_ctx.aio_operate(read_obj.oid, c, &op, NULL); if (r < 0) { ldout(cct, 0) << "rados->aio_operate r=" << r << dendl; goto done_err; @@ -10693,6 +10761,7 @@ int RGWRados::Object::Read::iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb) data->rados = store; data->io_ctx.dup(state.io_ctx); + data->tail_io_ctx.dup(state.tail_io_ctx); data->client_cb = cb; int r = store->iterate_obj(obj_ctx, source->get_bucket_info(), state.obj, ofs, end, cct->_conf->rgw_get_obj_max_req_size, _get_obj_iterate_cb, (void *)data); diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index a3682eb147d324..ef16d02eef4e8d 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -141,6 +141,8 @@ class rgw_obj_select { rgw_obj obj; rgw_raw_obj raw_obj; bool is_raw; + bool is_head; + rgw_data_placement_volatile_config data_placement_vc; public: rgw_obj_select() : is_raw(false) {} @@ -174,6 +176,20 @@ class rgw_obj_select { void set_placement_rule(const string& rule) { placement_rule = rule; } + + bool in_volatile_tail_pool() const { + return ! is_head && + data_placement_vc.get_data_layout_type() == RGWDLType_SplitPool && + ! data_placement_vc.get_tail_data_pool().empty(); + } + + void set_head(bool v) { + is_head = v; + } + + void set_data_placement_volatile_config(const rgw_data_placement_volatile_config& v) { + data_placement_vc = v; + } }; struct compression_block { @@ -425,6 +441,7 @@ class RGWObjManifest { string prefix; rgw_bucket_placement tail_placement; /* might be different than the original bucket, as object might have been copied across pools */ + rgw_data_placement_volatile_config data_placement_vc; map rules; string tail_instance; /* tail object's instance */ @@ -455,6 +472,7 @@ class RGWObjManifest { tail_placement = rhs.tail_placement; rules = rhs.rules; tail_instance = rhs.tail_instance; + data_placement_vc = rhs.data_placement_vc; begin_iter.set_manifest(this); end_iter.set_manifest(this); @@ -492,7 +510,7 @@ class RGWObjManifest { } void encode(bufferlist& bl) const { - ENCODE_START(7, 6, bl); + ENCODE_START(8, 6, bl); encode(obj_size, bl); encode(objs, bl); encode(explicit_objs, bl); @@ -513,11 +531,17 @@ class RGWObjManifest { } encode(head_placement_rule, bl); encode(tail_placement.placement_rule, bl); + bool encode_data_placement_vc = ! data_placement_vc.empty(); + encode(encode_data_placement_vc, bl); + if (encode_data_placement_vc) { + encode((uint32_t)data_placement_vc.data_layout_type, bl); + encode(data_placement_vc.tail_data_pool, bl); + } ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN_32(8, 2, 2, bl); decode(obj_size, bl); decode(objs, bl); if (struct_v >= 3) { @@ -584,6 +608,16 @@ class RGWObjManifest { decode(head_placement_rule, bl); decode(tail_placement.placement_rule, bl); } + if (struct_v >= 8) { + bool need_to_decode; + decode(need_to_decode, bl); + if (need_to_decode) { + uint32_t it; + decode(it, bl); + data_placement_vc.data_layout_type = (RGWBucketDataLayoutType)it; + decode(data_placement_vc.tail_data_pool, bl); + } + } update_iterators(); DECODE_FINISH(bl); @@ -643,6 +677,14 @@ class RGWObjManifest { return tail_placement; } + void set_data_placement_volatile_config(const rgw_data_placement_volatile_config& dpvc) { + data_placement_vc = dpvc; + } + + const rgw_data_placement_volatile_config& get_data_placement_volatile_config() const { + return data_placement_vc; + } + const string& get_head_placement_rule() { return head_placement_rule; } @@ -821,7 +863,13 @@ class RGWObjManifest { public: generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0), cur_stripe(0), cur_stripe_size(0) {} - int create_begin(CephContext *cct, RGWObjManifest *manifest, const string& placement_rule, rgw_bucket& bucket, rgw_obj& obj); + int create_begin( + CephContext *cct, + RGWObjManifest *manifest, + const string& placement_rule, + const rgw_data_placement_volatile_config& dpvc, + rgw_bucket& bucket, + rgw_obj& obj); int create_next(uint64_t ofs); @@ -1122,23 +1170,30 @@ struct RGWZonePlacementInfo { rgw_pool index_pool; rgw_pool data_pool; rgw_pool data_extra_pool; /* if not set we should use data_pool */ + rgw_pool current_tail_pool; + std::vector data_tail_pools; RGWBucketIndexType index_type; std::string compression_type; + RGWBucketDataLayoutType data_layout_type; - RGWZonePlacementInfo() : index_type(RGWBIType_Normal) {} + RGWZonePlacementInfo() : index_type(RGWBIType_Normal), + data_layout_type(RGWDLType_SinglePool) {} void encode(bufferlist& bl) const { - ENCODE_START(6, 1, bl); + ENCODE_START(7, 1, bl); encode(index_pool.to_str(), bl); encode(data_pool.to_str(), bl); encode(data_extra_pool.to_str(), bl); encode((uint32_t)index_type, bl); encode(compression_type, bl); + encode((uint32_t)data_layout_type, bl); + encode(current_tail_pool.to_str(), bl); + encode(data_tail_pools, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator& bl) { - DECODE_START(6, bl); + DECODE_START(7, bl); string index_pool_str; string data_pool_str; decode(index_pool_str, bl); @@ -1158,6 +1213,16 @@ struct RGWZonePlacementInfo { if (struct_v >= 6) { decode(compression_type, bl); } + if (struct_v >= 7) { + uint32_t it; + decode(it, bl); + data_layout_type = (RGWBucketDataLayoutType)it; + string current_tail_pool_str; + decode(current_tail_pool_str, bl); + current_tail_pool = rgw_pool(current_tail_pool_str); + decode(data_tail_pools, bl); + } + DECODE_FINISH(bl); } const rgw_pool& get_data_extra_pool() const { @@ -1166,6 +1231,10 @@ struct RGWZonePlacementInfo { } return data_extra_pool; } + const rgw_pool& get_tail_data_pool() const { + return current_tail_pool; + } + void dump(Formatter *f) const; void decode_json(JSONObj *obj); }; @@ -1330,6 +1399,20 @@ struct RGWZoneParams : RGWSystemMetaObj { } return true; } + + bool get_data_placement_volatile_config(const string& placement_id, + rgw_data_placement_volatile_config *pvc) const { + if (placement_id.empty()) { + return false; + } + auto iter = placement_pools.find(placement_id); + if (iter == placement_pools.end()) { + return false; + } + pvc->data_layout_type = iter->second.data_layout_type; + pvc->tail_data_pool = iter->second.get_tail_data_pool(); + return true; + } }; WRITE_CLASS_ENCODER(RGWZoneParams) @@ -2311,6 +2394,7 @@ class RGWRados uint32_t bucket_index_max_shards; int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx); + int get_obj_ioctx(const rgw_obj& obj, const rgw_pool& pool, librados::IoCtx *ioctx); int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref); int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref); uint64_t max_bucket_id; @@ -2635,6 +2719,7 @@ class RGWRados bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool); bool obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj); + bool obj_to_raw(const rgw_pool& pool, const rgw_obj& obj, rgw_raw_obj *raw_obj); int create_bucket(RGWUserInfo& owner, rgw_bucket& bucket, const string& zonegroup_id, @@ -2784,6 +2869,7 @@ class RGWRados librados::IoCtx io_ctx; rgw_obj obj; rgw_raw_obj head_obj; + librados::IoCtx tail_io_ctx; } state; struct ConditionParams { @@ -3937,6 +4023,7 @@ class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio rgw_raw_obj cur_obj; RGWObjManifest manifest; RGWObjManifest::generator manifest_gen; + rgw_data_placement_volatile_config data_placement_vc; int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive); int do_complete(size_t accounted_size, const string& etag, @@ -3981,6 +4068,10 @@ class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio const string& get_version_id() const { return version_id; } + + void set_data_placement_volatile_config(const rgw_data_placement_volatile_config &vc) { + data_placement_vc = vc; + } }; /* RGWPutObjProcessor_Atomic */ #define MP_META_SUFFIX ".meta" diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index f4d0e0849ddb6e..2cb9159a8c02b3 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -212,6 +212,16 @@ --index-pool= placement target index pool --data-pool= placement target data pool --data-extra-pool= placement target data extra (non-ec) pool + --current-tail-pool= + placement target tail pool + --data-tail-pools=[,...] + placement candidate tail pools + --new-tail-pool= + pool to be added to existing placement candidate tail pool list + --placement-index-type= + placement target index type (normal, indexless, or #id) + --placement-data-layout-type= + placement target data layout type (normal, splitted, or #id) --placement-index-type= placement target index type (normal, indexless, or #id) --compression= placement target compression type (plugin name or empty/none) diff --git a/src/test/rgw/test_rgw_manifest.cc b/src/test/rgw/test_rgw_manifest.cc index e72ecc50c77058..de45bc1ed87cca 100644 --- a/src/test/rgw/test_rgw_manifest.cc +++ b/src/test/rgw/test_rgw_manifest.cc @@ -136,7 +136,8 @@ static void gen_obj(test_rgw_env& env, uint64_t obj_size, uint64_t head_max_size test_rgw_init_bucket(bucket, "buck"); *head = rgw_obj(*bucket, "oid"); - gen->create_begin(g_ceph_context, manifest, placement_id, *bucket, *head); + rgw_data_placement_volatile_config dpvc; + gen->create_begin(g_ceph_context, manifest, placement_id, dpvc, *bucket, *head); append_head(test_objs, *head); cout << "test_objs.size()=" << test_objs->size() << std::endl; @@ -311,9 +312,10 @@ TEST(TestRGWManifest, multipart) { uint64_t ofs; rgw_obj head; + rgw_data_placement_volatile_config dpvc; for (ofs = 0; ofs < part_size; ofs += stripe_size) { if (ofs == 0) { - int r = gen.create_begin(g_ceph_context, &manifest, env.zonegroup.default_placement, bucket, head); + int r = gen.create_begin(g_ceph_context, &manifest, env.zonegroup.default_placement, dpvc, bucket, head); ASSERT_EQ(r, 0); continue; }