diff --git a/engines/ep/src/collections/manifest.cc b/engines/ep/src/collections/manifest.cc index de2d9b2cc7..e86b1ea93c 100644 --- a/engines/ep/src/collections/manifest.cc +++ b/engines/ep/src/collections/manifest.cc @@ -59,6 +59,9 @@ static constexpr nlohmann::json::value_t DataSizeType = static constexpr char const* MeteredKey = "metered"; static constexpr nlohmann::json::value_t MeteredType = nlohmann::json::value_t::boolean; +static constexpr char const* HistoryKey = "history"; +static constexpr nlohmann::json::value_t HistoryType = + nlohmann::json::value_t::boolean; /** * Get json sub-object from the json object for key and check the type. @@ -144,6 +147,15 @@ Manifest::Manifest(std::string_view json, size_t numVbuckets) } } + // Does the scope disable deduplication? + CanDeduplicate scopeCanDeduplicate = CanDeduplicate::Yes; + auto scopeHistoryConfigured = + cb::getOptionalJsonObject(scope, HistoryKey, HistoryType); + if (scopeHistoryConfigured && + scopeHistoryConfigured.value().get()) { + scopeCanDeduplicate = CanDeduplicate::No; + } + std::vector scopeCollections = {}; // Read the collections within this scope @@ -210,6 +222,15 @@ Manifest::Manifest(std::string_view json, size_t numVbuckets) maxTtl = std::chrono::seconds(value); } + // Does the collection (re)define a history setting + CanDeduplicate collectionCanDeduplicate = scopeCanDeduplicate; + auto historyConfigured = cb::getOptionalJsonObject( + collection, HistoryKey, HistoryType); + // Does the collection disable deduplication? + if (historyConfigured && historyConfigured.value().get()) { + collectionCanDeduplicate = CanDeduplicate::No; + } + Metered meteredState{Metered::Yes}; if (metered && !metered.value()) { // metered:false present in JSON @@ -217,8 +238,12 @@ Manifest::Manifest(std::string_view json, size_t numVbuckets) } enableDefaultCollection(cidValue); - scopeCollections.push_back(CollectionEntry{ - cidValue, cnameValue, maxTtl, sidValue, meteredState}); + scopeCollections.push_back(CollectionEntry{cidValue, + cnameValue, + maxTtl, + sidValue, + collectionCanDeduplicate, + meteredState}); } // Check for limits - only support for data_size @@ -230,7 +255,8 @@ Manifest::Manifest(std::string_view json, size_t numVbuckets) Scope{dataLimit.first, dataLimit.second, nameValue, - std::move(scopeCollections)}); + std::move(scopeCollections), + scopeCanDeduplicate}); } // Now build the collection id to collection-entry map @@ -372,6 +398,10 @@ nlohmann::json Manifest::to_json( if (c.metered == Metered::No) { collection[MeteredKey] = false; } + if (getHistoryFromCanDeduplicate(c.canDeduplicate)) { + // Only include when the value is true + collection[HistoryKey] = true; + } scope[CollectionsKey].push_back(collection); } } @@ -384,6 +414,11 @@ nlohmann::json Manifest::to_json( scopeMeta.dataLimitFromCluster; scope[LimitsKey] = jsonDataLimit; } + + if (getHistoryFromCanDeduplicate(scopeMeta.canDeduplicate)) { + // Only include when the value is true + scope[HistoryKey] = true; + } manifest[ScopesKey].push_back(scope); } } @@ -405,6 +440,7 @@ flatbuffers::DetachedBuffer Manifest::toFlatbuffer() const { c.maxTtl.has_value(), c.maxTtl.value_or(std::chrono::seconds(0)).count(), builder.CreateString(c.name), + getHistoryFromCanDeduplicate(c.canDeduplicate), c.metered == Metered::Yes); fbCollections.push_back(newEntry); } @@ -421,14 +457,17 @@ flatbuffers::DetachedBuffer Manifest::toFlatbuffer() const { uint32_t(sid), builder.CreateString(scope.name), collectionVector, - limits); + limits, + getHistoryFromCanDeduplicate(scope.canDeduplicate)); fbScopes.push_back(newEntry); } else { auto newEntry = Collections::Persist::CreateScope( builder, uint32_t(sid), builder.CreateString(scope.name), - collectionVector); + collectionVector, + 0, // No limits + getHistoryFromCanDeduplicate(scope.canDeduplicate)); fbScopes.push_back(newEntry); } } @@ -478,6 +517,7 @@ Manifest::Manifest(std::string_view flatbufferData, Manifest::FlatBuffers tag) collection->name()->str(), maxTtl, scope->scopeId(), + getCanDeduplicateFromHistory(collection->history()), collection->metered() ? Metered::Yes : Metered::No}); } @@ -489,11 +529,13 @@ Manifest::Manifest(std::string_view flatbufferData, Manifest::FlatBuffers tag) pristineValue = scope->limits()->dataSizeClusterValue(); } - this->scopes.emplace(scope->scopeId(), - Scope{dataSize, - pristineValue, - scope->name()->str(), - std::move(scopeCollections)}); + this->scopes.emplace( + scope->scopeId(), + Scope{dataSize, + pristineValue, + scope->name()->str(), + std::move(scopeCollections), + getCanDeduplicateFromHistory(scope->history())}); } // Now build the collection id to collection-entry map @@ -530,6 +572,10 @@ void Manifest::addCollectionStats(KVBucket& bucket, if (entry.metered == Metered::No) { collectionC.addStat(Key::collection_metered, "no"); } + + collectionC.addStat( + Key::collection_history, + getHistoryFromCanDeduplicate(entry.canDeduplicate)); } } } catch (const std::exception& e) { @@ -683,20 +729,30 @@ std::optional Manifest::getCollectionEntry( return {}; } +CanDeduplicate Manifest::getCanDeduplicate(CollectionID cid) const { + auto itr = collections.find(cid); + if (itr != collections.end()) { + return itr->second.canDeduplicate; + } + return CanDeduplicate::Yes; +} + void Manifest::dump() const { std::cerr << *this << std::endl; } bool CollectionEntry::operator==(const CollectionEntry& other) const { return cid == other.cid && name == other.name && sid == other.sid && - maxTtl == other.maxTtl && metered == other.metered; + maxTtl == other.maxTtl && metered == other.metered && + canDeduplicate == other.canDeduplicate; } bool Scope::operator==(const Scope& other) const { bool equal = name == other.name && collections.size() == other.collections.size() && dataLimit == other.dataLimit && - dataLimitFromCluster == other.dataLimitFromCluster; + dataLimitFromCluster == other.dataLimitFromCluster && + canDeduplicate == other.canDeduplicate; if (equal) { for (const auto& c : collections) { equal &= std::find(other.collections.begin(), @@ -837,12 +893,13 @@ std::ostream& operator<<(std::ostream& os, const Manifest& manifest) { std::string to_string(const CollectionEntry& collection) { return fmt::format( - "cid:{}, name:{}, ttl:{{{}, {}}}, sid:{}, metered:{}", + "cid:{}, name:{}, ttl:{{{}, {}}}, sid:{}, {}, {}", collection.cid.to_string(), collection.name, collection.maxTtl.has_value(), collection.maxTtl.value_or(std::chrono::seconds(0)).count(), collection.sid.to_string(), + collection.canDeduplicate, collection.metered); } @@ -853,12 +910,14 @@ std::ostream& operator<<(std::ostream& os, const CollectionEntry& collection) { std::string to_string(const Scope& scope) { // not descending into the collections vector as caller can choose how to // space that out. - return fmt::format("name:{}, limit:{{{},{}}}, limitFromCluster:{}, size:{}", - scope.name, - scope.dataLimit.has_value(), - scope.dataLimit.value_or(0), - scope.dataLimitFromCluster, - scope.collections.size()); + return fmt::format( + "name:{}, limit:{{{},{}}}, limitFromCluster:{}, size:{}, {}", + scope.name, + scope.dataLimit.has_value(), + scope.dataLimit.value_or(0), + scope.dataLimitFromCluster, + scope.collections.size(), + scope.canDeduplicate); } std::ostream& operator<<(std::ostream& os, const Scope& scope) { diff --git a/engines/ep/src/collections/manifest.fbs b/engines/ep/src/collections/manifest.fbs index dd94cbecbe..7eda12ad57 100644 --- a/engines/ep/src/collections/manifest.fbs +++ b/engines/ep/src/collections/manifest.fbs @@ -21,6 +21,7 @@ table Collection { ttlValid:bool; maxTtl:uint; name:string; + history:bool; metered:bool; } @@ -35,6 +36,7 @@ table Scope { name:string; collections:[Collection]; limits:ScopeLimits; + history:bool; } table Manifest { diff --git a/engines/ep/src/collections/manifest.h b/engines/ep/src/collections/manifest.h index ffc99ccc40..3f875d7e52 100644 --- a/engines/ep/src/collections/manifest.h +++ b/engines/ep/src/collections/manifest.h @@ -18,6 +18,7 @@ #include #include "collections/collections_types.h" +#include "ep_types.h" #include "memcached/engine_common.h" #include "memcached/engine_error.h" @@ -38,7 +39,9 @@ struct CollectionEntry { std::string name; cb::ExpiryLimit maxTtl; ScopeID sid; + CanDeduplicate canDeduplicate; Metered metered; + bool operator==(const CollectionEntry& other) const; bool operator!=(const CollectionEntry& other) const { return !(*this == other); @@ -50,6 +53,7 @@ const CollectionEntry DefaultCollectionEntry = {CollectionID::Default, DefaultCollectionName, cb::NoExpiryLimit, ScopeID::Default, + CanDeduplicate::Yes, Metered::Yes}; std::string to_string(const CollectionEntry&); @@ -71,6 +75,8 @@ struct Scope { std::string name; std::vector collections; + CanDeduplicate canDeduplicate; + bool operator==(const Scope& other) const; bool operator!=(const Scope& other) const { return !(*this == other); @@ -303,6 +309,13 @@ class Manifest { void addScopeStats(KVBucket& bucket, const BucketStatCollector& collector) const; + /** + * The use-case for this method is not to fail for unknown collection + * + * @return if the collection exists return its setting, else return Yes + */ + CanDeduplicate getCanDeduplicate(CollectionID cid) const; + /** * Write to std::cerr this */ @@ -363,9 +376,12 @@ class Manifest { * scopes stores all of the known scopes and the 'epoch' Manifest i.e. * default initialisation stores just the default scope. */ - scopeContainer scopes = { - {ScopeID::Default, - {NoDataLimit, 0, DefaultScopeName, {DefaultCollectionEntry}}}}; + scopeContainer scopes = {{ScopeID::Default, + {NoDataLimit, + 0, + DefaultScopeName, + {DefaultCollectionEntry}, + CanDeduplicate::Yes}}}; collectionContainer collections; ManifestUid uid{0}; }; diff --git a/engines/ep/src/ep_types.cc b/engines/ep/src/ep_types.cc index 9ab4b725b2..8a1bff66d5 100644 --- a/engines/ep/src/ep_types.cc +++ b/engines/ep/src/ep_types.cc @@ -139,3 +139,17 @@ std::ostream& operator<<(std::ostream& os, const snapshot_range_t& range) { std::ostream& operator<<(std::ostream& os, const snapshot_info_t& info) { return os << "start:" << info.start << ", range:" << info.range; } + +std::string to_string(CanDeduplicate value) { + switch (value) { + case CanDeduplicate::Yes: + return "CanDeduplicate::Yes"; + case CanDeduplicate::No: + return "CanDeduplicate::No"; + } + folly::assume_unreachable(); +} + +std::ostream& operator<<(std::ostream& os, CanDeduplicate value) { + return os << to_string(value); +} diff --git a/engines/ep/src/ep_types.h b/engines/ep/src/ep_types.h index 3683cda7a8..5ef290f829 100644 --- a/engines/ep/src/ep_types.h +++ b/engines/ep/src/ep_types.h @@ -48,6 +48,20 @@ enum class IsDeleted : char { No, Yes }; enum class IsCommitted : char { No, Yes }; enum class IsCompaction : char { No, Yes }; enum class IsPiTR : char { No, Yes }; +enum class CanDeduplicate : char { No, Yes }; + +static inline CanDeduplicate getCanDeduplicateFromHistory(bool value) { + // history:true => CanDeduplicate::No + return value ? CanDeduplicate::No : CanDeduplicate::Yes; +} + +static inline bool getHistoryFromCanDeduplicate(CanDeduplicate value) { + // CanDeduplicate::No => history:true + return value == CanDeduplicate::No; +} + +std::string to_string(CanDeduplicate); +std::ostream& operator<<(std::ostream&, CanDeduplicate); // Is the compaction callback invoked for the latest revision only, or any // revision? diff --git a/engines/ep/tests/module_tests/collections/evp_store_collections_eraser_test.cc b/engines/ep/tests/module_tests/collections/evp_store_collections_eraser_test.cc index 9c46b20211..54a69a6ddb 100644 --- a/engines/ep/tests/module_tests/collections/evp_store_collections_eraser_test.cc +++ b/engines/ep/tests/module_tests/collections/evp_store_collections_eraser_test.cc @@ -996,7 +996,10 @@ void CollectionsEraserTest::testScopePurgedItemsCorrectAfterDrop( TEST_P(CollectionsEraserTest, ScopePurgedItemsCorrectAfterDrop) { CollectionsManifest cm; cm.add(ScopeEntry::shop1); - cm.add(CollectionEntry::dairy, cb::ExpiryLimit{0}, ScopeEntry::shop1); + cm.add(CollectionEntry::dairy, + cb::ExpiryLimit{0}, + {/*history*/}, + ScopeEntry::shop1); setCollections(cookie, cm); flushVBucketToDiskIfPersistent(vbid, 2); diff --git a/engines/ep/tests/module_tests/collections/evp_store_collections_test.cc b/engines/ep/tests/module_tests/collections/evp_store_collections_test.cc index 43d9a41101..7f80fc21e5 100644 --- a/engines/ep/tests/module_tests/collections/evp_store_collections_test.cc +++ b/engines/ep/tests/module_tests/collections/evp_store_collections_test.cc @@ -4328,6 +4328,7 @@ TEST_P(CollectionsParameterizedTest, MB_45899) { "_default", {}, ScopeID::Default, + CanDeduplicate::Yes, Collections::Metered::Yes}); Collections::Summary summary; auto vb0 = store->getVBucket(vbid0); diff --git a/engines/ep/tests/module_tests/collections/manifest_test.cc b/engines/ep/tests/module_tests/collections/manifest_test.cc index 0bf830fc96..8aecd7a534 100644 --- a/engines/ep/tests/module_tests/collections/manifest_test.cc +++ b/engines/ep/tests/module_tests/collections/manifest_test.cc @@ -451,7 +451,17 @@ TEST(ManifestTest, validation) { "collections":[{"name":"test0","uid":"8", "metered":true}]}]})", R"({"uid" : "1", "scopes":[{"name":"_default", "uid":"0", - "collections":[{"name":"test1","uid":"8", "metered":false}]}]})"}; + "collections":[{"name":"test1","uid":"8", "metered":false}]}]})", + // scope which supports change history + R"({"uid" : "1", + "scopes":[{"name":"_default", "uid":"0", "collections":[]}, + {"name":"s1", "uid":"8", + "history": true, + "collections":[{"name":"c1","uid":"8","history":true}]}]})", + // collection which supports change history + R"({"uid" : "1", + "scopes":[{"name":"_default", "uid":"0", + "collections":[{"name":"brewery","uid":"8","history":true}]}]})"}; for (auto& manifest : invalidManifests) { try { @@ -613,7 +623,10 @@ TEST(ManifestTest, to_json) { cm.add(collection.scope); scopesAdded.insert(collection.scope.uid); } - cm.add(collection.collection, collection.maxTtl, collection.scope); + cm.add(collection.collection, + collection.maxTtl, + {/*history*/}, + collection.scope); } cm.setUid(manifest.first); @@ -990,3 +1003,25 @@ TEST(ManifestTest, scopeDataSize) { ASSERT_NE(scope, cm.endScopes()); EXPECT_EQ(123456, scope->second.dataLimitFromCluster); } + +TEST(ManifestTest, configureHistory) { + CollectionsManifest cm; + // A collection in default scope and then a new scope with history + cm.add(CollectionEntry::fruit); // expect no history + cm.add(CollectionEntry::vegetable, cb::NoExpiryLimit, true /*history*/); + + // dairy collection will get 'history' from the scope + cm.add(ScopeEntry::shop1, {/*no datalimit*/}, true /*history*/); + cm.add(CollectionEntry::dairy, ScopeEntry::shop1); + + Collections::Manifest manifest{std::string{cm}}; + EXPECT_EQ(CanDeduplicate::Yes, + manifest.getCanDeduplicate(CollectionEntry::fruit)); + + // Both the following are defined to keep history. Can these collections be + // deduplicated? No + EXPECT_EQ(CanDeduplicate::No, + manifest.getCanDeduplicate(CollectionEntry::vegetable)); + EXPECT_EQ(CanDeduplicate::No, + manifest.getCanDeduplicate(CollectionEntry::dairy)); +} diff --git a/statistics/stat_definitions.json b/statistics/stat_definitions.json index ede4df5629..aa44aae5dc 100644 --- a/statistics/stat_definitions.json +++ b/statistics/stat_definitions.json @@ -2483,7 +2483,12 @@ "unit": "seconds", "cbstat": "maxTTL" }, - { + { + "key": "collection_history", + "unit": "none", + "cbstat": "history" + }, + { "key": "collection_metered", "unit": "none", "cbstat": "metered" diff --git a/utilities/test_manifest.cc b/utilities/test_manifest.cc index d738157f12..c099ff80f5 100644 --- a/utilities/test_manifest.cc +++ b/utilities/test_manifest.cc @@ -33,7 +33,9 @@ CollectionsManifest::CollectionsManifest(const CollectionEntry::Entry& entry) add(entry); } -CollectionsManifest& CollectionsManifest::add(const ScopeEntry::Entry& entry) { +CollectionsManifest& CollectionsManifest::add(const ScopeEntry::Entry& entry, + std::optional dataLimit, + std::optional history) { updateUid(); nlohmann::json jsonEntry; @@ -44,31 +46,25 @@ CollectionsManifest& CollectionsManifest::add(const ScopeEntry::Entry& entry) { jsonEntry["uid"] = ss.str(); jsonEntry["collections"] = std::vector(); - json["scopes"].push_back(jsonEntry); + if (history) { + jsonEntry["history"] = history.value(); + } - return *this; -} + if (dataLimit) { + nlohmann::json limitObject; + limitObject["kv"]["data_size"] = dataLimit.value(); + jsonEntry["limits"] = limitObject; + } -CollectionsManifest& CollectionsManifest::add(const ScopeEntry::Entry& entry, - size_t limit) { - add(entry); + json["scopes"].push_back(jsonEntry); - for (auto& scope : json["scopes"]) { - if (scope["name"] == entry.name) { - nlohmann::json jsonEntry; - jsonEntry["kv"]["data_size"] = limit; - scope["limits"] = jsonEntry; - return *this; - } - } - throw std::logic_error( - "CollectionsManifest::add(scope, limit) could not find the new " - "scope"); + return *this; } CollectionsManifest& CollectionsManifest::add( const CollectionEntry::Entry& collectionEntry, cb::ExpiryLimit maxTtl, + std::optional history, const ScopeEntry::Entry& scopeEntry) { updateUid(); @@ -87,6 +83,10 @@ CollectionsManifest& CollectionsManifest::add( jsonEntry["metered"] = false; } + if (history) { + jsonEntry["history"] = history.value(); + } + // Add the new collection to the set of collections belonging to the // given scope for (auto itr = json["scopes"].begin(); itr != json["scopes"].end(); @@ -103,7 +103,10 @@ CollectionsManifest& CollectionsManifest::add( CollectionsManifest& CollectionsManifest::add( const CollectionEntry::Entry& collectionEntry, const ScopeEntry::Entry& scopeEntry) { - return add(collectionEntry, {/*no ttl*/}, scopeEntry); + return add(collectionEntry, + {/*no ttl*/}, + {/* no history setting defined*/}, + scopeEntry); } CollectionsManifest& CollectionsManifest::remove( diff --git a/utilities/test_manifest.h b/utilities/test_manifest.h index 69487c8b6c..5b86c424c7 100644 --- a/utilities/test_manifest.h +++ b/utilities/test_manifest.h @@ -152,10 +152,9 @@ class CollectionsManifest { explicit CollectionsManifest(const CollectionEntry::Entry& entry); /// Add the scope entry - allows duplicates - CollectionsManifest& add(const ScopeEntry::Entry& entry); - - /// Add the scope entry and set a limit - CollectionsManifest& add(const ScopeEntry::Entry& entry, size_t dataLimit); + CollectionsManifest& add(const ScopeEntry::Entry& entry, + std::optional dataLimit = {}, + std::optional history = {}); /// Add the collection entry to the given scope - allows duplicates /// caller specifies the collection maxTTL @@ -163,6 +162,7 @@ class CollectionsManifest { CollectionsManifest& add( const CollectionEntry::Entry& collectionEntry, cb::ExpiryLimit maxTtl, + std::optional history = {}, const ScopeEntry::Entry& scopeEntry = ScopeEntry::defaultS); /// Add the collection entry to the given scope - allows duplicates