diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 57d49ddf12af..abf5dec0318e 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -53,6 +53,13 @@ minor_behavior_changes: Added new configuration field :ref:`rate_limited_as_resource_exhausted ` to allow for setting if rate limit grpc response should be RESOURCE_EXHAUSTED instead of the default UNAVAILABLE. +- area: config parsing, http cache filter + change: | + Replaces protobuf hashing by human-readable string with a dedicated deterministic hashing algorithm. + The performance of the hash operation is improved by 2-10x depending on the structure of the message, + which is expected to reduce config update time or startup time by 10-25%. The new algorithm is also + used for http_cache_filter hashing, which will effectively cause a one-time cache flush on update + for users with a persistent cache. To enable this behavior set ``envoy.restart_features.use_fast_protobuf_hash`` to true. - area: filter state change: | Added config name of filter sending a local reply in filter state with key diff --git a/source/common/protobuf/BUILD b/source/common/protobuf/BUILD index e398f49d5eaa..fa252699ff10 100644 --- a/source/common/protobuf/BUILD +++ b/source/common/protobuf/BUILD @@ -199,6 +199,17 @@ envoy_cc_library( ]), ) +envoy_cc_library( + name = "deterministic_hash_lib", + srcs = ["deterministic_hash.cc"], + hdrs = ["deterministic_hash.h"], + deps = [ + ":protobuf", + "//source/common/common:assert_lib", + "//source/common/common:hash_lib", + ], +) + envoy_cc_library( name = "utility_lib", srcs = ["utility.cc"], @@ -206,6 +217,7 @@ envoy_cc_library( "protobuf", ], deps = [ + ":deterministic_hash_lib", ":message_validator_lib", ":protobuf", ":utility_lib_header", diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc new file mode 100644 index 000000000000..52e18c553297 --- /dev/null +++ b/source/common/protobuf/deterministic_hash.cc @@ -0,0 +1,233 @@ +#if defined(ENVOY_ENABLE_FULL_PROTOS) +#include "source/common/protobuf/deterministic_hash.h" + +#include "source/common/common/assert.h" +#include "source/common/common/hash.h" + +namespace Envoy { +namespace DeterministicProtoHash { +namespace { + +// Get a scalar field from protobuf reflection field definition. The return +// type must be specified by the caller. Every implementation is a specialization +// because the reflection interface did separate named functions instead of a +// template. +template +T reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field); + +template <> +uint32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetUInt32(message, &field); +} + +template <> +int32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetInt32(message, &field); +} + +template <> +uint64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetUInt64(message, &field); +} + +template <> +int64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetInt64(message, &field); +} + +template <> +float reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetFloat(message, &field); +} + +template <> +double reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetDouble(message, &field); +} + +template <> +bool reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetBool(message, &field); +} + +// Takes a field of scalar type, and hashes it. In case the field is a repeated field, +// the function hashes each of its elements. +template , bool> = true> +uint64_t hashScalarField(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field, uint64_t seed) { + if (field.is_repeated()) { + for (const T& scalar : reflection.GetRepeatedFieldRef(message, &field)) { + seed = HashUtil::xxHash64Value(scalar, seed); + } + } else { + seed = HashUtil::xxHash64Value(reflectionGet(reflection, message, field), seed); + } + return seed; +} + +uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = 0); +uint64_t reflectionHashField(const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field, uint64_t seed); + +// To make a map serialize deterministically we need to ignore the order of +// the map fields. To do that, we simply combine the hashes of each entry +// using an unordered operator (addition), and then apply that combined hash to +// the seed. +uint64_t reflectionHashMapField(const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field, uint64_t seed) { + const Protobuf::Reflection& reflection = *message.GetReflection(); + ASSERT(field.is_map()); + const auto& entries = reflection.GetRepeatedFieldRef(message, &field); + ASSERT(!entries.empty()); + const Protobuf::Descriptor& map_descriptor = *entries.begin()->GetDescriptor(); + const Protobuf::FieldDescriptor& key_field = *map_descriptor.map_key(); + const Protobuf::FieldDescriptor& value_field = *map_descriptor.map_value(); + uint64_t combined_hash = 0; + for (const Protobuf::Message& entry : entries) { + uint64_t entry_hash = reflectionHashField(entry, key_field, 0); + entry_hash = reflectionHashField(entry, value_field, entry_hash); + combined_hash += entry_hash; + } + return HashUtil::xxHash64Value(combined_hash, seed); +} + +uint64_t reflectionHashField(const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field, uint64_t seed) { + using Protobuf::FieldDescriptor; + const Protobuf::Reflection& reflection = *message.GetReflection(); + seed = HashUtil::xxHash64Value(field.number(), seed); + switch (field.cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + seed = hashScalarField(reflection, message, field, seed); + break; + case FieldDescriptor::CPPTYPE_UINT32: + seed = hashScalarField(reflection, message, field, seed); + break; + case FieldDescriptor::CPPTYPE_INT64: + seed = hashScalarField(reflection, message, field, seed); + break; + case FieldDescriptor::CPPTYPE_UINT64: + seed = hashScalarField(reflection, message, field, seed); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + seed = hashScalarField(reflection, message, field, seed); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + seed = hashScalarField(reflection, message, field, seed); + break; + case FieldDescriptor::CPPTYPE_BOOL: + seed = hashScalarField(reflection, message, field, seed); + break; + case FieldDescriptor::CPPTYPE_ENUM: + if (field.is_repeated()) { + const int c = reflection.FieldSize(message, &field); + for (int i = 0; i < c; i++) { + seed = HashUtil::xxHash64Value(reflection.GetRepeatedEnumValue(message, &field, i), seed); + } + } else { + seed = HashUtil::xxHash64Value(reflection.GetEnumValue(message, &field), seed); + } + break; + case FieldDescriptor::CPPTYPE_STRING: + if (field.is_repeated()) { + for (const std::string& str : reflection.GetRepeatedFieldRef(message, &field)) { + seed = HashUtil::xxHash64(str, seed); + } + } else { + // Scratch may be used by GetStringReference if the field is not already a std::string. + std::string scratch; + seed = HashUtil::xxHash64(reflection.GetStringReference(message, &field, &scratch), seed); + } + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + if (field.is_map()) { + seed = reflectionHashMapField(message, field, seed); + } else if (field.is_repeated()) { + for (const Protobuf::Message& submsg : + reflection.GetRepeatedFieldRef(message, &field)) { + seed = reflectionHashMessage(submsg, seed); + } + } else { + seed = reflectionHashMessage(reflection.GetMessage(message, &field), seed); + } + break; + } + return seed; +} + +// Converts from type urls OR descriptor full names to descriptor full names. +// Type urls are as used in envoy yaml config, e.g. +// "type.googleapis.com/envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig" +// becomes +// "envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig" +absl::string_view typeUrlToDescriptorFullName(absl::string_view url) { + const size_t pos = url.rfind('/'); + if (pos != absl::string_view::npos) { + return url.substr(pos + 1); + } + return url; +} + +std::unique_ptr unpackAnyForReflection(const ProtobufWkt::Any& any) { + const Protobuf::Descriptor* descriptor = + Protobuf::DescriptorPool::generated_pool()->FindMessageTypeByName( + typeUrlToDescriptorFullName(any.type_url())); + // If the type name refers to an unknown type, we treat it the same as other + // unknown fields - not including its contents in the hash. + if (descriptor == nullptr) { + return nullptr; + } + const Protobuf::Message* prototype = + Protobuf::MessageFactory::generated_factory()->GetPrototype(descriptor); + ASSERT(prototype != nullptr, "should be impossible since the descriptor is known"); + std::unique_ptr msg(prototype->New()); + any.UnpackTo(msg.get()); + return msg; +} + +// This is intentionally ignoring unknown fields. +uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) { + using Protobuf::FieldDescriptor; + std::string scratch; + const Protobuf::Reflection* reflection = message.GetReflection(); + const Protobuf::Descriptor* descriptor = message.GetDescriptor(); + seed = HashUtil::xxHash64(descriptor->full_name(), seed); + if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) { + const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated(&message); + ASSERT(any != nullptr, "casting to any should always work for WELLKNOWNTYPE_ANY"); + std::unique_ptr submsg = unpackAnyForReflection(*any); + if (submsg == nullptr) { + // If we wanted to handle unknown types in Any, this is where we'd have to do it. + // Since we don't know the type to introspect it, we hash just its type name. + return HashUtil::xxHash64(any->type_url(), seed); + } + return reflectionHashMessage(*submsg, seed); + } + std::vector fields; + // ListFields returned the fields ordered by field number. + reflection->ListFields(message, &fields); + // If we wanted to handle unknown fields, we'd need to also GetUnknownFields here. + for (const FieldDescriptor* field : fields) { + seed = reflectionHashField(message, *field, seed); + } + // Hash one extra character to signify end of message, so that + // msg{} field2=2 + // hashes differently from + // msg{field2=2} + return HashUtil::xxHash64("\x17", seed); +} +} // namespace + +uint64_t hash(const Protobuf::Message& message) { return reflectionHashMessage(message, 0); } + +} // namespace DeterministicProtoHash +} // namespace Envoy +#endif diff --git a/source/common/protobuf/deterministic_hash.h b/source/common/protobuf/deterministic_hash.h new file mode 100644 index 000000000000..64ea0ff76bc1 --- /dev/null +++ b/source/common/protobuf/deterministic_hash.h @@ -0,0 +1,26 @@ +#pragma once + +#include "source/common/protobuf/protobuf.h" + +#if defined(ENVOY_ENABLE_FULL_PROTOS) +namespace Envoy { +namespace DeterministicProtoHash { + +// Note: this ignores unknown fields and unrecognized types in Any fields. +// An alternative approach might treat such fields as "raw data" and include +// them in the hash, which would risk breaking the deterministic behavior, +// versus this way risks ignoring significant data. +// +// Ignoring unknown fields was chosen as the implementation because the +// TextFormat-based hashing this replaces was explicitly ignoring unknown +// fields. +// +// If this is used as part of making a hash table, it may result in +// collisions if unknown fields are present and are not ignored by the +// corresponding comparator. A `MessageDifferencer` can be configured to +// ignore unknown fields, or not to. +uint64_t hash(const Protobuf::Message& message); + +} // namespace DeterministicProtoHash +} // namespace Envoy +#endif diff --git a/source/common/protobuf/utility.cc b/source/common/protobuf/utility.cc index 18eb9f01e515..00e674df3842 100644 --- a/source/common/protobuf/utility.cc +++ b/source/common/protobuf/utility.cc @@ -10,6 +10,7 @@ #include "source/common/common/assert.h" #include "source/common/common/documentation_url.h" #include "source/common/common/fmt.h" +#include "source/common/protobuf/deterministic_hash.h" #include "source/common/protobuf/message_validator_impl.h" #include "source/common/protobuf/protobuf.h" #include "source/common/protobuf/visitor.h" @@ -129,22 +130,22 @@ void ProtoExceptionUtil::throwProtoValidationException(const std::string& valida } size_t MessageUtil::hash(const Protobuf::Message& message) { - std::string text_format; - #if defined(ENVOY_ENABLE_FULL_PROTOS) - { + if (Runtime::runtimeFeatureEnabled("envoy.restart_features.use_fast_protobuf_hash")) { + return DeterministicProtoHash::hash(message); + } else { + std::string text_format; Protobuf::TextFormat::Printer printer; printer.SetExpandAny(true); printer.SetUseFieldNumber(true); printer.SetSingleLineMode(true); printer.SetHideUnknownFields(true); printer.PrintToString(message, &text_format); + return HashUtil::xxHash64(text_format); } #else - absl::StrAppend(&text_format, message.SerializeAsString()); + return HashUtil::xxHash64(message.SerializeAsString()); #endif - - return HashUtil::xxHash64(text_format); } #if !defined(ENVOY_ENABLE_FULL_PROTOS) diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 60a58519b118..e5529145ce3c 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -129,6 +129,8 @@ FALSE_RUNTIME_GUARD(envoy_restart_features_use_eds_cache_for_ads); FALSE_RUNTIME_GUARD(envoy_reloadable_features_enable_universal_header_validator); // TODO(pksohn): enable after fixing https://github.com/envoyproxy/envoy/issues/29930 FALSE_RUNTIME_GUARD(envoy_reloadable_features_quic_defer_logging_to_ack_listener); +// TODO(#31276): flip this to true after some test time. +FALSE_RUNTIME_GUARD(envoy_restart_features_use_fast_protobuf_hash); // Block of non-boolean flags. Use of int flags is deprecated. Do not add more. ABSL_FLAG(uint64_t, re2_max_program_size_error_level, 100, ""); // NOLINT diff --git a/source/extensions/filters/http/cache/BUILD b/source/extensions/filters/http/cache/BUILD index fbe778851c59..7eaf3a4114f4 100644 --- a/source/extensions/filters/http/cache/BUILD +++ b/source/extensions/filters/http/cache/BUILD @@ -102,7 +102,7 @@ envoy_cc_library( "//source/common/common:assert_lib", "//source/common/http:header_utility_lib", "//source/common/http:headers_lib", - "//source/common/protobuf:utility_lib", + "//source/common/protobuf:deterministic_hash_lib", "@envoy_api//envoy/extensions/filters/http/cache/v3:pkg_cc_proto", ], ) diff --git a/source/extensions/filters/http/cache/http_cache.cc b/source/extensions/filters/http/cache/http_cache.cc index 3f5659e88495..73a2365a5f99 100644 --- a/source/extensions/filters/http/cache/http_cache.cc +++ b/source/extensions/filters/http/cache/http_cache.cc @@ -10,7 +10,7 @@ #include "source/common/http/header_utility.h" #include "source/common/http/headers.h" #include "source/common/http/utility.h" -#include "source/common/protobuf/utility.h" +#include "source/common/protobuf/deterministic_hash.h" #include "source/extensions/filters/http/cache/cache_custom_headers.h" #include "source/extensions/filters/http/cache/cache_headers_utils.h" @@ -54,7 +54,13 @@ LookupRequest::LookupRequest(const Http::RequestHeaderMap& request_headers, Syst // Unless this API is still alpha, calls to stableHashKey() must always return // the same result, or a way must be provided to deal with a complete cache // flush. -size_t stableHashKey(const Key& key) { return MessageUtil::hash(key); } +size_t stableHashKey(const Key& key) { + if (Runtime::runtimeFeatureEnabled("envoy.restart_features.use_fast_protobuf_hash")) { + return DeterministicProtoHash::hash(key); + } else { + return MessageUtil::hash(key); + } +} void LookupRequest::initializeRequestCacheControl(const Http::RequestHeaderMap& request_headers) { const absl::string_view cache_control = diff --git a/source/extensions/filters/http/cache/http_cache.h b/source/extensions/filters/http/cache/http_cache.h index f5b670c25199..177d12863e33 100644 --- a/source/extensions/filters/http/cache/http_cache.h +++ b/source/extensions/filters/http/cache/http_cache.h @@ -69,8 +69,6 @@ using LookupResultPtr = std::unique_ptr; // // When providing a cached response, Caches must ensure that the keys (and not // just their hashes) match. -// -// TODO(toddmgreer): Ensure that stability guarantees above are accurate. size_t stableHashKey(const Key& key); // LookupRequest holds everything about a request that's needed to look for a diff --git a/test/common/protobuf/BUILD b/test/common/protobuf/BUILD index 60a5840eaa8f..befefa24b657 100644 --- a/test/common/protobuf/BUILD +++ b/test/common/protobuf/BUILD @@ -1,5 +1,7 @@ load( "//bazel:envoy_build_system.bzl", + "envoy_benchmark_test", + "envoy_cc_benchmark_binary", "envoy_cc_fuzz_test", "envoy_cc_test", "envoy_package", @@ -21,6 +23,20 @@ envoy_cc_test( ], ) +envoy_proto_library( + name = "deterministic_hash_test_proto", + srcs = ["deterministic_hash_test.proto"], +) + +envoy_cc_test( + name = "deterministic_hash_test", + srcs = ["deterministic_hash_test.cc"], + deps = [ + ":deterministic_hash_test_proto_cc_proto", + "//source/common/protobuf:deterministic_hash_lib", + ], +) + envoy_proto_library( name = "utility_test_protos", srcs = [ @@ -82,3 +98,19 @@ envoy_cc_fuzz_test( tags = ["no_fuzz"], deps = ["//source/common/protobuf:utility_lib"], ) + +envoy_cc_benchmark_binary( + name = "utility_speed_test", + srcs = ["utility_speed_test.cc"], + external_deps = ["benchmark"], + deps = [ + ":deterministic_hash_test_proto_cc_proto", + "//source/common/protobuf:utility_lib", + "//test/test_common:test_runtime_lib", + ], +) + +envoy_benchmark_test( + name = "utility_speed_test_benchmark_test", + benchmark_binary = "utility_speed_test", +) diff --git a/test/common/protobuf/deterministic_hash_test.cc b/test/common/protobuf/deterministic_hash_test.cc new file mode 100644 index 000000000000..f5ebf94a1806 --- /dev/null +++ b/test/common/protobuf/deterministic_hash_test.cc @@ -0,0 +1,503 @@ +#include "source/common/protobuf/deterministic_hash.h" + +#include "test/common/protobuf/deterministic_hash_test.pb.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace Envoy { +namespace DeterministicProtoHash { + +TEST(HashTest, EmptyMessageHashDiffersByMessageType) { + deterministichashtest::Maps empty1; + deterministichashtest::SingleFields empty2; + EXPECT_NE(hash(empty1), hash(empty2)); +} + +TEST(HashTest, EmptyMessageHashMatches) { + deterministichashtest::Maps empty1, empty2; + EXPECT_EQ(hash(empty1), hash(empty2)); +} + +TEST(HashTest, MapWithRemovedValueBehavesTheSameAsEmptyMap) { + // This test is from an abundance of caution, to make sure that + // reflection can't be driven to traverse a map field with no values + // in it (there would be an ASSERT in that path.) + deterministichashtest::Maps empty1, empty2; + (*empty1.mutable_bool_string())[false] = "false"; + (*empty1.mutable_bool_string()).erase(false); + EXPECT_EQ(hash(empty1), hash(empty2)); +} + +TEST(HashTest, BoolKeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_bool_string())[false] = "false"; + (*map1.mutable_bool_string())[true] = "true"; + (*map2.mutable_bool_string())[true] = "true"; + (*map2.mutable_bool_string())[false] = "false"; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, StringKeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_string_bool())["false"] = false; + (*map1.mutable_string_bool())["true"] = true; + (*map2.mutable_string_bool())["true"] = true; + (*map2.mutable_string_bool())["false"] = false; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, Int32KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_int32_uint32())[-5] = 5; + (*map1.mutable_int32_uint32())[-8] = 8; + (*map2.mutable_int32_uint32())[-8] = 8; + (*map2.mutable_int32_uint32())[-5] = 5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, UInt32KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_uint32_int32())[5] = -5; + (*map1.mutable_uint32_int32())[8] = -8; + (*map2.mutable_uint32_int32())[8] = -8; + (*map2.mutable_uint32_int32())[5] = -5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, Int64KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_int64_uint64())[-5] = 5; + (*map1.mutable_int64_uint64())[-8] = 8; + (*map2.mutable_int64_uint64())[-8] = 8; + (*map2.mutable_int64_uint64())[-5] = 5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, UInt64KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_uint64_int64())[5] = -5; + (*map1.mutable_uint64_int64())[8] = -8; + (*map2.mutable_uint64_int64())[8] = -8; + (*map2.mutable_uint64_int64())[5] = -5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, MapWithSameKeysAndValuesPairedDifferentlyDoesNotMatch) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_string_bool())["false"] = false; + (*map1.mutable_string_bool())["true"] = true; + (*map2.mutable_string_bool())["true"] = false; + (*map2.mutable_string_bool())["false"] = true; + EXPECT_NE(hash(map1), hash(map2)); +} + +TEST(HashTest, RecursiveMessageMatchesWhenSame) { + deterministichashtest::Recursion r1, r2; + r1.set_index(0); + r1.mutable_child()->set_index(1); + r2.set_index(0); + r2.mutable_child()->set_index(1); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RecursiveMessageDoesNotMatchWhenSameValuesInDifferentOrder) { + deterministichashtest::Recursion r1, r2; + r1.set_index(0); + r1.mutable_child()->set_index(1); + r2.set_index(1); + r2.mutable_child()->set_index(0); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RecursiveMessageDoesNotMatchWithDifferentDepth) { + deterministichashtest::Recursion r1, r2; + r1.set_index(0); + r1.mutable_child()->set_index(1); + r2.set_index(0); + r2.mutable_child()->set_index(1); + r2.mutable_child()->mutable_child(); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, MatchingRepeatedBoolsMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_bools(false); + r1.add_bools(true); + r2.add_bools(false); + r2.add_bools(true); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBoolsDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_bools(false); + r1.add_bools(true); + r2.add_bools(true); + r2.add_bools(false); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBoolsDifferentLengthMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_bools(false); + r2.add_bools(false); + r2.add_bools(false); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedStringsMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_strings("foo"); + r1.add_strings("bar"); + r2.add_strings("foo"); + r2.add_strings("bar"); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedStringsDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_strings("foo"); + r1.add_strings("bar"); + r2.add_strings("bar"); + r2.add_strings("foo"); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBytesMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_byteses("\x01\x02\x03\x04"); + r1.add_byteses("\x01\x02\x03\x04\x05"); + r2.add_byteses("\x01\x02\x03\x04"); + r2.add_byteses("\x01\x02\x03\x04\x05"); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBytesDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_byteses("\x01\x02\x03\x04"); + r1.add_byteses("\x01\x02\x03\x04\x05"); + r2.add_byteses("\x01\x02\x03\x04\x05"); + r2.add_byteses("\x01\x02\x03\x04"); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt32Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int32s(5); + r1.add_int32s(8); + r2.add_int32s(5); + r2.add_int32s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt32DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int32s(5); + r1.add_int32s(8); + r2.add_int32s(8); + r2.add_int32s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt32Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint32s(5); + r1.add_uint32s(8); + r2.add_uint32s(5); + r2.add_uint32s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt32DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint32s(5); + r1.add_uint32s(8); + r2.add_uint32s(8); + r2.add_uint32s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt64Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int64s(5); + r1.add_int64s(8); + r2.add_int64s(5); + r2.add_int64s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt64DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int64s(5); + r1.add_int64s(8); + r2.add_int64s(8); + r2.add_int64s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt64Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint64s(5); + r1.add_uint64s(8); + r2.add_uint64s(5); + r2.add_uint64s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt64DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint64s(5); + r1.add_uint64s(8); + r2.add_uint64s(8); + r2.add_uint64s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedEnumMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_enums(deterministichashtest::FOO); + r1.add_enums(deterministichashtest::BAR); + r2.add_enums(deterministichashtest::FOO); + r2.add_enums(deterministichashtest::BAR); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedEnumDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_enums(deterministichashtest::FOO); + r1.add_enums(deterministichashtest::BAR); + r2.add_enums(deterministichashtest::BAR); + r2.add_enums(deterministichashtest::FOO); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedDoubleMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_doubles(1.84); + r1.add_doubles(-4.88); + r2.add_doubles(1.84); + r2.add_doubles(-4.88); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedDoubleDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_doubles(1.84); + r1.add_doubles(-4.88); + r2.add_doubles(-4.88); + r2.add_doubles(1.84); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedFloatMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_floats(1.84f); + r1.add_floats(-4.88f); + r2.add_floats(1.84f); + r2.add_floats(-4.88f); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedFloatDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_floats(1.84f); + r1.add_floats(-4.88f); + r2.add_floats(-4.88f); + r2.add_floats(1.84f); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedMessageMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_messages()->set_index(1); + r1.add_messages()->set_index(2); + r2.add_messages()->set_index(1); + r2.add_messages()->set_index(2); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedMessageOrderMisMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_messages()->set_index(1); + r1.add_messages()->set_index(2); + r2.add_messages()->set_index(2); + r2.add_messages()->set_index(1); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, SingleInt32Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_int32(5); + s2.set_int32(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleInt32Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_int32(5); + s2.set_int32(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt32Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint32(5); + s2.set_uint32(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt32Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint32(5); + s2.set_uint32(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleInt64Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_int64(5); + s2.set_int64(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleInt64Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_int64(5); + s2.set_int64(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt64Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint64(5); + s2.set_uint64(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt64Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint64(5); + s2.set_uint64(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBoolMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_b(true); + s2.set_b(true); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBoolMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_b(true); + s2.set_b(false); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleStringMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_string("true"); + s2.set_string("true"); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleStringMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_string("true"); + s2.set_string("false"); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBytesMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_bytes("\x01\x02"); + s2.set_bytes("\x01\x02"); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBytesMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_bytes("\x01\x02"); + s2.set_bytes("\x01\x02\x03"); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleDoubleMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_db(3.9); + s2.set_db(3.9); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleDoubleMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_db(3.9); + s2.set_db(3.91); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleFloatMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_f(3.9f); + s2.set_f(3.9f); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleFloatMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_f(3.9f); + s2.set_f(3.91f); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleEnumMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_e(deterministichashtest::FOO); + s2.set_e(deterministichashtest::FOO); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleEnumMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_e(deterministichashtest::FOO); + s2.set_e(deterministichashtest::BAR); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, AnyWithUnknownTypeMatch) { + deterministichashtest::AnyContainer a1, a2; + a1.mutable_any()->set_type_url("invalid_type"); + a2.mutable_any()->set_type_url("invalid_type"); + EXPECT_EQ(hash(a1), hash(a2)); +} + +TEST(HashTest, AnyWithUnknownTypeMismatch) { + deterministichashtest::AnyContainer a1, a2; + a1.mutable_any()->set_type_url("invalid_type"); + a2.mutable_any()->set_type_url("different_invalid_type"); + EXPECT_NE(hash(a1), hash(a2)); +} + +TEST(HashTest, AnyWithKnownTypeMatch) { + deterministichashtest::AnyContainer a1, a2; + deterministichashtest::Recursion value; + value.set_index(1); + a1.mutable_any()->PackFrom(value); + a2.mutable_any()->PackFrom(value); + EXPECT_EQ(hash(a1), hash(a2)); +} + +TEST(HashTest, AnyWithKnownTypeMismatch) { + deterministichashtest::AnyContainer a1, a2; + deterministichashtest::Recursion value; + value.set_index(1); + a1.mutable_any()->PackFrom(value); + value.set_index(2); + a2.mutable_any()->PackFrom(value); + EXPECT_NE(hash(a1), hash(a2)); +} + +} // namespace DeterministicProtoHash +} // namespace Envoy diff --git a/test/common/protobuf/deterministic_hash_test.proto b/test/common/protobuf/deterministic_hash_test.proto new file mode 100644 index 000000000000..5f56d35d41b0 --- /dev/null +++ b/test/common/protobuf/deterministic_hash_test.proto @@ -0,0 +1,56 @@ +syntax = "proto3"; + +package deterministichashtest; + +import "google/protobuf/any.proto"; + +enum FooEnum { + ZERO = 0; + FOO = 1; + BAR = 2; +} + +message Maps { + map bool_string = 1; + map string_bool = 2; + map int32_uint32 = 3; + map uint32_int32 = 4; + map int64_uint64 = 5; + map uint64_int64 = 6; +}; + +message Recursion { + Recursion child = 1; + uint32 index = 2; +}; + +message RepeatedFields { + repeated bool bools = 1; + repeated string strings = 2; + repeated int32 int32s = 3; + repeated uint32 uint32s = 4; + repeated int64 int64s = 5; + repeated uint64 uint64s = 6; + repeated bytes byteses = 7; + repeated double doubles = 8; + repeated float floats = 9; + repeated FooEnum enums = 10; + repeated Recursion messages = 11; +}; + +message SingleFields { + bool b = 1; + string string = 2; + int32 int32 = 3; + uint32 uint32 = 4; + int64 int64 = 5; + uint64 uint64 = 6; + bytes bytes = 7; + double db = 8; + float f = 9; + FooEnum e = 10; +}; + +message AnyContainer { + google.protobuf.Any any = 1; +}; diff --git a/test/common/protobuf/utility_speed_test.cc b/test/common/protobuf/utility_speed_test.cc new file mode 100644 index 000000000000..491d2f71ed5b --- /dev/null +++ b/test/common/protobuf/utility_speed_test.cc @@ -0,0 +1,85 @@ +// Note: this should be run with --compilation_mode=opt, and would benefit from a +// quiescent system with disabled cstate power management. + +#include "source/common/protobuf/utility.h" + +#include "test/common/protobuf/deterministic_hash_test.pb.h" +#include "test/test_common/test_runtime.h" + +#include "benchmark/benchmark.h" + +namespace Envoy { + +// NOLINT(namespace-envoy) + +static std::unique_ptr testProtoWithMaps() { + auto msg = std::make_unique(); + (*msg->mutable_bool_string())[false] = "abcdefghijklmnopqrstuvwxyz"; + (*msg->mutable_bool_string())[true] = "abcdefghijklmnopqrstuvwxyz"; + for (int i = 0; i < 100; i++) { + (*msg->mutable_string_bool())[absl::StrCat(i)] = true; + (*msg->mutable_int32_uint32())[i] = i; + (*msg->mutable_uint32_int32())[i] = i; + (*msg->mutable_uint64_int64())[i + 1000000000000L] = i + 1000000000000L; + (*msg->mutable_int64_uint64())[i + 1000000000000L] = i + 1000000000000L; + } + return msg; +} + +static std::unique_ptr testProtoWithRecursion() { + auto msg = std::make_unique(); + deterministichashtest::Recursion* p = msg.get(); + for (int i = 0; i < 100; i++) { + p->set_index(i); + p = p->mutable_child(); + } + return msg; +} + +static std::unique_ptr testProtoWithRepeatedFields() { + auto msg = std::make_unique(); + for (int i = 0; i < 100; i++) { + msg->add_bools(true); + msg->add_strings("abcdefghijklmnopqrstuvwxyz"); + msg->add_int32s(-12345); + msg->add_uint32s(12345); + msg->add_int64s(123456789012345L); + msg->add_uint64s(-123456789012345UL); + msg->add_byteses("abcdefghijklmnopqrstuvwxyz"); + msg->add_doubles(123456789.12345); + msg->add_floats(12345.12345); + msg->add_enums(deterministichashtest::FOO); + } + return msg; +} + +static void bmHashByTextFormat(benchmark::State& state, std::unique_ptr msg) { + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); + uint64_t hash = 0; + for (auto _ : state) { + UNREFERENCED_PARAMETER(_); + hash += MessageUtil::hash(*msg); + } + benchmark::DoNotOptimize(hash); +} + +static void bmHashByDeterministicHash(benchmark::State& state, + std::unique_ptr msg) { + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "true"}}); + uint64_t hash = 0; + for (auto _ : state) { + UNREFERENCED_PARAMETER(_); + hash += MessageUtil::hash(*msg); + } + benchmark::DoNotOptimize(hash); +} +BENCHMARK_CAPTURE(bmHashByDeterministicHash, map, testProtoWithMaps()); +BENCHMARK_CAPTURE(bmHashByTextFormat, map, testProtoWithMaps()); +BENCHMARK_CAPTURE(bmHashByDeterministicHash, recursion, testProtoWithRecursion()); +BENCHMARK_CAPTURE(bmHashByTextFormat, recursion, testProtoWithRecursion()); +BENCHMARK_CAPTURE(bmHashByDeterministicHash, repeatedFields, testProtoWithRepeatedFields()); +BENCHMARK_CAPTURE(bmHashByTextFormat, repeatedFields, testProtoWithRepeatedFields()); + +} // namespace Envoy diff --git a/test/common/protobuf/utility_test.cc b/test/common/protobuf/utility_test.cc index d347dcb391df..80743b0d1185 100644 --- a/test/common/protobuf/utility_test.cc +++ b/test/common/protobuf/utility_test.cc @@ -175,6 +175,12 @@ TEST_F(ProtobufUtilityTest, MessageUtilHash) { ProtobufWkt::Struct s; (*s.mutable_fields())["ab"].set_string_value("fgh"); (*s.mutable_fields())["cde"].set_string_value("ij"); + ProtobufWkt::Struct s2; + (*s2.mutable_fields())["ab"].set_string_value("ij"); + (*s2.mutable_fields())["cde"].set_string_value("fgh"); + ProtobufWkt::Struct s3; + (*s3.mutable_fields())["ac"].set_string_value("fgh"); + (*s3.mutable_fields())["cdb"].set_string_value("ij"); ProtobufWkt::Any a1; a1.PackFrom(s); @@ -184,11 +190,26 @@ TEST_F(ProtobufUtilityTest, MessageUtilHash) { a2.set_value(Base64::decode("CgsKA2NkZRIEGgJpagoLCgJhYhIFGgNmZ2g=")); ProtobufWkt::Any a3 = a1; a3.set_value(Base64::decode("CgsKAmFiEgUaA2ZnaAoLCgNjZGUSBBoCaWo=")); - - EXPECT_EQ(MessageUtil::hash(a1), MessageUtil::hash(a2)); - EXPECT_EQ(MessageUtil::hash(a2), MessageUtil::hash(a3)); - EXPECT_NE(0, MessageUtil::hash(a1)); - EXPECT_NE(MessageUtil::hash(s), MessageUtil::hash(a1)); + ProtobufWkt::Any a4, a5; + a4.PackFrom(s2); + a5.PackFrom(s3); + + TestScopedRuntime runtime_; + for (std::string runtime_value : {"true", "false"}) { + // TODO(ravenblack): when the runtime flag is removed, keep the expects + // but remove the loop around them and the extra output. + runtime_.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", runtime_value}}); + EXPECT_EQ(MessageUtil::hash(a1), MessageUtil::hash(a2)) << runtime_value; + EXPECT_EQ(MessageUtil::hash(a2), MessageUtil::hash(a3)) << runtime_value; + EXPECT_NE(0, MessageUtil::hash(a1)) << runtime_value; + // Same keys and values but with the values in a different order should not have + // the same hash. + EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a4)) << runtime_value; + // Different keys with the values in the same order should not have the same hash. + EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a5)) << runtime_value; + // Struct without 'any' around it should not hash the same as struct inside 'any'. + EXPECT_NE(MessageUtil::hash(s), MessageUtil::hash(a1)) << runtime_value; + } } TEST_F(ProtobufUtilityTest, RepeatedPtrUtilDebugString) { diff --git a/test/extensions/filters/http/cache/BUILD b/test/extensions/filters/http/cache/BUILD index 65930a8771f3..81ab2735dae1 100644 --- a/test/extensions/filters/http/cache/BUILD +++ b/test/extensions/filters/http/cache/BUILD @@ -69,6 +69,7 @@ envoy_extension_cc_test( "//source/extensions/http/cache/simple_http_cache:config", "//test/mocks/http:http_mocks", "//test/test_common:simulated_time_system_lib", + "//test/test_common:test_runtime_lib", "//test/test_common:utility_lib", ], ) diff --git a/test/extensions/filters/http/cache/http_cache_test.cc b/test/extensions/filters/http/cache/http_cache_test.cc index fa13cf30b4a1..f8cd99266b12 100644 --- a/test/extensions/filters/http/cache/http_cache_test.cc +++ b/test/extensions/filters/http/cache/http_cache_test.cc @@ -7,6 +7,7 @@ #include "test/extensions/filters/http/cache/common.h" #include "test/mocks/http/mocks.h" #include "test/test_common/simulated_time_system.h" +#include "test/test_common/test_runtime.h" #include "test/test_common/utility.h" #include "gtest/gtest.h" @@ -294,6 +295,17 @@ TEST_F(LookupRequestTest, PragmaNoFallback) { } TEST(HttpCacheTest, StableHashKey) { + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "true"}}); + Key key; + key.set_host("example.com"); + ASSERT_EQ(stableHashKey(key), 6153940628716543519u); +} + +TEST(HttpCacheTest, StableHashKeyWithSlowHash) { + // TODO(ravenblack): This test should be removed when the runtime guard is removed. + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); Key key; key.set_host("example.com"); ASSERT_EQ(stableHashKey(key), 9582653837550152292u); diff --git a/tools/code_format/check_format.py b/tools/code_format/check_format.py index ac723f7c0cbe..916e4e8f6db6 100755 --- a/tools/code_format/check_format.py +++ b/tools/code_format/check_format.py @@ -384,7 +384,9 @@ def allow_listed_for_grpc_init(self, file_path): def allow_listed_for_unpack_to(self, file_path): return file_path.startswith("./test") or file_path in [ - "./source/common/protobuf/utility.cc", "./source/common/protobuf/utility.h" + "./source/common/protobuf/deterministic_hash.cc", + "./source/common/protobuf/utility.cc", + "./source/common/protobuf/utility.h", ] def allow_listed_for_raw_try(self, file_path):