Skip to content

Commit

Permalink
Reflection-based deterministic message hashing (#30761)
Browse files Browse the repository at this point in the history
Signed-off-by: Raven Black <ravenblack@dropbox.com>
  • Loading branch information
ravenblackx committed Dec 13, 2023
1 parent 874ccb7 commit 1fbc9e5
Show file tree
Hide file tree
Showing 17 changed files with 1,014 additions and 17 deletions.
7 changes: 7 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ minor_behavior_changes:
Added new configuration field :ref:`rate_limited_as_resource_exhausted
<envoy_v3_api_field_extensions.filters.http.local_ratelimit.v3.LocalRateLimit.rate_limited_as_resource_exhausted>`
to allow for setting if rate limit grpc response should be RESOURCE_EXHAUSTED instead of the default UNAVAILABLE.
- area: config parsing, http cache filter
change: |
Replaces protobuf hashing by human-readable string with a dedicated deterministic hashing algorithm.
The performance of the hash operation is improved by 2-10x depending on the structure of the message,
which is expected to reduce config update time or startup time by 10-25%. The new algorithm is also
used for http_cache_filter hashing, which will effectively cause a one-time cache flush on update
for users with a persistent cache. To enable this behavior set ``envoy.restart_features.use_fast_protobuf_hash`` to true.
- area: filter state
change: |
Added config name of filter sending a local reply in filter state with key
Expand Down
12 changes: 12 additions & 0 deletions source/common/protobuf/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,25 @@ envoy_cc_library(
]),
)

envoy_cc_library(
name = "deterministic_hash_lib",
srcs = ["deterministic_hash.cc"],
hdrs = ["deterministic_hash.h"],
deps = [
":protobuf",
"//source/common/common:assert_lib",
"//source/common/common:hash_lib",
],
)

envoy_cc_library(
name = "utility_lib",
srcs = ["utility.cc"],
external_deps = [
"protobuf",
],
deps = [
":deterministic_hash_lib",
":message_validator_lib",
":protobuf",
":utility_lib_header",
Expand Down
233 changes: 233 additions & 0 deletions source/common/protobuf/deterministic_hash.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
#if defined(ENVOY_ENABLE_FULL_PROTOS)
#include "source/common/protobuf/deterministic_hash.h"

#include "source/common/common/assert.h"
#include "source/common/common/hash.h"

namespace Envoy {
namespace DeterministicProtoHash {
namespace {

// Get a scalar field from protobuf reflection field definition. The return
// type must be specified by the caller. Every implementation is a specialization
// because the reflection interface did separate named functions instead of a
// template.
template <typename T>
T reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field);

template <>
uint32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field) {
return reflection.GetUInt32(message, &field);
}

template <>
int32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field) {
return reflection.GetInt32(message, &field);
}

template <>
uint64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field) {
return reflection.GetUInt64(message, &field);
}

template <>
int64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field) {
return reflection.GetInt64(message, &field);
}

template <>
float reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field) {
return reflection.GetFloat(message, &field);
}

template <>
double reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field) {
return reflection.GetDouble(message, &field);
}

template <>
bool reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field) {
return reflection.GetBool(message, &field);
}

// Takes a field of scalar type, and hashes it. In case the field is a repeated field,
// the function hashes each of its elements.
template <typename T, std::enable_if_t<std::is_scalar_v<T>, bool> = true>
uint64_t hashScalarField(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field, uint64_t seed) {
if (field.is_repeated()) {
for (const T& scalar : reflection.GetRepeatedFieldRef<T>(message, &field)) {
seed = HashUtil::xxHash64Value(scalar, seed);
}
} else {
seed = HashUtil::xxHash64Value(reflectionGet<T>(reflection, message, field), seed);
}
return seed;
}

uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = 0);
uint64_t reflectionHashField(const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field, uint64_t seed);

// To make a map serialize deterministically we need to ignore the order of
// the map fields. To do that, we simply combine the hashes of each entry
// using an unordered operator (addition), and then apply that combined hash to
// the seed.
uint64_t reflectionHashMapField(const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field, uint64_t seed) {
const Protobuf::Reflection& reflection = *message.GetReflection();
ASSERT(field.is_map());
const auto& entries = reflection.GetRepeatedFieldRef<Protobuf::Message>(message, &field);
ASSERT(!entries.empty());
const Protobuf::Descriptor& map_descriptor = *entries.begin()->GetDescriptor();
const Protobuf::FieldDescriptor& key_field = *map_descriptor.map_key();
const Protobuf::FieldDescriptor& value_field = *map_descriptor.map_value();
uint64_t combined_hash = 0;
for (const Protobuf::Message& entry : entries) {
uint64_t entry_hash = reflectionHashField(entry, key_field, 0);
entry_hash = reflectionHashField(entry, value_field, entry_hash);
combined_hash += entry_hash;
}
return HashUtil::xxHash64Value(combined_hash, seed);
}

uint64_t reflectionHashField(const Protobuf::Message& message,
const Protobuf::FieldDescriptor& field, uint64_t seed) {
using Protobuf::FieldDescriptor;
const Protobuf::Reflection& reflection = *message.GetReflection();
seed = HashUtil::xxHash64Value(field.number(), seed);
switch (field.cpp_type()) {
case FieldDescriptor::CPPTYPE_INT32:
seed = hashScalarField<int32_t>(reflection, message, field, seed);
break;
case FieldDescriptor::CPPTYPE_UINT32:
seed = hashScalarField<uint32_t>(reflection, message, field, seed);
break;
case FieldDescriptor::CPPTYPE_INT64:
seed = hashScalarField<int64_t>(reflection, message, field, seed);
break;
case FieldDescriptor::CPPTYPE_UINT64:
seed = hashScalarField<uint64_t>(reflection, message, field, seed);
break;
case FieldDescriptor::CPPTYPE_DOUBLE:
seed = hashScalarField<double>(reflection, message, field, seed);
break;
case FieldDescriptor::CPPTYPE_FLOAT:
seed = hashScalarField<float>(reflection, message, field, seed);
break;
case FieldDescriptor::CPPTYPE_BOOL:
seed = hashScalarField<bool>(reflection, message, field, seed);
break;
case FieldDescriptor::CPPTYPE_ENUM:
if (field.is_repeated()) {
const int c = reflection.FieldSize(message, &field);
for (int i = 0; i < c; i++) {
seed = HashUtil::xxHash64Value(reflection.GetRepeatedEnumValue(message, &field, i), seed);
}
} else {
seed = HashUtil::xxHash64Value(reflection.GetEnumValue(message, &field), seed);
}
break;
case FieldDescriptor::CPPTYPE_STRING:
if (field.is_repeated()) {
for (const std::string& str : reflection.GetRepeatedFieldRef<std::string>(message, &field)) {
seed = HashUtil::xxHash64(str, seed);
}
} else {
// Scratch may be used by GetStringReference if the field is not already a std::string.
std::string scratch;
seed = HashUtil::xxHash64(reflection.GetStringReference(message, &field, &scratch), seed);
}
break;
case FieldDescriptor::CPPTYPE_MESSAGE:
if (field.is_map()) {
seed = reflectionHashMapField(message, field, seed);
} else if (field.is_repeated()) {
for (const Protobuf::Message& submsg :
reflection.GetRepeatedFieldRef<Protobuf::Message>(message, &field)) {
seed = reflectionHashMessage(submsg, seed);
}
} else {
seed = reflectionHashMessage(reflection.GetMessage(message, &field), seed);
}
break;
}
return seed;
}

// Converts from type urls OR descriptor full names to descriptor full names.
// Type urls are as used in envoy yaml config, e.g.
// "type.googleapis.com/envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig"
// becomes
// "envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig"
absl::string_view typeUrlToDescriptorFullName(absl::string_view url) {
const size_t pos = url.rfind('/');
if (pos != absl::string_view::npos) {
return url.substr(pos + 1);
}
return url;
}

std::unique_ptr<Protobuf::Message> unpackAnyForReflection(const ProtobufWkt::Any& any) {
const Protobuf::Descriptor* descriptor =
Protobuf::DescriptorPool::generated_pool()->FindMessageTypeByName(
typeUrlToDescriptorFullName(any.type_url()));
// If the type name refers to an unknown type, we treat it the same as other
// unknown fields - not including its contents in the hash.
if (descriptor == nullptr) {
return nullptr;
}
const Protobuf::Message* prototype =
Protobuf::MessageFactory::generated_factory()->GetPrototype(descriptor);
ASSERT(prototype != nullptr, "should be impossible since the descriptor is known");
std::unique_ptr<Protobuf::Message> msg(prototype->New());
any.UnpackTo(msg.get());
return msg;
}

// This is intentionally ignoring unknown fields.
uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) {
using Protobuf::FieldDescriptor;
std::string scratch;
const Protobuf::Reflection* reflection = message.GetReflection();
const Protobuf::Descriptor* descriptor = message.GetDescriptor();
seed = HashUtil::xxHash64(descriptor->full_name(), seed);
if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) {
const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated<ProtobufWkt::Any>(&message);
ASSERT(any != nullptr, "casting to any should always work for WELLKNOWNTYPE_ANY");
std::unique_ptr<Protobuf::Message> submsg = unpackAnyForReflection(*any);
if (submsg == nullptr) {
// If we wanted to handle unknown types in Any, this is where we'd have to do it.
// Since we don't know the type to introspect it, we hash just its type name.
return HashUtil::xxHash64(any->type_url(), seed);
}
return reflectionHashMessage(*submsg, seed);
}
std::vector<const FieldDescriptor*> fields;
// ListFields returned the fields ordered by field number.
reflection->ListFields(message, &fields);
// If we wanted to handle unknown fields, we'd need to also GetUnknownFields here.
for (const FieldDescriptor* field : fields) {
seed = reflectionHashField(message, *field, seed);
}
// Hash one extra character to signify end of message, so that
// msg{} field2=2
// hashes differently from
// msg{field2=2}
return HashUtil::xxHash64("\x17", seed);
}
} // namespace

uint64_t hash(const Protobuf::Message& message) { return reflectionHashMessage(message, 0); }

} // namespace DeterministicProtoHash
} // namespace Envoy
#endif
26 changes: 26 additions & 0 deletions source/common/protobuf/deterministic_hash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#pragma once

#include "source/common/protobuf/protobuf.h"

#if defined(ENVOY_ENABLE_FULL_PROTOS)
namespace Envoy {
namespace DeterministicProtoHash {

// Note: this ignores unknown fields and unrecognized types in Any fields.
// An alternative approach might treat such fields as "raw data" and include
// them in the hash, which would risk breaking the deterministic behavior,
// versus this way risks ignoring significant data.
//
// Ignoring unknown fields was chosen as the implementation because the
// TextFormat-based hashing this replaces was explicitly ignoring unknown
// fields.
//
// If this is used as part of making a hash table, it may result in
// collisions if unknown fields are present and are not ignored by the
// corresponding comparator. A `MessageDifferencer` can be configured to
// ignore unknown fields, or not to.
uint64_t hash(const Protobuf::Message& message);

} // namespace DeterministicProtoHash
} // namespace Envoy
#endif
13 changes: 7 additions & 6 deletions source/common/protobuf/utility.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "source/common/common/assert.h"
#include "source/common/common/documentation_url.h"
#include "source/common/common/fmt.h"
#include "source/common/protobuf/deterministic_hash.h"
#include "source/common/protobuf/message_validator_impl.h"
#include "source/common/protobuf/protobuf.h"
#include "source/common/protobuf/visitor.h"
Expand Down Expand Up @@ -129,22 +130,22 @@ void ProtoExceptionUtil::throwProtoValidationException(const std::string& valida
}

size_t MessageUtil::hash(const Protobuf::Message& message) {
std::string text_format;

#if defined(ENVOY_ENABLE_FULL_PROTOS)
{
if (Runtime::runtimeFeatureEnabled("envoy.restart_features.use_fast_protobuf_hash")) {
return DeterministicProtoHash::hash(message);
} else {
std::string text_format;
Protobuf::TextFormat::Printer printer;
printer.SetExpandAny(true);
printer.SetUseFieldNumber(true);
printer.SetSingleLineMode(true);
printer.SetHideUnknownFields(true);
printer.PrintToString(message, &text_format);
return HashUtil::xxHash64(text_format);
}
#else
absl::StrAppend(&text_format, message.SerializeAsString());
return HashUtil::xxHash64(message.SerializeAsString());
#endif

return HashUtil::xxHash64(text_format);
}

#if !defined(ENVOY_ENABLE_FULL_PROTOS)
Expand Down
2 changes: 2 additions & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ FALSE_RUNTIME_GUARD(envoy_restart_features_use_eds_cache_for_ads);
FALSE_RUNTIME_GUARD(envoy_reloadable_features_enable_universal_header_validator);
// TODO(pksohn): enable after fixing https://github.com/envoyproxy/envoy/issues/29930
FALSE_RUNTIME_GUARD(envoy_reloadable_features_quic_defer_logging_to_ack_listener);
// TODO(#31276): flip this to true after some test time.
FALSE_RUNTIME_GUARD(envoy_restart_features_use_fast_protobuf_hash);

// Block of non-boolean flags. Use of int flags is deprecated. Do not add more.
ABSL_FLAG(uint64_t, re2_max_program_size_error_level, 100, ""); // NOLINT
Expand Down
2 changes: 1 addition & 1 deletion source/extensions/filters/http/cache/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ envoy_cc_library(
"//source/common/common:assert_lib",
"//source/common/http:header_utility_lib",
"//source/common/http:headers_lib",
"//source/common/protobuf:utility_lib",
"//source/common/protobuf:deterministic_hash_lib",
"@envoy_api//envoy/extensions/filters/http/cache/v3:pkg_cc_proto",
],
)
Expand Down
10 changes: 8 additions & 2 deletions source/extensions/filters/http/cache/http_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "source/common/http/header_utility.h"
#include "source/common/http/headers.h"
#include "source/common/http/utility.h"
#include "source/common/protobuf/utility.h"
#include "source/common/protobuf/deterministic_hash.h"
#include "source/extensions/filters/http/cache/cache_custom_headers.h"
#include "source/extensions/filters/http/cache/cache_headers_utils.h"

Expand Down Expand Up @@ -54,7 +54,13 @@ LookupRequest::LookupRequest(const Http::RequestHeaderMap& request_headers, Syst
// Unless this API is still alpha, calls to stableHashKey() must always return
// the same result, or a way must be provided to deal with a complete cache
// flush.
size_t stableHashKey(const Key& key) { return MessageUtil::hash(key); }
size_t stableHashKey(const Key& key) {
if (Runtime::runtimeFeatureEnabled("envoy.restart_features.use_fast_protobuf_hash")) {
return DeterministicProtoHash::hash(key);
} else {
return MessageUtil::hash(key);
}
}

void LookupRequest::initializeRequestCacheControl(const Http::RequestHeaderMap& request_headers) {
const absl::string_view cache_control =
Expand Down
2 changes: 0 additions & 2 deletions source/extensions/filters/http/cache/http_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ using LookupResultPtr = std::unique_ptr<LookupResult>;
//
// When providing a cached response, Caches must ensure that the keys (and not
// just their hashes) match.
//
// TODO(toddmgreer): Ensure that stability guarantees above are accurate.
size_t stableHashKey(const Key& key);

// LookupRequest holds everything about a request that's needed to look for a
Expand Down

0 comments on commit 1fbc9e5

Please sign in to comment.