-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
xds-failover: plumbing the API and adding integration test #34761
Changes from 4 commits
2b60cab
061fa52
ac098f3
896f921
6ad52b7
a5dc27a
df3eced
5fdf0f5
5de301a
be6e38f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ | |
#include "source/common/common/assert.h" | ||
#include "source/common/protobuf/utility.h" | ||
|
||
#include "absl/status/status.h" | ||
|
||
namespace Envoy { | ||
namespace Config { | ||
|
||
|
@@ -68,11 +70,13 @@ namespace { | |
/** | ||
* Check the grpc_services and cluster_names for API config sanity. Throws on error. | ||
* @param api_config_source the config source to validate. | ||
* @param max_grpc_services the maximal number of grpc services allowed. | ||
* @return an invalid status when an API config has the wrong number of gRPC | ||
* services or cluster names, depending on expectations set by its API type. | ||
*/ | ||
absl::Status | ||
checkApiConfigSourceNames(const envoy::config::core::v3::ApiConfigSource& api_config_source) { | ||
checkApiConfigSourceNames(const envoy::config::core::v3::ApiConfigSource& api_config_source, | ||
int max_grpc_services) { | ||
const bool is_grpc = | ||
(api_config_source.api_type() == envoy::config::core::v3::ApiConfigSource::GRPC || | ||
api_config_source.api_type() == envoy::config::core::v3::ApiConfigSource::DELTA_GRPC); | ||
|
@@ -89,10 +93,10 @@ checkApiConfigSourceNames(const envoy::config::core::v3::ApiConfigSource& api_co | |
fmt::format("{}::(DELTA_)GRPC must not have a cluster name specified: {}", | ||
api_config_source.GetTypeName(), api_config_source.DebugString())); | ||
} | ||
if (api_config_source.grpc_services().size() > 1) { | ||
return absl::InvalidArgumentError( | ||
fmt::format("{}::(DELTA_)GRPC must have a single gRPC service specified: {}", | ||
api_config_source.GetTypeName(), api_config_source.DebugString())); | ||
if (api_config_source.grpc_services_size() > max_grpc_services) { | ||
return absl::InvalidArgumentError(fmt::format( | ||
"{}::(DELTA_)GRPC must have no more than {} gRPC services specified: {}", | ||
api_config_source.GetTypeName(), max_grpc_services, api_config_source.DebugString())); | ||
} | ||
} else { | ||
if (!api_config_source.grpc_services().empty()) { | ||
|
@@ -133,7 +137,9 @@ absl::Status Utility::checkApiConfigSourceSubscriptionBackingCluster( | |
envoy::config::core::v3::ApiConfigSource::AGGREGATED_DELTA_GRPC) { | ||
return absl::OkStatus(); | ||
} | ||
RETURN_IF_NOT_OK(checkApiConfigSourceNames(api_config_source)); | ||
RETURN_IF_NOT_OK(checkApiConfigSourceNames( | ||
api_config_source, | ||
Runtime::runtimeFeatureEnabled("envoy.restart_features.xds_failover_support") ? 2 : 1)); | ||
|
||
const bool is_grpc = | ||
(api_config_source.api_type() == envoy::config::core::v3::ApiConfigSource::GRPC); | ||
|
@@ -153,6 +159,14 @@ absl::Status Utility::checkApiConfigSourceSubscriptionBackingCluster( | |
primary_clusters, api_config_source.grpc_services()[0].envoy_grpc().cluster_name(), | ||
api_config_source.GetTypeName())); | ||
} | ||
if (Runtime::runtimeFeatureEnabled("envoy.restart_features.xds_failover_support") && | ||
api_config_source.grpc_services_size() == 2 && | ||
api_config_source.grpc_services()[1].has_envoy_grpc()) { | ||
// If an Envoy failover gRPC exists, we validate its cluster name. | ||
RETURN_IF_NOT_OK(Utility::validateClusterName( | ||
primary_clusters, api_config_source.grpc_services()[1].envoy_grpc().cluster_name(), | ||
api_config_source.GetTypeName())); | ||
} | ||
} | ||
// Otherwise, there is no cluster name to validate. | ||
return absl::OkStatus(); | ||
|
@@ -161,15 +175,23 @@ absl::Status Utility::checkApiConfigSourceSubscriptionBackingCluster( | |
absl::optional<std::string> | ||
Utility::getGrpcControlPlane(const envoy::config::core::v3::ApiConfigSource& api_config_source) { | ||
if (api_config_source.grpc_services_size() > 0) { | ||
// Only checking for the first entry in grpc_services, because Envoy's xDS implementation | ||
// currently only considers the first gRPC endpoint and ignores any other xDS management servers | ||
// specified in an ApiConfigSource. | ||
std::string res = ""; | ||
// In case more than one grpc service is defined, concatenate the names for | ||
// a unique GrpcControlPlane identifier. | ||
if (api_config_source.grpc_services(0).has_envoy_grpc()) { | ||
return api_config_source.grpc_services(0).envoy_grpc().cluster_name(); | ||
res = api_config_source.grpc_services(0).envoy_grpc().cluster_name(); | ||
} else if (api_config_source.grpc_services(0).has_google_grpc()) { | ||
res = api_config_source.grpc_services(0).google_grpc().target_uri(); | ||
} | ||
if (api_config_source.grpc_services(0).has_google_grpc()) { | ||
return api_config_source.grpc_services(0).google_grpc().target_uri(); | ||
// Concatenate the failover gRPC service. | ||
if (api_config_source.grpc_services_size() == 2) { | ||
if (api_config_source.grpc_services(1).has_envoy_grpc()) { | ||
absl::StrAppend(&res, ",", api_config_source.grpc_services(1).envoy_grpc().cluster_name()); | ||
} else if (api_config_source.grpc_services(1).has_google_grpc()) { | ||
absl::StrAppend(&res, ",", api_config_source.grpc_services(1).google_grpc().target_uri()); | ||
} | ||
} | ||
return res; | ||
} | ||
return absl::nullopt; | ||
} | ||
|
@@ -204,8 +226,10 @@ Utility::parseRateLimitSettings(const envoy::config::core::v3::ApiConfigSource& | |
absl::StatusOr<Grpc::AsyncClientFactoryPtr> Utility::factoryForGrpcApiConfigSource( | ||
Grpc::AsyncClientManager& async_client_manager, | ||
const envoy::config::core::v3::ApiConfigSource& api_config_source, Stats::Scope& scope, | ||
bool skip_cluster_check) { | ||
RETURN_IF_NOT_OK(checkApiConfigSourceNames(api_config_source)); | ||
bool skip_cluster_check, int grpc_service_idx) { | ||
RETURN_IF_NOT_OK(checkApiConfigSourceNames( | ||
api_config_source, | ||
Runtime::runtimeFeatureEnabled("envoy.restart_features.xds_failover_support") ? 2 : 1)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as above. |
||
|
||
if (api_config_source.api_type() != envoy::config::core::v3::ApiConfigSource::GRPC && | ||
api_config_source.api_type() != envoy::config::core::v3::ApiConfigSource::DELTA_GRPC) { | ||
|
@@ -214,8 +238,13 @@ absl::StatusOr<Grpc::AsyncClientFactoryPtr> Utility::factoryForGrpcApiConfigSour | |
api_config_source.DebugString())); | ||
} | ||
|
||
if (grpc_service_idx >= api_config_source.grpc_services_size()) { | ||
// No returned factory in case there's no entry. | ||
return nullptr; | ||
} | ||
|
||
envoy::config::core::v3::GrpcService grpc_service; | ||
grpc_service.MergeFrom(api_config_source.grpc_services(0)); | ||
grpc_service.MergeFrom(api_config_source.grpc_services(grpc_service_idx)); | ||
|
||
return async_client_manager.factoryForGrpcService(grpc_service, scope, skip_cluster_check); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,16 +88,25 @@ getOrigin(const Network::TransportSocketOptionsConstSharedPtr& options, HostCons | |
|
||
bool isBlockingAdsCluster(const envoy::config::bootstrap::v3::Bootstrap& bootstrap, | ||
absl::string_view cluster_name) { | ||
bool blocking_ads_cluster = false; | ||
if (bootstrap.dynamic_resources().has_ads_config()) { | ||
const auto& ads_config_source = bootstrap.dynamic_resources().ads_config(); | ||
// We only care about EnvoyGrpc, not GoogleGrpc, because we only need to delay ADS mux | ||
// initialization if it uses an Envoy cluster that needs to be initialized first. We don't | ||
// depend on the same cluster initialization when opening a gRPC stream for GoogleGrpc. | ||
return (ads_config_source.grpc_services_size() > 0 && | ||
ads_config_source.grpc_services(0).has_envoy_grpc() && | ||
ads_config_source.grpc_services(0).envoy_grpc().cluster_name() == cluster_name); | ||
blocking_ads_cluster = | ||
(ads_config_source.grpc_services_size() > 0 && | ||
ads_config_source.grpc_services(0).has_envoy_grpc() && | ||
ads_config_source.grpc_services(0).envoy_grpc().cluster_name() == cluster_name); | ||
if (Runtime::runtimeFeatureEnabled("envoy.restart_features.xds_failover_support")) { | ||
// Validate the failover server if there is one. | ||
blocking_ads_cluster |= | ||
(ads_config_source.grpc_services_size() == 2 && | ||
ads_config_source.grpc_services(1).has_envoy_grpc() && | ||
ads_config_source.grpc_services(1).envoy_grpc().cluster_name() == cluster_name); | ||
} | ||
} | ||
return false; | ||
return blocking_ads_cluster; | ||
} | ||
|
||
} // namespace | ||
|
@@ -447,14 +456,22 @@ ClusterManagerImpl::initialize(const envoy::config::bootstrap::v3::Bootstrap& bo | |
if (!factory) { | ||
return absl::InvalidArgumentError(fmt::format("{} not found", name)); | ||
} | ||
auto factory_or_error = Config::Utility::factoryForGrpcApiConfigSource( | ||
*async_client_manager_, dyn_resources.ads_config(), *stats_.rootScope(), false); | ||
THROW_IF_STATUS_NOT_OK(factory_or_error, throw); | ||
ads_mux_ = factory->create(factory_or_error.value()->createUncachedRawAsyncClient(), nullptr, | ||
dispatcher_, random_, *stats_.rootScope(), | ||
dyn_resources.ads_config(), local_info_, | ||
std::move(custom_config_validators), std::move(backoff_strategy), | ||
makeOptRefFromPtr(xds_config_tracker_.get()), {}, use_eds_cache); | ||
auto factory_primary_or_error = Config::Utility::factoryForGrpcApiConfigSource( | ||
*async_client_manager_, dyn_resources.ads_config(), *stats_.rootScope(), false, 0); | ||
THROW_IF_STATUS_NOT_OK(factory_primary_or_error, throw); | ||
Grpc::AsyncClientFactoryPtr factory_failover = nullptr; | ||
if (Runtime::runtimeFeatureEnabled("envoy.restart_features.xds_failover_support")) { | ||
auto factory_failover_or_error = Config::Utility::factoryForGrpcApiConfigSource( | ||
*async_client_manager_, dyn_resources.ads_config(), *stats_.rootScope(), false, 1); | ||
THROW_IF_STATUS_NOT_OK(factory_failover_or_error, throw); | ||
factory_failover = std::move(factory_failover_or_error.value()); | ||
} | ||
ads_mux_ = factory->create( | ||
factory_primary_or_error.value()->createUncachedRawAsyncClient(), | ||
factory_failover ? factory_failover->createUncachedRawAsyncClient() : nullptr, | ||
dispatcher_, random_, *stats_.rootScope(), dyn_resources.ads_config(), local_info_, | ||
std::move(custom_config_validators), std::move(backoff_strategy), | ||
makeOptRefFromPtr(xds_config_tracker_.get()), {}, use_eds_cache); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this large code block is is identical to one below, could be factored out into a dedicated function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a few differences between the initializations of SotW and delta, and in this PR I don't want to make major changes - I'm keeping the same pattern as was prior to this PR. |
||
} else { | ||
absl::Status status = Config::Utility::checkTransportVersion(dyn_resources.ads_config()); | ||
RETURN_IF_NOT_OK(status); | ||
|
@@ -470,12 +487,20 @@ ClusterManagerImpl::initialize(const envoy::config::bootstrap::v3::Bootstrap& bo | |
if (!factory) { | ||
return absl::InvalidArgumentError(fmt::format("{} not found", name)); | ||
} | ||
auto factory_or_error = Config::Utility::factoryForGrpcApiConfigSource( | ||
*async_client_manager_, dyn_resources.ads_config(), *stats_.rootScope(), false); | ||
THROW_IF_STATUS_NOT_OK(factory_or_error, throw); | ||
auto factory_primary_or_error = Config::Utility::factoryForGrpcApiConfigSource( | ||
*async_client_manager_, dyn_resources.ads_config(), *stats_.rootScope(), false, 0); | ||
THROW_IF_STATUS_NOT_OK(factory_primary_or_error, throw); | ||
Grpc::AsyncClientFactoryPtr factory_failover = nullptr; | ||
if (Runtime::runtimeFeatureEnabled("envoy.restart_features.xds_failover_support")) { | ||
auto factory_failover_or_error = Config::Utility::factoryForGrpcApiConfigSource( | ||
*async_client_manager_, dyn_resources.ads_config(), *stats_.rootScope(), false, 1); | ||
THROW_IF_STATUS_NOT_OK(factory_failover_or_error, throw); | ||
factory_failover = std::move(factory_failover_or_error.value()); | ||
} | ||
ads_mux_ = factory->create( | ||
factory_or_error.value()->createUncachedRawAsyncClient(), nullptr, dispatcher_, random_, | ||
*stats_.rootScope(), dyn_resources.ads_config(), local_info_, | ||
factory_primary_or_error.value()->createUncachedRawAsyncClient(), | ||
factory_failover ? factory_failover->createUncachedRawAsyncClient() : nullptr, | ||
dispatcher_, random_, *stats_.rootScope(), dyn_resources.ads_config(), local_info_, | ||
std::move(custom_config_validators), std::move(backoff_strategy), | ||
makeOptRefFromPtr(xds_config_tracker_.get()), xds_delegate_opt_ref, use_eds_cache); | ||
} | ||
|
@@ -567,7 +592,7 @@ absl::Status ClusterManagerImpl::initializeSecondaryClusters( | |
absl::Status status = Config::Utility::checkTransportVersion(load_stats_config); | ||
RETURN_IF_NOT_OK(status); | ||
auto factory_or_error = Config::Utility::factoryForGrpcApiConfigSource( | ||
*async_client_manager_, load_stats_config, *stats_.rootScope(), false); | ||
*async_client_manager_, load_stats_config, *stats_.rootScope(), false, 0); | ||
THROW_IF_STATUS_NOT_OK(factory_or_error, throw); | ||
load_stats_reporter_ = std::make_unique<LoadStatsReporter>( | ||
local_info_, *this, *stats_.rootScope(), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can this be computed in the `checkApiConfigSourceNames function instead of being passed as function param?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will require the
checkApiConfigSourceNames()
function to become a method (rather than a function), or the runtime passed to it, and I'm not sure if it's worth it as this runtime-flag will disappear in the future.If you feel strongly about this, then I'll pass the runtime as a second param.