From 3f13f086c4ff78e9d022e37208cd23f4916098e1 Mon Sep 17 00:00:00 2001 From: June Kim Date: Sat, 9 May 2026 10:50:29 -0700 Subject: [PATCH 1/2] lb: fix segfault in validateEndpoints with non-contiguous EDS priorities TypedHashLbConfigBase::validateEndpoints() dereferences hosts without checking for null. Non-contiguous EDS priorities (e.g. 0, 1, 5) leave null entries in the PriorityState vector for the gaps (2, 3, 4), causing a segfault on clusters using ring_hash or maglev LB policies. Add a null guard to skip empty priority levels, matching the existing pattern in eds.cc which already checks `priority_state[i].first != nullptr`. Risk Level: Low Testing: Unit test added Docs Changes: N/A Release Notes: N/A Fixes #44349 Signed-off-by: June Kim Signed-off-by: June Kim --- .../common/thread_aware_lb_impl.cc | 4 +++ .../ring_hash/ring_hash_lb_test.cc | 29 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc index dcfcc8d1f62de..ec06dabbd9846 100644 --- a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc +++ b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc @@ -379,6 +379,10 @@ TypedHashLbConfigBase::TypedHashLbConfigBase(absl::Span(); + hosts_p0->push_back(makeTestHost(info_, "tcp://127.0.0.1:80")); + priorities.emplace_back(std::move(hosts_p0), LocalityWeightsMap{}); + + // Priority 1: null (gap). + priorities.emplace_back(nullptr, LocalityWeightsMap{}); + + // Priority 2: one host. + auto hosts_p2 = std::make_unique(); + hosts_p2->push_back(makeTestHost(info_, "tcp://127.0.0.1:81")); + priorities.emplace_back(std::move(hosts_p2), LocalityWeightsMap{}); + + absl::Status creation_status; + TypedRingHashLbConfig typed_config(config_, context_.regex_engine_, creation_status); + ASSERT_TRUE(creation_status.ok()); + + // This must not crash (was a segfault before the fix). + EXPECT_TRUE(typed_config.validateEndpoints(priorities).ok()); +} + } // namespace } // namespace Upstream } // namespace Envoy From 06adbb02ce29bec44c37a0acd426dad6f3568ebb Mon Sep 17 00:00:00 2001 From: June Kim Date: Sat, 9 May 2026 10:51:58 -0700 Subject: [PATCH 2/2] codex review: tighten null guard to preserve locality-weight validation Per codex feedback: restructure the null check to only skip host-weight iteration while still validating locality weights for null priority entries. Rename test, trim comments, use a larger gap (4 null entries) to match the reported shape (priority 0 and 5). Signed-off-by: June Kim --- .../common/thread_aware_lb_impl.cc | 24 +++++++++---------- .../ring_hash/ring_hash_lb_test.cc | 22 +++++++---------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc index ec06dabbd9846..3db19f9826b51 100644 --- a/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc +++ b/source/extensions/load_balancing_policies/common/thread_aware_lb_impl.cc @@ -380,18 +380,18 @@ absl::Status TypedHashLbConfigBase::validateEndpoints(const PriorityState& prior for (const auto& [hosts, locality_weights_map] : priorities) { // Non-contiguous priorities can leave null entries in the priority vector. - if (hosts == nullptr) { - continue; - } - // Sum should be at most uint32_t max value, so we can validate it by accumulating into uint64_t - // and making sure there was no overflow. - uint64_t host_sum = 0; - for (const auto& host : *hosts) { - host_sum += host->weight(); - if (host_sum > std::numeric_limits::max()) { - return absl::InvalidArgumentError( - fmt::format("The sum of weights of all upstream hosts in a locality exceeds {}", - std::numeric_limits::max())); + // Only skip the host-weight check; locality weights are still validated. + if (hosts != nullptr) { + // Sum should be at most uint32_t max value, so we can validate it by accumulating into + // uint64_t and making sure there was no overflow. + uint64_t host_sum = 0; + for (const auto& host : *hosts) { + host_sum += host->weight(); + if (host_sum > std::numeric_limits::max()) { + return absl::InvalidArgumentError( + fmt::format("The sum of weights of all upstream hosts in a locality exceeds {}", + std::numeric_limits::max())); + } } } diff --git a/test/extensions/load_balancing_policies/ring_hash/ring_hash_lb_test.cc b/test/extensions/load_balancing_policies/ring_hash/ring_hash_lb_test.cc index 1e5bdda71b6fc..978361f6dd892 100644 --- a/test/extensions/load_balancing_policies/ring_hash/ring_hash_lb_test.cc +++ b/test/extensions/load_balancing_policies/ring_hash/ring_hash_lb_test.cc @@ -1252,31 +1252,27 @@ TEST(RingHashMidBatchInitializeCrashTest, NoOobOnNewPriority) { } // Regression test for https://github.com/envoyproxy/envoy/issues/44349. -// Non-contiguous EDS priorities leave null entries in the PriorityState vector. -// validateEndpoints must skip them instead of crashing. -TEST_P(RingHashLoadBalancerTest, ValidateEndpointsWithNonContiguousPriorities) { - // Build a PriorityState with a null entry at index 1 (simulates gap between - // priority 0 and priority 2). +// Null entries in PriorityState (from non-contiguous priority levels) must not segfault. +TEST_P(RingHashLoadBalancerTest, ValidateEndpointsSkipsNullPriorityEntries) { PriorityState priorities; - // Priority 0: one host. auto hosts_p0 = std::make_unique(); hosts_p0->push_back(makeTestHost(info_, "tcp://127.0.0.1:80")); priorities.emplace_back(std::move(hosts_p0), LocalityWeightsMap{}); - // Priority 1: null (gap). - priorities.emplace_back(nullptr, LocalityWeightsMap{}); + // Gaps at priorities 1-4 (null host lists, as produced by non-contiguous EDS assignments). + for (int i = 0; i < 4; ++i) { + priorities.emplace_back(nullptr, LocalityWeightsMap{}); + } - // Priority 2: one host. - auto hosts_p2 = std::make_unique(); - hosts_p2->push_back(makeTestHost(info_, "tcp://127.0.0.1:81")); - priorities.emplace_back(std::move(hosts_p2), LocalityWeightsMap{}); + auto hosts_p5 = std::make_unique(); + hosts_p5->push_back(makeTestHost(info_, "tcp://127.0.0.1:81")); + priorities.emplace_back(std::move(hosts_p5), LocalityWeightsMap{}); absl::Status creation_status; TypedRingHashLbConfig typed_config(config_, context_.regex_engine_, creation_status); ASSERT_TRUE(creation_status.ok()); - // This must not crash (was a segfault before the fix). EXPECT_TRUE(typed_config.validateEndpoints(priorities).ok()); }