diff --git a/source/server/BUILD b/source/server/BUILD index 49f2efc73908..13d06d2f18cd 100644 --- a/source/server/BUILD +++ b/source/server/BUILD @@ -165,6 +165,7 @@ envoy_cc_library( hdrs = ["overload_manager_impl.h"], deps = [ "//include/envoy/server:overload_manager_interface", + "//include/envoy/stats:stats_interface", "//source/common/common:logger_lib", "//source/common/config:utility_lib", "//source/server:resource_monitor_config_lib", diff --git a/source/server/overload_manager_impl.cc b/source/server/overload_manager_impl.cc index 1f9c78ddf7bf..e4c6d3c302d0 100644 --- a/source/server/overload_manager_impl.cc +++ b/source/server/overload_manager_impl.cc @@ -6,6 +6,8 @@ #include "server/resource_monitor_config_impl.h" +#include "absl/strings/str_cat.h" + namespace Envoy { namespace Server { @@ -29,9 +31,15 @@ class ThresholdTriggerImpl : public OverloadAction::Trigger { absl::optional value_; }; +std::string StatsName(const std::string& a, const std::string& b) { + return absl::StrCat("overload.", a, b); +} + } // namespace -OverloadAction::OverloadAction(const envoy::config::overload::v2alpha::OverloadAction& config) { +OverloadAction::OverloadAction(const envoy::config::overload::v2alpha::OverloadAction& config, + Stats::Scope& stats_scope) + : active_gauge_(stats_scope.gauge(StatsName(config.name(), ".active"))) { for (const auto& trigger_config : config.triggers()) { TriggerPtr trigger; @@ -57,9 +65,11 @@ bool OverloadAction::updateResourcePressure(const std::string& name, double pres ASSERT(it != triggers_.end()); if (it->second->updateValue(pressure)) { if (it->second->isFired()) { + active_gauge_.set(1); const auto result = fired_triggers_.insert(name); ASSERT(result.second); } else { + active_gauge_.set(0); const auto result = fired_triggers_.erase(name); ASSERT(result == 1); } @@ -71,7 +81,8 @@ bool OverloadAction::updateResourcePressure(const std::string& name, double pres bool OverloadAction::isActive() const { return !fired_triggers_.empty(); } OverloadManagerImpl::OverloadManagerImpl( - Event::Dispatcher& dispatcher, const envoy::config::overload::v2alpha::OverloadManager& config) + Event::Dispatcher& dispatcher, Stats::Scope& stats_scope, + const envoy::config::overload::v2alpha::OverloadManager& config) : started_(false), dispatcher_(dispatcher), refresh_interval_( std::chrono::milliseconds(PROTOBUF_GET_MS_OR_DEFAULT(config, refresh_interval, 1000))) { @@ -84,8 +95,9 @@ OverloadManagerImpl::OverloadManagerImpl( auto config = Config::Utility::translateToFactoryConfig(resource, factory); auto monitor = factory.createResourceMonitor(*config, context); - auto result = resources_.emplace(std::piecewise_construct, std::forward_as_tuple(name), - std::forward_as_tuple(name, std::move(monitor), *this)); + auto result = + resources_.emplace(std::piecewise_construct, std::forward_as_tuple(name), + std::forward_as_tuple(name, std::move(monitor), *this, stats_scope)); if (!result.second) { throw EnvoyException(fmt::format("Duplicate resource monitor {}", name)); } @@ -95,7 +107,7 @@ OverloadManagerImpl::OverloadManagerImpl( const auto& name = action.name(); ENVOY_LOG(debug, "Adding overload action {}", name); auto result = actions_.emplace(std::piecewise_construct, std::forward_as_tuple(name), - std::forward_as_tuple(action)); + std::forward_as_tuple(action, stats_scope)); if (!result.second) { throw EnvoyException(fmt::format("Duplicate overload action {}", name)); } @@ -163,6 +175,13 @@ void OverloadManagerImpl::updateResourcePressure(const std::string& resource, do }); } +OverloadManagerImpl::Resource::Resource(const std::string& name, ResourceMonitorPtr monitor, + OverloadManagerImpl& manager, Stats::Scope& stats_scope) + : name_(name), monitor_(std::move(monitor)), manager_(manager), pending_update_(false), + pressure_gauge_(stats_scope.gauge(StatsName(name, ".pressure"))), + failed_updates_counter_(stats_scope.counter(StatsName(name, ".failed_updates"))), + skipped_updates_counter_(stats_scope.counter(StatsName(name, ".skipped_updates"))) {} + void OverloadManagerImpl::Resource::update() { if (!pending_update_) { pending_update_ = true; @@ -170,19 +189,19 @@ void OverloadManagerImpl::Resource::update() { return; } ENVOY_LOG(debug, "Skipping update for resource {} which has pending update", name_); - // TODO(eziskind) add stat + skipped_updates_counter_.inc(); } void OverloadManagerImpl::Resource::onSuccess(const ResourceUsage& usage) { pending_update_ = false; manager_.updateResourcePressure(name_, usage.resource_pressure_); + pressure_gauge_.set(usage.resource_pressure_ * 100); // convert to percent } void OverloadManagerImpl::Resource::onFailure(const EnvoyException& error) { pending_update_ = false; ENVOY_LOG(info, "Failed to update resource {}: {}", name_, error.what()); - - // TODO(eziskind): add stat + failed_updates_counter_.inc(); } } // namespace Server diff --git a/source/server/overload_manager_impl.h b/source/server/overload_manager_impl.h index 439933602e6c..de528efa67ed 100644 --- a/source/server/overload_manager_impl.h +++ b/source/server/overload_manager_impl.h @@ -9,6 +9,7 @@ #include "envoy/event/dispatcher.h" #include "envoy/server/overload_manager.h" #include "envoy/server/resource_monitor.h" +#include "envoy/stats/stats.h" #include "common/common/logger.h" @@ -17,7 +18,8 @@ namespace Server { class OverloadAction { public: - OverloadAction(const envoy::config::overload::v2alpha::OverloadAction& config); + OverloadAction(const envoy::config::overload::v2alpha::OverloadAction& config, + Stats::Scope& stats_scope); // Updates the current pressure for the given resource and returns whether the action // has changed state. @@ -41,11 +43,12 @@ class OverloadAction { private: std::unordered_map triggers_; std::unordered_set fired_triggers_; + Stats::Gauge& active_gauge_; }; class OverloadManagerImpl : Logger::Loggable, public OverloadManager { public: - OverloadManagerImpl(Event::Dispatcher& dispatcher, + OverloadManagerImpl(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope, const envoy::config::overload::v2alpha::OverloadManager& config); void start(); @@ -57,8 +60,8 @@ class OverloadManagerImpl : Logger::Loggable, public OverloadM private: class Resource : public ResourceMonitor::Callbacks { public: - Resource(const std::string& name, ResourceMonitorPtr monitor, OverloadManagerImpl& manager) - : name_(name), monitor_(std::move(monitor)), manager_(manager), pending_update_(false) {} + Resource(const std::string& name, ResourceMonitorPtr monitor, OverloadManagerImpl& manager, + Stats::Scope& stats_scope); // ResourceMonitor::Callbacks void onSuccess(const ResourceUsage& usage) override; @@ -71,6 +74,9 @@ class OverloadManagerImpl : Logger::Loggable, public OverloadM ResourceMonitorPtr monitor_; OverloadManagerImpl& manager_; bool pending_update_; + Stats::Gauge& pressure_gauge_; + Stats::Counter& failed_updates_counter_; + Stats::Counter& skipped_updates_counter_; }; struct ActionCallback { diff --git a/test/server/BUILD b/test/server/BUILD index 4ceb534c4463..82032f60b1ae 100644 --- a/test/server/BUILD +++ b/test/server/BUILD @@ -118,6 +118,7 @@ envoy_cc_test( srcs = ["overload_manager_impl_test.cc"], deps = [ "//include/envoy/registry", + "//source/common/stats:stats_lib", "//source/extensions/resource_monitors/common:factory_base_lib", "//source/server:overload_manager_lib", "//test/mocks/event:event_mocks", diff --git a/test/server/overload_manager_impl_test.cc b/test/server/overload_manager_impl_test.cc index 352a9bf6ec8b..cc7aa8acc7da 100644 --- a/test/server/overload_manager_impl_test.cc +++ b/test/server/overload_manager_impl_test.cc @@ -1,6 +1,8 @@ #include "envoy/server/resource_monitor.h" #include "envoy/server/resource_monitor_config.h" +#include "common/stats/stats_impl.h" + #include "server/overload_manager_impl.h" #include "extensions/resource_monitors/common/factory_base.h" @@ -87,45 +89,48 @@ class OverloadManagerImplTest : public testing::Test { return proto; } + std::string getConfig() { + return R"EOF( + refresh_interval { + seconds: 1 + } + resource_monitors { + name: "envoy.resource_monitors.fake_resource1" + } + resource_monitors { + name: "envoy.resource_monitors.fake_resource2" + } + actions { + name: "envoy.overload_actions.dummy_action" + triggers { + name: "envoy.resource_monitors.fake_resource1" + threshold { + value: 0.9 + } + } + triggers { + name: "envoy.resource_monitors.fake_resource2" + threshold { + value: 0.8 + } + } + } + )EOF"; + } + FakeResourceMonitorFactory factory1_; FakeResourceMonitorFactory factory2_; Registry::InjectFactory register_factory1_; Registry::InjectFactory register_factory2_; NiceMock dispatcher_; + Stats::IsolatedStoreImpl stats_; Event::TimerCb timer_cb_; }; TEST_F(OverloadManagerImplTest, CallbackOnlyFiresWhenStateChanges) { setDispatcherExpectation(); - const std::string config = R"EOF( - refresh_interval { - seconds: 1 - } - resource_monitors { - name: "envoy.resource_monitors.fake_resource1" - } - resource_monitors { - name: "envoy.resource_monitors.fake_resource2" - } - actions { - name: "envoy.overload_actions.dummy_action" - triggers { - name: "envoy.resource_monitors.fake_resource1" - threshold { - value: 0.9 - } - } - triggers { - name: "envoy.resource_monitors.fake_resource2" - threshold { - value: 0.8 - } - } - } - )EOF"; - - OverloadManagerImpl manager(dispatcher_, parseConfig(config)); + OverloadManagerImpl manager(dispatcher_, stats_, parseConfig(getConfig())); bool is_active = false; int cb_count = 0; manager.registerForAction("envoy.overload_actions.dummy_action", dispatcher_, @@ -134,24 +139,35 @@ TEST_F(OverloadManagerImplTest, CallbackOnlyFiresWhenStateChanges) { cb_count++; }); manager.registerForAction("envoy.overload_actions.unknown_action", dispatcher_, - [&](OverloadActionState) { ASSERT(false); }); + [&](OverloadActionState) { EXPECT_TRUE(false); }); manager.start(); + Stats::Gauge& active_gauge = stats_.gauge("overload.envoy.overload_actions.dummy_action.active"); + Stats::Gauge& pressure_gauge1 = + stats_.gauge("overload.envoy.resource_monitors.fake_resource1.pressure"); + Stats::Gauge& pressure_gauge2 = + stats_.gauge("overload.envoy.resource_monitors.fake_resource2.pressure"); + factory1_.monitor_->setPressure(0.5); timer_cb_(); EXPECT_FALSE(is_active); EXPECT_EQ(0, cb_count); + EXPECT_EQ(0, active_gauge.value()); + EXPECT_EQ(50, pressure_gauge1.value()); factory1_.monitor_->setPressure(0.95); timer_cb_(); EXPECT_TRUE(is_active); EXPECT_EQ(1, cb_count); + EXPECT_EQ(1, active_gauge.value()); + EXPECT_EQ(95, pressure_gauge1.value()); // Callback should not be invoked if action active state has not changed factory1_.monitor_->setPressure(0.94); timer_cb_(); EXPECT_TRUE(is_active); EXPECT_EQ(1, cb_count); + EXPECT_EQ(94, pressure_gauge1.value()); // Different triggers firing but overall action remains active so no callback expected factory1_.monitor_->setPressure(0.5); @@ -159,17 +175,52 @@ TEST_F(OverloadManagerImplTest, CallbackOnlyFiresWhenStateChanges) { timer_cb_(); EXPECT_TRUE(is_active); EXPECT_EQ(1, cb_count); + EXPECT_EQ(50, pressure_gauge1.value()); + EXPECT_EQ(90, pressure_gauge2.value()); factory2_.monitor_->setPressure(0.4); timer_cb_(); EXPECT_FALSE(is_active); EXPECT_EQ(2, cb_count); + EXPECT_EQ(0, active_gauge.value()); + EXPECT_EQ(40, pressure_gauge2.value()); +} + +TEST_F(OverloadManagerImplTest, FailedUpdates) { + setDispatcherExpectation(); + OverloadManagerImpl manager(dispatcher_, stats_, parseConfig(getConfig())); + manager.start(); + Stats::Counter& failed_updates = + stats_.counter("overload.envoy.resource_monitors.fake_resource1.failed_updates"); - factory1_.monitor_->setPressure(0.95); factory1_.monitor_->setError(); timer_cb_(); - EXPECT_FALSE(is_active); - EXPECT_EQ(2, cb_count); + EXPECT_EQ(1, failed_updates.value()); + timer_cb_(); + EXPECT_EQ(2, failed_updates.value()); +} + +TEST_F(OverloadManagerImplTest, SkippedUpdates) { + setDispatcherExpectation(); + + // Save the post callback instead of executing it. + Event::PostCb post_cb; + ON_CALL(dispatcher_, post(_)).WillByDefault(Invoke([&](Event::PostCb cb) { post_cb = cb; })); + + OverloadManagerImpl manager(dispatcher_, stats_, parseConfig(getConfig())); + manager.start(); + Stats::Counter& skipped_updates = + stats_.counter("overload.envoy.resource_monitors.fake_resource1.skipped_updates"); + + timer_cb_(); + EXPECT_EQ(0, skipped_updates.value()); + timer_cb_(); + EXPECT_EQ(1, skipped_updates.value()); + timer_cb_(); + EXPECT_EQ(2, skipped_updates.value()); + post_cb(); + timer_cb_(); + EXPECT_EQ(2, skipped_updates.value()); } TEST_F(OverloadManagerImplTest, DuplicateResourceMonitor) { @@ -182,8 +233,8 @@ TEST_F(OverloadManagerImplTest, DuplicateResourceMonitor) { } )EOF"; - EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, parseConfig(config)), EnvoyException, - "Duplicate resource monitor .*"); + EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, stats_, parseConfig(config)), + EnvoyException, "Duplicate resource monitor .*"); } TEST_F(OverloadManagerImplTest, DuplicateOverloadAction) { @@ -196,8 +247,8 @@ TEST_F(OverloadManagerImplTest, DuplicateOverloadAction) { } )EOF"; - EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, parseConfig(config)), EnvoyException, - "Duplicate overload action .*"); + EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, stats_, parseConfig(config)), + EnvoyException, "Duplicate overload action .*"); } TEST_F(OverloadManagerImplTest, UnknownTrigger) { @@ -213,8 +264,8 @@ TEST_F(OverloadManagerImplTest, UnknownTrigger) { } )EOF"; - EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, parseConfig(config)), EnvoyException, - "Unknown trigger resource .*"); + EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, stats_, parseConfig(config)), + EnvoyException, "Unknown trigger resource .*"); } TEST_F(OverloadManagerImplTest, DuplicateTrigger) { @@ -239,8 +290,8 @@ TEST_F(OverloadManagerImplTest, DuplicateTrigger) { } )EOF"; - EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, parseConfig(config)), EnvoyException, - "Duplicate trigger .*"); + EXPECT_THROW_WITH_REGEX(OverloadManagerImpl(dispatcher_, stats_, parseConfig(config)), + EnvoyException, "Duplicate trigger .*"); } } // namespace } // namespace Server