Skip to content

Commit

Permalink
Watchdog Extension: Profile Action (envoyproxy#12636)
Browse files Browse the repository at this point in the history
Added a watchdog extension that triggers profiling.

Risk Level: Medium (new extension that is optional)
Testing: Unit tests
Docs Changes: Included (added a reference to the generated extension proto.rst)
Release Notes: Included

Fixes envoyproxy#11388

Signed-off-by: Kevin Baichoo <kbaichoo@google.com>
Signed-off-by: Clara Andrew-Wani <candrewwani@gmail.com>
  • Loading branch information
KBaichoo authored and clarakosi committed Sep 3, 2020
1 parent 6809828 commit 0500fc0
Show file tree
Hide file tree
Showing 22 changed files with 851 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Expand Up @@ -128,6 +128,8 @@ extensions/filters/common/original_src @snowp @klarose
/*/extensions/compression/common @junr03 @rojkov
/*/extensions/compression/gzip @junr03 @rojkov
/*/extensions/filters/http/decompressor @rojkov @dio
# Watchdog Extensions
/*/extensions/watchdog/profile_action @kbaichoo @htuch
# Core upstream code
extensions/upstreams/http @alyssawilk @snowp @mattklein123
extensions/upstreams/http/http @alyssawilk @snowp @mattklein123
Expand Down
1 change: 1 addition & 0 deletions api/BUILD
Expand Up @@ -245,6 +245,7 @@ proto_library(
"//envoy/extensions/upstreams/http/http/v3:pkg",
"//envoy/extensions/upstreams/http/tcp/v3:pkg",
"//envoy/extensions/wasm/v3:pkg",
"//envoy/extensions/watchdog/profile_action/v3alpha:pkg",
"//envoy/service/accesslog/v3:pkg",
"//envoy/service/auth/v3:pkg",
"//envoy/service/cluster/v3:pkg",
Expand Down
9 changes: 9 additions & 0 deletions api/envoy/extensions/watchdog/profile_action/v3alpha/BUILD
@@ -0,0 +1,9 @@
# DO NOT EDIT. This file is generated by tools/proto_format/proto_sync.py.

load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package")

licenses(["notice"]) # Apache 2

api_proto_package(
deps = ["@com_github_cncf_udpa//udpa/annotations:pkg"],
)
@@ -0,0 +1,35 @@
syntax = "proto3";

package envoy.extensions.watchdog.profile_action.v3alpha;

import "google/protobuf/duration.proto";

import "udpa/annotations/status.proto";
import "udpa/annotations/versioning.proto";
import "validate/validate.proto";

option java_package = "io.envoyproxy.envoy.extensions.watchdog.profile_action.v3alpha";
option java_outer_classname = "ProfileActionProto";
option java_multiple_files = true;
option (udpa.annotations.file_status).work_in_progress = true;
option (udpa.annotations.file_status).package_version_status = ACTIVE;

// [#protodoc-title: Watchdog Action that does CPU profiling.]
// [#extension: envoy.watchdog.profile_action]

// Configuration for the profile watchdog action.
message ProfileActionConfig {
// How long the profile should last. If not set defaults to 5 seconds.
google.protobuf.Duration profile_duration = 1;

// File path to the directory to output profiles.
string profile_path = 2 [(validate.rules).string = {min_bytes: 1}];

// Limits the max number of profiles that can be generated by a thread over
// its lifetime to avoid filling the disk. We keep a map of <tid, count>
// to track the number of profiles triggered by a particular thread. Only one
// thread is counted as triggering the profile even though multiple threads
// might have been eligible for triggering the profile.
// If not set (i.e. it's 0), a default of 10 will be used.
uint64 max_profiles_per_thread = 3;
}
1 change: 1 addition & 0 deletions api/versioning/BUILD
Expand Up @@ -128,6 +128,7 @@ proto_library(
"//envoy/extensions/upstreams/http/http/v3:pkg",
"//envoy/extensions/upstreams/http/tcp/v3:pkg",
"//envoy/extensions/wasm/v3:pkg",
"//envoy/extensions/watchdog/profile_action/v3alpha:pkg",
"//envoy/service/accesslog/v3:pkg",
"//envoy/service/auth/v3:pkg",
"//envoy/service/cluster/v3:pkg",
Expand Down
1 change: 1 addition & 0 deletions docs/root/api-v3/config/config.rst
Expand Up @@ -21,3 +21,4 @@ Extensions
endpoint/endpoint
upstream/upstream
wasm/wasm
watchdog/watchdog
8 changes: 8 additions & 0 deletions docs/root/api-v3/config/watchdog/watchdog.rst
@@ -0,0 +1,8 @@
Watchdog
========

.. toctree::
:glob:
:maxdepth: 2

../../extensions/watchdog/profile_action/v3alpha/*
1 change: 1 addition & 0 deletions docs/root/version_history/current.rst
Expand Up @@ -100,6 +100,7 @@ New Features
* udp_proxy: added :ref:`use_original_src_ip <envoy_v3_api_msg_extensions.filters.udp.udp_proxy.v3.UdpProxyConfig>` option to replicate the downstream remote address of the packets on the upstream side of Envoy. It is similar to :ref:`original source filter <envoy_v3_api_msg_extensions.filters.listener.original_src.v3.OriginalSrc>`.
* watchdog: support randomizing the watchdog's kill timeout to prevent synchronized kills via a maximium jitter parameter :ref:`max_kill_timeout_jitter<envoy_v3_api_field_config.bootstrap.v3.Watchdog.max_kill_timeout_jitter>`.
* watchdog: supports an extension point where actions can be registered to fire on watchdog events such as miss, megamiss, kill and multikill. See ref:`watchdog actions<envoy_v3_api_field_config.bootstrap.v3.Watchdog.actions>`.
* watchdog: watchdog action extension that does cpu profiling. See ref:`Profile Action <envoy_v3_api_file_envoy/extensions/watchdog/profile_action/v3alpha/profile_action.proto>`.
* xds: added :ref:`extension config discovery<envoy_v3_api_msg_config.core.v3.ExtensionConfigSource>` support for HTTP filters.
* zlib: added option to use `zlib-ng <https://github.com/zlib-ng/zlib-ng>`_ as zlib library.

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion include/envoy/server/guarddog_config.h
Expand Up @@ -32,7 +32,7 @@ class GuardDogAction {
* @param now the current time.
*/
virtual void run(envoy::config::bootstrap::v3::Watchdog::WatchdogAction::WatchdogEvent event,
std::vector<std::pair<Thread::ThreadId, MonotonicTime>> thread_ltt_pairs,
const std::vector<std::pair<Thread::ThreadId, MonotonicTime>>& thread_ltt_pairs,
MonotonicTime now) PURE;
};

Expand Down
6 changes: 5 additions & 1 deletion source/extensions/extensions_build_config.bzl
Expand Up @@ -196,10 +196,14 @@ EXTENSIONS = {
#
# Http Upstreams (excepting envoy.upstreams.http.generic which is hard-coded into the build so not registered here)
#

"envoy.upstreams.http.http": "//source/extensions/upstreams/http/http:config",
"envoy.upstreams.http.tcp": "//source/extensions/upstreams/http/tcp:config",

#
# Watchdog actions
#
"envoy.watchdog.profile_action": "//source/extensions/watchdog/profile_action:config",

}

# These can be changed to ["//visibility:public"], for downstream builds which
Expand Down
46 changes: 46 additions & 0 deletions source/extensions/watchdog/profile_action/BUILD
@@ -0,0 +1,46 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_extension",
"envoy_cc_library",
"envoy_extension_package",
)

licenses(["notice"]) # Apache 2

envoy_extension_package()

envoy_cc_library(
name = "profile_action_lib",
srcs = ["profile_action.cc"],
hdrs = ["profile_action.h"],
external_deps = [
"abseil_optional",
],
deps = [
"//include/envoy/api:api_interface",
"//include/envoy/common:time_interface",
"//include/envoy/event:timer_interface",
"//include/envoy/server:guarddog_config_interface",
"//include/envoy/thread:thread_interface",
"//source/common/profiler:profiler_lib",
"//source/common/protobuf:utility_lib",
"@envoy_api//envoy/extensions/watchdog/profile_action/v3alpha:pkg_cc_proto",
],
)

envoy_cc_extension(
name = "config",
srcs = ["config.cc"],
hdrs = ["config.h"],
security_posture = "robust_to_untrusted_downstream_and_upstream",
status = "alpha",
deps = [
":profile_action_lib",
"//include/envoy/registry",
"//source/common/common:assert_lib",
"//source/common/config:utility_lib",
"//source/common/protobuf",
"//source/common/protobuf:message_validator_lib",
"@envoy_api//envoy/extensions/watchdog/profile_action/v3alpha:pkg_cc_proto",
],
)
32 changes: 32 additions & 0 deletions source/extensions/watchdog/profile_action/config.cc
@@ -0,0 +1,32 @@
#include "extensions/watchdog/profile_action/config.h"

#include "envoy/registry/registry.h"

#include "common/config/utility.h"
#include "common/protobuf/message_validator_impl.h"

#include "extensions/watchdog/profile_action/profile_action.h"

namespace Envoy {
namespace Extensions {
namespace Watchdog {
namespace ProfileAction {

Server::Configuration::GuardDogActionPtr ProfileActionFactory::createGuardDogActionFromProto(
const envoy::config::bootstrap::v3::Watchdog::WatchdogAction& config,
Server::Configuration::GuardDogActionFactoryContext& context) {
auto message = createEmptyConfigProto();
Config::Utility::translateOpaqueConfig(config.config().typed_config(), ProtobufWkt::Struct(),
ProtobufMessage::getStrictValidationVisitor(), *message);
return std::make_unique<ProfileAction>(dynamic_cast<ProfileActionConfig&>(*message), context);
}

/**
* Static registration for the ProfileAction factory. @see RegistryFactory.
*/
REGISTER_FACTORY(ProfileActionFactory, Server::Configuration::GuardDogActionFactory);

} // namespace ProfileAction
} // namespace Watchdog
} // namespace Extensions
} // namespace Envoy
37 changes: 37 additions & 0 deletions source/extensions/watchdog/profile_action/config.h
@@ -0,0 +1,37 @@
#pragma once

#include "envoy/extensions/watchdog/profile_action/v3alpha/profile_action.pb.h"
#include "envoy/server/guarddog_config.h"

#include "common/protobuf/protobuf.h"

namespace Envoy {
namespace Extensions {
namespace Watchdog {
namespace ProfileAction {

class ProfileActionFactory : public Server::Configuration::GuardDogActionFactory {
public:
ProfileActionFactory() : name_("envoy.watchdog.profile_action"){};

Server::Configuration::GuardDogActionPtr createGuardDogActionFromProto(
const envoy::config::bootstrap::v3::Watchdog::WatchdogAction& config,
Server::Configuration::GuardDogActionFactoryContext& context) override;

ProtobufTypes::MessagePtr createEmptyConfigProto() override {
return std::make_unique<ProfileActionConfig>();
}

std::string name() const override { return name_; }

private:
using ProfileActionConfig =
envoy::extensions::watchdog::profile_action::v3alpha::ProfileActionConfig;

const std::string name_;
};

} // namespace ProfileAction
} // namespace Watchdog
} // namespace Extensions
} // namespace Envoy
109 changes: 109 additions & 0 deletions source/extensions/watchdog/profile_action/profile_action.cc
@@ -0,0 +1,109 @@
#include "extensions/watchdog/profile_action/profile_action.h"

#include <chrono>

#include "envoy/thread/thread.h"

#include "common/profiler/profiler.h"
#include "common/protobuf/utility.h"

#include "absl/strings/str_format.h"

namespace Envoy {
namespace Extensions {
namespace Watchdog {
namespace ProfileAction {
namespace {
static constexpr uint64_t DefaultMaxProfilePerTid = 10;

std::string generateProfileFilePath(const std::string& directory, const SystemTime& now) {
auto timestamp = std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch()).count();
if (absl::EndsWith(directory, "/")) {
return absl::StrFormat("%s%s.%d", directory, "ProfileAction", timestamp);
}
return absl::StrFormat("%s/%s.%d", directory, "ProfileAction", timestamp);
}
} // namespace

ProfileAction::ProfileAction(
envoy::extensions::watchdog::profile_action::v3alpha::ProfileActionConfig& config,
Server::Configuration::GuardDogActionFactoryContext& context)
: path_(config.profile_path()),
duration_(
std::chrono::milliseconds(PROTOBUF_GET_MS_OR_DEFAULT(config, profile_duration, 5000))),
max_profiles_per_tid_(config.max_profiles_per_thread() == 0
? DefaultMaxProfilePerTid
: config.max_profiles_per_thread()),
running_profile_(false), profiles_started_(0), context_(context),
timer_cb_(context_.dispatcher_.createTimer([this] {
if (Profiler::Cpu::profilerEnabled()) {
Profiler::Cpu::stopProfiler();
running_profile_ = false;
} else {
ENVOY_LOG_MISC(error,
"Profile Action's stop() was scheduled, but profiler isn't running!");
}

if (!context_.api_.fileSystem().fileExists(profile_filename_)) {
ENVOY_LOG_MISC(error, "Profile file {} wasn't created!", profile_filename_);
}
})) {}

void ProfileAction::run(
envoy::config::bootstrap::v3::Watchdog::WatchdogAction::WatchdogEvent /*event*/,
const std::vector<std::pair<Thread::ThreadId, MonotonicTime>>& thread_ltt_pairs,
MonotonicTime /*now*/) {
if (running_profile_) {
return;
}

// Check if there's a tid that justifies profiling
auto trigger_tid = getTidTriggeringProfile(thread_ltt_pairs);
if (!trigger_tid.has_value()) {
ENVOY_LOG_MISC(warn, "Profile Action: None of the provided tids justify profiling");
return;
}

auto& fs = context_.api_.fileSystem();
if (!fs.directoryExists(path_)) {
ENVOY_LOG_MISC(error, "Profile Action: Directory path {} doesn't exist.", path_);
return;
}

// Generate file path for output and try to profile
profile_filename_ = generateProfileFilePath(path_, context_.api_.timeSource().systemTime());

if (!Profiler::Cpu::profilerEnabled()) {
if (Profiler::Cpu::startProfiler(profile_filename_)) {
// Update state
running_profile_ = true;
++profiles_started_;
tid_to_profile_count_[*trigger_tid] += 1;

// Schedule callback to stop
timer_cb_->enableTimer(duration_);
} else {
ENVOY_LOG_MISC(error, "Profile Action failed to start the profiler.");
}
} else {
ENVOY_LOG_MISC(error, "Profile Action unable to start the profiler as it is in use elsewhere.");
}
}

// Helper to determine if we have a valid tid to start profiling.
absl::optional<Thread::ThreadId> ProfileAction::getTidTriggeringProfile(
const std::vector<std::pair<Thread::ThreadId, MonotonicTime>>& thread_ltt_pairs) {

// Find a TID not over the max_profiles threshold
for (const auto& [tid, ltt] : thread_ltt_pairs) {
if (tid_to_profile_count_[tid] < max_profiles_per_tid_) {
return tid;
}
}

return absl::nullopt;
}
} // namespace ProfileAction
} // namespace Watchdog
} // namespace Extensions
} // namespace Envoy

0 comments on commit 0500fc0

Please sign in to comment.