From 6c6518e3b89a666c6d4bb5e891e18fdd606dd925 Mon Sep 17 00:00:00 2001 From: alyssawilk Date: Mon, 13 May 2024 08:50:26 -0400 Subject: [PATCH] Regex: removing some exceptions (#33853) regex: removing exceptions from cc file Signed-off-by: Alyssa Wilk --- .../input_matchers/test/matcher_speed_test.cc | 4 +- .../hyperscan/regex_engines/source/regex.cc | 3 +- .../hyperscan/regex_engines/source/regex.h | 2 +- .../regex_engines/test/config_test.cc | 2 +- .../regex_engines/test/regex_test.cc | 2 +- envoy/common/regex.h | 2 +- source/common/common/regex.cc | 47 +++++++++++++++---- source/common/common/regex.h | 41 ++++++++++++---- source/common/config/well_known_names.cc | 4 +- source/server/admin/prometheus_stats.cc | 2 +- tools/code_format/config.yaml | 2 +- 11 files changed, 81 insertions(+), 30 deletions(-) diff --git a/contrib/hyperscan/matching/input_matchers/test/matcher_speed_test.cc b/contrib/hyperscan/matching/input_matchers/test/matcher_speed_test.cc index 07b40b00ce2f..5ffc3d230576 100644 --- a/contrib/hyperscan/matching/input_matchers/test/matcher_speed_test.cc +++ b/contrib/hyperscan/matching/input_matchers/test/matcher_speed_test.cc @@ -30,11 +30,11 @@ static void BM_CompiledGoogleReMatcher(benchmark::State& state) { envoy::type::matcher::v3::RegexMatcher config; config.mutable_google_re2(); config.set_regex(std::string(ClusterRePattern)); - const auto matcher = Regex::CompiledGoogleReMatcher(config); + const auto matcher = Regex::CompiledGoogleReMatcher::create(config).value(); uint32_t passes = 0; for (auto _ : state) { // NOLINT for (const std::string& cluster_input : clusterInputs()) { - if (matcher.match(cluster_input)) { + if (matcher->match(cluster_input)) { ++passes; } } diff --git a/contrib/hyperscan/regex_engines/source/regex.cc b/contrib/hyperscan/regex_engines/source/regex.cc index 4b30a48f88a8..2ffa239eabb7 100644 --- a/contrib/hyperscan/regex_engines/source/regex.cc +++ b/contrib/hyperscan/regex_engines/source/regex.cc @@ -8,7 +8,8 @@ namespace Hyperscan { HyperscanEngine::HyperscanEngine(Event::Dispatcher& dispatcher, ThreadLocal::SlotAllocator& tls) : dispatcher_(dispatcher), tls_(tls) {} -Envoy::Regex::CompiledMatcherPtr HyperscanEngine::matcher(const std::string& regex) const { +absl::StatusOr +HyperscanEngine::matcher(const std::string& regex) const { std::vector expressions{regex.c_str()}; std::vector flags{HS_FLAG_UTF8}; std::vector ids{0}; diff --git a/contrib/hyperscan/regex_engines/source/regex.h b/contrib/hyperscan/regex_engines/source/regex.h index 2a55b816d2dd..b1ac92345085 100644 --- a/contrib/hyperscan/regex_engines/source/regex.h +++ b/contrib/hyperscan/regex_engines/source/regex.h @@ -12,7 +12,7 @@ namespace Hyperscan { class HyperscanEngine : public Envoy::Regex::Engine { public: explicit HyperscanEngine(Event::Dispatcher& dispatcher, ThreadLocal::SlotAllocator& tls); - Envoy::Regex::CompiledMatcherPtr matcher(const std::string& regex) const override; + absl::StatusOr matcher(const std::string& regex) const override; private: Event::Dispatcher& dispatcher_; diff --git a/contrib/hyperscan/regex_engines/test/config_test.cc b/contrib/hyperscan/regex_engines/test/config_test.cc index 425b5e3c9548..8d9e68d51e98 100644 --- a/contrib/hyperscan/regex_engines/test/config_test.cc +++ b/contrib/hyperscan/regex_engines/test/config_test.cc @@ -31,7 +31,7 @@ TEST_F(ConfigTest, IncompatibleArchitecture) { TEST_F(ConfigTest, Regex) { setup(); - Envoy::Regex::CompiledMatcherPtr matcher = engine_->matcher("^/asdf/.+"); + Envoy::Regex::CompiledMatcherPtr matcher = engine_->matcher("^/asdf/.+").value(); EXPECT_TRUE(matcher->match("/asdf/1")); EXPECT_FALSE(matcher->match("/ASDF/1")); diff --git a/contrib/hyperscan/regex_engines/test/regex_test.cc b/contrib/hyperscan/regex_engines/test/regex_test.cc index f027d4a60595..c53fac7dbb79 100644 --- a/contrib/hyperscan/regex_engines/test/regex_test.cc +++ b/contrib/hyperscan/regex_engines/test/regex_test.cc @@ -30,7 +30,7 @@ class EngineTest : public ::testing::Test { TEST_F(EngineTest, Matcher) { setup(); - EXPECT_NO_THROW(engine_->matcher("^/asdf/.+")); + EXPECT_TRUE(engine_->matcher("^/asdf/.+").status().ok()); } } // namespace Hyperscan diff --git a/envoy/common/regex.h b/envoy/common/regex.h index d9f593e20c9b..b5261476708d 100644 --- a/envoy/common/regex.h +++ b/envoy/common/regex.h @@ -37,7 +37,7 @@ class Engine { * Create a @ref CompiledMatcher with the given regex expression. * @param regex the regex expression match string */ - virtual CompiledMatcherPtr matcher(const std::string& regex) const PURE; + virtual absl::StatusOr matcher(const std::string& regex) const PURE; }; using EnginePtr = std::shared_ptr; diff --git a/source/common/common/regex.cc b/source/common/common/regex.cc index 086de371b7b3..4827ae134556 100644 --- a/source/common/common/regex.cc +++ b/source/common/common/regex.cc @@ -13,11 +13,41 @@ namespace Envoy { namespace Regex { +absl::StatusOr> +CompiledGoogleReMatcher::createAndSizeCheck(const std::string& regex) { + absl::Status creation_status = absl::OkStatus(); + auto ret = std::unique_ptr( + new CompiledGoogleReMatcher(regex, creation_status, true)); + RETURN_IF_NOT_OK(creation_status); + return ret; +} + +absl::StatusOr> +CompiledGoogleReMatcher::create(const envoy::type::matcher::v3::RegexMatcher& config) { + absl::Status creation_status = absl::OkStatus(); + auto ret = std::unique_ptr( + new CompiledGoogleReMatcher(config, creation_status)); + RETURN_IF_NOT_OK(creation_status); + return ret; +} + +absl::StatusOr> +CompiledGoogleReMatcher::create(const xds::type::matcher::v3::RegexMatcher& config) { + absl::Status creation_status = absl::OkStatus(); + auto ret = std::unique_ptr( + new CompiledGoogleReMatcher(config, creation_status)); + RETURN_IF_NOT_OK(creation_status); + return ret; +} + +REGISTER_FACTORY(GoogleReEngineFactory, EngineFactory); CompiledGoogleReMatcher::CompiledGoogleReMatcher(const std::string& regex, + absl::Status& creation_status, bool do_program_size_check) : regex_(regex, re2::RE2::Quiet) { if (!regex_.ok()) { - throwEnvoyExceptionOrPanic(regex_.error()); + creation_status = absl::InvalidArgumentError(regex_.error()); + return; } if (do_program_size_check && Runtime::isRuntimeInitialized()) { @@ -25,7 +55,7 @@ CompiledGoogleReMatcher::CompiledGoogleReMatcher(const std::string& regex, const uint32_t max_program_size_error_level = Runtime::getInteger("re2.max_program_size.error_level", 100); if (regex_program_size > max_program_size_error_level) { - throwEnvoyExceptionOrPanic( + creation_status = absl::InvalidArgumentError( fmt::format("regex '{}' RE2 program size of {} > max program size of " "{} set for the error level threshold. Increase " "configured max program size if necessary.", @@ -44,8 +74,9 @@ CompiledGoogleReMatcher::CompiledGoogleReMatcher(const std::string& regex, } CompiledGoogleReMatcher::CompiledGoogleReMatcher( - const envoy::type::matcher::v3::RegexMatcher& config) - : CompiledGoogleReMatcher(config.regex(), !config.google_re2().has_max_program_size()) { + const envoy::type::matcher::v3::RegexMatcher& config, absl::Status& creation_status) + : CompiledGoogleReMatcher(config.regex(), creation_status, + !config.google_re2().has_max_program_size()) { const uint32_t regex_program_size = static_cast(regex_.ProgramSize()); // Check if the deprecated field max_program_size is set first, and follow the old logic if so. @@ -53,7 +84,7 @@ CompiledGoogleReMatcher::CompiledGoogleReMatcher( const uint32_t max_program_size = PROTOBUF_GET_WRAPPED_OR_DEFAULT(config.google_re2(), max_program_size, 100); if (regex_program_size > max_program_size) { - throwEnvoyExceptionOrPanic( + creation_status = absl::InvalidArgumentError( fmt::format("regex '{}' RE2 program size of {} > max program size of " "{}. Increase configured max program size if necessary.", config.regex(), regex_program_size, max_program_size)); @@ -61,8 +92,8 @@ CompiledGoogleReMatcher::CompiledGoogleReMatcher( } } -CompiledMatcherPtr GoogleReEngine::matcher(const std::string& regex) const { - return std::make_unique(regex, true); +absl::StatusOr GoogleReEngine::matcher(const std::string& regex) const { + return CompiledGoogleReMatcher::createAndSizeCheck(regex); } EnginePtr GoogleReEngineFactory::createEngine(const Protobuf::Message&, @@ -74,7 +105,5 @@ ProtobufTypes::MessagePtr GoogleReEngineFactory::createEmptyConfigProto() { return std::make_unique(); } -REGISTER_FACTORY(GoogleReEngineFactory, EngineFactory); - } // namespace Regex } // namespace Envoy diff --git a/source/common/common/regex.h b/source/common/common/regex.h index c728d6d9cc29..5a78b5026628 100644 --- a/source/common/common/regex.h +++ b/source/common/common/regex.h @@ -20,12 +20,13 @@ namespace Regex { class CompiledGoogleReMatcher : public CompiledMatcher { public: - explicit CompiledGoogleReMatcher(const std::string& regex, bool do_program_size_check); - - explicit CompiledGoogleReMatcher(const xds::type::matcher::v3::RegexMatcher& config) - : CompiledGoogleReMatcher(config.regex(), false) {} - - explicit CompiledGoogleReMatcher(const envoy::type::matcher::v3::RegexMatcher& config); + // Create, applying re2.max_program_size.error_level and re2.max_program_size.warn_level. + static absl::StatusOr> + createAndSizeCheck(const std::string& regex); + static absl::StatusOr> + create(const envoy::type::matcher::v3::RegexMatcher& config); + static absl::StatusOr> + create(const xds::type::matcher::v3::RegexMatcher& config); // CompiledMatcher bool match(absl::string_view value) const override { return re2::RE2::FullMatch(value, regex_); } @@ -37,13 +38,32 @@ class CompiledGoogleReMatcher : public CompiledMatcher { return result; } -private: +protected: + explicit CompiledGoogleReMatcher(const std::string& regex) : regex_(regex, re2::RE2::Quiet) { + ENVOY_BUG(regex_.ok(), "Invalid regex"); + } + explicit CompiledGoogleReMatcher(const std::string& regex, absl::Status& creation_status, + bool do_program_size_check); + explicit CompiledGoogleReMatcher(const envoy::type::matcher::v3::RegexMatcher& config, + absl::Status& creation_status); + explicit CompiledGoogleReMatcher(const xds::type::matcher::v3::RegexMatcher& config, + absl::Status& creation_status) + : CompiledGoogleReMatcher(config.regex(), creation_status, false) {} + const re2::RE2 regex_; }; +// Allow creating CompiledGoogleReMatcher without checking for status failures +// for call sites which really really want to. +class CompiledGoogleReMatcherNoSafetyChecks : public CompiledGoogleReMatcher { +public: + CompiledGoogleReMatcherNoSafetyChecks(const std::string& regex) + : CompiledGoogleReMatcher(regex) {} +}; + class GoogleReEngine : public Engine { public: - CompiledMatcherPtr matcher(const std::string& regex) const override; + absl::StatusOr matcher(const std::string& regex) const override; }; class GoogleReEngineFactory : public EngineFactory { @@ -69,10 +89,11 @@ class Utility { static CompiledMatcherPtr parseRegex(const RegexMatcherType& matcher, Engine& engine) { // Fallback deprecated engine type in regex matcher. if (matcher.has_google_re2()) { - return std::make_unique(matcher); + return THROW_OR_RETURN_VALUE(CompiledGoogleReMatcher::create(matcher), + std::unique_ptr); } - return engine.matcher(matcher.regex()); + return THROW_OR_RETURN_VALUE(engine.matcher(matcher.regex()), CompiledMatcherPtr); } }; diff --git a/source/common/config/well_known_names.cc b/source/common/config/well_known_names.cc index 9b5078d8c1bf..1cc7913d77f4 100644 --- a/source/common/config/well_known_names.cc +++ b/source/common/config/well_known_names.cc @@ -31,8 +31,8 @@ std::string expandRegex(const std::string& regex) { } const Regex::CompiledGoogleReMatcher& validTagValueRegex() { - CONSTRUCT_ON_FIRST_USE(Regex::CompiledGoogleReMatcher, absl::StrCat("^", TAG_VALUE_REGEX, "$"), - false); + CONSTRUCT_ON_FIRST_USE(Regex::CompiledGoogleReMatcherNoSafetyChecks, + absl::StrCat("^", TAG_VALUE_REGEX, "$")); } } // namespace diff --git a/source/server/admin/prometheus_stats.cc b/source/server/admin/prometheus_stats.cc index a882031a0f8f..f807a6bd6ab3 100644 --- a/source/server/admin/prometheus_stats.cc +++ b/source/server/admin/prometheus_stats.cc @@ -17,7 +17,7 @@ namespace Server { namespace { const Regex::CompiledGoogleReMatcher& promRegex() { - CONSTRUCT_ON_FIRST_USE(Regex::CompiledGoogleReMatcher, "[^a-zA-Z0-9_]", false); + CONSTRUCT_ON_FIRST_USE(Regex::CompiledGoogleReMatcherNoSafetyChecks, "[^a-zA-Z0-9_]"); } /** diff --git a/tools/code_format/config.yaml b/tools/code_format/config.yaml index 11802d3e1be9..e84caaee0ddf 100644 --- a/tools/code_format/config.yaml +++ b/tools/code_format/config.yaml @@ -137,7 +137,7 @@ paths: - source/common/filesystem/kqueue/watcher_impl.cc - source/common/filesystem/win32/directory_iterator_impl.cc - source/common/common/utility.cc - - source/common/common/regex.cc + - source/common/common/regex.h - source/common/common/matchers.cc - source/exe/stripped_main_base.cc - source/server/options_impl.cc