Skip to content

Commit

Permalink
Move SubstringSetMatcher into //base.
Browse files Browse the repository at this point in the history
As discussed in email earlier, move the Aho-Corasick tree (SubstringSetMatcher)
into //base, so that it is directly available to e.g. Blink code without
adding extra exceptions. Update everything depending on it to take into
account the new paths and namespace.

Note that some of the unit tests have been clang-formatted when they
were edited for the move.

I've copied the existing OWNERS file, and added myself to help with
maintenance/review if needed.

Change-Id: I90971a29f1de0fb8b5a90a95a3437f73339df423
Bug: 1318773
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3641524
Reviewed-by: Justin DeWitt <dewittj@chromium.org>
Commit-Queue: Steinar H Gunderson <sesse@chromium.org>
Reviewed-by: Devlin Cronin <rdevlin.cronin@chromium.org>
Reviewed-by: Dominic Battré <battre@chromium.org>
Reviewed-by: danakj <danakj@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1002568}
  • Loading branch information
Steinar H. Gunderson authored and Chromium LUCI CQ committed May 12, 2022
1 parent 57ef100 commit 5570feb
Show file tree
Hide file tree
Showing 20 changed files with 380 additions and 452 deletions.
7 changes: 7 additions & 0 deletions base/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,10 @@ mixed_component("base") {
"strings/utf_string_conversion_utils.h",
"strings/utf_string_conversions.cc",
"strings/utf_string_conversions.h",
"substring_set_matcher/string_pattern.cc",
"substring_set_matcher/string_pattern.h",
"substring_set_matcher/substring_set_matcher.cc",
"substring_set_matcher/substring_set_matcher.h",
"supports_user_data.cc",
"supports_user_data.h",
"sync_socket.cc",
Expand Down Expand Up @@ -2706,6 +2710,7 @@ test("base_perftests") {
"observer_list_perftest.cc",
"rand_util_perftest.cc",
"strings/string_util_perftest.cc",
"substring_set_matcher/substring_set_matcher_perftest.cc",
"task/job_perftest.cc",
"task/sequence_manager/sequence_manager_perftest.cc",
"task/thread_pool/thread_pool_perftest.cc",
Expand Down Expand Up @@ -3184,6 +3189,8 @@ test("base_unittests") {
"strings/sys_string_conversions_unittest.cc",
"strings/utf_offset_string_conversions_unittest.cc",
"strings/utf_string_conversions_unittest.cc",
"substring_set_matcher/string_pattern_unittest.cc",
"substring_set_matcher/substring_set_matcher_unittest.cc",
"supports_user_data_unittest.cc",
"sync_socket_unittest.cc",
"synchronization/atomic_flag_unittest.cc",
Expand Down
2 changes: 2 additions & 0 deletions base/substring_set_matcher/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
battre@chromium.org
sesse@chromium.org
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_matcher/string_pattern.h"
#include "base/substring_set_matcher/string_pattern.h"

#include <tuple>
#include <utility>

#include "base/check_op.h"

namespace url_matcher {
namespace base {

StringPattern::StringPattern(std::string pattern, StringPattern::ID id)
: pattern_(std::move(pattern)), id_(id) {
Expand All @@ -25,4 +25,4 @@ bool StringPattern::operator<(const StringPattern& rhs) const {
return std::tie(id_, pattern_) < std::tie(rhs.id_, rhs.pattern_);
}

} // namespace url_matcher
} // namespace base
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_URL_MATCHER_STRING_PATTERN_H_
#define COMPONENTS_URL_MATCHER_STRING_PATTERN_H_
#ifndef BASE_SUBSTRING_SET_MATCHER_STRING_PATTERN_H_
#define BASE_SUBSTRING_SET_MATCHER_STRING_PATTERN_H_

#include <string>

#include "components/url_matcher/url_matcher_export.h"
#include "base/base_export.h"

namespace url_matcher {
namespace base {

// An individual pattern of a substring or regex matcher. A pattern consists of
// a string (interpreted as individual bytes, no character encoding) and an
Expand All @@ -18,7 +18,7 @@ namespace url_matcher {
// RegexMatcher::MatchURL() to help the caller to figure out what
// patterns matched a string. All patterns registered to a matcher
// need to contain unique IDs.
class URL_MATCHER_EXPORT StringPattern {
class BASE_EXPORT StringPattern {
public:
typedef int ID;

Expand All @@ -43,6 +43,6 @@ class URL_MATCHER_EXPORT StringPattern {
ID id_;
};

} // namespace url_matcher
} // namespace base

#endif // COMPONENTS_URL_MATCHER_STRING_PATTERN_H_
#endif // BASE_SUBSTRING_SET_MATCHER_STRING_PATTERN_H_
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_matcher/string_pattern.h"
#include "base/substring_set_matcher/string_pattern.h"

#include <string>

#include "testing/gtest/include/gtest/gtest.h"

namespace url_matcher {
namespace base {

TEST(StringPatternTest, StringPattern) {
StringPattern r1("Test", 2);
Expand All @@ -22,4 +22,4 @@ TEST(StringPatternTest, StringPattern) {
EXPECT_TRUE(r1 < r3);
}

} // namespace url_matcher
} // namespace base
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_matcher/substring_set_matcher.h"
#include "base/substring_set_matcher/substring_set_matcher.h"

#include <stddef.h>

Expand All @@ -18,9 +18,9 @@
#include "base/containers/contains.h"
#include "base/containers/queue.h"
#include "base/numerics/checked_math.h"
#include "base/trace_event/memory_usage_estimator.h"
#include "base/trace_event/memory_usage_estimator.h" // no-presubmit-check

namespace url_matcher {
namespace base {

namespace {

Expand Down Expand Up @@ -477,4 +477,4 @@ size_t SubstringSetMatcher::AhoCorasickNode::EstimateMemoryUsage() const {
}
}

} // namespace url_matcher
} // namespace base
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_
#define COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_
#ifndef BASE_SUBSTRING_SET_MATCHER_SUBSTRING_SET_MATCHER_H_
#define BASE_SUBSTRING_SET_MATCHER_SUBSTRING_SET_MATCHER_H_

#include <stdint.h>

Expand All @@ -12,15 +12,15 @@
#include <string>
#include <vector>

#include "base/base_export.h"
#include "base/check_op.h"
#include "components/url_matcher/string_pattern.h"
#include "components/url_matcher/url_matcher_export.h"
#include "base/substring_set_matcher/string_pattern.h"

namespace url_matcher {
namespace base {

// Class that store a set of string patterns and can find for a string S,
// which string patterns occur in S.
class URL_MATCHER_EXPORT SubstringSetMatcher {
class BASE_EXPORT SubstringSetMatcher {
public:
SubstringSetMatcher() = default;
SubstringSetMatcher(const SubstringSetMatcher&) = delete;
Expand Down Expand Up @@ -318,6 +318,6 @@ class URL_MATCHER_EXPORT SubstringSetMatcher {
bool is_empty_ = true;
};

} // namespace url_matcher
} // namespace base

#endif // COMPONENTS_URL_MATCHER_SUBSTRING_SET_MATCHER_H_
#endif // BASE_SUBSTRING_SET_MATCHER_SUBSTRING_SET_MATCHER_H_
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_matcher/substring_set_matcher.h"
#include "base/substring_set_matcher/substring_set_matcher.h"

#include <limits>
#include <string>
Expand All @@ -12,11 +12,11 @@
#include "base/rand_util.h"
#include "base/time/time.h"
#include "base/timer/elapsed_timer.h"
#include "base/trace_event/memory_usage_estimator.h"
#include "base/trace_event/memory_usage_estimator.h" // no-presubmit-check
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/perf/perf_result_reporter.h"

namespace url_matcher {
namespace base {

namespace {

Expand Down Expand Up @@ -83,4 +83,4 @@ TEST(SubstringSetMatcherPerfTest, RandomKeys) {

} // namespace

} // namespace url_matcher
} // namespace base
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/url_matcher/substring_set_matcher.h"
#include "base/substring_set_matcher/substring_set_matcher.h"

#include <stddef.h>

Expand All @@ -13,7 +13,7 @@
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace url_matcher {
namespace base {

namespace {

Expand Down Expand Up @@ -198,4 +198,4 @@ TEST(SubstringSetMatcherTest, TestEmptyMatcher) {
EXPECT_TRUE(matcher.IsEmpty());
}

} // namespace url_matcher
} // namespace base
2 changes: 0 additions & 2 deletions components/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,6 @@ if (!is_ios) {
"omnibox/browser/history_quick_provider_performance_unittest.cc",
"subresource_filter/core/common/perftests/indexed_ruleset_perftest.cc",
"test/run_all_perftests.cc",
"url_matcher/substring_set_matcher_perftest.cc",
"visitedlink/test/visitedlink_perftest.cc",
]

Expand All @@ -974,7 +973,6 @@ if (!is_ios) {
"//components/subresource_filter/core/common",
"//components/subresource_filter/tools:tools_lib",
"//components/test:test_support",
"//components/url_matcher",
"//components/visitedlink/browser",
"//testing/perf",
"//url",
Expand Down
12 changes: 6 additions & 6 deletions components/feed/core/v2/web_feed_subscriptions/web_feed_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
#include "base/containers/flat_map.h"
#include "base/strings/strcat.h"
#include "base/strings/string_piece.h"
#include "base/substring_set_matcher/string_pattern.h"
#include "components/feed/core/proto/v2/store.pb.h"
#include "components/feed/core/proto/v2/wire/web_feed_matcher.pb.h"
#include "components/feed/core/v2/feedstore_util.h"
#include "components/url_matcher/string_pattern.h"
#include "components/url_matcher/url_matcher.h"
#include "url/gurl.h"
namespace feed {
Expand Down Expand Up @@ -168,8 +168,8 @@ class EntrySet {

// Members that just hold on to memory used in matchers.
url_matcher::URLMatcherConditionFactory condition_factory_;
std::vector<url_matcher::StringPattern> host_match_patterns_;
std::vector<url_matcher::StringPattern> path_match_patterns_;
std::vector<base::StringPattern> host_match_patterns_;
std::vector<base::StringPattern> path_match_patterns_;

// List of `MultiConditionSet`. Each one knows how to aggregate match IDs
// reported from matchers below into a WebFeed match.
Expand Down Expand Up @@ -363,9 +363,9 @@ class EntrySetBuilder {
return true;
}

static std::vector<const url_matcher::StringPattern*> MakeVectorOfPointers(
const std::vector<url_matcher::StringPattern>& patterns) {
std::vector<const url_matcher::StringPattern*> result(patterns.size());
static std::vector<const base::StringPattern*> MakeVectorOfPointers(
const std::vector<base::StringPattern>& patterns) {
std::vector<const base::StringPattern*> result(patterns.size());
for (size_t i = 0; i < patterns.size(); ++i) {
result[i] = &patterns[i];
}
Expand Down
18 changes: 0 additions & 18 deletions components/url_matcher/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,6 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

component("substring_set_matcher") {
sources = [
"string_pattern.cc",
"string_pattern.h",
"substring_set_matcher.cc",
"substring_set_matcher.h",
"url_matcher_export.h",
]

defines = [ "URL_MATCHER_IMPLEMENTATION" ]

public_deps = [ "//base" ]
}

component("url_matcher") {
sources = [
"regex_set_matcher.cc",
Expand All @@ -34,7 +20,6 @@ component("url_matcher") {
defines = [ "URL_MATCHER_IMPLEMENTATION" ]

public_deps = [
":substring_set_matcher",
"//base",
"//base/third_party/dynamic_annotations",
"//components/google/core/common",
Expand All @@ -49,14 +34,11 @@ source_set("unit_tests") {
testonly = true
sources = [
"regex_set_matcher_unittest.cc",
"string_pattern_unittest.cc",
"substring_set_matcher_unittest.cc",
"url_matcher_factory_unittest.cc",
"url_matcher_unittest.cc",
"url_util_unittest.cc",
]
deps = [
":substring_set_matcher",
":url_matcher",
"//testing/gmock",
"//testing/gtest",
Expand Down
12 changes: 7 additions & 5 deletions components/url_matcher/regex_set_matcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@

#include "base/logging.h"
#include "base/strings/string_util.h"
#include "components/url_matcher/substring_set_matcher.h"
#include "base/substring_set_matcher/substring_set_matcher.h"
#include "third_party/re2/src/re2/filtered_re2.h"
#include "third_party/re2/src/re2/re2.h"

using base::StringPattern;

namespace url_matcher {

RegexSetMatcher::RegexSetMatcher() = default;
Expand Down Expand Up @@ -79,8 +81,8 @@ void RegexSetMatcher::RebuildMatcher() {

for (auto it = regexes_.begin(); it != regexes_.end(); ++it) {
RE2ID re2_id;
RE2::ErrorCode error = filtered_re2_->Add(
it->second->pattern(), RE2::DefaultOptions, &re2_id);
RE2::ErrorCode error =
filtered_re2_->Add(it->second->pattern(), RE2::DefaultOptions, &re2_id);
if (error == RE2::NoError) {
DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
re2_id_map_.push_back(it->first);
Expand All @@ -95,14 +97,14 @@ void RegexSetMatcher::RebuildMatcher() {
std::vector<std::string> strings_to_match;
filtered_re2_->Compile(&strings_to_match);

std::vector<url_matcher::StringPattern> substring_patterns;
std::vector<StringPattern> substring_patterns;
substring_patterns.reserve(strings_to_match.size());

// Build SubstringSetMatcher from |strings_to_match|.
for (size_t i = 0; i < strings_to_match.size(); ++i)
substring_patterns.emplace_back(std::move(strings_to_match[i]), i);

substring_matcher_ = std::make_unique<SubstringSetMatcher>();
substring_matcher_ = std::make_unique<base::SubstringSetMatcher>();
bool success = substring_matcher_->Build(substring_patterns);
CHECK(success);
}
Expand Down

0 comments on commit 5570feb

Please sign in to comment.