Skip to content
This repository has been archived by the owner on Apr 21, 2023. It is now read-only.

Commit

Permalink
Fix FastWildcardGroup bug. We insert all-wildcard patterns in *revers…
Browse files Browse the repository at this point in the history
…e* order,

and should match them forwards as a result.  Add a couple of tests beyond
Steve's initial repro.

#1294
  • Loading branch information
jmaessen authored and crowell committed Jul 21, 2016
1 parent 921fd71 commit 0654743
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 13 deletions.
21 changes: 9 additions & 12 deletions pagespeed/kernel/base/fast_wildcard_group.cc
Expand Up @@ -33,10 +33,6 @@
namespace net_instaweb {

namespace {
// Don't generate a hash unless there are this many
// non-wildcard-only patterns.
const int kMinPatterns = 11;

// Maximum rolling hash window size
const int32 kMaxRollingHashWindow = 256;

Expand Down Expand Up @@ -266,23 +262,24 @@ bool FastWildcardGroup::Match(const StringPiece& str, bool allow) const {
return allow;
}
int max_effective_index = kNoEntry;
// Start by matching against all-wildcard patterns in reverse order,
// stopping if a match is found (since earlier matches will have
// a smaller index and be overridden by the already-found match).
// TODO(jmaessen): These patterns all devolve to
// a string length check (== or >=). Consider optimizing them.
for (int i = wildcard_only_indices_.size() - 1; i >= 0; --i) {
// Start by matching against all-wildcard patterns in reverse order. Their
// indices are stored in reverse index order in wildcard_only_indices, so
// traverse it forwards. Stop if a match is found (since earlier matches will
// have a smaller index and be overridden by the already-found match).
// TODO(jmaessen): These patterns all devolve to a string length check (== or
// >=). Consider optimizing them.
for (int i = 0, sz = wildcard_only_indices_.size(); i < sz; ++i) {
int index = wildcard_only_indices_[i];
if (wildcards_[index]->Match(str)) {
max_effective_index = effective_indices_[index];
break;
}
}
int exit_effective_index = wildcards_.size() - 1;
int rolling_end = str.size() - rolling_hash_length;
if (rolling_end >= 0) {
if (max_effective_index < exit_effective_index && rolling_end >= 0) {
// Do a Rabin-Karp rolling match through the string.
uint64 rolling_hash = RollingHash(str.data(), 0, rolling_hash_length);
int exit_effective_index = wildcards_.size() - 1;
// Uses signed arithmetic for correct comparison below.
for (int ofs = 0;
max_effective_index < exit_effective_index && ofs <= rolling_end; ) {
Expand Down
7 changes: 6 additions & 1 deletion pagespeed/kernel/base/fast_wildcard_group.h
Expand Up @@ -93,6 +93,11 @@ don't get long, and all failed probes terminate in an empty bucket.

class FastWildcardGroup {
public:
// Don't generate a hash unless there are this many non-wildcard-only
// patterns. Exposed for testing purposes (we can't use FRIEND_TEST here for
// open-source dependency reasons).
static const int kMinPatterns = 11;

FastWildcardGroup()
: rolling_hash_length_(kUncompiled) { }
FastWildcardGroup(const FastWildcardGroup& src)
Expand Down Expand Up @@ -150,7 +155,7 @@ class FastWildcardGroup {
// Information that is computed during compilation.
mutable std::vector<uint64> rolling_hashes_; // One per wildcard
mutable std::vector<int> effective_indices_; // One per wildcard
mutable std::vector<int> wildcard_only_indices_;
mutable std::vector<int> wildcard_only_indices_; // Reverse order
mutable std::vector<int> pattern_hash_index_; // hash table
mutable AtomicInt32 rolling_hash_length_;

Expand Down
48 changes: 48 additions & 0 deletions pagespeed/kernel/base/fast_wildcard_group_test.cc
Expand Up @@ -139,6 +139,54 @@ TEST_F(FastWildcardGroupTest, AppendSequenceLarge) {
Append();
}

TEST_F(FastWildcardGroupTest, AllowDisallowCompiled) {
FastWildcardGroup group;
group.Disallow("*");
// Pad the group with irrelevant stuff to force it to be compiled.
for (int i = 0; i < FastWildcardGroup::kMinPatterns; ++i) {
group.Allow("a");
}
group.Allow("*");

EXPECT_TRUE(group.Match("a", true));
EXPECT_TRUE(group.Match("b", true));
EXPECT_TRUE(group.Match("c", true));
}

TEST_F(FastWildcardGroupTest, AllowDisallowCompiledLarger) {
FastWildcardGroup group;
group.Allow("*");
// Pad the group with irrelevant stuff to force it to be compiled.
for (int i = 0; i < FastWildcardGroup::kMinPatterns; ++i) {
group.Disallow("a");
}
group.Disallow("*");
for (int i = 0; i < FastWildcardGroup::kMinPatterns; ++i) {
group.Allow("c");
}

EXPECT_FALSE(group.Match("a", true));
EXPECT_FALSE(group.Match("b", true));
EXPECT_TRUE(group.Match("c", true));
}

TEST_F(FastWildcardGroupTest, AllowDisallowLargeWildcardOnly) {
FastWildcardGroup group;
group.Allow("?");
// Pad the group with irrelevant stuff to force it to be compiled.
for (int i = 0; i < FastWildcardGroup::kMinPatterns; ++i) {
group.Allow("aa");
}
group.Disallow("??");

EXPECT_TRUE(group.Match("a", true));
EXPECT_FALSE(group.Match("aa", true));
EXPECT_TRUE(group.Match("aaa", true));
EXPECT_TRUE(group.Match("a", false));
EXPECT_FALSE(group.Match("aa", false));
EXPECT_FALSE(group.Match("aaa", false));
}

TEST_F(FastWildcardGroupTest, HardCodedDefault) {
HardCodedDefault();
}
Expand Down

0 comments on commit 0654743

Please sign in to comment.