Skip to content
This repository has been archived by the owner. It is now read-only.

Add support for conditional matching filters based on tags #192

Merged
merged 2 commits into from Mar 13, 2019
Merged
Changes from 1 commit
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

Next

Add condition tagging support

  • Loading branch information
bbondy committed Feb 26, 2019
commit f076a017a867a8925ee5a39acb39d28a780c6133
@@ -620,10 +620,13 @@ bool AdBlockClient::hasMatchingFilters(Filter *filter, int numFilters,
for (int i = 0; i < numFilters; i++) {
if (filter->matches(input, inputLen, contextOption,
contextDomain, inputBloomFilter, inputHost, inputHostLen)) {
if (matchingFilter) {
*matchingFilter = filter;
if (filter->tagLen == 0 ||
tagExists(std::string(filter->tag, filter->tagLen))) {
if (matchingFilter) {
*matchingFilter = filter;
}
return true;
}
return true;
}
filter++;
}
@@ -685,13 +688,13 @@ bool isNoFingerprintDomainHashSetMiss(HashSet<NoFingerprintDomain> *hashSet,
static_cast<int>(host + hostLen - start)));
}

bool isHostAnchoredHashSetMiss(const char *input, int inputLen,
bool AdBlockClient::isHostAnchoredHashSetMiss(const char *input, int inputLen,
HashSet<Filter> *hashSet,
const char *inputHost,
int inputHostLen,
FilterOption contextOption,
const char *contextDomain,
Filter **foundFilter = nullptr) {
Filter **foundFilter) {
if (!hashSet) {
return false;
}
@@ -712,10 +715,13 @@ bool isHostAnchoredHashSetMiss(const char *input, int inputLen,
nullptr, start, inputHostLen - (start - inputHost)));
if (filter && filter->matches(input, inputLen,
contextOption, contextDomain)) {
if (foundFilter) {
*foundFilter = filter;
if (filter->tagLen == 0 ||
tagExists(std::string(filter->tag, filter->tagLen))) {
if (foundFilter) {
*foundFilter = filter;
}
return false;
}
return false;
}
}
start--;
@@ -728,8 +734,14 @@ bool isHostAnchoredHashSetMiss(const char *input, int inputLen,
return true;
}
bool result = !filter->matches(input, inputLen, contextOption, contextDomain);
if (!result && foundFilter) {
*foundFilter = filter;
if (!result) {
if (filter->tagLen > 0 &&
!tagExists(std::string(filter->tag, filter->tagLen))) {
return true;
}
if (foundFilter) {
*foundFilter = filter;
}
}
return result;
}
@@ -1451,6 +1463,23 @@ bool AdBlockClient::parse(const char *input, bool preserveRules) {
return true;
}

void AdBlockClient::addTag(const std::string &tag) {
if (tags.find(tag) == tags.end()) {
tags.insert(tag);
}
}

void AdBlockClient::removeTag(const std::string &tag) {
auto it = tags.find(tag);
if (it != tags.end()) {
tags.erase(it);
}
}

bool AdBlockClient::tagExists(const std::string &tag) const {
return tags.find(tag) != tags.end();
}

// Fills the specified buffer if specified, returns the number of characters
// written or needed
int serializeFilters(char * buffer, size_t bufferSizeAvail,
@@ -1476,6 +1505,15 @@ int serializeFilters(char * buffer, size_t bufferSizeAvail,
}
bufferSize++;

if (f->tagLen > 0) {
if (buffer) {
buffer[bufferSize] = '#';

This comment has been minimized.

Copy link
@emerick

emerick Mar 3, 2019

Contributor

Is it safe to assume the buffer has the space available for this additional data? Wasn't quite sure how that side of things works.

This comment has been minimized.

Copy link
@bbondy

bbondy Mar 3, 2019

Author Member

Yep how it works is it calls the function 2 times. The first with nullptr buffer to get the size. The second it specifies a buffer of that size.

This comment has been minimized.

Copy link
@emerick

emerick Mar 3, 2019

Contributor

OK, got it!

memcpy(buffer + bufferSize + 1, f->tag, f->tagLen);
buffer[bufferSize + 1 + f->tagLen] = ',';
}
bufferSize += f->tagLen + 2;
}

if (f->domainList) {
if (buffer) {
snprintf(buffer + bufferSize, bufferSizeAvail, "%s", f->domainList);
@@ -1700,6 +1738,21 @@ int deserializeFilters(char *buffer, Filter *f, int numFilters) {
}
pos++;

// If the domain section starts with a # then we're in a tag
// block.
if (buffer[pos] == '#') {
pos++;
f->tag = buffer + pos;
f->tagLen = 0;
while (buffer[pos + f->tagLen] != '\0') {
if (buffer[pos + f->tagLen] == ',') {
pos += f->tagLen + 1;
break;
}
f->tagLen++;
}
}

if (*(buffer + pos) == '\0') {
f->domainList = nullptr;
} else {
@@ -1721,6 +1774,7 @@ int deserializeFilters(char *buffer, Filter *f, int numFilters) {
}

bool AdBlockClient::deserialize(char *buffer) {
clear();
deserializedBuffer = buffer;
int bloomFilterSize = 0, exceptionBloomFilterSize = 0,
hostAnchoredHashSetSize = 0, hostAnchoredExceptionHashSetSize = 0,
@@ -36,6 +36,9 @@ class AdBlockClient {
const char *contextDomain,
Filter **matchingFilter,
Filter **matchingExceptionFilter);
void addTag(const std::string &tag);
void removeTag(const std::string &tag);
bool tagExists(const std::string &tag) const;
// Serializes a the parsed data and bloom filter data into a single buffer.
// The returned buffer should be deleted.
char * serialize(int *size,
@@ -106,10 +109,19 @@ class AdBlockClient {
int inputLen, FilterOption contextOption, const char *contextDomain,
BloomFilter *inputBloomFilter, const char *inputHost, int inputHostLen,
Filter **matchingFilter = nullptr);
bool isHostAnchoredHashSetMiss(const char *input, int inputLen,
HashSet<Filter> *hashSet,
const char *inputHost,
int inputHostLen,
FilterOption contextOption,
const char *contextDomain,
Filter **foundFilter = nullptr);

void initBloomFilter(BloomFilter**, const char *buffer, int len);
template<class T>
bool initHashSet(HashSet<T>**, char *buffer, int len);
char *deserializedBuffer;
std::set<std::string> tags;
};

extern std::set<std::string> unknownOptions;
@@ -129,6 +129,8 @@ void AdBlockClientWrap::Init(Local<Object> exports) {
NODE_SET_PROTOTYPE_METHOD(tpl, "generateRegionalManifestFiles",
AdBlockClientWrap::GenerateRegionalManifestFiles);
NODE_SET_PROTOTYPE_METHOD(tpl, "cleanup", AdBlockClientWrap::Cleanup);
NODE_SET_PROTOTYPE_METHOD(tpl, "addTag", AdBlockClientWrap::AddTag);
NODE_SET_PROTOTYPE_METHOD(tpl, "removeTag", AdBlockClientWrap::RemoveTag);

// filter options
Local<Object> filterOptions = Object::New(isolate);
@@ -345,6 +347,26 @@ void AdBlockClientWrap::Deserialize(const FunctionCallbackInfo<Value>& args) {
obj->deserialize(deserializedData)));
}

void AdBlockClientWrap::AddTag(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
String::Utf8Value str(isolate, args[0]->ToString());
const char * buffer = *str;

AdBlockClientWrap* obj =
ObjectWrap::Unwrap<AdBlockClientWrap>(args.Holder());
obj->addTag(buffer);
}

void AdBlockClientWrap::RemoveTag(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
String::Utf8Value str(isolate, args[0]->ToString());
const char * buffer = *str;

AdBlockClientWrap* obj =
ObjectWrap::Unwrap<AdBlockClientWrap>(args.Holder());
obj->removeTag(buffer);
}

void AdBlockClientWrap::GetParsingStats(
const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
@@ -506,6 +528,10 @@ void AdBlockClientWrap::GetFilters(
result->Set(String::NewFromUtf8(isolate, "domainList"), domain_list);
result->Set(String::NewFromUtf8(isolate,
"antiDomainList"), anti_domain_list);
result->Set(String::NewFromUtf8(isolate, "tag"),
String::NewFromUtf8(isolate,
std::string(filter->tagLen > 0 ? filter->tag : "",
filter->tagLen).c_str()));

result_list->Set(i, result);
filter++;
@@ -32,6 +32,8 @@ class AdBlockClientWrap : public AdBlockClient, public node::ObjectWrap {
static void Serialize(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Deserialize(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Cleanup(const v8::FunctionCallbackInfo<v8::Value>& args);
static void AddTag(const v8::FunctionCallbackInfo<v8::Value>& args);
static void RemoveTag(const v8::FunctionCallbackInfo<v8::Value>& args);
static void GetParsingStats(const v8::FunctionCallbackInfo<v8::Value>& args);
static void GetMatchingStats(const v8::FunctionCallbackInfo<v8::Value>& args);
static void GetFilters(const v8::FunctionCallbackInfo<v8::Value>& args);
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.