Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter user-tracking parameters from query string #3239

Merged
merged 1 commit into from Sep 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions browser/net/BUILD.gn
Expand Up @@ -52,6 +52,7 @@ source_set("net") {
"//net",
"//services/network/public/cpp",
"//services/network/public/mojom",
"//third_party/re2",
"//url",
]

Expand Down
68 changes: 68 additions & 0 deletions browser/net/brave_site_hacks_network_delegate_helper.cc
Expand Up @@ -7,7 +7,11 @@

#include <memory>
#include <string>
#include <vector>

#include "base/lazy_instance.h"
#include "base/metrics/histogram_macros.h"
#include "base/no_destructor.h"
#include "base/sequenced_task_runner.h"
#include "base/strings/string_util.h"
#include "brave/common/network_constants.h"
Expand All @@ -21,6 +25,7 @@
#include "content/public/common/referrer.h"
#include "extensions/common/url_pattern.h"
#include "net/url_request/url_request.h"
#include "third_party/re2/src/re2/re2.h"

using content::BrowserThread;
using content::Referrer;
Expand All @@ -29,6 +34,42 @@ namespace brave {

namespace {

const std::string& GetQueryStringTrackers() {
static const base::NoDestructor<std::string> trackers(base::JoinString(
std::vector<std::string>({"fbclid", "gclid", "msclkid", "mc_eid"}), "|"));
return *trackers;
}

// From src/components/autofill/content/renderer/page_passwords_analyser.cc
// and password_form_conversion_utils.cc:
#define DECLARE_LAZY_MATCHER(NAME, PATTERN) \
struct LabelPatternLazyInstanceTraits_##NAME \
: public base::internal::DestructorAtExitLazyInstanceTraits<re2::RE2> { \
static re2::RE2* New(void* instance) { \
re2::RE2::Options options; \
options.set_case_sensitive(false); \
re2::RE2* matcher = new (instance) re2::RE2(PATTERN, options); \
DCHECK(matcher->ok()); \
return matcher; \
} \
}; \
base::LazyInstance<re2::RE2, LabelPatternLazyInstanceTraits_##NAME> NAME = \
LAZY_INSTANCE_INITIALIZER

// e.g. "?fbclid=1234"
DECLARE_LAZY_MATCHER(tracker_only_matcher,
"^(" + GetQueryStringTrackers() + ")=[^&]+$");

// e.g. "?fbclid=1234&foo=1"
DECLARE_LAZY_MATCHER(tracker_first_matcher,
"^(" + GetQueryStringTrackers() + ")=[^&]+&");

// e.g. "?foo=1&fbclid=1234" or "?foo=1&fbclid=1234&bar=2"
DECLARE_LAZY_MATCHER(tracker_appended_matcher,
"&(" + GetQueryStringTrackers() + ")=[^&]+");

#undef DECLARE_LAZY_MATCHER

bool ApplyPotentialReferrerBlock(std::shared_ptr<BraveRequestInfo> ctx) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
GURL target_origin = ctx->request_url.GetOrigin();
Expand All @@ -50,12 +91,39 @@ bool ApplyPotentialReferrerBlock(std::shared_ptr<BraveRequestInfo> ctx) {
return false;
}

void ApplyPotentialQueryStringFilter(const GURL& request_url,
std::string* new_url_spec) {
DCHECK(new_url_spec);
SCOPED_UMA_HISTOGRAM_TIMER("Brave.SiteHacks.QueryFilter");
std::string new_query = request_url.query();
fmarier marked this conversation as resolved.
Show resolved Hide resolved
// Note: the ordering of these replacements is important.
const int replacement_count =
re2::RE2::GlobalReplace(&new_query, tracker_appended_matcher.Get(), "") +
re2::RE2::GlobalReplace(&new_query, tracker_first_matcher.Get(), "") +
re2::RE2::GlobalReplace(&new_query, tracker_only_matcher.Get(), "");

if (replacement_count > 0) {
url::Replacements<char> replacements;
if (new_query.empty()) {
replacements.ClearQuery();
} else {
replacements.SetQuery(new_query.c_str(),
url::Component(0, new_query.size()));
}
*new_url_spec = request_url.ReplaceComponents(replacements).spec();
}
}

} // namespace

int OnBeforeURLRequest_SiteHacksWork(
const ResponseCallback& next_callback,
std::shared_ptr<BraveRequestInfo> ctx) {
ApplyPotentialReferrerBlock(ctx);

if (ctx->request_url.has_query()) {
ApplyPotentialQueryStringFilter(ctx->request_url, &ctx->new_url_spec);
}
return net::OK;
}

Expand Down
Expand Up @@ -7,6 +7,7 @@

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "brave/browser/net/url_context.h"
Expand Down Expand Up @@ -203,4 +204,85 @@ TEST_F(BraveSiteHacksNetworkDelegateHelperTest,
});
}

TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringUntouched) {
const std::vector<const std::string> urls({
"https://example.com/",
"https://example.com/?",
"https://example.com/?+%20",
"https://user:pass@example.com/path/file.html?foo=1#fragment",
"http://user:pass@example.com/path/file.html?foo=1&bar=2#fragment",
"https://example.com/?file=https%3A%2F%2Fexample.com%2Ftest.pdf",
"https://example.com/?title=1+2&caption=1%202",
"https://example.com/?foo=1&&bar=2#fragment",
"https://example.com/?foo&bar=&#fragment",
"https://example.com/?foo=1&fbcid=no&gcid=no&mc_cid=no&bar=&#frag",
"https://example.com/?fbclid=&gclid&=mc_eid&msclkid=",
"https://example.com/?value=fbclid=1&not-gclid=2&foo+mc_eid=3",
"https://example.com/?+fbclid=1",
"https://example.com/?%20fbclid=1",
"https://example.com/#fbclid=1",
});
for (const auto& url : urls) {
net::TestDelegate test_delegate;
std::unique_ptr<net::URLRequest> request = context()->CreateRequest(
GURL(url), net::IDLE, &test_delegate, TRAFFIC_ANNOTATION_FOR_TESTS);

std::shared_ptr<brave::BraveRequestInfo> brave_request_info(
new brave::BraveRequestInfo());
brave::BraveRequestInfo::FillCTXFromRequest(request.get(),
brave_request_info);
brave::ResponseCallback callback;
int ret =
brave::OnBeforeURLRequest_SiteHacksWork(callback, brave_request_info);
EXPECT_EQ(ret, net::OK);
// new_url should not be set
EXPECT_TRUE(brave_request_info->new_url_spec.empty());
EXPECT_EQ(request->url(), GURL(url));
}
}

TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) {
const std::vector<const std::pair<const std::string, const std::string>> urls(
{
// { original url, expected url after filtering }
{"https://example.com/?fbclid=1234", "https://example.com/"},
{"https://example.com/?fbclid=1234&", "https://example.com/"},
{"https://example.com/?&fbclid=1234", "https://example.com/"},
{"https://example.com/?gclid=1234", "https://example.com/"},
{"https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1",
"https://example.com/"},
{"https://example.com/?fbclid=&foo=1&bar=2&gclid=abc",
"https://example.com/?fbclid=&foo=1&bar=2"},
{"https://example.com/?fbclid=&foo=1&gclid=1234&bar=2",
"https://example.com/?fbclid=&foo=1&bar=2"},
{"http://u:p@example.com/path/file.html?foo=1&fbclid=abcd#fragment",
"http://u:p@example.com/path/file.html?foo=1#fragment"},
// Obscure edge cases that break most parsers:
{"https://example.com/?fbclid&foo&&gclid=2&bar=&%20",
"https://example.com/?fbclid&foo&&bar=&%20"},
{"https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&",
"https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&"},
{"https://example.com/?fbclid=1&=2&?foo=yes&bar=2+",
"https://example.com/?=2&?foo=yes&bar=2+"},
{"https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4",
"https://example.com/?a+b+c=some%20thing&1%202=3+4"},
});
for (const auto& pair : urls) {
net::TestDelegate test_delegate;
std::unique_ptr<net::URLRequest> request =
context()->CreateRequest(GURL(pair.first), net::IDLE, &test_delegate,
TRAFFIC_ANNOTATION_FOR_TESTS);

std::shared_ptr<brave::BraveRequestInfo> brave_request_info(
new brave::BraveRequestInfo());
brave::BraveRequestInfo::FillCTXFromRequest(request.get(),
brave_request_info);
brave::ResponseCallback callback;
int ret =
brave::OnBeforeURLRequest_SiteHacksWork(callback, brave_request_info);
EXPECT_EQ(ret, net::OK);
EXPECT_EQ(brave_request_info->new_url_spec, pair.second);
}
}

} // namespace