Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cliqz-url-parser for URL processing. #410

Merged
merged 4 commits into from Jul 17, 2019
Merged
Changes from 1 commit
Commits
File filter
Filter file types
Jump to
Jump to file
Failed to load files.

Always

Just for now

Second iteration: Use URL object directly, benefit from lazy properties.

  • Loading branch information
sammacbeth committed Jul 5, 2019
commit b1628e7a154f5107ca38a70038e96b46c2e6eb11
@@ -378,7 +378,7 @@ class EventHandlers {

/* ** SMART BLOCKING - Privacy ** */
// block HTTP request on HTTPS page
if (this.policySmartBlock.isInsecureRequest(tab_id, page_protocol, processed.protocol, processed.host)) {
if (this.policySmartBlock.isInsecureRequest(tab_id, page_protocol, processed.scheme, processed.hostname)) {
return this._blockHelper(details, tab_id, null, null, request_id, from_redirect, true);
}

@@ -399,7 +399,7 @@ class EventHandlers {

/* ** SMART BLOCKING - Breakage ** */
// allow first party trackers
if (this.policySmartBlock.isFirstPartyRequest(tab_id, page_host, processed.host)) {
if (this.policySmartBlock.isFirstPartyRequest(tab_id, page_host, processed.hostname)) {
return { cancel: false };
}

@@ -715,11 +715,11 @@ class EventHandlers {
*
* @private
*
* @param {Object} parsedURL
* @param {URL} parsedURL
* @return {Boolean}
*/
_isValidUrl(parsedURL) {
if (parsedURL.protocol.startsWith('http') && parsedURL.host.includes('.') && /[A-Za-z]/.test(parsedURL.host) && !parsedURL.path.includes('_/chrome/newtab')) {
if (parsedURL && parsedURL.protocol.startsWith('http') && parsedURL.isValidHost() && !parsedURL.pathname.includes('_/chrome/newtab')) {
return true;
}

@@ -81,7 +81,7 @@ class PanelData {
const { url } = tab;

this._activeTab = tab;
this._activeTab.pageHost = url && processUrl(url).host || '';
this._activeTab.pageHost = url && processUrl(url).hostname || '';

this._attachListeners();

@@ -191,7 +191,7 @@ class PanelData {
// Android panel only
const { url } = tab;
this._activeTab = tab;
this._activeTab.pageHost = url && processUrl(url).host || '';
this._activeTab.pageHost = url && processUrl(url).hostname || '';
this._setTrackerListAndCategories();
switch (view) {
case 'panel':
@@ -63,7 +63,7 @@ class Policy {
*/
whitelisted(url) {
if (url) {
url = processUrl(url).host;
url = processUrl(url).hostname;
url = url.replace(/^www\./, '');
const sites = conf.site_whitelist || [];
const num_sites = sites.length;
@@ -87,7 +87,7 @@ class Policy {
*/
blacklisted(url) {
if (url) {
url = processUrl(url).host;
url = processUrl(url).hostname;
url = url.replace(/^www\./, '');
const sites = conf.site_blacklist || [];
const num_sites = sites.length;
@@ -177,10 +177,10 @@ class TabInfo {
_updateUrl(tab_id, tab_url) {
const parsed = processUrl(tab_url);
this._tabInfo[tab_id].url = tab_url;
this._tabInfo[tab_id].protocol = parsed.protocol;
this._tabInfo[tab_id].host = parsed.host;
this._tabInfo[tab_id].path = parsed.path;
this._tabInfo[tab_id].hash = parsed.anchor;
this._tabInfo[tab_id].protocol = parsed.scheme;
this._tabInfo[tab_id].host = parsed.hostname;
this._tabInfo[tab_id].path = parsed.pathname;
this._tabInfo[tab_id].hash = parsed.hash;
this._tabInfo[tab_id].partialScan = false;
}
}
@@ -115,8 +115,8 @@ export function buildC2P(details, app_id) {
* @return {string} url of the internal template of the blocked redirect page
*/
export function buildRedirectC2P(requestId, redirectUrls, app_id) {
const host_url = processUrl(redirectUrls.url).host;
const redirect_url = processUrl(redirectUrls.redirectUrl).host;
const host_url = processUrl(redirectUrls.url).hostname;
const redirect_url = processUrl(redirectUrls.redirectUrl).hostname;
const app_name = bugDb.db.apps[app_id].name;

globals.BLOCKED_REDIRECT_DATA = {};
@@ -36,13 +36,13 @@ export function isBug(src, tab_url) {

found =
// pattern classification 2: check host+path hash
_matchesHost(db.patterns.host_path, processedSrc.host, processedSrc.path) ||
_matchesHost(db.patterns.host_path, processedSrc.hostname, processedSrc.pathname) ||
// class 1: check host hash
_matchesHost(db.patterns.host, processedSrc.host) ||
// class 3: check path hash
_matchesPath(processedSrc.path) ||
// class 4: check regex patterns
_matchesRegex(processedSrc.host_with_path);
_matchesRegex(processedSrc.host + processedSrc.pathname);

if (typeof tab_url !== 'undefined') {
// check firstPartyExceptions
@@ -69,9 +69,9 @@ export function isBug(src, tab_url) {
*/
export function fuzzyUrlMatcher(url, urls) {
const parsed = processUrl(url.toLowerCase());
let tab_host = parsed.host;
let tab_host = parsed.hostname;

const tab_path = parsed.path;
const tab_path = parsed.pathname;

if (tab_host.startsWith('www.')) {
tab_host = tab_host.slice(4);
@@ -165,25 +165,18 @@ export function processFpeUrl(src) {
* @memberOf BackgroundUtils
*
* @param {string} src the source url
* @return {Object} contains url parts as properties
* @return {URL} contains url parts as properties
*
*/
export function processUrl(src) {
try {
const res = new URL(src);
return {
protocol: res.protocol ? res.protocol.substr(0, res.protocol.length - 1) : '',
host: res.hostname || '',
path: res.pathname ? res.pathname.substr(1) : '',
host_with_path: (res.host || '') + (res.pathname || ''),
anchor: res.hash ? res.hash.substr(1) : '',
};
return res;
} catch (e) {
return {
protocol: '',
host: '',
path: '',
host_with_path: '',
anchor: '',
hostname: '',
pathname: '',
};
}
}
ProTip! Use n and p to navigate between commits in a pull request.