Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cliqz-url-parser for URL processing. #410

Merged
merged 4 commits into from Jul 17, 2019
Merged
Changes from all commits
Commits
File filter
Filter file types
Jump to
Jump to file
Failed to load files.

Always

Just for now

@@ -42,6 +42,7 @@
},
"homepage": "https://github.com/ghostery/ghostery-extension#readme",
"dependencies": {
"@cliqz/url-parser": "^1.0.2",
"base64-js": "^1.2.1",
"browser-core": "https://github.com/cliqz-oss/browser-core/releases/download/v7.37.4/browser-core-7.37.4.tgz",
"classnames": "^2.2.5",
@@ -378,7 +378,7 @@ class EventHandlers {

/* ** SMART BLOCKING - Privacy ** */
// block HTTP request on HTTPS page
if (this.policySmartBlock.isInsecureRequest(tab_id, page_protocol, processed.protocol, processed.host)) {
if (this.policySmartBlock.isInsecureRequest(tab_id, page_protocol, processed.scheme, processed.hostname)) {
return this._blockHelper(details, tab_id, null, null, request_id, from_redirect, true);
}

@@ -399,7 +399,7 @@ class EventHandlers {

/* ** SMART BLOCKING - Breakage ** */
// allow first party trackers
if (this.policySmartBlock.isFirstPartyRequest(tab_id, page_host, processed.host)) {
if (this.policySmartBlock.isFirstPartyRequest(tab_id, page_host, processed.hostname)) {
return { cancel: false };
}

@@ -715,11 +715,11 @@ class EventHandlers {
*
* @private
*
* @param {Object} parsedURL
* @param {URL} parsedURL
* @return {Boolean}
*/
_isValidUrl(parsedURL) {
if (parsedURL.protocol.startsWith('http') && parsedURL.host.includes('.') && /[A-Za-z]/.test(parsedURL.host) && !parsedURL.path.includes('_/chrome/newtab')) {
if (parsedURL && parsedURL.protocol.startsWith('http') && parsedURL.isValidHost() && !parsedURL.pathname.includes('_/chrome/newtab')) {
return true;
}

@@ -81,7 +81,7 @@ class PanelData {
const { url } = tab;

this._activeTab = tab;
this._activeTab.pageHost = url && processUrl(url).host || '';
this._activeTab.pageHost = url && processUrl(url).hostname || '';

this._attachListeners();

@@ -191,7 +191,7 @@ class PanelData {
// Android panel only
const { url } = tab;
this._activeTab = tab;
this._activeTab.pageHost = url && processUrl(url).host || '';
this._activeTab.pageHost = url && processUrl(url).hostname || '';
this._setTrackerListAndCategories();
switch (view) {
case 'panel':
@@ -63,7 +63,7 @@ class Policy {
*/
whitelisted(url) {
if (url) {
url = processUrl(url).host;
url = processUrl(url).hostname;
url = url.replace(/^www\./, '');
const sites = conf.site_whitelist || [];
const num_sites = sites.length;
@@ -87,7 +87,7 @@ class Policy {
*/
blacklisted(url) {
if (url) {
url = processUrl(url).host;
url = processUrl(url).hostname;
url = url.replace(/^www\./, '');
const sites = conf.site_blacklist || [];
const num_sites = sites.length;
@@ -177,10 +177,10 @@ class TabInfo {
_updateUrl(tab_id, tab_url) {
const parsed = processUrl(tab_url);
this._tabInfo[tab_id].url = tab_url;
this._tabInfo[tab_id].protocol = parsed.protocol;
this._tabInfo[tab_id].host = parsed.host;
this._tabInfo[tab_id].path = parsed.path;
this._tabInfo[tab_id].hash = parsed.anchor;
this._tabInfo[tab_id].protocol = parsed.scheme;
this._tabInfo[tab_id].host = parsed.hostname;
this._tabInfo[tab_id].path = parsed.pathname;
this._tabInfo[tab_id].hash = parsed.hash;
this._tabInfo[tab_id].partialScan = false;
}
}
@@ -115,8 +115,8 @@ export function buildC2P(details, app_id) {
* @return {string} url of the internal template of the blocked redirect page
*/
export function buildRedirectC2P(requestId, redirectUrls, app_id) {
const host_url = processUrl(redirectUrls.url).host;
const redirect_url = processUrl(redirectUrls.redirectUrl).host;
const host_url = processUrl(redirectUrls.url).hostname;
const redirect_url = processUrl(redirectUrls.redirectUrl).hostname;
const app_name = bugDb.db.apps[app_id].name;

globals.BLOCKED_REDIRECT_DATA = {};
@@ -34,15 +34,17 @@ export function isBug(src, tab_url) {
const processedSrc = processUrl(src.toLowerCase());
let found = false;

const path = processedSrc.pathname ? processedSrc.pathname.substring(1) : '';

found =
// pattern classification 2: check host+path hash
_matchesHost(db.patterns.host_path, processedSrc.host, processedSrc.path) ||
_matchesHost(db.patterns.host_path, processedSrc.hostname, path) ||
// class 1: check host hash
_matchesHost(db.patterns.host, processedSrc.host) ||
_matchesHost(db.patterns.host, processedSrc.hostname) ||
// class 3: check path hash
_matchesPath(processedSrc.path) ||
_matchesPath(path) ||
// class 4: check regex patterns
_matchesRegex(processedSrc.host_with_path);
_matchesRegex(processedSrc.host + processedSrc.pathname);

if (typeof tab_url !== 'undefined') {
// check firstPartyExceptions
@@ -69,9 +71,9 @@ export function isBug(src, tab_url) {
*/
export function fuzzyUrlMatcher(url, urls) {
const parsed = processUrl(url.toLowerCase());
let tab_host = parsed.host;
let tab_host = parsed.hostname;

const tab_path = parsed.path;
const tab_path = parsed.pathname ? parsed.pathname.substring(1) : '';

if (tab_host.startsWith('www.')) {
tab_host = tab_host.slice(4);
@@ -19,7 +19,7 @@
* @namespace BackgroundUtils
*/
import { debounce } from 'underscore';
import url from 'url';
import { URL } from '@cliqz/url-parser';
import tabInfo from '../classes/TabInfo';
import globals from '../classes/Globals';
import { log, objectEntries } from './common';
@@ -165,27 +165,20 @@ export function processFpeUrl(src) {
* @memberOf BackgroundUtils
*
* @param {string} src the source url
* @return {Object} contains url parts as properties
* @return {URL} contains url parts as properties
*
*/
export function processUrl(src) {
if (!src) {
try {
const res = new URL(src);
return res;
} catch (e) {
return {
protocol: '',
host: '',
path: '',
host_with_path: '',
anchor: '',
hostname: '',
pathname: '',
};
}
const res = url.parse(src);

return {
protocol: res.protocol ? res.protocol.substr(0, res.protocol.length - 1) : '',
host: res.hostname || '',
path: res.pathname ? res.pathname.substr(1) : '',
host_with_path: (res.host || '') + (res.pathname || ''),
anchor: res.hash ? res.hash.substr(1) : '',
};
}

/**
@@ -199,7 +192,15 @@ export function processUrlQuery(src) {
return {};
}

return url.parse(src, true).query;
try {
const res = {};
for (const [key, value] of new URL(src).searchParams.entries()) {
res[key] = value;
}
return res;
} catch (e) {
return {};
}
}

/**
@@ -311,6 +311,13 @@
tslib "^1.9.3"
tsmaz "^1.2.1"

"@cliqz/url-parser@^1.0.2":
version "1.0.2"
resolved "https://registry.yarnpkg.com/@cliqz/url-parser/-/url-parser-1.0.2.tgz#0c42d73dbe354efad572d9ef39c0aa5d7f6151ea"
integrity sha512-4Y5DQqUv41SWoP7nDRO9PBMH0sSor6aiBT/t1wvjCrUDpG4yhjvAvwRaBN2GoGuHPkWUxT7mZN/FKWtSRZ/FIQ==
dependencies:
tldts "^5.0.3"

"@cnakazawa/watch@^1.0.3":
version "1.0.3"
resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.3.tgz#099139eaec7ebf07a27c1786a3ff64f39464d2ef"
ProTip! Use n and p to navigate between commits in a pull request.