Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Closes GH-711 URL parse more safely

This does 3 things:

1. Delimiters and "unwise" characters are never included in the
   hostname or path.
2. url.format will sanitize string URLs that are passed to it.
3. The parsed url's 'href' member will be the sanitized url, which may
   not match the argument to url.parse.
  • Loading branch information...
commit d118003307e858f6a1bdffe8bf75f379930d2bff 1 parent 3b34adf
@isaacs authored
Showing with 150 additions and 72 deletions.
  1. +65 −9 lib/url.js
  2. +85 −63 test/simple/test-url.js
View
74 lib/url.js
@@ -7,9 +7,30 @@ exports.format = urlFormat;
// compiled once on the first module load.
var protocolPattern = /^([a-z0-9]+:)/,
portPattern = /:[0-9]+$/,
- nonHostChars = ['/', '?', ';', '#'],
+ delims = ['<', '>', '"', '\'', '`', /\s/],
+ unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims),
+ nonHostChars = ['/', '?', ';', '#'].concat(unwise),
+ hostnameMaxLen = 255,
+ hostnamePartPattern = /^[a-z0-9][a-z0-9A-Z-]{0,62}$/,
+ unsafeProtocol = {
+ 'javascript': true,
+ 'javascript:': true
+ },
hostlessProtocol = {
+ 'javascript': true,
+ 'javascript:': true,
+ 'file': true,
+ 'file:': true
+ },
+ pathedProtocol = {
+ 'http': true,
+ 'https': true,
+ 'ftp': true,
+ 'gopher': true,
'file': true,
+ 'http:': true,
+ 'ftp:': true,
+ 'gopher:': true,
'file:': true
},
slashedProtocol = {
@@ -29,7 +50,7 @@ var protocolPattern = /^([a-z0-9]+:)/,
function urlParse(url, parseQueryString, slashesDenoteHost) {
if (url && typeof(url) === 'object' && url.href) return url;
- var out = { href: url },
+ var out = {},
rest = url;
var proto = protocolPattern.exec(rest);
@@ -50,6 +71,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
out.slashes = true;
}
}
+
if (!hostlessProtocol[proto] &&
(slashes || (proto && !slashedProtocol[proto]))) {
// there's a hostname.
@@ -79,9 +101,36 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
// we've indicated that there is a hostname,
// so even if it's empty, it has to be present.
out.hostname = out.hostname || '';
+
+ // validate a little.
+ if (out.hostname.length > hostnameMaxLen) {
+ out.hostname = '';
+ } else {
+ var hostparts = out.hostname.split(/\./);
+ for (var i = 0, l = hostparts.length; i < l; i++) {
+ var part = hostparts[i];
+ if (!part.match(hostnamePartPattern)) {
+ out.hostname = '';
+ break;
+ }
+ }
+ }
}
// now rest is set to the post-host stuff.
+ // chop off any delim chars.
+ if (!unsafeProtocol[proto]) {
+ var chop = rest.length;
+ for (var i = 0, l = delims.length; i < l; i++) {
+ var c = rest.indexOf(delims[i]);
+ if (c !== -1) {
+ chop = Math.min(c, chop);
+ }
+ }
+ rest = rest.substr(0, chop);
+ }
+
+
// chop off from the tail first.
var hash = rest.indexOf('#');
if (hash !== -1) {
@@ -99,9 +148,17 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
rest = rest.slice(0, qm);
} else if (parseQueryString) {
// no query string, but parseQueryString still requested
+ out.search = '';
out.query = {};
}
if (rest) out.pathname = rest;
+ if (slashedProtocol[proto] &&
+ out.hostname && !out.pathname) {
+ out.pathname = '/';
+ }
+
+ // finally, reconstruct the href based on what has been validated.
+ out.href = urlFormat(out);
return out;
}
@@ -123,13 +180,12 @@ function urlFormat(obj) {
) :
false,
pathname = obj.pathname || '',
- search = obj.search || (
- obj.query && ('?' + (
- typeof(obj.query) === 'object' ?
- querystring.stringify(obj.query) :
- String(obj.query)
- ))
- ) || '',
+ query = obj.query &&
+ ((typeof obj.query === 'object' &&
+ Object.keys(obj.query).length) ?
+ querystring.stringify(obj.query) :
+ '') || '',
+ search = obj.search || (query && ('?' + query)) || '',
hash = obj.hash || '';
if (protocol && protocol.substr(-1) !== ':') protocol += ':';
View
148 test/simple/test-url.js
@@ -28,7 +28,8 @@ var parseTests = {
'pathname': '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s='
},
'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : {
- 'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=',
+ 'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api' +
+ '&x=2&y=2&z=3&s=',
'protocol': 'http:',
'host': 'mt0.google.com',
'hostname': 'mt0.google.com',
@@ -37,7 +38,8 @@ var parseTests = {
'pathname': '/vt/lyrs=m@114'
},
'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : {
- 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=',
+ 'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???' +
+ '&hl=en&src=api&x=2&y=2&z=3&s=',
'protocol': 'http:',
'host': 'user:pass@mt0.google.com',
'auth': 'user:pass',
@@ -84,49 +86,6 @@ var parseTests = {
'query': 'baz=quux',
'pathname': '/foo/bar'
},
- 'http://example.com?foo=bar#frag' : {
- 'href': 'http://example.com?foo=bar#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=bar',
- 'query': 'foo=bar'
- },
- 'http://example.com?foo=@bar#frag' : {
- 'href': 'http://example.com?foo=@bar#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=@bar',
- 'query': 'foo=@bar'
- },
- 'http://example.com?foo=/bar/#frag' : {
- 'href': 'http://example.com?foo=/bar/#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=/bar/',
- 'query': 'foo=/bar/'
- },
- 'http://example.com?foo=?bar/#frag' : {
- 'href': 'http://example.com?foo=?bar/#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag',
- 'search': '?foo=?bar/',
- 'query': 'foo=?bar/'
- },
- 'http://example.com#frag=?bar/#frag' : {
- 'href': 'http://example.com#frag=?bar/#frag',
- 'protocol': 'http:',
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'hash': '#frag=?bar/#frag'
- },
'/foo/bar?baz=quux#frag' : {
'href': '/foo/bar?baz=quux#frag',
'hash': '#frag',
@@ -154,9 +113,7 @@ var parseTests = {
'javascript:alert(\'hello\');' : {
'href': 'javascript:alert(\'hello\');',
'protocol': 'javascript:',
- 'host': 'alert(\'hello\')',
- 'hostname': 'alert(\'hello\')',
- 'pathname' : ';'
+ 'pathname': 'alert(\'hello\');'
},
'xmpp:isaacschlueter@jabber.org' : {
'href': 'xmpp:isaacschlueter@jabber.org',
@@ -194,21 +151,13 @@ var parseTestsWithQueryString = {
'pathname': '/foo/bar'
},
'http://example.com' : {
- 'href': 'http://example.com',
- 'protocol': 'http:',
- 'slashes': true,
- 'host': 'example.com',
- 'hostname': 'example.com',
- 'query': {}
- },
- 'http://example.com?' : {
- 'href': 'http://example.com?',
+ 'href': 'http://example.com/',
'protocol': 'http:',
'slashes': true,
'host': 'example.com',
'hostname': 'example.com',
- 'search': '?',
- 'query': {}
+ 'query': {},
+ 'pathname': '/'
}
};
for (var u in parseTestsWithQueryString) {
@@ -225,7 +174,72 @@ for (var u in parseTestsWithQueryString) {
// some extra formatting tests, just to verify
// that it'll format slightly wonky content to a valid url.
var formatTests = {
+ 'http://example.com?' : {
+ 'href': 'http://example.com/?',
+ 'protocol': 'http:',
+ 'slashes': true,
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'search': '?',
+ 'query': {},
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=bar#frag' : {
+ 'href': 'http://example.com/?foo=bar#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=bar',
+ 'query': 'foo=bar',
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=@bar#frag' : {
+ 'href': 'http://example.com/?foo=@bar#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=@bar',
+ 'query': 'foo=@bar',
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=/bar/#frag' : {
+ 'href': 'http://example.com/?foo=/bar/#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=/bar/',
+ 'query': 'foo=/bar/',
+ 'pathname': '/'
+ },
+ 'http://example.com?foo=?bar/#frag' : {
+ 'href': 'http://example.com/?foo=?bar/#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag',
+ 'search': '?foo=?bar/',
+ 'query': 'foo=?bar/',
+ 'pathname': '/'
+ },
+ 'http://example.com#frag=?bar/#frag' : {
+ 'href': 'http://example.com/#frag=?bar/#frag',
+ 'protocol': 'http:',
+ 'host': 'example.com',
+ 'hostname': 'example.com',
+ 'hash': '#frag=?bar/#frag',
+ 'pathname': '/'
+ },
+ 'http://google.com" onload="alert(42)/' : {
+ 'href': 'http://google.com/',
+ 'protocol': 'http:',
+ 'host': 'google.com',
+ 'pathname': '/'
+ },
'http://a.com/a/b/c?s#h' : {
+ 'href': 'http://a.com/a/b/c?s#h',
'protocol': 'http',
'host': 'a.com',
'pathname': 'a/b/c',
@@ -233,7 +247,7 @@ var formatTests = {
'search': 's'
},
'xmpp:isaacschlueter@jabber.org' : {
- 'href': 'xmpp://isaacschlueter@jabber.org',
+ 'href': 'xmpp:isaacschlueter@jabber.org',
'protocol': 'xmpp:',
'host': 'isaacschlueter@jabber.org',
'auth': 'isaacschlueter',
@@ -241,9 +255,17 @@ var formatTests = {
}
};
for (var u in formatTests) {
- var actual = url.format(formatTests[u]);
- assert.equal(actual, u,
- 'wonky format(' + u + ') == ' + u + '\nactual:' + actual);
+ var expect = formatTests[u].href;
+ delete formatTests[u].href;
+ var actual = url.format(u);
+ var actualObj = url.format(formatTests[u]);
+ assert.equal(actual, expect,
+ 'wonky format(' + u + ') == ' + expect +
+ '\nactual:' + actual);
+ assert.equal(actualObj, expect,
+ 'wonky format(' + JSON.stringify(formatTests[u]) +
+ ') == ' + expect +
+ '\nactual: ' + actualObj);
}
/*
Please sign in to comment.
Something went wrong with that request. Please try again.