diff --git a/README.md b/README.md
index 4a841b7..e3fcf3c 100644
--- a/README.md
+++ b/README.md
@@ -180,6 +180,10 @@ sanitizeHtml(
## Changelog
+1.4.3: invokes itself recursively until the markup stops changing to guard against [this issue](https://github.com/fb55/htmlparser2/issues/105). Bump to htmlparser2 version 3.7.x.
+
+1.4.1, 1.4.2: more tests.
+
1.4.0: ability to allow all attributes or tags through by setting `allowedAttributes` and/or `allowedTags` to false. Thanks to Anand Thakker.
1.3.0: `attribs` now available on frames passed to exclusive filter.
diff --git a/index.js b/index.js
index 8a3c01c..3aba678 100644
--- a/index.js
+++ b/index.js
@@ -4,7 +4,11 @@ var he = require('he');
module.exports = sanitizeHtml;
-function sanitizeHtml(html, options) {
+// Ignore the _recursing flag; it's there for recursive
+// invocation as a guard against this exploit:
+// https://github.com/fb55/htmlparser2/issues/105
+
+function sanitizeHtml(html, options, _recursing) {
var result = '';
function Frame(tag, attribs) {
@@ -87,8 +91,8 @@ function sanitizeHtml(html, options) {
var skipText = false;
var parser = new htmlparser.Parser({
onopentag: function(name, attribs) {
- var frame = new Frame(name, attribs);
- stack.push(frame);
+ var frame = new Frame(name, attribs);
+ stack.push(frame);
var skip = false;
if (_.has(transformTagsMap, name)) {
@@ -198,6 +202,18 @@ function sanitizeHtml(html, options) {
});
parser.write(html);
parser.end();
+
+ // Invoke recursively until we stop finding
+ // clever little nesting exploits
+ if (!_recursing) {
+ while (true) {
+ var newResult = sanitizeHtml(result, options, true);
+ if (newResult === result) {
+ return result;
+ }
+ result = newResult;
+ }
+ }
return result;
function escapeHtml(s) {
diff --git a/package.json b/package.json
index 44d5763..cf20799 100644
--- a/package.json
+++ b/package.json
@@ -22,7 +22,7 @@
"license": "MIT",
"dependencies": {
"he": "~0.4.1",
- "htmlparser2": "~3.3.0",
+ "htmlparser2": "3.7.x",
"lodash": "2.4.x"
}
}
diff --git a/test/test.js b/test/test.js
index 86e3b2a..d4c36ea 100644
--- a/test/test.js
+++ b/test/test.js
@@ -281,6 +281,21 @@ describe('sanitizeHtml', function() {
}
),
''
- )
+ );
+ });
+ it('should not be faked out by double <', function() {
+ assert.equal(
+ sanitizeHtml('<img src="javascript:evil"/>'
+ ),
+ ''
+ );
+ // I don't love what I get back here obviously, but
+ // it is not an attack vector, although it might be parsed
+ // by some browsers as containing an unbalanced close tag.
+ assert.equal(
+ sanitizeHtml('<a href="javascript:evil"/>'
+ ),
+ '<a href="javascript:evil"/>'
+ );
});
});