Skip to content
Browse files

decode entities in htmlparser2

fixes #248

This changes how text nodes are stored in the DOM: They are now
unescaped by default, which leads to a slight overhead when rendering.
  • Loading branch information...
1 parent e478de2 commit 826b9f85fe7f2a20ecbaf991258a7a31cb755a89 @fb55 fb55 committed
Showing with 37 additions and 48 deletions.
  1. +1 −1 lib/api/manipulation.js
  2. +1 −1 lib/cheerio.js
  3. +2 −2 lib/parse.js
  4. +3 −4 lib/render.js
  5. +1 −2 lib/static.js
  6. +0 −12 lib/utils.js
  7. +1 −1 test/api.manipulation.js
  8. +23 −22 test/parse.js
  9. +5 −3 test/render.js
View
2 lib/api/manipulation.js
@@ -246,7 +246,7 @@ var text = exports.text = function(str) {
}
var elem = {
- data: encode(str),
+ data: str,
type: 'text',
parent: null,
prev: null,
View
2 lib/cheerio.js
@@ -111,7 +111,7 @@ Cheerio.prototype.cheerio = '[cheerio object]';
Cheerio.prototype.options = {
normalizeWhitespace: false,
xmlMode: false,
- decodeEntities: false
+ decodeEntities: true
};
/*
View
4 lib/parse.js
@@ -4,7 +4,6 @@
var htmlparser = require('htmlparser2'),
_ = require('lodash'),
utils = require('./utils'),
- decode = utils.decode,
isTag = utils.isTag,
camelCase = require('./utils').camelCase;
@@ -41,6 +40,7 @@ var evaluate = exports.evaluate = function(content, options) {
dom = content;
}
+ if(!options.decodeEntities) throw Error('whoot');
_.forEach(dom, parseData);
return dom;
@@ -106,7 +106,7 @@ var parseData = exports.parseData = function(elem) {
if (elem.data === undefined) elem.data = {};
var value;
for (var key in elem.attribs) {
- value = decode(elem.attribs[key]);
+ value = elem.attribs[key];
if (key.substr(0, 5) === 'data-') {
key = key.slice(5);
View
7 lib/render.js
@@ -3,8 +3,7 @@
*/
var _ = require('lodash');
var utils = require('./utils');
-
-var encode = utils.encode;
+var encode = require('entities').encodeXML;
/*
Boolean Attributes
@@ -30,7 +29,7 @@ var formatAttrs = function(attributes) {
if (!value && (rboolean.test(key) || key === '/')) {
output += key;
} else {
- output += key + '="' + encode(value) + '"';
+ output += key + '="' + encode(value || '') + '"';
}
}
@@ -132,7 +131,7 @@ var renderDirective = function(elem) {
};
var renderText = function(elem) {
- return elem.data || '';
+ return encode(elem.data || '');
};
var renderCdata = function(elem) {
View
3 lib/static.js
@@ -5,7 +5,6 @@
var select = require('CSSselect'),
parse = require('./parse'),
render = require('./render'),
- decode = require('./utils').decode,
_ = require('lodash');
/**
@@ -76,7 +75,7 @@ var text = exports.text = function(elems) {
for (var i = 0; i < len; i ++) {
elem = elems[i];
- if (elem.type === 'text') ret += decode(elem.data);
+ if (elem.type === 'text') ret += elem.data;
else if (elem.children && elem.type !== 'comment') {
ret += text(elem.children);
}
View
12 lib/utils.js
@@ -1,9 +1,4 @@
/**
- * Module Dependencies
- */
-var entities = require('entities');
-
-/**
* HTML Tags
*/
@@ -33,13 +28,6 @@ exports.camelCase = function(str) {
};
/**
- * Expose encode and decode methods from FB55's node-entities library
- */
-
-exports.encode = function(str) { return entities.encodeXML(String(str)); };
-exports.decode = entities.decodeHTML;
-
-/**
* Iterate over each DOM element without creating intermediary Cheerio instances.
*
* This is indented for use internally to avoid otherwise unnecessary memory pressure introduced
View
2 test/api.manipulation.js
@@ -814,7 +814,7 @@ describe('$(...)', function() {
var $apple = $('.apple', fruits);
$apple.text('blah <script>alert("XSS!")</script> blah');
- expect($apple[0].children[0].data).to.equal('blah &lt;script&gt;alert(&quot;XSS!&quot;)&lt;/script&gt; blah');
+ expect($apple[0].children[0].data).to.equal('blah <script>alert("XSS!")</script> blah');
expect($apple.text()).to.equal('blah <script>alert("XSS!")</script> blah');
$apple.text('blah <script>alert("XSS!")</script> blah');
View
45 test/parse.js
@@ -1,5 +1,6 @@
var expect = require('expect.js'),
- parse = require('../lib/parse');
+ parse = require('../lib/parse'),
+ defaultOpts = require('..').prototype.options;
// Tags
@@ -42,14 +43,14 @@ describe('parse', function() {
describe('.eval', function() {
it('should parse basic empty tags: ' + basic, function() {
- var tag = parse.evaluate(basic)[0];
+ var tag = parse.evaluate(basic, defaultOpts)[0];
expect(tag.type).to.equal('tag');
expect(tag.name).to.equal('html');
expect(tag.children).to.be.empty();
});
it('should handle sibling tags: ' + siblings, function() {
- var dom = parse.evaluate(siblings),
+ var dom = parse.evaluate(siblings, defaultOpts),
h2 = dom[0],
p = dom[1];
@@ -59,21 +60,21 @@ describe('parse', function() {
});
it('should handle single tags: ' + single, function() {
- var tag = parse.evaluate(single)[0];
+ var tag = parse.evaluate(single, defaultOpts)[0];
expect(tag.type).to.equal('tag');
expect(tag.name).to.equal('br');
expect(tag.children).to.be.empty();
});
it('should handle malformatted single tags: ' + singleWrong, function() {
- var tag = parse.evaluate(singleWrong)[0];
+ var tag = parse.evaluate(singleWrong, defaultOpts)[0];
expect(tag.type).to.equal('tag');
expect(tag.name).to.equal('br');
expect(tag.children).to.be.empty();
});
it('should handle tags with children: ' + children, function() {
- var tag = parse.evaluate(children)[0];
+ var tag = parse.evaluate(children, defaultOpts)[0];
expect(tag.type).to.equal('tag');
expect(tag.name).to.equal('html');
expect(tag.children).to.be.ok();
@@ -81,44 +82,44 @@ describe('parse', function() {
});
it('should handle tags with children: ' + li, function() {
- var tag = parse.evaluate(li)[0];
+ var tag = parse.evaluate(li, defaultOpts)[0];
expect(tag.children).to.have.length(1);
expect(tag.children[0].data).to.equal('Durian');
});
it('should handle tags with attributes: ' + attributes, function() {
- var attrs = parse.evaluate(attributes)[0].attribs;
+ var attrs = parse.evaluate(attributes, defaultOpts)[0].attribs;
expect(attrs).to.be.ok();
expect(attrs.src).to.equal('hello.png');
expect(attrs.alt).to.equal('man waving');
});
it('should handle value-less attributes: ' + noValueAttribute, function() {
- var attrs = parse.evaluate(noValueAttribute)[0].attribs;
+ var attrs = parse.evaluate(noValueAttribute, defaultOpts)[0].attribs;
expect(attrs).to.be.ok();
expect(attrs.disabled).to.equal('');
});
it('should handle comments: ' + comment, function() {
- var elem = parse.evaluate(comment)[0];
+ var elem = parse.evaluate(comment, defaultOpts)[0];
expect(elem.type).to.equal('comment');
expect(elem.data).to.equal(' sexy ');
});
it('should handle conditional comments: ' + conditional, function() {
- var elem = parse.evaluate(conditional)[0];
+ var elem = parse.evaluate(conditional, defaultOpts)[0];
expect(elem.type).to.equal('comment');
expect(elem.data).to.equal(conditional.replace('<!--', '').replace('-->', ''));
});
it('should handle text: ' + text, function() {
- var text_ = parse.evaluate(text)[0];
+ var text_ = parse.evaluate(text, defaultOpts)[0];
expect(text_.type).to.equal('text');
expect(text_.data).to.equal('lorem ipsum');
});
it('should handle script tags: ' + script, function() {
- var script_ = parse.evaluate(script)[0];
+ var script_ = parse.evaluate(script, defaultOpts)[0];
expect(script_.type).to.equal('script');
expect(script_.name).to.equal('script');
expect(script_.attribs.type).to.equal('text/javascript');
@@ -128,7 +129,7 @@ describe('parse', function() {
});
it('should handle style tags: ' + style, function() {
- var style_ = parse.evaluate(style)[0];
+ var style_ = parse.evaluate(style, defaultOpts)[0];
expect(style_.type).to.equal('style');
expect(style_.name).to.equal('style');
expect(style_.attribs.type).to.equal('text/css');
@@ -138,7 +139,7 @@ describe('parse', function() {
});
it('should handle directives: ' + directive, function() {
- var elem = parse.evaluate(directive)[0];
+ var elem = parse.evaluate(directive, defaultOpts)[0];
expect(elem.type).to.equal('directive');
expect(elem.data).to.equal('!doctype html');
expect(elem.name).to.equal('!doctype');
@@ -162,14 +163,14 @@ describe('parse', function() {
}
it('should add root to: ' + basic, function() {
- var root = parse(basic);
+ var root = parse(basic, defaultOpts);
rootTest(root);
expect(root.children).to.have.length(1);
expect(root.children[0].name).to.equal('html');
});
it('should add root to: ' + siblings, function() {
- var root = parse(siblings);
+ var root = parse(siblings, defaultOpts);
rootTest(root);
expect(root.children).to.have.length(2);
expect(root.children[0].name).to.equal('h2');
@@ -178,35 +179,35 @@ describe('parse', function() {
});
it('should add root to: ' + comment, function() {
- var root = parse(comment);
+ var root = parse(comment, defaultOpts);
rootTest(root);
expect(root.children).to.have.length(1);
expect(root.children[0].type).to.equal('comment');
});
it('should add root to: ' + text, function() {
- var root = parse(text);
+ var root = parse(text, defaultOpts);
rootTest(root);
expect(root.children).to.have.length(1);
expect(root.children[0].type).to.equal('text');
});
it('should add root to: ' + scriptEmpty, function() {
- var root = parse(scriptEmpty);
+ var root = parse(scriptEmpty, defaultOpts);
rootTest(root);
expect(root.children).to.have.length(1);
expect(root.children[0].type).to.equal('script');
});
it('should add root to: ' + styleEmpty, function() {
- var root = parse(styleEmpty);
+ var root = parse(styleEmpty, defaultOpts);
rootTest(root);
expect(root.children).to.have.length(1);
expect(root.children[0].type).to.equal('style');
});
it('should add root to: ' + directive, function() {
- var root = parse(directive);
+ var root = parse(directive, defaultOpts);
rootTest(root);
expect(root.children).to.have.length(1);
expect(root.children[0].type).to.equal('directive');
View
8 test/render.js
@@ -1,18 +1,20 @@
var expect = require('expect.js'),
+ defaultOpts = require('..').prototype.options,
+ _ = require('lodash'),
parse = require('../lib/parse'),
render = require('../lib/render');
var html = function(str, options) {
- options = options || {};
+ options = _.defaults(options || {}, defaultOpts);
var dom = parse(str, options);
return render(dom);
};
var xml = function(str, options) {
- options = options || {};
+ options = _.defaults(options || {}, defaultOpts);
options.xmlMode = true;
var dom = parse(str, options);
- return render(dom, {xmlMode:true});
+ return render(dom, options);
};
describe('render', function() {

0 comments on commit 826b9f8

Please sign in to comment.
Something went wrong with that request. Please try again.