Skip to content

Commit

Permalink
disable decoding of attributes on parse
Browse files Browse the repository at this point in the history
  • Loading branch information
alanclarke committed Apr 21, 2015
1 parent 3c75647 commit da78227
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 2 deletions.
3 changes: 2 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ Difference with cheerio:
* Use `$.load(content)` to load HTML documents (e.g. missing `<html>` tags will be automatically emitted in this case).
* Use `$(content)` to create HTML-fragments which can be later added to the loaded document.
* Parser options (e.g. `xmlMode` and `normalizeWhitespace`) are missing since whacko is intended for spec compliant HTML parsing.
* New parser option `encodeEntities` added. It disables HTML entities decoding on serialization.
* New parser option `encodeEntities` added. When false, it disables HTML entities decoding on serialization.
* New parser option `decodeHtmlEntities` added. When false, it disables HTML entities decoding on parse.

In all other aspects it is the same with the [cheerio API](https://github.com/cheeriojs/cheerio#api).

Expand Down
4 changes: 3 additions & 1 deletion lib/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
*/
var parse5 = require('parse5');

var parser = new parse5.Parser(parse5.TreeAdapters.htmlparser2);

/*
Parser
Expand All @@ -28,6 +27,9 @@ var shouldParseAsDocument = function (content) {
};

exports.evaluate = function (content, options, isDocument) {
var parser = new parse5.Parser(parse5.TreeAdapters.htmlparser2, {
decodeHtmlEntities: options.decodeHtmlEntities !== false
});
var dom = null;

if (Buffer.isBuffer(content))
Expand Down
23 changes: 23 additions & 0 deletions test/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ var li = '<li class="durian">Durian</li>';
// Attributes
var attributes = '<img src="hello.png" alt="man waving">';
var noValueAttribute = '<textarea disabled></textarea>';
var encodedAttributes = '<img data-object="{&quot;foo&quot;:&quot;bar&quot;}">';

// Comments
var comment = '<!-- sexy -->';
Expand Down Expand Up @@ -93,6 +94,28 @@ describe('parse', function() {
expect(attrs.alt).to.equal('man waving');
});

it('should not decodeHtmlEntities if decodeHtmlEntities options is false', function() {
var attrs = parse.evaluate(encodedAttributes, {
'decodeHtmlEntities': false
})[0].attribs;
expect(attrs).to.be.ok();
expect(attrs['data-object']).to.equal('{&quot;foo&quot;:&quot;bar&quot;}');
});

it('should decodeHtmlEntities if decodeHtmlEntities options is true', function() {
var attrs = parse.evaluate(encodedAttributes, {
'decodeHtmlEntities': true
})[0].attribs;
expect(attrs).to.be.ok();
expect(attrs['data-object']).to.equal('{"foo":"bar"}');
});

it('should decodeHtmlEntities by default', function() {
var attrs = parse.evaluate(encodedAttributes, defaultOpts)[0].attribs;
expect(attrs).to.be.ok();
expect(attrs['data-object']).to.equal('{"foo":"bar"}');
});

it('should handle value-less attributes: ' + noValueAttribute, function() {
var attrs = parse.evaluate(noValueAttribute, defaultOpts)[0].attribs;
expect(attrs).to.be.ok();
Expand Down

0 comments on commit da78227

Please sign in to comment.