diff --git a/.gitignore b/.gitignore index 8225baa..7c08f18 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,31 @@ +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: +.idea/workspace.xml +.idea/tasks.xml +.idea/dictionaries +.idea/vcs.xml +.idea/jsLibraryMappings.xml + +# Sensitive or high-churn files: +.idea/dataSources.ids +.idea/dataSources.xml +.idea/dataSources.local.xml +.idea/sqlDataSources.xml +.idea/dynamic.xml +.idea/uiDesigner.xml + +# Gradle: +.idea/gradle.xml +.idea/libraries + +# Mongo Explorer plugin: +.idea/mongoSettings.xml + +## File-based project format: +*.iws + /node_modules -/dist +/dist \ No newline at end of file diff --git a/lib/simple-dom/html-parser.js b/lib/simple-dom/html-parser.js index 1fbe94c..63030c4 100644 --- a/lib/simple-dom/html-parser.js +++ b/lib/simple-dom/html-parser.js @@ -1,3 +1,5 @@ +import he from 'he'; + function HTMLParser(tokenize, document, voidMap) { this.tokenize = tokenize; this.document = document; @@ -14,6 +16,10 @@ HTMLParser.prototype.pushElement = function(token) { for (var i=0;i&]/g; +var REG_ESCAPE_PRESERVE_ENTITIES = /[<>]|&(?:#?[a-zA-Z0-9]+;)?/g; + function HTMLSerializer(voidMap) { this.voidMap = voidMap; } @@ -23,12 +26,14 @@ HTMLSerializer.prototype.attributes = function(namedNodeMap) { }; HTMLSerializer.prototype.escapeAttrValue = function(attrValue) { - return attrValue.replace(/[&"]/g, function(match) { + return attrValue.replace(/"|&(?:#?[a-zA-Z0-9]+;)?/g, function(match) { switch(match) { case '&': return '&'; case '\"': return '"'; + default: + return match; } }); }; @@ -38,13 +43,16 @@ HTMLSerializer.prototype.attr = function(attr) { return ''; } if (attr.value) { + if (attr.name === 'href' || attr.name === 'src') { + return ' ' + attr.name + '="' + attr.value + '"'; + } return ' ' + attr.name + '="' + this.escapeAttrValue(attr.value) + '"'; } return ' ' + attr.name; }; -HTMLSerializer.prototype.escapeText = function(textNodeValue) { - return textNodeValue.replace(/[&<>]/g, function(match) { +HTMLSerializer.prototype.escapeText = function(textNodeValue, escapeAll) { + return textNodeValue.replace(escapeAll ? REG_ESCAPE_ALL : REG_ESCAPE_PRESERVE_ENTITIES, function(match) { switch(match) { case '&': return '&'; @@ -52,6 +60,8 @@ HTMLSerializer.prototype.escapeText = function(textNodeValue) { return '<'; case '>': return '>'; + default: + return match; } }); }; @@ -97,7 +107,7 @@ HTMLSerializer.prototype.serialize = function(node) { next = next.nextSibling; } } else if(node.nodeType === 1 && node.textContent){ - buffer += node.textContent; + buffer += this.escapeText(node.textContent, true); } if (node.nodeType === 1 && !this.isVoid(node)) { diff --git a/package.json b/package.json index 8ecd956..c4ea575 100644 --- a/package.json +++ b/package.json @@ -32,18 +32,18 @@ }, "npmIgnore": [ "testee" - ], - "transpiler": "babel" + ] }, "devDependencies": { "steal": "^1.0.0-rc.0", "steal-qunit": "^0.1.0", "steal-tools": "^1.0.0-rc.0", "testee": "^0.2.0", - "jquery": "^2.2.0" + "jquery": "^2.2.0", + "simple-html-tokenizer": "^0.2.1" }, "dependencies": { - "micro-location": "^0.1.4", - "simple-html-tokenizer": "^0.2.1" + "he": "^1.1.0", + "micro-location": "^0.1.4" } } diff --git a/test/parser-test.js b/test/parser-test.js index c43a33c..cde1fea 100644 --- a/test/parser-test.js +++ b/test/parser-test.js @@ -1,18 +1,9 @@ import { document } from './support'; -import Parser from 'simple-dom/html-parser'; -import voidMap from 'simple-dom/void-map'; -import HTMLTokenizer from 'simple-html-tokenizer/tokenizer'; -import EntityParser from 'simple-html-tokenizer/entity-parser'; -// make test rebuilds fast -import namedCodepoints from 'simple-html-tokenizer/char-refs/min'; - -function tokenize(input) { - // TODO: make Tokenizer take input on the tokenize method like tokenizePart - // just init state, I'd rather pass in the tokenizer instance and call tokenize(input) - var tokenizer = new HTMLTokenizer(input, new EntityParser(namedCodepoints)); - return tokenizer.tokenize(); -} +import Parser from 'can-simple-dom/simple-dom/html-parser'; +import voidMap from 'can-simple-dom/simple-dom/void-map'; +import tokenize from 'can-simple-dom/simple-dom/default-tokenize'; +import QUnit from 'steal-qunit'; QUnit.module('Basic HTML parsing', { beforeEach: function() { @@ -78,7 +69,7 @@ QUnit.test('nested parse', function (assert) { }); QUnit.test('void tags', function (assert) { - var fragment = this.parser.parse('
Hello
World
'); + var fragment = this.parser.parse('
Hello
World
'); assert.ok(fragment); var node = fragment.firstChild; assert.ok(node); @@ -100,6 +91,49 @@ QUnit.test('void tags', function (assert) { assert.ok(node); assert.equal(node.nodeType, 1); assert.equal(node.nodeName, 'IMG'); - assert.equal(node.getAttribute('src'), 'http://example.com/image.png'); + assert.equal(node.getAttribute('src'), 'http://example.com/image.png?foo=bar&bar=foo'); assert.equal(node.nextSibling, null); }); + +QUnit.test('simple charater encode', function(assert) { + + var fragment = this.parser.parse('hello > world &   goodbye'); + assert.ok(fragment); + + var node = fragment.firstChild; + assert.ok(node); + assert.equal(node.nodeType, 3); + assert.equal(node.nodeValue, 'hello > world &   goodbye'); +}); + +QUnit.test('node child charater encode', function(assert) { + var fragment = this.parser.parse('
Foo & Bar & Baz < Buz > Biz
'); + assert.ok(fragment); + var node = fragment.firstChild; + assert.ok(node); + assert.equal(node.nodeType, 1); + assert.equal(node.nodeName, 'DIV'); + + node = node.firstChild; + assert.ok(node); + assert.equal(node.nodeType, 3); + assert.equal(node.nodeValue, 'Foo & Bar & Baz < Buz > Biz'); + +}); + +QUnit.test('node attribute charater encode', function(assert) { + + var fragment = this.parser.parse('
'); + assert.ok(fragment); + + var node = fragment.firstChild; + assert.ok(node); + assert.equal(node.nodeType, 1); + assert.equal(node.nodeName, 'DIV'); + + var attibutes = node.attributes; + assert.ok(attibutes.length); + var title = attibutes[0]; + assert.equal(title.name, 'title'); + assert.equal(title.value, ' foo & bar & baz < buz > biz'); +}); diff --git a/test/serializer-test.js b/test/serializer-test.js index 379ad58..dec7abf 100644 --- a/test/serializer-test.js +++ b/test/serializer-test.js @@ -9,15 +9,29 @@ QUnit.module('Serializer', { } }); +QUnit.test('simple text', function(assert) { + var actual = this.serializer.serialize(fragment( + text('hello > world &    & goodbye') + )); + assert.equal(actual, 'hello > world &    & goodbye'); +}); + QUnit.test('serializes correctly', function (assert) { var actual = this.serializer.serialize(fragment( - element('div', { id:'foo' }, + element('div', { id:'foo', title: '&&"'}, element('b', {}, - text('Foo & Bar') + text('Foo & Bar & Baz < Buz > Biz ©') ) ) )); - assert.equal(actual, '
Foo & Bar
'); + assert.equal(actual, '
Foo & Bar & Baz < Buz > Biz ©
'); +}); + +QUnit.test('serializes image correctly', function (assert) { + var actual = this.serializer.serialize(fragment( + element('img', { src:'https://foo.com/foobar.jpg?foo=bar&bar=foo'}) + )); + assert.equal(actual, ''); }); QUnit.test('serializes textContent', function(assert) { diff --git a/test/test.js b/test/test.js index 9c82de9..acd5263 100644 --- a/test/test.js +++ b/test/test.js @@ -2,3 +2,4 @@ import './element-test'; import './serializer-test'; import './element-sp-test'; import './element-event-test'; +import './parser-test';