Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,31 @@
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/workspace.xml
.idea/tasks.xml
.idea/dictionaries
.idea/vcs.xml
.idea/jsLibraryMappings.xml

# Sensitive or high-churn files:
.idea/dataSources.ids
.idea/dataSources.xml
.idea/dataSources.local.xml
.idea/sqlDataSources.xml
.idea/dynamic.xml
.idea/uiDesigner.xml

# Gradle:
.idea/gradle.xml
.idea/libraries

# Mongo Explorer plugin:
.idea/mongoSettings.xml

## File-based project format:
*.iws

/node_modules
/dist
/dist
9 changes: 8 additions & 1 deletion lib/simple-dom/html-parser.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import he from 'he';

function HTMLParser(tokenize, document, voidMap) {
this.tokenize = tokenize;
this.document = document;
Expand All @@ -14,6 +16,10 @@ HTMLParser.prototype.pushElement = function(token) {

for (var i=0;i<token.attributes.length;i++) {
var attr = token.attributes[i];

if(attr[0] !== 'href' && attr[0] !== 'src') {
attr[1] = he.encode(attr[1]);
}
el.setAttribute(attr[0], attr[1]);
}

Expand All @@ -35,7 +41,8 @@ HTMLParser.prototype.popElement = function(token) {
};

HTMLParser.prototype.appendText = function(token) {
var text = this.document.createTextNode(token.chars);
var content = he.encode(token.chars);
var text = this.document.createTextNode(content);
this.appendChild(text);
};

Expand Down
18 changes: 14 additions & 4 deletions lib/simple-dom/html-serializer.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
var REG_ESCAPE_ALL = /[<>&]/g;
var REG_ESCAPE_PRESERVE_ENTITIES = /[<>]|&(?:#?[a-zA-Z0-9]+;)?/g;

function HTMLSerializer(voidMap) {
this.voidMap = voidMap;
}
Expand All @@ -23,12 +26,14 @@ HTMLSerializer.prototype.attributes = function(namedNodeMap) {
};

HTMLSerializer.prototype.escapeAttrValue = function(attrValue) {
return attrValue.replace(/[&"]/g, function(match) {
return attrValue.replace(/"|&(?:#?[a-zA-Z0-9]+;)?/g, function(match) {
switch(match) {
case '&':
return '&amp;';
case '\"':
return '&quot;';
default:
return match;
}
});
};
Expand All @@ -38,20 +43,25 @@ HTMLSerializer.prototype.attr = function(attr) {
return '';
}
if (attr.value) {
if (attr.name === 'href' || attr.name === 'src') {
return ' ' + attr.name + '="' + attr.value + '"';
}
return ' ' + attr.name + '="' + this.escapeAttrValue(attr.value) + '"';
}
return ' ' + attr.name;
};

HTMLSerializer.prototype.escapeText = function(textNodeValue) {
return textNodeValue.replace(/[&<>]/g, function(match) {
HTMLSerializer.prototype.escapeText = function(textNodeValue, escapeAll) {
return textNodeValue.replace(escapeAll ? REG_ESCAPE_ALL : REG_ESCAPE_PRESERVE_ENTITIES, function(match) {
switch(match) {
case '&':
return '&amp;';
case '<':
return '&lt;';
case '>':
return '&gt;';
default:
return match;
}
});
};
Expand Down Expand Up @@ -97,7 +107,7 @@ HTMLSerializer.prototype.serialize = function(node) {
next = next.nextSibling;
}
} else if(node.nodeType === 1 && node.textContent){
buffer += node.textContent;
buffer += this.escapeText(node.textContent, true);
}

if (node.nodeType === 1 && !this.isVoid(node)) {
Expand Down
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,18 @@
},
"npmIgnore": [
"testee"
],
"transpiler": "babel"
]
},
"devDependencies": {
"steal": "^1.0.0-rc.0",
"steal-qunit": "^0.1.0",
"steal-tools": "^1.0.0-rc.0",
"testee": "^0.2.0",
"jquery": "^2.2.0"
"jquery": "^2.2.0",
"simple-html-tokenizer": "^0.2.1"
},
"dependencies": {
"micro-location": "^0.1.4",
"simple-html-tokenizer": "^0.2.1"
"he": "^1.1.0",
"micro-location": "^0.1.4"
}
}
64 changes: 49 additions & 15 deletions test/parser-test.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
import { document } from './support';

import Parser from 'simple-dom/html-parser';
import voidMap from 'simple-dom/void-map';
import HTMLTokenizer from 'simple-html-tokenizer/tokenizer';
import EntityParser from 'simple-html-tokenizer/entity-parser';
// make test rebuilds fast
import namedCodepoints from 'simple-html-tokenizer/char-refs/min';

function tokenize(input) {
// TODO: make Tokenizer take input on the tokenize method like tokenizePart
// just init state, I'd rather pass in the tokenizer instance and call tokenize(input)
var tokenizer = new HTMLTokenizer(input, new EntityParser(namedCodepoints));
return tokenizer.tokenize();
}
import Parser from 'can-simple-dom/simple-dom/html-parser';
import voidMap from 'can-simple-dom/simple-dom/void-map';
import tokenize from 'can-simple-dom/simple-dom/default-tokenize';
import QUnit from 'steal-qunit';

QUnit.module('Basic HTML parsing', {
beforeEach: function() {
Expand Down Expand Up @@ -78,7 +69,7 @@ QUnit.test('nested parse', function (assert) {
});

QUnit.test('void tags', function (assert) {
var fragment = this.parser.parse('<div>Hello<br>World<img src="http://example.com/image.png"></div>');
var fragment = this.parser.parse('<div>Hello<br>World<img src="http://example.com/image.png?foo=bar&bar=foo"></div>');
assert.ok(fragment);
var node = fragment.firstChild;
assert.ok(node);
Expand All @@ -100,6 +91,49 @@ QUnit.test('void tags', function (assert) {
assert.ok(node);
assert.equal(node.nodeType, 1);
assert.equal(node.nodeName, 'IMG');
assert.equal(node.getAttribute('src'), 'http://example.com/image.png');
assert.equal(node.getAttribute('src'), 'http://example.com/image.png?foo=bar&bar=foo');
assert.equal(node.nextSibling, null);
});

QUnit.test('simple charater encode', function(assert) {

var fragment = this.parser.parse('hello > world &amp; &nbsp;&nbsp;goodbye');
assert.ok(fragment);

var node = fragment.firstChild;
assert.ok(node);
assert.equal(node.nodeType, 3);
assert.equal(node.nodeValue, 'hello &#x3E; world &#x26; &#xA0;&#xA0;goodbye');
});

QUnit.test('node child charater encode', function(assert) {
var fragment = this.parser.parse('<div>Foo & Bar &amp; Baz &lt; Buz &gt; Biz</div>');
assert.ok(fragment);
var node = fragment.firstChild;
assert.ok(node);
assert.equal(node.nodeType, 1);
assert.equal(node.nodeName, 'DIV');

node = node.firstChild;
assert.ok(node);
assert.equal(node.nodeType, 3);
assert.equal(node.nodeValue, 'Foo &#x26; Bar &#x26; Baz &#x3C; Buz &#x3E; Biz');

});

QUnit.test('node attribute charater encode', function(assert) {

var fragment = this.parser.parse('<div title="&nbsp;foo & bar &amp; baz < buz > biz"></div>');
assert.ok(fragment);

var node = fragment.firstChild;
assert.ok(node);
assert.equal(node.nodeType, 1);
assert.equal(node.nodeName, 'DIV');

var attibutes = node.attributes;
assert.ok(attibutes.length);
var title = attibutes[0];
assert.equal(title.name, 'title');
assert.equal(title.value, '&#xA0;foo &#x26; bar &#x26; baz &#x3C; buz &#x3E; biz');
});
20 changes: 17 additions & 3 deletions test/serializer-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,29 @@ QUnit.module('Serializer', {
}
});

QUnit.test('simple text', function(assert) {
var actual = this.serializer.serialize(fragment(
text('hello > world &amp; &nbsp;&nbsp; & goodbye')
));
assert.equal(actual, 'hello &gt; world &amp; &nbsp;&nbsp; &amp; goodbye');
});

QUnit.test('serializes correctly', function (assert) {
var actual = this.serializer.serialize(fragment(
element('div', { id:'foo' },
element('div', { id:'foo', title: '&amp;&"'},
element('b', {},
text('Foo & Bar')
text('Foo & Bar &amp; Baz < Buz > Biz ©')
)
)
));
assert.equal(actual, '<div id="foo"><b>Foo &amp; Bar</b></div>');
assert.equal(actual, '<div id="foo" title="&amp;&amp;&quot;"><b>Foo &amp; Bar &amp; Baz &lt; Buz &gt; Biz ©</b></div>');
});

QUnit.test('serializes image correctly', function (assert) {
var actual = this.serializer.serialize(fragment(
element('img', { src:'https://foo.com/foobar.jpg?foo=bar&bar=foo'})
));
assert.equal(actual, '<img src="https://foo.com/foobar.jpg?foo=bar&bar=foo">');
});

QUnit.test('serializes textContent', function(assert) {
Expand Down
1 change: 1 addition & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ import './element-test';
import './serializer-test';
import './element-sp-test';
import './element-event-test';
import './parser-test';