Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Script tag parsing, issue #26 #31

Merged
merged 1 commit into from

2 participants

@codders

In reference to our discussion earlier, here's a patch in the HTML5 parser that delays emitting the Script tag until it's complete. Didn't seem like JSDom had much control over the tag emission in this case.

It needs some tests, but please let me know if you think it's going in the wrong direction. Good news is, it seems to solve my issue with Zombie.

@aredridel
Owner

Wow. That's really fantastic. Thank you! I'll add you to the contributors list and push out a new version! The jsdom-interaction-free approach works well, there, it looks like.

@aredridel aredridel merged commit fb4cd01 into from
@aredridel aredridel referenced this pull request from a commit
@gsnedders gsnedders Fix #31: remove sites directory.
It's not clear quite why these are here, and they certainly aren't testcases
per-se (what's their pass condition?!). I think originally to have some sort
of shared files for testing performance on? It also seems unlikely we have
the copyright holders' permission to distribute them under the MIT license in
the case of the Google results and the Python docs.
1c6c2c0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jun 29, 2011
This page is out of date. Refresh to see the latest.
View
3  lib/html5/constants.js
@@ -4,6 +4,7 @@ HTML5.CONTENT_MODEL_FLAGS = [
'PCDATA',
'RCDATA',
'CDATA',
+ 'SCRIPT_CDATA',
'PLAINTEXT'
];
@@ -1053,7 +1054,7 @@ HTML5.E = {
"Unexpected %(name). Expected table content."
};
-HTML5.Models = {PCDATA: 0, RCDATA: 1, CDATA: 2};
+HTML5.Models = {PCDATA: 0, RCDATA: 1, CDATA: 2, SCRIPT_CDATA: 3};
HTML5.PHASES = PHASES = {
initial: require('./parser/initial_phase').Phase,
View
11 lib/html5/parser.js
@@ -97,10 +97,17 @@ Parser.prototype.do_token = function(token) {
this.phase[method](token.data);
break;
case 'StartTag':
+ if (token.name == "script") {
+ this.inScript = true;
+ this.scriptBuffer = '';
+ }
this.phase[method](token.name, token.data, token.self_closing);
break;
case 'EndTag':
this.phase[method](token.name);
+ if (token.name == "script") {
+ this.inScript = false;
+ }
break;
case 'Doctype':
this.phase[method](token.name, token.publicId, token.systemId, token.correct);
@@ -138,8 +145,10 @@ Parser.prototype.setup = function(container, encoding) {
case 'textarea':
this.tokenizer.content_model = HTML5.Models.RCDATA;
break;
- case 'style':
case 'script':
+ this.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA;
+ break;
+ case 'style':
case 'xmp':
case 'iframe':
case 'noembed':
View
2  lib/html5/parser/in_head_phase.js
@@ -103,7 +103,7 @@ p.prototype.startTagScript = function(name, attributes) {
this.tree.open_elements.last().appendChild(element);
}
this.tree.open_elements.push(element);
- this.parser.tokenizer.content_model = HTML5.Models.CDATA;
+ this.parser.tokenizer.content_model = HTML5.Models.SCRIPT_CDATA;
}
p.prototype.startTagBaseLinkMeta = function(name, attributes) {
View
28 lib/html5/tokenizer.js
@@ -81,7 +81,11 @@ t.prototype.tokenize = function() {
t.prototype.emitToken = function(tok) {
tok = this.normalize_token(tok);
HTML5.debug('tokenizer.token', tok)
- this.emit('token', tok);
+ if (this.content_model == Models.SCRIPT_CDATA && tok.type == 'Characters') {
+ this.script_buffer += tok.data;
+ } else {
+ this.emit('token', tok);
+ }
}
t.prototype.consume_entity = function(buffer, from_attr) {
@@ -211,26 +215,32 @@ t.prototype.process_solidus_in_tag = function(buffer) {
t.prototype.data_state = function(buffer) {
var c = buffer.char()
- if(c != HTML5.EOF && this.content_model == Models.CDATA || this.content_model == Models.RCDATA) {
+ if(c != HTML5.EOF && this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) {
this.lastFourChars += c;
if(this.lastFourChars.length >= 4) {
this.lastFourChars = this.lastFourChars.substr(-4)
}
}
+ if (this.content_model == Models.SCRIPT_CDATA) {
+ if (this.script_buffer == null) {
+ this.script_buffer = '';
+ }
+ }
+
if(c == HTML5.EOF) {
this.emitToken(HTML5.EOF_TOK);
this.commit();
return false;
} else if(c == '&' && (this.content_model == Models.PCDATA || this.content_model == Models.RCDATA) && !this.escapeFlag) {
this.state = 'entity_data_state';
- } else if(c == '-' && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA) && !this.escapeFlag && this.lastFourChars == '<!--') {
+ } else if(c == '-' && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) && !this.escapeFlag && this.lastFourChars == '<!--') {
this.escapeFlag = true;
this.emitToken({type: 'Characters', data: c});
this.commit();
- } else if(c == '<' && !this.escapeFlag && (this.content_model == Models.PCDATA || this.content_model == Models.RCDATA || this.content_model == Models.CDATA)) {
+ } else if(c == '<' && !this.escapeFlag && (this.content_model == Models.PCDATA || this.content_model == Models.RCDATA || this.content_model == Models.CDATA || this.content_model == Models.SCRIPT_CDATA)) {
this.state = 'tag_open_state';
- } else if(c == '>' && this.escapeFlag && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA) && this.lastFourChars.match(/-->$/)) {
+ } else if(c == '>' && this.escapeFlag && (this.content_model == Models.CDATA || this.content_model == Models.RCDATA || this.content_model == Models.SCRIPT_CDATA) && this.lastFourChars.match(/-->$/)) {
this.escapeFlag = false;
this.emitToken({type: 'Characters', data: c});
this.commit();
@@ -288,7 +298,7 @@ t.prototype.tag_open_state = function(buffer) {
this.state = 'data_state';
}
} else {
- // We know the content model flag is set to either RCDATA or CDATA
+ // We know the content model flag is set to either RCDATA or CDATA or SCRIPT_CDATA
// now because this state can never be entered with the PLAINTEXT
// flag.
if (data == '/') {
@@ -303,7 +313,7 @@ t.prototype.tag_open_state = function(buffer) {
}
t.prototype.close_tag_open_state = function(buffer) {
- if(this.content_model == Models.RCDATA || this.content_model == Models.CDATA) {
+ if(this.content_model == Models.RCDATA || this.content_model == Models.CDATA || this.content_model == Models.SCRIPT_CDATA) {
var chars = '';
if(this.current_token) {
for(var i = 0; i <= this.current_token.name.length; i++) {
@@ -811,6 +821,10 @@ t.prototype.emit_current_token = function() {
}
break;
}
+ if (this.current_token.name == "script") {
+ this.emitToken({ type: 'Characters', data: this.script_buffer });
+ this.script_buffer = null;
+ }
this.emitToken(tok);
this.state = 'data_state';
}
Something went wrong with that request. Please try again.