Skip to content

Commit

Permalink
added doctype
Browse files Browse the repository at this point in the history
  • Loading branch information
deanmao committed May 2, 2012
1 parent bc66fe3 commit 6bcd609
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
37 changes: 37 additions & 0 deletions lib/htmlparser.js
Expand Up @@ -49,6 +49,7 @@ var Mode = {
Tag: 'tag',
Attr: 'attr',
CData: 'cdata',
Doctype: 'doctype',
Comment: 'comment'
};

Expand Down Expand Up @@ -136,6 +137,8 @@ function Parser (builder, options) {
return this._parseAttr(this._state);
case Mode.CData:
return this._parseCData(this._state);
case Mode.Doctype:
return this._parseDoctype(this._state);
case Mode.Comment:
return this._parseComment(this._state);
}
Expand Down Expand Up @@ -224,6 +227,11 @@ function Parser (builder, options) {
state.pos += 8;
return;
}
if (!match[1] && match[2].substr(0, 8) === '!DOCTYPE') {
state.mode = Mode.Doctype;
state.pos += 8;
return;
}
if (!state.done && (state.pos + match[0].length) === state.data.length) {
//We're at the and of the data, might be incomplete
state.needData = true;
Expand Down Expand Up @@ -400,6 +408,35 @@ function Parser (builder, options) {
}
};

Parser.prototype._parseDoctype = function Parser$_parseDoctype () {
var state = this._state;
var foundPos = state.data.indexOf('>', state.pos);
if (foundPos < 0 && state.done) {
foundPos = state.data.length;
}
if (foundPos < 0) {
Parser.re_parseCData_findEnding.lastIndex = state.pos;
if (!state.pendingText) {
state.pendingText = [];
}
state.pendingText.push(state.data.substr(state.pos, state.data.length));
state.pos = state.data.length;
state.needData = true;
} else {
var text;
if (state.pendingText) {
state.pendingText.push(state.data.substring(state.pos, foundPos));
text = state.pendingText.join('');
state.pendingText = null;
} else {
text = state.data.substring(state.pos, foundPos);
}
this._write({ type: Mode.Doctype, data: text });
state.mode = Mode.Text;
state.pos = foundPos + 1;
}
};

Parser.re_parseComment_findEnding = /\-{1,2}$/;
Parser.prototype._parseComment = function Parser$_parseComment () {
var state = this._state;
Expand Down
5 changes: 5 additions & 0 deletions tests/parser.js
Expand Up @@ -367,6 +367,11 @@ exports['html inside comment'] = {
, expected: [{ type: 'comment', data: ' <div>foo</div> '}]
};

exports['transitional doctype'] = {
data: ['<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">']
, expected: [{ type: 'doctype', data: ' HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"'}]
};

exports['html inside cdata'] = {
data: ['<![CDATA[ <div>foo</div> ]]>']
, expected: [{ type: 'cdata', data: ' <div>foo</div> '}]
Expand Down

0 comments on commit 6bcd609

Please sign in to comment.