diff --git a/lib/node-htmlparser.js b/lib/node-htmlparser.js index b348790..ad0deeb 100644 --- a/lib/node-htmlparser.js +++ b/lib/node-htmlparser.js @@ -18,7 +18,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ***********************************************/ -/* v1.6.3 */ +/* v1.6.4 */ (function () { @@ -244,17 +244,18 @@ function Parser (handler) { element.type = ElementType.Text; //If the previous element is text, append the current text to it if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) { + var prevElement = this._elements[this._elements.length - 1]; if (element.raw != "") { - var prevElement = this._elements[this._elements.length - 1]; prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw; element.raw = element.data = ""; //This causes the current element to not be added to the element list - } - else //Element is empty, so just append the last tag marker found + } else { //Element is empty, so just append the last tag marker found prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep; - } - else //The previous element was not text - if (element.raw != "") + } + } else { //The previous element was not text + if (element.raw != "") { element.raw = element.data = element.raw; + } + } } } } diff --git a/lib/node-htmlparser.min.js b/lib/node-htmlparser.min.js index 5ab1e72..2eb2998 100644 --- a/lib/node-htmlparser.min.js +++ b/lib/node-htmlparser.min.js @@ -18,5 +18,5 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ***********************************************/ -/* v1.6.3 */ -(function(){function e(a){this.validateHandler(a);this._handler=a;this.reset()}function n(a){n.super_.call(this,a,{ignoreWhitespace:true,verbose:false,enforceEmptyTags:false})}function g(a,c){this.reset();this._options=c?c:{};if(this._options.ignoreWhitespace==undefined)this._options.ignoreWhitespace=false;if(this._options.verbose==undefined)this._options.verbose=true;if(this._options.enforceEmptyTags==undefined)this._options.enforceEmptyTags=true;if(typeof a=="function")this._callback=a}if(!(typeof require== "function"&&typeof exports=="object"&&typeof module=="object"&&typeof __filename=="string"&&typeof __dirname=="string")){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"};e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment=/(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/; e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()};e.prototype.done=function(){if(!this._done){this._done=true;if(this._buffer.length){var a=this._buffer;this._buffer= "";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=false;this._elements=[];this._next=this._current=this._elementsCurrent=0;this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()}; e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=false;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs=function(a){for(var c=a.length,b=0;b"){this._tagStack.pop();if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){i=this._elements[this._elements.length-1];i.raw=i.data=(i.raw+b.raw).replace(e._reTrimComment, "");b.raw=b.data="";b.type=d.Text}else b.type=d.Comment}else{b.type=d.Comment;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){i=this._elements[this._elements.length-1];i.raw=i.data=i.raw+b.raw+c;b.raw=b.data="";b.type=d.Text}else b.raw=b.data=b.raw+c}}if(b.type==d.Tag){b.name=h;if(b.raw.indexOf("!--")==0){b.type=d.Comment;delete b.name;j=b.raw.length;if(b.raw.charAt(j-1)=="-"&&b.raw.charAt(j-2)=="-"&&c==">")b.raw=b.data=b.raw.replace(e._reTrimComment,"");else{b.raw+= c;this._tagStack.push(d.Comment)}}else if(b.raw.indexOf("!")==0||b.raw.indexOf("?")==0)b.type=d.Directive;else if(b.name=="script"){b.type=d.Script;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Script)}else if(b.name=="/script")b.type=d.Script;else if(b.name=="style"){b.type=d.Style;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Style)}else if(b.name=="/style")b.type=d.Style;if(b.name&&b.name.charAt(0)=="/")b.data=b.name}if(b.raw!=""||b.type!=d.Text){this.parseAttribs(b); this._elements.push(b);b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&b.data.charAt(b.data.length-1)=="/"&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type})}this._parseState=c=="<"?d.Tag:d.Text;this._current=this._next+1;this._prevTagSep=c}this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.validateHandler=function(a){if(typeof a!="object")throw Error("Handler is not an object");if(typeof a.reset!= "function")throw Error("Handler method 'reset' is invalid");if(typeof a.done!="function")throw Error("Handler method 'done' is invalid");if(typeof a.writeTag!="function")throw Error("Handler method 'writeTag' is invalid");if(typeof a.writeText!="function")throw Error("Handler method 'writeText' is invalid");if(typeof a.writeComment!="function")throw Error("Handler method 'writeComment' is invalid");if(typeof a.writeDirective!="function")throw Error("Handler method 'writeDirective' is invalid");}; e.prototype.writeHandler=function(a){a=!!a;if(!(this._tagStack.length&&!a))for(;this._elements.length;){a=this._elements.shift();switch(a.type){case d.Comment:this._handler.writeComment(a);break;case d.Directive:this._handler.writeDirective(a);break;case d.Text:this._handler.writeText(a);break;default:this._handler.writeTag(a)}}};e.prototype.handleError=function(a){if(typeof this._handler.error=="function")this._handler.error(a);else throw a;};(function(a,c){var b=function(){};b.prototype=c.prototype; a.super_=c;a.prototype=new b;a.prototype.constructor=a})(n,g);n.prototype.done=function(){var a={},c,b=f.getElementsByTagName(function(k){return k=="rss"||k=="feed"},this.dom,false);if(b.length)c=b[0];if(c){if(c.name=="rss"){a.type="rss";c=c.children[0];a.id="";try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(h){}try{a.link=f.getElementsByTagName("link",c.children,false)[0].children[0].data}catch(i){}try{a.description=f.getElementsByTagName("description",c.children, false)[0].children[0].data}catch(j){}try{a.updated=new Date(f.getElementsByTagName("lastBuildDate",c.children,false)[0].children[0].data)}catch(m){}try{a.author=f.getElementsByTagName("managingEditor",c.children,false)[0].children[0].data}catch(o){}a.items=[];f.getElementsByTagName("item",c.children).forEach(function(k){var l={};try{l.id=f.getElementsByTagName("guid",k.children,false)[0].children[0].data}catch(q){}try{l.title=f.getElementsByTagName("title",k.children,false)[0].children[0].data}catch(r){}try{l.link= f.getElementsByTagName("link",k.children,false)[0].children[0].data}catch(s){}try{l.description=f.getElementsByTagName("description",k.children,false)[0].children[0].data}catch(t){}try{l.pubDate=new Date(f.getElementsByTagName("pubDate",k.children,false)[0].children[0].data)}catch(u){}a.items.push(l)})}else{a.type="atom";try{a.id=f.getElementsByTagName("id",c.children,false)[0].children[0].data}catch(p){}try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(v){}try{a.link= f.getElementsByTagName("link",c.children,false)[0].attribs.href}catch(w){}try{a.description=f.getElementsByTagName("subtitle",c.children,false)[0].children[0].data}catch(x){}try{a.updated=new Date(f.getElementsByTagName("updated",c.children,false)[0].children[0].data)}catch(y){}try{a.author=f.getElementsByTagName("email",c.children,true)[0].children[0].data}catch(z){}a.items=[];f.getElementsByTagName("entry",c.children).forEach(function(k){var l={};try{l.id=f.getElementsByTagName("id",k.children, false)[0].children[0].data}catch(q){}try{l.title=f.getElementsByTagName("title",k.children,false)[0].children[0].data}catch(r){}try{l.link=f.getElementsByTagName("link",k.children,false)[0].attribs.href}catch(s){}try{l.description=f.getElementsByTagName("summary",k.children,false)[0].children[0].data}catch(t){}try{l.pubDate=new Date(f.getElementsByTagName("updated",k.children,false)[0].children[0].data)}catch(u){}a.items.push(l)})}this.dom=a}n.super_.prototype.done.call(this)};g._emptyTags={area:1, base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1};g.reWhitespace=/^\s*$/;g.prototype.dom=null;g.prototype.reset=function(){this.dom=[];this._done=false;this._tagStack=[];this._tagStack.last=function(){return this.length?this[this.length-1]:null}};g.prototype.done=function(){this._done=true;this.handleCallback(null)};g.prototype.writeTag=function(a){this.handleElement(a)};g.prototype.writeText=function(a){if(this._options.ignoreWhitespace)if(g.reWhitespace.test(a.data))return; this.handleElement(a)};g.prototype.writeComment=function(a){this.handleElement(a)};g.prototype.writeDirective=function(a){this.handleElement(a)};g.prototype.error=function(a){this.handleCallback(a)};g.prototype._options=null;g.prototype._callback=null;g.prototype._done=false;g.prototype._tagStack=null;g.prototype.handleCallback=function(a){if(typeof this._callback!="function")if(a)throw a;else return;this._callback(a,this.dom)};g.prototype.handleElement=function(a){this._done&&this.handleCallback(Error("Writing to the handler after done() called is not allowed without a reset()")); if(!this._options.verbose){delete a.raw;if(a.type=="tag"||a.type=="script"||a.type=="style")delete a.data}if(this._tagStack.last())if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive)if(a.name.charAt(0)=="/"){a=a.name.substring(1);if(!this._options.enforceEmptyTags||!g._emptyTags[a]){for(var c=this._tagStack.length-1;c>-1&&this._tagStack[c--].name!=a;);if(c>-1||this._tagStack[0].name==a)for(;c=0&&j.length>=h)return j;if(b&&c.children)c=c.children;else if(c instanceof Array)c=c;else return j;for(m=0;m=0&&j.length>=h)break}return j},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,h){return f.getElements({tag_name:a},c,b,h)},getElementsByTagType:function(a, c,b,h){return f.getElements({tag_type:a},c,b,h)}};exports.Parser=e;exports.DefaultHandler=g;exports.RssHandler=n;exports.ElementType=d;exports.DomUtils=f})(); \ No newline at end of file +/* v1.6.4 */ +(function(){function e(a){this.validateHandler(a);this._handler=a;this.reset()}function n(a){n.super_.call(this,a,{ignoreWhitespace:true,verbose:false,enforceEmptyTags:false})}function h(a,c){this.reset();this._options=c?c:{};if(this._options.ignoreWhitespace==undefined)this._options.ignoreWhitespace=false;if(this._options.verbose==undefined)this._options.verbose=true;if(this._options.enforceEmptyTags==undefined)this._options.enforceEmptyTags=true;if(typeof a=="function")this._callback=a}if(!(typeof require== "function"&&typeof exports=="object"&&typeof module=="object"&&typeof __filename=="string"&&typeof __dirname=="string")){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"};e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment=/(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/; e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()};e.prototype.done=function(){if(!this._done){this._done=true;if(this._buffer.length){var a=this._buffer;this._buffer= "";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=false;this._elements=[];this._next=this._current=this._elementsCurrent=0;this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()}; e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=false;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs=function(a){for(var c=a.length,b=0;b"){this._tagStack.pop();if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=(g.raw+b.raw).replace(e._reTrimComment, "");b.raw=b.data="";b.type=d.Text}else b.type=d.Comment}else{b.type=d.Comment;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=g.raw+b.raw+c;b.raw=b.data="";b.type=d.Text}else b.raw=b.data=b.raw+c}}if(b.type==d.Tag){b.name=i;if(b.raw.indexOf("!--")==0){b.type=d.Comment;delete b.name;g=b.raw.length;if(b.raw.charAt(g-1)=="-"&&b.raw.charAt(g-2)=="-"&&c==">")b.raw=b.data=b.raw.replace(e._reTrimComment,"");else{b.raw+= c;this._tagStack.push(d.Comment)}}else if(b.raw.indexOf("!")==0||b.raw.indexOf("?")==0)b.type=d.Directive;else if(b.name=="script"){b.type=d.Script;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Script)}else if(b.name=="/script")b.type=d.Script;else if(b.name=="style"){b.type=d.Style;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Style)}else if(b.name=="/style")b.type=d.Style;if(b.name&&b.name.charAt(0)=="/")b.data=b.name}if(b.raw!=""||b.type!=d.Text){this.parseAttribs(b); this._elements.push(b);b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&b.data.charAt(b.data.length-1)=="/"&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type})}this._parseState=c=="<"?d.Tag:d.Text;this._current=this._next+1;this._prevTagSep=c}this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.validateHandler=function(a){if(typeof a!="object")throw Error("Handler is not an object");if(typeof a.reset!= "function")throw Error("Handler method 'reset' is invalid");if(typeof a.done!="function")throw Error("Handler method 'done' is invalid");if(typeof a.writeTag!="function")throw Error("Handler method 'writeTag' is invalid");if(typeof a.writeText!="function")throw Error("Handler method 'writeText' is invalid");if(typeof a.writeComment!="function")throw Error("Handler method 'writeComment' is invalid");if(typeof a.writeDirective!="function")throw Error("Handler method 'writeDirective' is invalid");}; e.prototype.writeHandler=function(a){a=!!a;if(!(this._tagStack.length&&!a))for(;this._elements.length;){a=this._elements.shift();switch(a.type){case d.Comment:this._handler.writeComment(a);break;case d.Directive:this._handler.writeDirective(a);break;case d.Text:this._handler.writeText(a);break;default:this._handler.writeTag(a)}}};e.prototype.handleError=function(a){if(typeof this._handler.error=="function")this._handler.error(a);else throw a;};(function(a,c){var b=function(){};b.prototype=c.prototype; a.super_=c;a.prototype=new b;a.prototype.constructor=a})(n,h);n.prototype.done=function(){var a={},c,b=f.getElementsByTagName(function(j){return j=="rss"||j=="feed"},this.dom,false);if(b.length)c=b[0];if(c){if(c.name=="rss"){a.type="rss";c=c.children[0];a.id="";try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(i){}try{a.link=f.getElementsByTagName("link",c.children,false)[0].children[0].data}catch(g){}try{a.description=f.getElementsByTagName("description",c.children, false)[0].children[0].data}catch(l){}try{a.updated=new Date(f.getElementsByTagName("lastBuildDate",c.children,false)[0].children[0].data)}catch(m){}try{a.author=f.getElementsByTagName("managingEditor",c.children,false)[0].children[0].data}catch(o){}a.items=[];f.getElementsByTagName("item",c.children).forEach(function(j){var k={};try{k.id=f.getElementsByTagName("guid",j.children,false)[0].children[0].data}catch(q){}try{k.title=f.getElementsByTagName("title",j.children,false)[0].children[0].data}catch(r){}try{k.link= f.getElementsByTagName("link",j.children,false)[0].children[0].data}catch(s){}try{k.description=f.getElementsByTagName("description",j.children,false)[0].children[0].data}catch(t){}try{k.pubDate=new Date(f.getElementsByTagName("pubDate",j.children,false)[0].children[0].data)}catch(u){}a.items.push(k)})}else{a.type="atom";try{a.id=f.getElementsByTagName("id",c.children,false)[0].children[0].data}catch(p){}try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(v){}try{a.link= f.getElementsByTagName("link",c.children,false)[0].attribs.href}catch(w){}try{a.description=f.getElementsByTagName("subtitle",c.children,false)[0].children[0].data}catch(x){}try{a.updated=new Date(f.getElementsByTagName("updated",c.children,false)[0].children[0].data)}catch(y){}try{a.author=f.getElementsByTagName("email",c.children,true)[0].children[0].data}catch(z){}a.items=[];f.getElementsByTagName("entry",c.children).forEach(function(j){var k={};try{k.id=f.getElementsByTagName("id",j.children, false)[0].children[0].data}catch(q){}try{k.title=f.getElementsByTagName("title",j.children,false)[0].children[0].data}catch(r){}try{k.link=f.getElementsByTagName("link",j.children,false)[0].attribs.href}catch(s){}try{k.description=f.getElementsByTagName("summary",j.children,false)[0].children[0].data}catch(t){}try{k.pubDate=new Date(f.getElementsByTagName("updated",j.children,false)[0].children[0].data)}catch(u){}a.items.push(k)})}this.dom=a}n.super_.prototype.done.call(this)};h._emptyTags={area:1, base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1};h.reWhitespace=/^\s*$/;h.prototype.dom=null;h.prototype.reset=function(){this.dom=[];this._done=false;this._tagStack=[];this._tagStack.last=function(){return this.length?this[this.length-1]:null}};h.prototype.done=function(){this._done=true;this.handleCallback(null)};h.prototype.writeTag=function(a){this.handleElement(a)};h.prototype.writeText=function(a){if(this._options.ignoreWhitespace)if(h.reWhitespace.test(a.data))return; this.handleElement(a)};h.prototype.writeComment=function(a){this.handleElement(a)};h.prototype.writeDirective=function(a){this.handleElement(a)};h.prototype.error=function(a){this.handleCallback(a)};h.prototype._options=null;h.prototype._callback=null;h.prototype._done=false;h.prototype._tagStack=null;h.prototype.handleCallback=function(a){if(typeof this._callback!="function")if(a)throw a;else return;this._callback(a,this.dom)};h.prototype.handleElement=function(a){this._done&&this.handleCallback(Error("Writing to the handler after done() called is not allowed without a reset()")); if(!this._options.verbose){delete a.raw;if(a.type=="tag"||a.type=="script"||a.type=="style")delete a.data}if(this._tagStack.last())if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive)if(a.name.charAt(0)=="/"){a=a.name.substring(1);if(!this._options.enforceEmptyTags||!h._emptyTags[a]){for(var c=this._tagStack.length-1;c>-1&&this._tagStack[c--].name!=a;);if(c>-1||this._tagStack[0].name==a)for(;c=0&&l.length>=i)return l;if(b&&c.children)c=c.children;else if(c instanceof Array)c=c;else return l;for(m=0;m=0&&l.length>=i)break}return l},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,i){return f.getElements({tag_name:a},c,b,i)},getElementsByTagType:function(a, c,b,i){return f.getElements({tag_type:a},c,b,i)}};exports.Parser=e;exports.DefaultHandler=h;exports.RssHandler=n;exports.ElementType=d;exports.DomUtils=f})(); \ No newline at end of file