Skip to content

Commit

Permalink
Added DomUtils
Browse files Browse the repository at this point in the history
  • Loading branch information
tautologistics committed May 4, 2010
1 parent 000de68 commit 514ad43
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 4 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,7 @@ becomes:
, children: [ { data: 'xxx', type: 'text' } ]
}
]

##DomUtils

###TBD (see utils_example.js for now)
78 changes: 78 additions & 0 deletions node-htmlparser.js
Original file line number Diff line number Diff line change
Expand Up @@ -547,10 +547,88 @@ function DefaultHandler (callback, options) {
}
}

var DomUtils = {
testElement: function DomUtils$testElement (options, element) {
if (!element) {
return(false);
}

for (var key in options) {
if (key == "tag_name") {
if (element.type != "tag" && element.type != "script" && element.type != "style") {
return(false);
}
if (options["tag_name"] != element.name) {
return(false);
}
} else if (key == "tag_type") {
if (element.type != options["tag_type"]) {
return(false);
}
} else if (key == "tag_contains") {
if (element.type != "text" && element.type != "comment" && element.type != "directive") {
return(false);
}
if (!element.data) {
return(false);
}
if (element.data.indexOf(options["tag_contains"]) < 0) {
return(false);
}
} else {
if (!element.attribs || options[key] != element.attribs[key]) {
return(false);
}
}
}

return(true);
}

, getElements: function DomUtils$getElements (options, currentElement) {
if (!currentElement)
return([]);

var found = [];
var elementList;

if (DomUtils.testElement(options, currentElement)) {
found.push(currentElement);
}

if (currentElement.children)
elementList = currentElement.children;
else if (currentElement instanceof Array)
elementList = currentElement;
else
return(found);

for (var i = 0; i < elementList.length; i++)
found = found.concat(DomUtils.getElements(options, elementList[i]));

return(found);
}

, getElementById: function DomUtils$getElementById (id, currentElement) {
var result = DomUtils.getElements({ id: id }, currentElement);
return(result.length ? result[0] : null);
}

, getElementsByTagName: function DomUtils$getElementsByTagName (name, currentElement) {
return(DomUtils.getElements({ tag_name: name }, currentElement));
}

, getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement) {
return(DomUtils.getElements({ tag_type: type }, currentElement));
}
}

exports.Parser = Parser;

exports.DefaultHandler = DefaultHandler;

exports.ElementType = ElementType;

exports.DomUtils = DomUtils;

})();
8 changes: 4 additions & 4 deletions runtests.html
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@
}, test.options);
var parser = new Tautologistics.NodeHtmlParser.Parser(handler);
document.write("<b>" + test.name + "</b>: ");
parser.ParseComplete(test.html);
parser.parseComplete(test.html);
var resultComplete = handler.dom;
var chunkPos = 0;
parser.Reset();
parser.reset();
while (chunkPos < test.html.length) {
parser.ParseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
chunkPos += chunkSize;
}
parser.Done();
parser.done();
var resultChunk = handler.dom;
var testResult =
JSON.stringify(resultComplete).toString() === JSON.stringify(test.expected).toString()
Expand Down
29 changes: 29 additions & 0 deletions utils_example.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//node --prof --prof_auto profile.js
//deps/v8/tools/mac-tick-processor v8.log
var sys = require("sys");
var htmlparser = require("./node-htmlparser");

var html = "<a>text a</a><b id='x'>text b</b><c class='y'>text c</c><d id='z' class='w'><e>text e</e></d>";

var handler = new htmlparser.DefaultHandler(function(err, dom) {
if (err) {
sys.debug("Error: " + err);
}
else {
sys.debug(sys.inspect(dom, false, null));
var id = htmlparser.DomUtils.getElementById("x", dom);
sys.debug("id: " + sys.inspect(id, false, null));
var class = htmlparser.DomUtils.getElements({ class: "y" }, dom);
sys.debug("class: " + sys.inspect(class, false, null));
var name = htmlparser.DomUtils.getElementsByTagName("a", dom);
sys.debug("name: " + sys.inspect(name, false, null));
var text = htmlparser.DomUtils.getElementsByTagType("text", dom);
sys.debug("text: " + sys.inspect(text, false, null));
var nested = htmlparser.DomUtils.getElements({ tag_name: "d", id: "z", class: "w" }, dom);
nested = htmlparser.DomUtils.getElementsByTagName("e", nested);
nested = htmlparser.DomUtils.getElementsByTagType("text", nested);
sys.debug("nested: " + sys.inspect(nested, false, null));
}
}, { verbose: false });
var parser = new htmlparser.Parser(handler);
parser.parseComplete(html);

0 comments on commit 514ad43

Please sign in to comment.