Skip to content
Browse files

Work started on test suite

  • Loading branch information...
1 parent a0fe8d7 commit a57aa9ee39b1d524d66e77f14f48b8bcda7f9f13 Harry Fuecks committed Oct 1, 2010
Showing with 108 additions and 28 deletions.
  1. +13 −6 lib/soupselect.js
  2. +31 −0 testdata/test.html
  3. +64 −22 tests/soupselect.js
View
19 lib/soupselect.js
@@ -5,7 +5,7 @@ http://www.opensource.org/licenses/mit-license.php
Right now just a working prototype...
*/
-var htmlparser = require("htmlparser");
+var domUtils = require("htmlparser").DomUtils;
var sys = require('sys');
var tagRe = /^[a-z0-9]+$/
@@ -72,7 +72,7 @@ exports.select = function(dom, selector) {
var found = [];
currentContext.forEach(function(context) {
- found = found.concat(htmlparser.DomUtils.getElements(options, context));
+ found = found.concat(domUtils.getElements(options, context));
});
currentContext = found
@@ -81,7 +81,7 @@ exports.select = function(dom, selector) {
// ID selector
else if ( token.indexOf('#') != -1 ) {
- var found = htmlparser.DomUtils.getElementById(token.split('#', 2)[1], currentContext[0]);
+ var found = domUtils.getElementById(token.split('#', 2)[1], currentContext[0]);
if (found[0] === null) {
currentContext = [];
throw StopIteration;
@@ -112,17 +112,24 @@ exports.select = function(dom, selector) {
else if ( token == '*' ) {
// nothing to do right?
}
- // Tag selector
+
+ // Tag selector
else {
if (!tagRe.test(token)) {
currentContext = [];
throw StopIteration;
}
-
+
var found = []
currentContext.forEach(function(context) {
- found = found.concat(htmlparser.DomUtils.getElementsByTagName(token, context));
+ if ( typeof context.children != 'undefined' ) {
+ found = found.concat(domUtils.getElementsByTagName(token, context.children));
+ } else {
+ found = found.concat(domUtils.getElementsByTagName(token, context));
+ }
+
});
+
currentContext = found;
}
});
View
31 testdata/test.html
@@ -0,0 +1,31 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+ "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>The title</title>
+ <link rel="stylesheet" href="blah.css" type="text/css" id="l1">
+</head>
+<body>
+
+<div id="main">
+ <div id="inner">
+ <h1 id="header1">An H1</h1>
+ <p>Some text</p>
+ <p class="onep" id="p1">Some more text</p>
+ <h2 id="header2">An H2</h2>
+ <p class="class1 class2 class3" id="pmulti">Another</p>
+ <a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
+ <h2 id="header3">Another H2</h2>
+ <a id="me" href="http://simonwillison.net/" rel="me">me</a>
+ </div>
+ <p lang="en" id="lang-en">English</p>
+ <p lang="en-gb" id="lang-en-gb">English UK</p>
+ <p lang="en-us" id="lang-en-us">English US</p>
+ <p lang="fr" id="lang-fr">French</p>
+</div>
+
+<div id="footer">
+</div>
+
+</body>
+</html>
View
86 tests/soupselect.js
@@ -1,26 +1,68 @@
-var soupselect = require('soupselect'),
- nodeunit = require('nodeunit'),
- htmlparser = require("htmlparser")
+var select = require('soupselect').select,
+ htmlparser = require("htmlparser"),
+ fs = require('fs'),
sys = require('sys');
-
-var select = soupselect.select;
-var html = "<a class='y'>text a</a><b id='x'>text b</b><c class='y'>text c</c><d id='z' class='w'><e>text e</e></d><g>bogus</g><g class='g h i'>hhh</g><g class='h'>foo</g>";
+var html = fs.readFileSync('testdata/test.html', 'utf-8');
-var handler = new htmlparser.DefaultHandler(function(err, dom) {
- if (err) {
- sys.debug("Error: " + err);
- } else {
- sys.puts("a : " + sys.inspect(select(dom, 'a')));
- sys.puts("g : " + sys.inspect(select(dom, 'g')));
- sys.puts("g.h : " + sys.inspect(select(dom, 'g.h')));
- sys.puts("g[class=h] : " + sys.inspect(select(dom, 'g[class=h]'))); // ISSUES
- sys.puts("g[class~=h] : " + sys.inspect(select(dom, 'g[class~=h]'))); // ISSUES
- sys.puts("#x : " + sys.inspect(select(dom, '#x')));
- sys.puts("* : " + sys.inspect(select(dom, '*')));
- sys.puts("g* : " + sys.inspect(select(dom, 'g *')));
- }
-});
+function runTest(test, callback) {
+ var handler = new htmlparser.DefaultHandler(function(err, dom) {
+ if (err) {
+ sys.debug("Error: " + err);
+ } else {
+ callback(dom);
+ }
+ });
+ var parser = new htmlparser.Parser(handler);
+ parser.parseComplete(html);
+ test.done();
+}
-var parser = new htmlparser.Parser(handler);
-parser.parseComplete(html);
+function assertSelects(test, dom, selector, expected_ids) {
+ var el_ids = [];
+ select(dom, selector).forEach(function(el) {
+ el_ids.push(el.attribs.id);
+ });
+ el_ids.sort();
+ expected_ids.sort();
+ test.deepEqual(
+ expected_ids,
+ el_ids,
+ "Selector " + selector + ", expected " + sys.inspect(expected_ids)+ ", got " + sys.inspect(el_ids)
+ );
+}
+
+exports.basicSelectors = {
+ one_tag_one: function(test) {
+ runTest(test, function(dom) {
+ var els = select(dom, 'title');
+ test.equal(els.length, 1);
+ test.equal(els[0].name, 'title');
+ test.equal(els[0].children[0].raw, 'The title');
+ });
+ },
+
+ one_tag_many: function(test) {
+ runTest(test, function(dom) {
+ var els = select(dom, 'div');
+ test.equal(els.length, 3);
+ els.forEach(function(div) {
+ test.equal(div.name, 'div');
+ });
+ });
+ },
+
+ tag_in_tag_one: function(test) {
+ runTest(test, function(dom) {
+ assertSelects(test, dom, 'div div', ['inner']);
+ });
+ },
+
+ tag_in_tag_many: function(test) {
+ ['html div', 'html body div', 'body div'].forEach(function(selector) {
+ runTest(test, function(dom) {
+ assertSelects(test, dom, selector, ['main', 'inner', 'footer']);
+ });
+ });
+ }
+}

0 comments on commit a57aa9e

Please sign in to comment.
Something went wrong with that request. Please try again.