diff --git a/bin/cli.js b/bin/cli.js index e7dd1ce..e0f6681 100644 --- a/bin/cli.js +++ b/bin/cli.js @@ -30,7 +30,7 @@ if (!spiderStartUrl || !testDir) { optimist.showHelp(); process.exit(-1); } else { - spiderTest.runTests(spiderStartUrl, testDir, null, null, reporters); + spiderTest.runTests(spiderStartUrl, testDir, null, null, reporters, config); } var reporters = initialiseReporters(config.get("reporters")); diff --git a/bin/options.js b/bin/options.js index 19047de..8b15be1 100644 --- a/bin/options.js +++ b/bin/options.js @@ -3,7 +3,7 @@ exports.options = { "describe": "Path to a json configuration file defining custom options. All command line options - except this one and --help - will be used if present. Options specified directly on the command line override file loaded options." }, "failOnMissingRoute": { - "default": false, + "default": "false", "describe": "Set to true if spidering should stop when a route to an encountered link is not available" }, "help": { @@ -11,17 +11,20 @@ exports.options = { "describe": "This message." }, "reporters": { - "alias": "reporters", "default": "../lib/reporters/ConsoleReporter", "describe": "Comma separated list of paths to reporter.js Reporter implementations for reporting test results." }, "reportOptions": { "describe": "String of options passed into the createReporter() Reporter function. It is up to the reporter to determine what to do with it." }, + "spiderCrossDomain": { + "default": "false", + "describe": "Allow spidering to continue across different domains" + }, "spiderStartUrl": { "describe": "The full http url from which to start spidering." }, "testDir" : { - "describe": "Path to folder containing javascript test definitions " + "describe": "Absolute path to folder containing javascript test definitions " } }; \ No newline at end of file diff --git a/examples/tests/htmlTests.js b/examples/tests/htmlTests.js index 1c6fa1e..8296818 100644 --- a/examples/tests/htmlTests.js +++ b/examples/tests/htmlTests.js @@ -2,13 +2,13 @@ var should = require("should"); exports.tests = { "Common HTML Tests" : { - urlPattern: "\.html$", + urlPattern: "/", tests: { "HTML responses should have a statusCode of 200": function(spiderPayload) { should.equal(spiderPayload.response.statusCode, 200) }, - "HTML responses should have a content type of text/html; charset=UTF-8": function (spiderPayload) { - should.equal("text/html; charset=UTF-8", spiderPayload.response.headers['content-type']); + "HTML responses should have a content type of text/html": function (spiderPayload) { + spiderPayload.response.headers['content-type'].should.include("text/html"); } } } diff --git a/lib/spider.js b/lib/spider.js index 4e6c91b..8dd3f2f 100644 --- a/lib/spider.js +++ b/lib/spider.js @@ -77,6 +77,7 @@ function Spider(options) { this.userAgent = options.userAgent || firefoxUA; this.cache = options.cache || new NoCache(); this.pool = options.pool || {maxSockets: options.maxSockets}; + this.spiderCrossDomain = options.spiderCrossDomain || false; this.options = options; this.currentUrl = null; this.routers = {}; @@ -100,6 +101,9 @@ Spider.prototype.get = function (url, referer) { this.urls.push(url); var u = urlParse(url); + if(!this.domain) { + this.domain = u.hostname; + } if (!this.routers[u.host]) { if (this.throwOnMissingRoute) { errors.NO_ROUTES_FOR_HOST.thro("No routes for host '" + u.host + "'"); @@ -117,6 +121,9 @@ Spider.prototype.get = function (url, referer) { return this; } } + if(u.hostname !== this.domain && !options.spiderCrossDomain) { + return this; + } if (referer) { h.referer = referer; @@ -135,6 +142,7 @@ Spider.prototype.get = function (url, referer) { request.get({url:url, headers:h, pool:self.pool}, function (e, resp) { self.emit('log', debug, 'Response received for ' + url + '.'); + if (e || !resp) { console.log(e); //todo make sure this exception is visible when it is needed self.emit('log', debug, 'Error getting URL ' + url); @@ -162,37 +170,46 @@ Spider.prototype.route = function (hosts, pattern, cb) { if (!self.routers[host]) { self.routers[host] = new routes.Router(); } - self.routers[host].addRoute(pattern, cb); + self.routers[host].addRoute(new RegExp(pattern), cb); }); return self; }; Spider.prototype._handler = function (url, referer, response) { - var u = urlParse(url); - var $ = cheerio.load(response.body); - if (this.routers[u.host]) { - var r = this.routers[u.host].match(u.href.slice(u.href.indexOf(u.host) + u.host.length)); - r.spider = this; - r.response = response; - r.url = u; - - this.currentUrl = url; - r.fn.call(r, $, url); - this.currentUrl = null; - } - if (this.options.autoSpider) { - var auto = this.options.autoSpider; - if (auto & AUTO.ANCHORS) { - spiderUrls("a", this, response, $); + try { + var u = urlParse(url); + var $ = cheerio.load(response.body); + if (this.routers[u.host]) { + var r = this.routers[u.host].match(u.href.slice(u.href.indexOf(u.host) + u.host.length)); + r.spider = this; + r.response = response; + r.url = u; + + this.currentUrl = url; + r.fn.call(r, $, url); + this.currentUrl = null; } - if (auto & AUTO.IMAGES) { - spiderUrls("img", this, response, $, "src"); - } - if (auto & AUTO.LINKS) { - spiderUrls("link", this, response, $); + if (this.options.autoSpider) { + var auto = this.options.autoSpider; + if (auto & AUTO.ANCHORS) { + spiderUrls("a", this, response, $); + } + if (auto & AUTO.IMAGES) { + spiderUrls("img", this, response, $, "src"); + } + if (auto & AUTO.LINKS) { + spiderUrls("link", this, response, $); + } + if (auto & AUTO.SCRIPTS) { + spiderUrls("script", this, response, $, "src"); + } } - if (auto & AUTO.SCRIPTS) { - spiderUrls("script", this, response, $, "src"); + } catch(e) { + if (e.message === "Maximum call stack size exceeded") { + this.emit('log', info, e.message); + } else { + this.emit('log', error, e); + throw e; } } }; diff --git a/lib/spiderTest.js b/lib/spiderTest.js index e764193..81b876f 100644 --- a/lib/spiderTest.js +++ b/lib/spiderTest.js @@ -19,8 +19,12 @@ var timeout; * relative to this dir. * @param {Reporter | Reporter []} reporter one or more instantiated reporters to use for generating the test report. If not specified then * the console reporter is used. This argument can be a single Reporter or an array of Reporters. + * @config {Config} config a Config object from nconf describing the options for running the tests */ -exports.runTests = function (startUrl, testsDir, callback, baseDir, reporter, failOnMissingRoute) { +exports.runTests = function (startUrl, testsDir, callback, baseDir, reporter, config) { + + var failOnMissingRoute = config && config.get("failOnMissingRoute") !== "false"; + var spiderCrossDomain = config && config.get("spiderCrossDomain") !== "false"; var origDir; @@ -48,6 +52,7 @@ exports.runTests = function (startUrl, testsDir, callback, baseDir, reporter, fa var spider = spiderModule({ throwOnMissingRoute: failOnMissingRoute, + spiderCrossDomain: spiderCrossDomain, autoSpider: spiderOptions.AUTO.ANCHORS | spiderOptions.AUTO.LINKS | spiderOptions.AUTO.IMAGES | spiderOptions.AUTO.SCRIPTS }); @@ -63,8 +68,6 @@ exports.runTests = function (startUrl, testsDir, callback, baseDir, reporter, fa }) .get(startUrl.href) .log("error"); - - }; /**