Permalink
Browse files

detect non-html reponses

  • Loading branch information...
sylvinus committed Feb 26, 2013
1 parent 3e90506 commit 2d1a0706107e499c4a3fae289d8a6d5ebc1765e9
Showing with 25 additions and 1 deletion.
  1. +4 −1 lib/crawler.js
  2. +21 −0 test/units/simple.js
View
@@ -236,7 +236,10 @@ exports.Crawler = function(options) {
response.options = toQueue;
- if (toQueue.jQuery && toQueue.method!="HEAD") {
+ // This could definitely be improved by *also* matching content-type headers
+ var isHTML = response.body.match(/^\s*</);
+
+ if (isHTML && toQueue.jQuery && toQueue.method!="HEAD") {
// TODO support for non-HTML content
// https://github.com/joshfire/node-crawler/issues/9
View
@@ -108,6 +108,27 @@ test("one request + user agent", function() {
});
+test("Auto-disabling of jQuery if no html tag first", function() {
+ expect( 2 );
+
+ stop();
+
+ var c = new Crawler({
+ "debug":DEBUG,
+ "userAgent":"test/1.2",
+ "forceUTF8":true,
+ "callback":function(error,result,$) {
+ equal(error,null);
+ ok(result.body=="Your user agent: test/1.2");
+ start();
+ }
+ });
+
+ c.queue(["http://127.0.0.1:"+MOCKPORT+"/echo_useragent"]);
+
+});
+
+
test("from the readme",function() {
expect( 2 );

0 comments on commit 2d1a070

Please sign in to comment.