Skip to content
This repository has been archived by the owner on Mar 7, 2021. It is now read-only.

Commit

Permalink
Change: return synchronously from queue.
Browse files Browse the repository at this point in the history
When gathering statistics about the queue, many of the calculations are
synchronous. In these cases it makes sense to return the values
synchronously as well.

Add: slow thresholds as needed by tests.

Change: increase test cookie expiration date year so tests pass.
  • Loading branch information
nisaacson committed Oct 22, 2013
1 parent 39dc343 commit bf7d613
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 20 deletions.
50 changes: 31 additions & 19 deletions lib/queue.js
Expand Up @@ -75,39 +75,46 @@ FetchQueue.prototype.exists = function(protocol,domain,port,path,callback) {

if (!!this.scanIndex[url]) {
callback(null,1);
return 1;
} else {
this.scanIndex[url] = true;
callback(null,0);
return 1;
}
};

// Get last item in queue...
FetchQueue.prototype.last = function(callback) {
callback = callback && callback instanceof Function ? callback : function(){};
var self = this;

callback(null,self[self.length-1]);
var item, self = this;
item = self[self.length-1];
callback(null, item);
return item;
};

// Get item from queue
FetchQueue.prototype.get = function(id, callback) {
callback = callback && callback instanceof Function ? callback : function(){};
var self = this;
var item, self = this;

if (!isNaN(id) && self.length > id) {
return callback(null,self[id]);
item = self[id];
callback(null, item);
return item;
}
};

// Get first unfetched item in the queue (and return its index)
FetchQueue.prototype.oldestUnfetchedItem = function(callback) {
callback = callback && callback instanceof Function ? callback : function(){};
var self = this;
var item, self = this;

for (var itemIndex = self.oldestUnfetchedIndex; itemIndex < self.length; itemIndex ++) {
if (self[itemIndex].status === "queued") {
self.oldestUnfetchedIndex = itemIndex;
return callback(null,self[itemIndex]);
item = self[itemIndex];
callback(null, item);
return item;
}
}

Expand All @@ -131,12 +138,13 @@ FetchQueue.prototype.max = function(statisticName,callback) {
});

callback(null,maxStatisticValue);
return maxStatisticValue;
};

// Gets the minimum total request time, request latency, or download time
FetchQueue.prototype.min = function(statisticName,callback) {
callback = callback && callback instanceof Function ? callback : function(){};
var minStatisticValue = Infinity, self = this;
var minimum, minStatisticValue = Infinity, self = this;

if (allowedStatistics.join().indexOf(statisticName) === -1) {
// Not a recognised statistic!
Expand All @@ -148,14 +156,15 @@ FetchQueue.prototype.min = function(statisticName,callback) {
minStatisticValue = item.stateData[statisticName];
}
});

callback(null,minStatisticValue === Infinity? 0 : minStatisticValue);
minimum = minStatisticValue === Infinity? 0 : minStatisticValue;
callback(null, minimum);
return minimum;
};

// Gets the minimum total request time, request latency, or download time
FetchQueue.prototype.avg = function(statisticName,callback) {
callback = callback && callback instanceof Function ? callback : function(){};
var NumberSum = 0, NumberCount = 0, self = this;
var average, NumberSum = 0, NumberCount = 0, self = this;

if (allowedStatistics.join().indexOf(statisticName) === -1) {
// Not a recognised statistic!
Expand All @@ -168,8 +177,9 @@ FetchQueue.prototype.avg = function(statisticName,callback) {
NumberCount ++;
}
});

callback(null,NumberSum / NumberCount);
average = NumberSum / NumberCount;
callback(null, average);
return average;
};

// Gets the number of requests which have been completed.
Expand Down Expand Up @@ -199,6 +209,7 @@ FetchQueue.prototype.countWithStatus = function(status,callback) {
});

callback(null,queueItemsMatched);
return queueItemsMatched;
};

// Gets the number of queue items with the given status
Expand All @@ -214,18 +225,19 @@ FetchQueue.prototype.getWithStatus = function(status,callback) {
});

callback(null,subqueue);
return subqueue;
};

// Gets the number of requests which have failed for some reason
FetchQueue.prototype.errors = function(callback) {
callback = callback && callback instanceof Function ? callback : function(){};
var self = this;
var total, failedCount, notFoundCount, self = this;

self.countWithStatus("failed",function(err1,failed) {
self.countWithStatus("notfound",function(err2,notfound) {
callback(null,failed + notfound);
});
});
failedCount = self.countWithStatus("failed");
notFoundCount = self.countWithStatus("notfound");
total = failedCount + notFoundCount;
callback(null, total);
return total;
};

// Writes the queue to disk
Expand Down
2 changes: 1 addition & 1 deletion test/cookies.js
Expand Up @@ -9,7 +9,7 @@ var cookies = [
"Set-Cookie: RMID=007f010019155170d6ca005f; Expires=Sat, 19 Apr 2014 05:31:54 GMT; Path=/; Domain=.nytimes.com;",
"Set-cookie: adxcs=-; path=/; domain=.nytimes.com",
"Set-Cookie: PREF=ID=8c63f2522e22574d:FF=0:TM=1366349569:LM=1366349569:S=p1Urbmfwfs-R573P; expires=Sun, 19-Apr-2015 05:32:49 GMT; path=/; domain=.google.com",
"Set-Cookie: NID=67=DhLO04YPAMlhETrTIe2oFPqWZfypQXLZfCIPItOvf70zhtUEMEItYfdVh6aROEzRHqtd9jHT6HJ7Oo93eqP3cjYNp8GgShfa6r0WVbsmQQRUvutbjBOPwzo7ybwYcWdB; expires=Sat, 19-Oct-2013 05:32:49 GMT; path=/; domain=.google.com; HttpOnly",
"Set-Cookie: NID=67=DhLO04YPAMlhETrTIe2oFPqWZfypQXLZfCIPItOvf70zhtUEMEItYfdVh6aROEzRHqtd9jHT6HJ7Oo93eqP3cjYNp8GgShfa6r0WVbsmQQRUvutbjBOPwzo7ybwYcWdB; expires=Sat, 19-Oct-2015 05:32:49 GMT; path=/; domain=.google.com; HttpOnly",
"Set-Cookie: fpc=d=Yq1z8hbA9WextmPFlb7suMTfMRgtSc2FyzAB7now1ExfUZ.eW7s4QSwSKlB6ZB0juN8OLZxWf_XXEIcspYaQmVVD0mD0xJ.xpXBCSw5Dl_Ql6n.RLoM.7CnTbNSsiSr2fkNiCN47tRUB4j8iWevNwQdFDn1hB8z8t1xwWt76n.sLIRY9p2_jTBhukfSD4SBpBkJhI1o-&v=2; expires=Sat, 19-Apr-2014 05:48:42 GMT; path=/; domain=www.yahoo.com",
"Set-Cookie: test=test; path=/test; domain=test.com"
];
Expand Down
3 changes: 3 additions & 0 deletions test/jshint.js
Expand Up @@ -25,6 +25,9 @@ describe("Core code",function() {

it("module `" + item + "` should pass JSHint with no errors",function() {

var slowThresholdMilliseconds = 200;
this.slow(slowThresholdMilliseconds);

JSHINT(code,{
"indent": 4,
"undef": true
Expand Down
2 changes: 2 additions & 0 deletions test/reliability.js
Expand Up @@ -12,6 +12,8 @@ describe("Crawler reliability",function() {

it("should be able to handle a timeout",function(done) {

this.slow('1s')

var localCrawler = Crawler.crawl("http://127.0.0.1:3000/timeout");
localCrawler.timeout = 200;

Expand Down
2 changes: 2 additions & 0 deletions test/testcrawl.js
Expand Up @@ -46,6 +46,8 @@ describe("Test Crawl",function() {

it("should support async event listeners for manual discovery",function(done) {

this.slow('1s')

// Use a different crawler this time
asyncCrawler.discoverResources = false;
asyncCrawler.queueURL("http://127.0.0.1:3000/async-stage1");
Expand Down

0 comments on commit bf7d613

Please sign in to comment.