Permalink
Browse files

added uriPath

  • Loading branch information...
1 parent e624162 commit deb52393bd1c56c77db784a9214cc7dd0396a223 @gregmolnar gregmolnar committed Apr 10, 2013
Showing with 5 additions and 3 deletions.
  1. +1 −0 README.markdown
  2. +4 −3 lib/crawler.js
View
@@ -219,6 +219,7 @@ var conditionID = myCrawler.addFetchCondition(function(parsedURL) {
return !parsedURL.path.match(/\.pdf$/i);
});
```
+NOTE: simplecrawler uses slightly different terminology to URIjs. `parsedURL.path` includes the query string too. If you want the path without the query string, use `parsedURL.uriPath`.
##### Removing a fetch condition
View
@@ -257,9 +257,10 @@ Crawler.prototype.processURL = function(URL,context) {
// simplecrawler uses slightly different terminology to URIjs. Sorry!
return {
"protocol": newURL.protocol() || "http",
- "host": newURL.hostname(),
- "port": newURL.port() || 80,
- "path": newURL.resource()
+ "host": newURL.hostname(),
+ "port": newURL.port() || 80,
+ "path": newURL.resource(),
+ "uriPath": newURL.path()
};
};

0 comments on commit deb5239

Please sign in to comment.