Skip to content
Browse files

Document plugin.

  • Loading branch information...
1 parent 4f1d8fc commit 91a534d7e869837e62b0781732960d76f7e673c3 @jaredhanson committed May 4, 2013
Showing with 27 additions and 1 deletion.
  1. +27 −1 lib/index.js
View
28 lib/index.js
@@ -1,7 +1,28 @@
+/**
+ * Module dependencies.
+ */
var url = require('url');
-// http://www.robotstxt.org/robotstxt.html
+/**
+ * robots.txt plugin.
+ *
+ * This plugin adds a `/robots.txt` page to a site, giving instructions to web
+ * crawlers using the Robots Exclusion Protocol.
+ *
+ * Examples:
+ *
+ * site.plug(require('kerouac-robotstxt')());
+ *
+ * References:
+ * - [About /robots.txt](http://www.robotstxt.org/robotstxt.html)
+ * - [A Standard for Robot Exclusion](http://www.robotstxt.org/orig.html)
+ * - [A Method for Web Robots Control](http://www.robotstxt.org/norobots-rfc.txt)
+ * - [Specifying the Sitemap location in your robots.txt file](http://www.sitemaps.org/protocol.html#submit_robots)
+ *
+ * @return {Function}
+ * @api public
+ */
exports = module.exports = function() {
return function robots(site, pages) {
@@ -10,11 +31,16 @@ exports = module.exports = function() {
var uri = url.parse(site.get('base url'));
site.page('/robots.txt', function(page, next) {
+ // By default, be permissive and allow any crawler to crawl all pages.
+ // This is done by using the '*' wildcard for the user agent and setting
+ // the disallow field to an empty value.
var txt = '';
txt += 'User-agent: *\r\n';
txt += 'Disallow:\r\n';
txt += '\r\n';
+ // Add the location of any sitemaps to the robots.txt file, as specified
+ // by: http://www.sitemaps.org/protocol.html#submit_robots
var paths = Object.keys(pages).sort()
, pg;
for (var i = 0, len = paths.length; i < len; i++) {

0 comments on commit 91a534d

Please sign in to comment.
Something went wrong with that request. Please try again.