Permalink
Browse files

Allow for fetching limited range of webpages, to avoid massive files

  • Loading branch information...
1 parent f0e99df commit 52d95b712087e221818b42537f5c857c305b4b23 @pmyteh pmyteh committed Jul 22, 2012
Showing with 4 additions and 0 deletions.
  1. +4 −0 CONFIG_db_example.php
View
@@ -21,6 +21,10 @@
$whitelistdomainlevel=2;
//list of domains starting, ending, and separated with :
$whitelistdomainlist=":gov.uk:.org.uk:";
+//Fetch only first part of each page, to avoid huge files?
+$fetchrangeonly=true;
+// If $fetchrangeonly=true, what range to fetch? Here, the first 100KB is specified.
+$fetchrange="0-99999";
// Set spider penetration depth. If 0 crawl only pages in database.
$MAX_PENETRATION = 5;

0 comments on commit 52d95b7

Please sign in to comment.