Skip to content

Commit

Permalink
Respect max_items_read parameter for FOLLOW pagination
Browse files Browse the repository at this point in the history
  • Loading branch information
holgerd77 authored and holgerd77 committed Jun 26, 2017
1 parent 87d5080 commit a97ef00
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion dynamic_scraper/spiders/django_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,11 @@ def parse(self, response):
self.log("Value after: " + value, logging.DEBUG)
else:
self.log("Item could not be read!", logging.ERROR)
if self.scraper.follow_pages_url_xpath:

mir_reached = False
if self.conf['MAX_ITEMS_READ'] and (self.conf['MAX_ITEMS_READ'] - self.items_read_count <= 0):
mir_reached = True
if self.scraper.follow_pages_url_xpath and not mir_reached:
if not self.scraper.num_pages_follow or follow_page_num < self.scraper.num_pages_follow:
url = response.xpath(self.scraper.follow_pages_url_xpath).extract_first()
if url is not None:
Expand Down

0 comments on commit a97ef00

Please sign in to comment.