Skip to content

Commit

Permalink
Add tests for start requests, filtered and non-filtered
Browse files Browse the repository at this point in the history
  • Loading branch information
redapple committed Apr 26, 2014
1 parent eb07e09 commit 4898d96
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
23 changes: 23 additions & 0 deletions scrapy/tests/spiders.py
Expand Up @@ -157,3 +157,26 @@ def on_error(self, failure):
self.meta['failure'] = failure
if callable(self.errback_func):
return self.errback_func(failure)


class DuplicateStartRequestsSpider(Spider):
dont_filter = True
name = 'duplicatestartrequests'
distinct_urls = 2
dupe_factor = 3

def start_requests(self):
for i in range(0, self.distinct_urls):
for j in range(0, self.dupe_factor):
url = "http://localhost:8998/echo?headers=1&body=test%d" % i
yield self.make_requests_from_url(url)

def make_requests_from_url(self, url):
return Request(url, dont_filter=self.dont_filter)

def __init__(self, url="http://localhost:8998", *args, **kwargs):
super(DuplicateStartRequestsSpider, self).__init__(*args, **kwargs)
self.visited = 0

def parse(self, response):
self.visited += 1
17 changes: 16 additions & 1 deletion scrapy/tests/test_crawl.py
Expand Up @@ -5,7 +5,7 @@
from twisted.trial.unittest import TestCase
from scrapy.utils.test import docrawl, get_testlog
from scrapy.tests.spiders import FollowAllSpider, DelaySpider, SimpleSpider, \
BrokenStartRequestsSpider, SingleRequestSpider
BrokenStartRequestsSpider, SingleRequestSpider, DuplicateStartRequestsSpider
from scrapy.tests.mockserver import MockServer
from scrapy.http import Request

Expand Down Expand Up @@ -113,6 +113,21 @@ def test_start_requests_lazyness(self):
#self.assertTrue(spider.seedsseen.index(None) < spider.seedsseen.index(99),
# spider.seedsseen)

@defer.inlineCallbacks
def test_start_requests_dupes(self):
settings = {"CONCURRENT_REQUESTS": 1}
spider = DuplicateStartRequestsSpider(dont_filter=True,
distinct_urls=2,
dupe_factor=3)
yield docrawl(spider, settings)
self.assertEqual(spider.visited, 6)

spider = DuplicateStartRequestsSpider(dont_filter=False,
distinct_urls=3,
dupe_factor=4)
yield docrawl(spider, settings)
self.assertEqual(spider.visited, 3)

@defer.inlineCallbacks
def test_unbounded_response(self):
# Completeness of responses without Content-Length or Transfer-Encoding
Expand Down

0 comments on commit 4898d96

Please sign in to comment.