Permalink
Browse files

SEP-017 contracts: various minor changes

  • Loading branch information...
1 parent 9019871 commit 6f1a5d8db6adf7e84e506321fb47f6636e4ea8aa @alexcepoi committed Aug 29, 2012
View
6 .gitignore
@@ -1,12 +1,6 @@
*.pyc
-*swp
-*~
-
_trial_temp
dropin.cache
docs/build
*egg-info
.tox
-
-build/
-dist/
View
38 scrapy/commands/check.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
from functools import wraps
from scrapy.conf import settings
@@ -7,6 +8,7 @@
from scrapy.utils import display
from scrapy.utils.misc import load_object
from scrapy.utils.spider import iterate_spider_output
+from scrapy.utils.conf import build_component_list
def _generate(cb):
""" create a callback which does not return anything """
@@ -19,34 +21,48 @@ def wrapper(response):
class Command(ScrapyCommand):
requires_project = True
+ default_settings = {'LOG_ENABLED': False}
def syntax(self):
return "[options] <spider>"
def short_desc(self):
return "Check contracts for given spider"
- def run(self, args, opts):
- self.conman = ContractsManager()
+ def add_options(self, parser):
+ ScrapyCommand.add_options(self, parser)
+ parser.add_option("-l", "--list", dest="list", action="store_true", \
+ help="only list contracts, without checking them")
- # load contracts
- contracts = settings['SPIDER_CONTRACTS_BASE'] + \
- settings['SPIDER_CONTRACTS']
- for contract in contracts:
- concls = load_object(contract)
- self.conman.register(concls)
+ def run(self, args, opts):
+ # load contracts
+ contracts = build_component_list(settings['SPIDER_CONTRACTS_BASE'],
+ settings['SPIDER_CONTRACTS'])
+ self.conman = ContractsManager([load_object(c) for c in contracts])
- # schedule requests
+ # contract requests
+ contract_reqs = defaultdict(list)
self.crawler.engine.has_capacity = lambda: True
for spider in args or self.crawler.spiders.list():
spider = self.crawler.spiders.create(spider)
requests = self.get_requests(spider)
- self.crawler.crawl(spider, requests)
+
+ if opts.list:
+ for req in requests:
+ contract_reqs[spider.name].append(req.callback.__name__)
+ else:
+ self.crawler.crawl(spider, requests)
# start checks
- self.crawler.start()
+ if opts.list:
+ for spider, methods in sorted(contract_reqs.iteritems()):
+ print spider
+ for method in sorted(methods):
+ print ' * %s' % method
+ else:
+ self.crawler.start()
def get_requests(self, spider):
requests = []
View
47 scrapy/contracts/__init__.py
@@ -8,10 +8,11 @@
from scrapy.exceptions import ContractFail
class ContractsManager(object):
- registered = {}
+ contracts = {}
- def register(self, contract):
- self.registered[contract.name] = contract
+ def __init__(self, contracts):
+ for contract in contracts:
+ self.contracts[contract.name] = contract
def extract_contracts(self, method):
contracts = []
@@ -20,28 +21,33 @@ def extract_contracts(self, method):
if line.startswith('@'):
name, args = re.match(r'@(\w+)\s*(.*)', line).groups()
- args = re.split(r'\s*\,\s*', args)
+ args = re.split(r'\s+', args)
- contracts.append(self.registered[name](method, *args))
+ contracts.append(self.contracts[name](method, *args))
return contracts
def from_method(self, method):
contracts = self.extract_contracts(method)
if contracts:
# calculate request args
- args = get_spec(Request.__init__)[1]
- args['callback'] = method
+ args, kwargs = get_spec(Request.__init__)
+ kwargs['callback'] = method
for contract in contracts:
- args = contract.adjust_request_args(args)
+ kwargs = contract.adjust_request_args(kwargs)
# create and prepare request
- assert 'url' in args, "Method '%s' does not have an url contract" % method.__name__
- request = Request(**args)
- for contract in contracts:
- request = contract.prepare_request(request)
+ args.remove('self')
+ if set(args).issubset(set(kwargs)):
+ request = Request(**kwargs)
+
+ # execute pre and post hooks in order
+ for contract in reversed(contracts):
+ request = contract.add_pre_hook(request)
+ for contract in contracts:
+ request = contract.add_post_hook(request)
- return request
+ return request
class Contract(object):
""" Abstract class for contracts """
@@ -50,25 +56,30 @@ def __init__(self, method, *args):
self.method = method
self.args = args
- def prepare_request(self, request):
+ def add_pre_hook(self, request):
cb = request.callback
@wraps(cb)
def wrapper(response):
self.pre_process(response)
+ return list(iterate_spider_output(cb(response)))
+
+ request.callback = wrapper
+ return request
+
+ def add_post_hook(self, request):
+ cb = request.callback
+ @wraps(cb)
+ def wrapper(response):
output = list(iterate_spider_output(cb(response)))
self.post_process(output)
return output
request.callback = wrapper
- request = self.modify_request(request)
return request
def adjust_request_args(self, args):
return args
- def modify_request(self, request):
- return request
-
def pre_process(self, response):
pass
View
56 scrapy/contracts/default.py
@@ -17,61 +17,73 @@ def adjust_request_args(self, args):
args['url'] = self.args[0]
return args
+
class ReturnsContract(Contract):
""" Contract to check the output of a callback
- @returns items, 1
- @returns requests, 1+
+
+ general form:
+ @returns request(s)/item(s) [min=1 [max]]
+
+ e.g.:
+ @returns request
+ @returns request 2
+ @returns request 2 10
+ @returns request 0 10
"""
name = 'returns'
objects = {
+ 'request': Request,
'requests': Request,
+ 'item': BaseItem,
'items': BaseItem,
}
def __init__(self, *args, **kwargs):
super(ReturnsContract, self).__init__(*args, **kwargs)
- if len(self.args) != 2:
- raise ContractError("Returns Contract must have two arguments")
- self.obj_name, self.raw_num = self.args
-
- # validate input
+ assert len(self.args) in [1, 2, 3]
+ self.obj_name = self.args[0] or None
self.obj_type = self.objects[self.obj_name]
- self.modifier = self.raw_num[-1]
- if self.modifier in ['+', '-']:
- self.num = int(self.raw_num[:-1])
- else:
- self.num = int(self.raw_num)
- self.modifier = None
+ try:
+ self.min_bound = int(self.args[1])
+ except IndexError:
+ self.min_bound = 1
+
+ try:
+ self.max_bound = int(self.args[2])
+ except IndexError:
+ self.max_bound = float('inf')
def post_process(self, output):
occurences = 0
for x in output:
if isinstance(x, self.obj_type):
occurences += 1
- if self.modifier == '+':
- assertion = (occurences >= self.num)
- elif self.modifier == '-':
- assertion = (occurences <= self.num)
- else:
- assertion = (occurences == self.num)
+ assertion = (self.min_bound <= occurences <= self.max_bound)
if not assertion:
+ if self.min_bound == self.max_bound:
+ expected = self.min_bound
+ else:
+ expected = '%s..%s' % (self.min_bound, self.max_bound)
+
raise ContractFail("Returned %s %s, expected %s" % \
- (occurences, self.obj_name, self.raw_num))
+ (occurences, self.obj_name, expected))
+
class ScrapesContract(Contract):
""" Contract to check presence of fields in scraped items
- @scrapes page_name, page_body
+ @scrapes page_name page_body
"""
+
name = 'scrapes'
def post_process(self, output):
for x in output:
if isinstance(x, BaseItem):
for arg in self.args:
if not arg in x:
- raise ContractFail('%r field is missing' % arg)
+ raise ContractFail("'%s' field is missing" % arg)
View
4 scrapy/exceptions.py
@@ -50,6 +50,6 @@ class ScrapyDeprecationWarning(Warning):
"""
pass
-class ContractFail(Exception):
- """Error in constructing contracts for a method"""
+class ContractFail(AssertionError):
+ """Error raised in case of a failing contract"""
pass
View
12 scrapy/settings/default_settings.py
@@ -242,9 +242,9 @@
'scrapy.contrib.webservice.stats.StatsResource': 1,
}
-SPIDER_CONTRACTS = []
-SPIDER_CONTRACTS_BASE = [
- 'scrapy.contracts.default.UrlContract',
- 'scrapy.contracts.default.ReturnsContract',
- 'scrapy.contracts.default.ScrapesContract',
-]
+SPIDER_CONTRACTS = {}
+SPIDER_CONTRACTS_BASE = {
+ 'scrapy.contracts.default.UrlContract' : 1,
+ 'scrapy.contracts.default.ReturnsContract': 2,
+ 'scrapy.contracts.default.ScrapesContract': 3,
+}
View
2 scrapy/utils/misc.py
@@ -106,7 +106,7 @@ def md5sum(file):
return m.hexdigest()
def get_spec(func):
- """Returns (args, kwargs) touple for a function
+ """Returns (args, kwargs) tuple for a function
>>> import re
>>> get_spec(re.match)

0 comments on commit 6f1a5d8

Please sign in to comment.