forked from scrapy/scrapy
/
item.py
91 lines (66 loc) · 2.29 KB
/
item.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Scrapy Item
See documentation in docs/topics/item.rst
"""
from UserDict import DictMixin
from scrapy.utils.trackref import object_ref
class BaseItem(object_ref):
"""Base class for all scraped items."""
pass
class Field(dict):
"""Container of field metadata"""
class ItemMeta(type):
def __new__(mcs, class_name, bases, attrs):
fields = {}
new_attrs = {}
for n, v in attrs.iteritems():
if isinstance(v, Field):
fields[n] = v
else:
new_attrs[n] = v
cls = type.__new__(mcs, class_name, bases, new_attrs)
cls.fields = cls.fields.copy()
cls.fields.update(fields)
return cls
class DictItem(DictMixin, BaseItem):
fields = {}
def __init__(self, *args, **kwargs):
self._values = {}
if args or kwargs: # avoid creating dict for most common case
for k, v in dict(*args, **kwargs).iteritems():
self[k] = v
def __getitem__(self, key):
try:
return self._values[key]
except KeyError:
field = self.fields[key]
if 'default' in field:
return field['default']
raise
def __setitem__(self, key, value):
if key in self.fields:
self._values[key] = value
else:
raise KeyError("%s does not support field: %s" % \
(self.__class__.__name__, key))
def __delitem__(self, key):
del self._values[key]
def __getattr__(self, name):
if name in self.fields:
raise AttributeError("Use item[%r] to get field value" % name)
raise AttributeError(name)
def __setattr__(self, name, value):
if not name.startswith('_'):
raise AttributeError("Use item[%r] = %r to set field value" % \
(name, value))
super(DictItem, self).__setattr__(name, value)
def keys(self):
return self._values.keys()
def __repr__(self):
"""Generate a representation of this item that can be used to
reconstruct the item by evaluating it
"""
values = ', '.join('%s=%r' % field for field in self.iteritems())
return "%s(%s)" % (self.__class__.__name__, values)
class Item(DictItem):
__metaclass__ = ItemMeta