Skip to content

Commit

Permalink
rename XPathSelectorList as SelectorList scrapy#176
Browse files Browse the repository at this point in the history
  • Loading branch information
dangra committed Sep 23, 2013
1 parent c08991c commit 0ea8c38
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 22 deletions.
15 changes: 13 additions & 2 deletions scrapy/selector/__init__.py
Expand Up @@ -7,11 +7,10 @@
Two backends are currently available: lxml (default) and libxml2.
"""

import os

backend = os.environ.get('SCRAPY_SELECTORS_BACKEND')

backend = os.environ.get('SCRAPY_SELECTORS_BACKEND')
if backend == 'libxml2':
from scrapy.selector.libxml2sel import *
elif backend == 'lxml':
Expand All @@ -26,3 +25,15 @@
from scrapy.selector.lxmlsel import *

from scrapy.selector.csssel import *
from scrapy.selector.list import SelectorList


class XPathSelectorList(SelectorList):

def __init__(self, *a, **kw):
import warnings
from scrapy.exceptions import ScrapyDeprecationWarning
warnings.warn('XPathSelectorList is deprecated, use '
'scrapy.selector.SelectorList instead',
category=ScrapyDeprecationWarning, stacklevel=1)
super(XPathSelectorList, self).__init__(*a, **kw)
9 changes: 7 additions & 2 deletions scrapy/selector/csssel.py
@@ -1,8 +1,10 @@
from cssselect import GenericTranslator, HTMLTranslator
from scrapy.utils.python import flatten
from scrapy.selector import HtmlXPathSelector, XmlXPathSelector, XPathSelectorList
from scrapy.selector import HtmlXPathSelector, XmlXPathSelector
from .list import SelectorList

class CSSSelectorList(XPathSelectorList):

class CSSSelectorList(SelectorList):
def xpath(self, xpath):
return self.__class__(flatten([x.xpath(xpath) for x in self]))

Expand All @@ -12,6 +14,7 @@ def get(self, attr):
def text(self, all=False):
return self.__class__(flatten([x.text(all) for x in self]))


class CSSSelectorMixin(object):
def select(self, css):
return CSSSelectorList(super(CSSSelectorMixin, self).select(self.translator.css_to_xpath(css)))
Expand All @@ -25,8 +28,10 @@ def text(self, all=False):
def get(self, attr):
return self.xpath('@' + attr)


class XmlCSSSelector(CSSSelectorMixin, XmlXPathSelector):
translator = GenericTranslator()


class HtmlCSSSelector(CSSSelectorMixin, HtmlXPathSelector):
translator = HTMLTranslator()
15 changes: 8 additions & 7 deletions scrapy/selector/libxml2sel.py
Expand Up @@ -12,10 +12,11 @@
from scrapy.utils.trackref import object_ref
from scrapy.utils.decorator import deprecated
from .libxml2document import Libxml2Document, xmlDoc_from_html, xmlDoc_from_xml
from .list import XPathSelectorList
from .list import SelectorList


__all__ = ['HtmlXPathSelector', 'XmlXPathSelector', 'XPathSelector']

__all__ = ['HtmlXPathSelector', 'XmlXPathSelector', 'XPathSelector', \
'XPathSelectorList']

class XPathSelector(object_ref):

Expand Down Expand Up @@ -44,13 +45,13 @@ def select(self, xpath):
except libxml2.xpathError:
raise ValueError("Invalid XPath: %s" % xpath)
if hasattr(xpath_result, '__iter__'):
return XPathSelectorList([self.__class__(node=node, parent=self, \
return SelectorList([self.__class__(node=node, parent=self, \
expr=xpath) for node in xpath_result])
else:
return XPathSelectorList([self.__class__(node=xpath_result, \
return SelectorList([self.__class__(node=xpath_result, \
parent=self, expr=xpath)])
else:
return XPathSelectorList([])
return SelectorList([])

def re(self, regex):
return extract_regex(regex, self.extract())
Expand All @@ -62,7 +63,7 @@ def extract(self):
if isinstance(self.xmlNode, libxml2.xmlDoc):
data = self.xmlNode.getRootElement().serialize('utf-8')
text = unicode(data, 'utf-8', errors='ignore') if data else u''
elif isinstance(self.xmlNode, libxml2.xmlAttr):
elif isinstance(self.xmlNode, libxml2.xmlAttr):
# serialization doesn't work sometimes for xmlAttr types
text = unicode(self.xmlNode.content, 'utf-8', errors='ignore')
else:
Expand Down
5 changes: 3 additions & 2 deletions scrapy/selector/list.py
@@ -1,7 +1,8 @@
from scrapy.utils.python import flatten
from scrapy.utils.decorator import deprecated

class XPathSelectorList(list):

class SelectorList(list):

def __getslice__(self, i, j):
return self.__class__(list.__getslice__(self, i, j))
Expand All @@ -18,6 +19,6 @@ def extract(self):
def extract_unquoted(self):
return [x.extract_unquoted() for x in self]

@deprecated(use_instead='XPathSelectorList.select')
@deprecated(use_instead='SelectorList.select')
def x(self, xpath):
return self.select(xpath)
15 changes: 6 additions & 9 deletions scrapy/selector/lxmlsel.py
Expand Up @@ -10,11 +10,10 @@
from scrapy.utils.decorator import deprecated
from scrapy.http import TextResponse
from .lxmldocument import LxmlDocument
from .list import XPathSelectorList
from .list import SelectorList


__all__ = ['HtmlXPathSelector', 'XmlXPathSelector', 'XPathSelector', \
'XPathSelectorList']
__all__ = ['HtmlXPathSelector', 'XmlXPathSelector', 'XPathSelector']


class XPathSelector(object_ref):
Expand All @@ -25,8 +24,8 @@ class XPathSelector(object_ref):

def __init__(self, response=None, text=None, namespaces=None, _root=None, _expr=None):
if text is not None:
response = TextResponse(url='about:blank', \
body=unicode_to_str(text, 'utf-8'), encoding='utf-8')
response = TextResponse(url='about:blank', encoding='utf-8',
body=unicode_to_str(text, 'utf-8'))
if response is not None:
_root = LxmlDocument(response, self._parser)

Expand All @@ -39,7 +38,7 @@ def select(self, xpath):
try:
xpathev = self._root.xpath
except AttributeError:
return XPathSelectorList([])
return SelectorList([])

try:
result = xpathev(xpath, namespaces=self.namespaces)
Expand All @@ -51,7 +50,7 @@ def select(self, xpath):

result = [self.__class__(_root=x, _expr=xpath, namespaces=self.namespaces)
for x in result]
return XPathSelectorList(result)
return SelectorList(result)

def re(self, regex):
return extract_regex(regex, self.extract())
Expand Down Expand Up @@ -84,10 +83,8 @@ def __nonzero__(self):
def __str__(self):
data = repr(self.extract()[:40])
return "<%s xpath=%r data=%s>" % (type(self).__name__, self._expr, data)

__repr__ = __str__


@deprecated(use_instead='XPathSelector.extract')
def extract_unquoted(self):
return self.extract()
Expand Down

0 comments on commit 0ea8c38

Please sign in to comment.