Merge pull request scrapy#1218 from Curita/move-base-to-packages

Move base classes to their packages
curita · May 14, 2015 · 1195f90 · 1195f90
2 parents 65aa9cc + 53fdaa3
commit 1195f90
Show file tree

Hide file tree

Showing 84 changed files with 450 additions and 430 deletions.
diff --git a/conftest.py b/conftest.py
@@ -16,6 +16,9 @@ def _py_files(folder):
     "scrapy/squeue.py",
     "scrapy/log.py",
     "scrapy/dupefilter.py",
+    "scrapy/command.py",
+    "scrapy/linkextractor.py",
+    "scrapy/spider.py",
 ] + _py_files("scrapy/contrib") + _py_files("scrapy/contrib_exp")
 
 

diff --git a/docs/intro/tutorial.rst b/docs/intro/tutorial.rst
@@ -95,26 +95,26 @@ domain (or group of domains).
 They define an initial list of URLs to download, how to follow links, and how
 to parse the contents of pages to extract :ref:`items <topics-items>`.
 
-To create a Spider, you must subclass :class:`scrapy.Spider <scrapy.spider.Spider>` and
-define some attributes:
+To create a Spider, you must subclass :class:`scrapy.Spider
+<scrapy.spiders.Spider>` and define some attributes:
 
-* :attr:`~scrapy.spider.Spider.name`: identifies the Spider. It must be
+* :attr:`~scrapy.spiders.Spider.name`: identifies the Spider. It must be
   unique, that is, you can't set the same name for different Spiders.
 
-* :attr:`~scrapy.spider.Spider.start_urls`: a list of URLs where the
+* :attr:`~scrapy.spiders.Spider.start_urls`: a list of URLs where the
   Spider will begin to crawl from.  The first pages downloaded will be those
   listed here. The subsequent URLs will be generated successively from data
   contained in the start URLs.
 
-* :meth:`~scrapy.spider.Spider.parse`: a method of the spider, which will
+* :meth:`~scrapy.spiders.Spider.parse`: a method of the spider, which will
   be called with the downloaded :class:`~scrapy.http.Response` object of each
   start URL. The response is passed to the method as the first and only
   argument.
 
   This method is responsible for parsing the response data and extracting
   scraped data (as scraped items) and more URLs to follow.
 
-  The :meth:`~scrapy.spider.Spider.parse` method is in charge of processing
+  The :meth:`~scrapy.spiders.Spider.parse` method is in charge of processing
   the response and returning scraped data (as :class:`~scrapy.item.Item`
   objects) and more URLs to follow (as :class:`~scrapy.http.Request` objects).
 
@@ -178,7 +178,7 @@ them the ``parse`` method of the spider as their callback function.
 
 These Requests are scheduled, then executed, and :class:`scrapy.http.Response`
 objects are returned and then fed back to the spider, through the
-:meth:`~scrapy.spider.Spider.parse` method.
+:meth:`~scrapy.spiders.Spider.parse` method.
 
 Extracting Items
 ----------------

diff --git a/docs/topics/api.rst b/docs/topics/api.rst
@@ -31,7 +31,7 @@ how you :ref:`configure the downloader middlewares
 .. class:: Crawler(spidercls, settings)
 
     The Crawler object must be instantiated with a
-    :class:`scrapy.spider.Spider` subclass and a
+    :class:`scrapy.spiders.Spider` subclass and a
     :class:`scrapy.settings.Settings` object.
 
     .. attribute:: settings

diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
@@ -91,7 +91,7 @@ more of the following methods:
       :type request: :class:`~scrapy.http.Request` object
 
       :param spider: the spider for which this request is intended
-      :type spider: :class:`~scrapy.spider.Spider` object
+      :type spider: :class:`~scrapy.spiders.Spider` object
 
    .. method:: process_response(request, response, spider)
 
@@ -118,7 +118,7 @@ more of the following methods:
       :type response: :class:`~scrapy.http.Response` object
 
       :param spider: the spider for which this response is intended
-      :type spider: :class:`~scrapy.spider.Spider` object
+      :type spider: :class:`~scrapy.spiders.Spider` object
 
    .. method:: process_exception(request, exception, spider)
 
@@ -149,7 +149,7 @@ more of the following methods:
       :type exception: an ``Exception`` object
 
       :param spider: the spider for which this request is intended
-      :type spider: :class:`~scrapy.spider.Spider` object
+      :type spider: :class:`~scrapy.spiders.Spider` object
 
 .. _topics-downloader-middleware-ref:
 

diff --git a/docs/topics/item-pipeline.rst b/docs/topics/item-pipeline.rst
@@ -36,7 +36,7 @@ Each item pipeline component is a Python class that must implement the following
    :type item: :class:`~scrapy.item.Item` object or a dict
 
    :param spider: the spider which scraped the item
-   :type spider: :class:`~scrapy.spider.Spider` object
+   :type spider: :class:`~scrapy.spiders.Spider` object
 
 Additionally, they may also implement the following methods:
 
@@ -45,14 +45,14 @@ Additionally, they may also implement the following methods:
    This method is called when the spider is opened.
 
    :param spider: the spider which was opened
-   :type spider: :class:`~scrapy.spider.Spider` object
+   :type spider: :class:`~scrapy.spiders.Spider` object
 
 .. method:: close_spider(self, spider)
 
    This method is called when the spider is closed.
 
    :param spider: the spider which was closed
-   :type spider: :class:`~scrapy.spider.Spider` object
+   :type spider: :class:`~scrapy.spiders.Spider` object
 
 .. method:: from_crawler(cls, crawler)
 

diff --git a/docs/topics/leaks.rst b/docs/topics/leaks.rst
@@ -92,7 +92,7 @@ subclasses):
 * :class:`scrapy.http.Response`
 * :class:`scrapy.item.Item`
 * :class:`scrapy.selector.Selector`
-* :class:`scrapy.spider.Spider`
+* :class:`scrapy.spiders.Spider`
 
 A real example
 --------------
@@ -155,7 +155,7 @@ For this reason, that function has a ``ignore`` argument which can be used to
 ignore a particular class (and all its subclases). For
 example, this won't show any live references to spiders::
 
-    >>> from scrapy.spider import Spider
+    >>> from scrapy.spiders import Spider
     >>> prefs(ignore=Spider)
 
 .. module:: scrapy.utils.trackref

diff --git a/docs/topics/link-extractors.rst b/docs/topics/link-extractors.rst
@@ -78,8 +78,8 @@ LxmlLinkExtractor
     :param deny_extensions: a single value or list of strings containing
         extensions that should be ignored when extracting links.
         If not given, it will default to the
-        ``IGNORED_EXTENSIONS`` list defined in the `scrapy.linkextractor`_
-        module.
+        ``IGNORED_EXTENSIONS`` list defined in the
+        `scrapy.linkextractors`_ module.
     :type deny_extensions: list
 
     :param restrict_xpaths: is an XPath (or list of XPath's) which defines
@@ -132,4 +132,4 @@ LxmlLinkExtractor
 
     :type process_value: callable
 
-.. _scrapy.linkextractor: https://github.com/scrapy/scrapy/blob/master/scrapy/linkextractor.py
+.. _scrapy.linkextractors: https://github.com/scrapy/scrapy/blob/master/scrapy/linkextractors/__init__.py
diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst
@@ -94,7 +94,7 @@ path::
 Logging from Spiders
 ====================
 
-Scrapy provides a :data:`~scrapy.spider.Spider.logger` within each Spider
+Scrapy provides a :data:`~scrapy.spiders.Spider.logger` within each Spider
 instance, that can be accessed and used like this::
 
     import scrapy

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
@@ -37,7 +37,7 @@ Request objects
        request (once its downloaded) as its first parameter. For more information
        see :ref:`topics-request-response-ref-request-callback-arguments` below.
        If a Request doesn't specify a callback, the spider's
-       :meth:`~scrapy.spider.Spider.parse` method will be used.
+       :meth:`~scrapy.spiders.Spider.parse` method will be used.
        Note that if exceptions are raised during processing, errback is called instead.
 
     :type callback: callable

diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
@@ -67,7 +67,7 @@ Example::
 
 Spiders (See the :ref:`topics-spiders` chapter for reference) can define their
 own settings that will take precedence and override the project ones. They can
-do so by setting their :attr:`scrapy.spider.Spider.custom_settings` attribute.
+do so by setting their :attr:`scrapy.spiders.Spider.custom_settings` attribute.
 
 3. Project settings module
 --------------------------

diff --git a/docs/topics/shell.rst b/docs/topics/shell.rst
@@ -74,7 +74,7 @@ Those objects are:
  * ``crawler`` - the current :class:`~scrapy.crawler.Crawler` object.
 
  * ``spider`` - the Spider which is known to handle the URL, or a
-   :class:`~scrapy.spider.Spider` object if there is no spider found for
+   :class:`~scrapy.spiders.Spider` object if there is no spider found for
    the current URL
 
  * ``request`` - a :class:`~scrapy.http.Request` object of the last fetched

diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
@@ -74,7 +74,7 @@ item_scraped
     :type item: dict or :class:`~scrapy.item.Item` object
 
     :param spider: the spider which scraped the item
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
     :param response: the response from where the item was scraped
     :type response: :class:`~scrapy.http.Response` object
@@ -94,7 +94,7 @@ item_dropped
     :type item: dict or :class:`~scrapy.item.Item` object
 
     :param spider: the spider which scraped the item
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
     :param response: the response from where the item was dropped
     :type response: :class:`~scrapy.http.Response` object
@@ -116,7 +116,7 @@ spider_closed
     This signal supports returning deferreds from their handlers.
 
     :param spider: the spider which has been closed
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
     :param reason: a string which describes the reason why the spider was closed. If
         it was closed because the spider has completed scraping, the reason
@@ -140,7 +140,7 @@ spider_opened
     This signal supports returning deferreds from their handlers.
 
     :param spider: the spider which has been opened
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 spider_idle
 -----------
@@ -164,7 +164,7 @@ spider_idle
     This signal does not support returning deferreds from their handlers.
 
     :param spider: the spider which has gone idle
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 spider_error
 ------------
@@ -181,7 +181,7 @@ spider_error
     :type response: :class:`~scrapy.http.Response` object
 
     :param spider: the spider which raised the exception
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 request_scheduled
 -----------------
@@ -198,7 +198,7 @@ request_scheduled
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider that yielded the request
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 request_dropped
 -----------------
@@ -215,7 +215,7 @@ request_dropped
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider that yielded the request
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 response_received
 -----------------
@@ -235,7 +235,7 @@ response_received
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider for which the response is intended
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 response_downloaded
 -------------------
@@ -254,6 +254,6 @@ response_downloaded
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider for which the response is intended
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 .. _Failure: http://twistedmatrix.com/documents/current/api/twisted.python.failure.Failure.html
diff --git a/docs/topics/spider-middleware.rst b/docs/topics/spider-middleware.rst
@@ -81,7 +81,7 @@ following methods:
         :type response: :class:`~scrapy.http.Response` object
 
         :param spider: the spider for which this response is intended
-        :type spider: :class:`~scrapy.spider.Spider` object
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
 
     .. method:: process_spider_output(response, result, spider)
@@ -102,7 +102,7 @@ following methods:
           or :class:`~scrapy.item.Item` objects
 
         :param spider: the spider whose result is being processed
-        :type spider: :class:`~scrapy.spider.Spider` object
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
 
     .. method:: process_spider_exception(response, exception, spider)
@@ -130,7 +130,7 @@ following methods:
         :type exception: `Exception`_ object
 
         :param spider: the spider which raised the exception
-        :type spider: :class:`~scrapy.spider.Spider` object
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
     .. method:: process_start_requests(start_requests, spider)
 
@@ -157,7 +157,7 @@ following methods:
         :type start_requests: an iterable of :class:`~scrapy.http.Request`
 
         :param spider: the spider to whom the start requests belong
-        :type spider: :class:`~scrapy.spider.Spider` object
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
 
 .. _Exception: https://docs.python.org/2/library/exceptions.html#exceptions.Exception
@@ -272,7 +272,7 @@ OffsiteMiddleware
    Filters out Requests for URLs outside the domains covered by the spider.
 
    This middleware filters out every request whose host names aren't in the
-   spider's :attr:`~scrapy.spider.Spider.allowed_domains` attribute.
+   spider's :attr:`~scrapy.spiders.Spider.allowed_domains` attribute.
 
    When your spider returns a request for a domain not belonging to those
    covered by the spider, this middleware will log a debug message similar to
@@ -287,7 +287,7 @@ OffsiteMiddleware
    will be printed (but only for the first request filtered).
 
    If the spider doesn't define an
-   :attr:`~scrapy.spider.Spider.allowed_domains` attribute, or the
+   :attr:`~scrapy.spiders.Spider.allowed_domains` attribute, or the
    attribute is empty, the offsite middleware will allow all requests.
 
    If the request has the :attr:`~scrapy.http.Request.dont_filter` attribute

diff --git a/docs/topics/spiders.rst b/docs/topics/spiders.rst
@@ -17,10 +17,10 @@ For spiders, the scraping cycle goes through something like this:
    those requests.
 
    The first requests to perform are obtained by calling the
-   :meth:`~scrapy.spider.Spider.start_requests` method which (by default)
+   :meth:`~scrapy.spiders.Spider.start_requests` method which (by default)
    generates :class:`~scrapy.http.Request` for the URLs specified in the
-   :attr:`~scrapy.spider.Spider.start_urls` and the
-   :attr:`~scrapy.spider.Spider.parse` method as callback function for the
+   :attr:`~scrapy.spiders.Spider.start_urls` and the
+   :attr:`~scrapy.spiders.Spider.parse` method as callback function for the
    Requests.
 
 2. In the callback function, you parse the response (web page) and return either
@@ -42,7 +42,7 @@ Even though this cycle applies (more or less) to any kind of spider, there are
 different kinds of default spiders bundled into Scrapy for different purposes.
 We will talk about those types here.
 
-.. module:: scrapy.spider
+.. module:: scrapy.spiders
    :synopsis: Spiders base class, spider manager and spider middleware
 
 .. _topics-spiders-ref:
@@ -319,8 +319,7 @@ with a ``TestItem`` declared in a ``myproject.items`` module::
         description = scrapy.Field()
 
 
-.. module:: scrapy.spiders
-   :synopsis: Collection of generic spiders
+.. currentmodule:: scrapy.spiders
 
 CrawlSpider
 -----------

diff --git a/extras/qpsclient.py b/extras/qpsclient.py
@@ -7,7 +7,7 @@
 
 """
 
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.http import Request
 
 

diff --git a/scrapy/__init__.py b/scrapy/__init__.py
@@ -45,7 +45,7 @@
     optional_features.add('http11')
 
 # Declare top-level shortcuts
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.http import Request, FormRequest
 from scrapy.selector import Selector
 from scrapy.item import Item, Field

diff --git a/scrapy/cmdline.py b/scrapy/cmdline.py
@@ -8,7 +8,7 @@
 import scrapy
 from scrapy.crawler import CrawlerProcess
 from scrapy.xlib import lsprofcalltree
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
 from scrapy.utils.misc import walk_modules
 from scrapy.utils.project import inside_project, get_project_settings