Skip to content

Commit

Permalink
allow redirects
Browse files Browse the repository at this point in the history
  • Loading branch information
jadbin committed Aug 28, 2018
1 parent 4799165 commit 8fd27d5
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 9 deletions.
5 changes: 4 additions & 1 deletion docs/req_resp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ Request Meta Keys

- ``timeout`` : 可以通过设置 ``timeout`` 分别控制每个request的超时时间。

- ``verify_ssl`` : 是否校验SSL证书。

- ``allow_redirects`` : 是否允许重定向。

- ``auth`` : 设置request的HTTP Basic Auth,可以是 ``str`` 、 ``tuple`` 或 ``aiohttp.helpers.BasicAuth`` 。

- ``proxy`` : 设置请求使用的代理。
Expand All @@ -109,7 +113,6 @@ Request Meta Keys

- ``depth`` : 当使用 :class:`~xpaw.spidermws.DepthMiddleware` 时,纪录当前request的深度。


Response API
------------

Expand Down
10 changes: 9 additions & 1 deletion docs/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,19 @@ downloader_timeout
verify_ssl
^^^^^^^^^^

- ``--verify-ssl``
- Default: ``False``

是否验证ssl证书。

.. _allow_redirects:

allow_redirects
^^^^^^^^^^^^^^^

- Default: ``True``

是否允许重定向。

.. _cookie_jar_enabled:

cookie_jar_enabled
Expand Down
32 changes: 32 additions & 0 deletions tests/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,38 @@ async def post_bytes():
await post_bytes()


async def make_redirect_server(aiohttp_server):
async def process(request):
return web.Response(headers={'Location': 'http://python.org/'}, status=302)

app = web.Application()
app.router.add_route("GET", "/{tail:.*}", process)
server = await aiohttp_server(app)
return server


async def test_allow_redirects(aiohttp_server, loop):
server = await make_redirect_server(aiohttp_server)
downloader = Downloader(loop=loop, allow_redirects=True)
downloader2 = Downloader(loop=loop, allow_redirects=False)

resp = await downloader.download(HttpRequest('http://{}:{}/'.format(server.host, server.port)))
assert resp.status // 100 == 2 and 'python.org' in str(resp.url)

resp = await downloader2.download(HttpRequest('http://{}:{}/'.format(server.host, server.port)))
assert resp.status // 100 == 3

req = HttpRequest('http://{}:{}/'.format(server.host, server.port))
req.meta['allow_redirects'] = False
resp = await downloader.download(req)
assert resp.status // 100 == 3

req = HttpRequest('http://{}:{}/'.format(server.host, server.port))
req.meta['allow_redirects'] = True
resp = await downloader2.download(req)
assert resp.status // 100 == 2 and 'python.org' in str(resp.url)


class FooDownloadermw:
def __init__(self, d):
self.d = d
Expand Down
1 change: 1 addition & 0 deletions xpaw/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def __init__(self, config):
self.dupe_filter = self._new_object_from_cluster(self.config.get('dupe_filter'), self)
self.downloader = Downloader(timeout=self.config.getfloat('downloader_timeout'),
verify_ssl=self.config.getbool('verify_ssl'),
allow_redirects=self.config.getbool('allow_redirects'),
loop=self.loop)
self.spider = self._new_object_from_cluster(self.config.get('spider'), self)
assert isinstance(self.spider, Spider), 'spider must inherit from the Spider class'
Expand Down
2 changes: 1 addition & 1 deletion xpaw/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _import_settings(self):
config.LogLevel, config.LogFile,
config.DumpDir,
config.DownloaderClients, config.DownloaderTimeout,
config.VerifySsl, config.CookieJarEnabled,
config.CookieJarEnabled,
config.MaxDepth)

def add_arguments(self, parser):
Expand Down
8 changes: 5 additions & 3 deletions xpaw/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,12 @@ class DownloaderTimeout(Setting):

class VerifySsl(Setting):
name = 'verify_ssl'
cli = ['--verify-ssl']
action = 'store_true'
default = False
short_desc = 'verify ssl certifications'


class AllowRedirects(Setting):
name = 'allow_redirects'
default = True


class CookieJarEnabled(Setting):
Expand Down
14 changes: 11 additions & 3 deletions xpaw/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,28 @@


class Downloader:
def __init__(self, timeout=None, verify_ssl=True, loop=None):
def __init__(self, timeout=None, verify_ssl=False, allow_redirects=True, loop=None):
self._timeout = timeout
self._verify_ssl = verify_ssl
self._allow_redirects = allow_redirects
self._loop = loop or asyncio.get_event_loop()

async def download(self, request):
log.debug("HTTP request: %s", request)
timeout = request.meta.get("timeout")
if timeout is None:
timeout = self._timeout
verify_ssl = request.meta.get('verify_ssl')
if verify_ssl is None:
verify_ssl = self._verify_ssl
allow_redirects = request.meta.get('allow_redirects')
if allow_redirects is None:
allow_redirects = self._allow_redirects
cookie_jar = request.meta.get('cookie_jar')
auth = parse_request_auth(request.meta.get('auth'))
proxy = parse_request_url(request.meta.get('proxy'))
proxy_auth = parse_request_auth(request.meta.get('proxy_auth'))
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=self._verify_ssl, loop=self._loop),
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=verify_ssl, loop=self._loop),
cookies=request.cookies,
cookie_jar=cookie_jar,
loop=self._loop) as session:
Expand All @@ -51,7 +58,8 @@ async def download(self, request):
data=data,
json=json,
proxy=proxy,
proxy_auth=proxy_auth) as resp:
proxy_auth=proxy_auth,
allow_redirects=allow_redirects) as resp:
body = await resp.read()
cookies = resp.cookies
response = HttpResponse(resp.url,
Expand Down

0 comments on commit 8fd27d5

Please sign in to comment.