From 95f7b5a8655365955987e5dd88da2553daa06885 Mon Sep 17 00:00:00 2001 From: jadbin Date: Fri, 20 Oct 2017 11:27:47 +0800 Subject: [PATCH] use pytest-aiohttp --- requirements-ci.txt | 1 + requirements.txt | 2 +- setup.py | 2 +- tests/helpers.py | 18 ---- tests/test_downloader.py | 46 +++------ tests/test_downloadermws.py | 199 +++++++++++++++++------------------- xpaw/__init__.py | 1 + xpaw/cluster.py | 4 +- xpaw/version.py | 2 +- 9 files changed, 114 insertions(+), 161 deletions(-) delete mode 100644 tests/helpers.py diff --git a/requirements-ci.txt b/requirements-ci.txt index 4127910..13f1a66 100644 --- a/requirements-ci.txt +++ b/requirements-ci.txt @@ -3,3 +3,4 @@ pytest pytest-cov coverage coveralls +pytest-aiohttp>=0.1.3 diff --git a/requirements.txt b/requirements.txt index 7df82e8..369223f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -aiohttp>=2.0.1 +aiohttp>=2.2.0 pyyaml lxml diff --git a/setup.py b/setup.py index 4515d20..3debc97 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def main(): "pyyaml", "lxml", ] - tests_requires = install_requires + ["pytest"] + tests_requires = install_requires + ["pytest", "pytest-aiohttp"] setup( name="xpaw", version=read_version(), diff --git a/tests/helpers.py b/tests/helpers.py deleted file mode 100644 index 29d9774..0000000 --- a/tests/helpers.py +++ /dev/null @@ -1,18 +0,0 @@ -# coding=utf-8 - -import time -import socket - - -def wait_server_start(addr): - host, port = addr.split(":") - port = int(port) - not_ready = True - while not_ready: - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - res = s.connect_ex((host, port)) - s.close() - if res == 0: - not_ready = False - else: - time.sleep(0.1) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 74eb6a7..b7554cc 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,45 +1,31 @@ # coding=utf-8 -import pytest import json - -import asyncio +import random from xpaw import HttpRequest from xpaw.downloader import Downloader, DownloaderMiddlewareManager from xpaw.downloadermws import CookieJarMiddleware -@pytest.fixture(scope="module") -def loop(request): - def close(): - loop.close() - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - request.addfinalizer(close) - return loop - - -@pytest.fixture(scope="module") -def downloader(loop): - return Downloader(timeout=20, loop=loop) - - -def test_cookies(downloader, loop): - req = HttpRequest("http://httpbin.org/cookies", cookies={"k1": "v1", "k2": "v2"}) - resp = loop.run_until_complete(downloader.download(req)) +async def test_cookies(loop): + downloader = Downloader(timeout=20, loop=loop) + seed = str(random.randint(0, 2147483647)) + req = HttpRequest("http://httpbin.org/cookies", cookies={"seed": seed}) + resp = await downloader.download(req) cookies = json.loads(resp.text)["cookies"] - assert len(cookies) == 2 and cookies.get("k1") == "v1" and cookies.get("k2") == "v2" + assert len(cookies) == 1 and cookies.get("seed") == seed -def test_cookie_jar(downloader, loop): +async def test_cookie_jar(loop): + downloader = Downloader(timeout=20, loop=loop) dmm = DownloaderMiddlewareManager(CookieJarMiddleware(loop=loop)) - loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/set?k1=v1&k2=v2"))) - resp = loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies"))) + seed = str(random.randint(0, 2147483647)) + await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/set?seed={}".format(seed))) + resp = await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies")) cookies = json.loads(resp.text)["cookies"] - assert len(cookies) == 2 and cookies.get("k1") == "v1" and cookies.get("k2") == "v2" - loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/delete?k1="))) - resp = loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies"))) + assert len(cookies) == 1 and cookies.get("seed") == seed + await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/delete?seed=")) + resp = await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies")) cookies = json.loads(resp.text)["cookies"] - assert len(cookies) == 1 and cookies.get("k2") == "v2" + assert len(cookies) == 0 diff --git a/tests/test_downloadermws.py b/tests/test_downloadermws.py index 195d27e..10e6715 100644 --- a/tests/test_downloadermws.py +++ b/tests/test_downloadermws.py @@ -1,8 +1,9 @@ # coding=utf-8 import re +import json +import random import asyncio -import threading import pytest from aiohttp import web @@ -13,121 +14,100 @@ from xpaw.downloadermws import * from xpaw.errors import IgnoreRequest, ResponseNotMatch -from .helpers import wait_server_start - - -@pytest.fixture(scope="module") -def loop(request): - def close(): - loop.close() - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - request.addfinalizer(close) - return loop - class TestForwardedForMiddleware: - def test_handle_request(self, loop): + async def test_handle_request(self): mw = ForwardedForMiddleware() - req = HttpRequest("http://www.example.com") - loop.run_until_complete(mw.handle_request(req)) + req = HttpRequest("http://httpbin.org") + await mw.handle_request(req) assert re.search(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", req.headers["X-Forwarded-For"]) class TestRequestHeadersMiddleware: - def test_handle_request(self, loop): + async def test_handle_request(self): headers = {"Content-Type": "text/html", "User-Agent": "xpaw", "Connection": "keep-alive"} mw = RequestHeadersMiddleware.from_config(dict(request_headers=headers)) - req = HttpRequest("http://www.example.com") - loop.run_until_complete(mw.handle_request(req)) + req = HttpRequest("http://httpbin.org") + await mw.handle_request(req) assert headers == req.headers -@pytest.fixture(scope="module") -def agent(request): - async def handle_request(request): - return web.Response(body=b'["127.0.0.1:3128", "127.0.0.1:8080"]') +def make_proxy_list(): + return ["127.0.0.1:3128", "127.0.0.1:8080"] - def handle_error(loop, context): - pass - def start_loop(): - app = web.Application(loop=loop) - app.router.add_resource("/").add_route("GET", handle_request) - loop.run_until_complete(loop.create_server(app.make_handler(access_log=None, loop=loop), "0.0.0.0", 7340)) - try: - loop.run_forever() - except Exception: - pass - finally: - loop.close() +def make_another_proxy_list(): + return ["127.0.0.1:8888", "127.0.0.2:9090"] + - def stop_loop(): - loop.call_soon_threadsafe(loop.stop) +async def make_proxy_agent(test_server): + def get_proxies(request): + return web.Response(body=json.dumps(server.proxy_list).encode("utf-8"), + charset="utf-8", + content_type="application/json") - loop = asyncio.new_event_loop() - loop.set_exception_handler(handle_error) - t = threading.Thread(target=start_loop) - t.start() - wait_server_start("127.0.0.1:7340") - request.addfinalizer(stop_loop) + app = web.Application() + app.router.add_route("GET", "/", get_proxies) + server = await test_server(app) + server.proxy_list = make_proxy_list() + return server + + +class Random: + def __init__(self): + self.iter = 0 + + def randint(self, a, b): + res = a + self.iter % (b - a + 1) + self.iter += 1 + return res class TestProxyMiddleware: - def test_hanle_request(self, loop): - mw = ProxyMiddleware(["127.0.0.1"]) - req = HttpRequest("http://www.example.com") - loop.run_until_complete(mw.handle_request(req)) - assert req.proxy == "http://127.0.0.1" + async def test_hanle_request(self, monkeypatch): + monkeypatch.setattr(random, 'randint', Random().randint) + proxy_list = make_proxy_list() + mw = ProxyMiddleware(proxy_list) + target_list = proxy_list * 2 + req = HttpRequest("http://httpbin.org") + for i in range(len(target_list)): + await mw.handle_request(req) + assert req.proxy == "http://{}".format(target_list[i]) class TestProxyAgentMiddleware: - def test_handle_request(self, loop, monkeypatch): - async def _pick_proxy(): - return "http://127.0.0.1" - - mw = ProxyAgentMiddleware.from_config(Config({"proxy_agent": {"addr": "http://127.0.0.1:7340"}})) - monkeypatch.setattr(mw, "_pick_proxy", _pick_proxy) - req = HttpRequest("http://www.example.com") - loop.run_until_complete(mw.handle_request(req)) - assert req.proxy == "http://127.0.0.1" - - def test_pick_proxy(self, loop, monkeypatch): - async def _update_proxy_list(): - while self.index < 2: - await asyncio.sleep(0.1) - self.index += 1 - mw._proxy_list = proxy_list[self.index - 1] - - self.index = 0 - proxy_list = [[], ["127.0.0.1", "127.0.0.2"]] - res = ["http://127.0.0.1", "http://127.0.0.2"] - mw = ProxyAgentMiddleware.from_config( - Config({"proxy_agent": {"addr": "127.0.0.1:7340", "update_interval": 0.1}})) - monkeypatch.setattr(mw, "_update_proxy_list", _update_proxy_list) + async def test_handle_request(self, monkeypatch, test_server, loop): + monkeypatch.setattr(random, 'randint', Random().randint) + server = await make_proxy_agent(test_server) + mw = ProxyAgentMiddleware.from_config(Config({"proxy_agent": + {"addr": "http://{}:{}".format(server.host, server.port)}, + "downloader_loop": loop})) mw.open() - for i in range(len(res)): - req = HttpRequest("http://www.example.com") - loop.run_until_complete(mw.handle_request(req)) - assert req.proxy in res - - def test_update_proxy_list(self, loop, agent): - async def _func(): - while mw._proxy_list is None: - await asyncio.sleep(0.1) + await asyncio.sleep(0.1, loop=loop) + req = HttpRequest("http://httpbin.org") + target_list = make_proxy_list() * 2 + for i in range(len(target_list)): + await mw.handle_request(req) + assert req.proxy == "http://{}".format(target_list[i]) + mw.close() + async def test_update_proxy_list(self, test_server, loop): + server = await make_proxy_agent(test_server) mw = ProxyAgentMiddleware.from_config( - Config({"proxy_agent": {"addr": "http://127.0.0.1:7340", "update_interval": 0.1}})) + Config({"proxy_agent": {"addr": "http://{}:{}".format(server.host, server.port), + "update_interval": 0.05}, + "downloader_loop": loop})) mw.open() - loop.run_until_complete(_func()) - assert mw._proxy_list == ["127.0.0.1:3128", "127.0.0.1:8080"] + await asyncio.sleep(0.1, loop=loop) + assert mw._proxy_list == make_proxy_list() + server.proxy_list = make_another_proxy_list() + await asyncio.sleep(0.1, loop=loop) + assert mw._proxy_list == make_another_proxy_list() mw.close() - loop.run_until_complete(asyncio.sleep(0.1)) class TestRetryMiddleware: - def test_handle_reponse(self, loop, monkeypatch): + async def test_handle_reponse(self, monkeypatch, loop): class ErrorFlag(Exception): pass @@ -135,16 +115,16 @@ def _retry(request, reason): assert isinstance(request, HttpRequest) and isinstance(reason, str) raise ErrorFlag - mw = RetryMiddleware.from_config(Config()) + mw = RetryMiddleware.from_config(Config({"downloader_loop": loop})) monkeypatch.setattr(mw, "retry", _retry) - req = HttpRequest("http://www.example.com") - resp = HttpResponse(URL("http://www.example.com"), 400) - loop.run_until_complete(mw.handle_response(req, resp)) + req = HttpRequest("http://httpbin.org") + resp = HttpResponse(URL("http://httpbin.org"), 400) + await mw.handle_response(req, resp) with pytest.raises(ErrorFlag): - resp = HttpResponse(URL("http://www.example.com"), 503) - loop.run_until_complete(mw.handle_response(req, resp)) + resp = HttpResponse(URL("http://httpbin.org"), 503) + await mw.handle_response(req, resp) - def test_handle_error(self, loop, monkeypatch): + async def test_handle_error(self, loop, monkeypatch): class ErrorFlag(Exception): pass @@ -152,19 +132,20 @@ def _retry(request, reason): assert isinstance(request, HttpRequest) and isinstance(reason, str) raise ErrorFlag - mw = RetryMiddleware.from_config(Config()) + mw = RetryMiddleware.from_config(Config({"downloader_loop": loop})) monkeypatch.setattr(mw, "retry", _retry) - req = HttpRequest("http://www.example.com") + req = HttpRequest("http://httpbin.org") err = ValueError() - loop.run_until_complete(mw.handle_error(req, err)) + await mw.handle_error(req, err) with pytest.raises(ErrorFlag): err = ResponseNotMatch() - loop.run_until_complete(mw.handle_error(req, err)) + await mw.handle_error(req, err) - def test_retry(self): + async def test_retry(self, loop): max_retry_times = 2 - mw = RetryMiddleware.from_config(Config({"retry": {"max_retry_times": max_retry_times}})) - req = HttpRequest("http://www.example.com") + mw = RetryMiddleware.from_config(Config({"retry": {"max_retry_times": max_retry_times}, + "downloader_loop": loop})) + req = HttpRequest("http://httpbin.org") for i in range(max_retry_times): req = mw.retry(req, "") assert isinstance(req, HttpRequest) @@ -180,26 +161,28 @@ def test_match_status(self): assert RetryMiddleware.match_status("50x", 403) is False assert RetryMiddleware.match_status("~20X", 200) is False assert RetryMiddleware.match_status("!20x", 400) is True + assert RetryMiddleware.match_status("0200", 200) is False class TestResponseMatchMiddleware: - def test_handle_response(self, loop): + async def test_handle_response(self, loop): req_baidu = HttpRequest("http://www.baidu.com") req_qq = HttpRequest("http://www.qq.com") resp_baidu = HttpResponse(URL("http://www.baidu.com"), 200, body="百度一下,你就知道".encode("utf-8")) resp_qq = HttpResponse(URL("http://www.qq.com"), 200, body="腾讯QQ".encode("utf-8")) mw = ResponseMatchMiddleware.from_config(Config({"response_match": [{"url_pattern": "baidu\\.com", "body_pattern": "百度", - "encoding": "utf-8"}]})) - loop.run_until_complete(mw.handle_response(req_baidu, resp_baidu)) + "encoding": "utf-8"}], + "downloader_loop": loop})) + await mw.handle_response(req_baidu, resp_baidu) with pytest.raises(ResponseNotMatch): - loop.run_until_complete(mw.handle_response(req_baidu, resp_qq)) - loop.run_until_complete(mw.handle_response(req_qq, resp_qq)) + await mw.handle_response(req_baidu, resp_qq) + await mw.handle_response(req_qq, resp_qq) class TestCookieJarMiddleware: - def test_handle_request(self, loop): + async def test_handle_request(self, loop): mw = CookieJarMiddleware.from_config(Config({"downloader_loop": loop})) - req = HttpRequest("http://www.example.com") - loop.run_until_complete(mw.handle_request(req)) + req = HttpRequest("http://httpbin.org") + await mw.handle_request(req) assert req.meta.get("cookie_jar") is mw._cookie_jar diff --git a/xpaw/__init__.py b/xpaw/__init__.py index b470ce1..e507d93 100644 --- a/xpaw/__init__.py +++ b/xpaw/__init__.py @@ -12,3 +12,4 @@ from xpaw.spider import Spider from xpaw.http import HttpRequest, HttpResponse from xpaw.selector import Selector +from xpaw.downloader import Downloader diff --git a/xpaw/cluster.py b/xpaw/cluster.py index 4d1c9e6..f9661f2 100644 --- a/xpaw/cluster.py +++ b/xpaw/cluster.py @@ -38,7 +38,7 @@ async def _push_start_requests(self): self._queue.push(res) await asyncio.sleep(0.01, loop=self._downloader_loop) except Exception: - log.warning("Error while handling start requests", exc_info=True) + log.warning("Error occurred while handling start requests", exc_info=True) def _start_downloader_loop(self): self._futures = [] @@ -54,7 +54,7 @@ def _start_downloader_loop(self): log.info("Start event loop") self._downloader_loop.run_forever() except Exception: - log.error("Error while running event loop", exc_info=True) + log.error("Error occurred while running event loop", exc_info=True) raise finally: log.info("Close event loop") diff --git a/xpaw/version.py b/xpaw/version.py index 1a707c1..4f6de1c 100644 --- a/xpaw/version.py +++ b/xpaw/version.py @@ -1,3 +1,3 @@ # coding=utf-8 -__version__ = "0.6.6" +__version__ = "0.7.0a1"