Skip to content

Commit

Permalink
use pytest-aiohttp
Browse files Browse the repository at this point in the history
  • Loading branch information
jadbin committed Oct 20, 2017
1 parent 97da2fd commit 95f7b5a
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 161 deletions.
1 change: 1 addition & 0 deletions requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pytest
pytest-cov
coverage
coveralls
pytest-aiohttp>=0.1.3
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
aiohttp>=2.0.1
aiohttp>=2.2.0
pyyaml
lxml
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def main():
"pyyaml",
"lxml",
]
tests_requires = install_requires + ["pytest"]
tests_requires = install_requires + ["pytest", "pytest-aiohttp"]
setup(
name="xpaw",
version=read_version(),
Expand Down
18 changes: 0 additions & 18 deletions tests/helpers.py

This file was deleted.

46 changes: 16 additions & 30 deletions tests/test_downloader.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,31 @@
# coding=utf-8

import pytest
import json

import asyncio
import random

from xpaw import HttpRequest
from xpaw.downloader import Downloader, DownloaderMiddlewareManager
from xpaw.downloadermws import CookieJarMiddleware


@pytest.fixture(scope="module")
def loop(request):
def close():
loop.close()

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
request.addfinalizer(close)
return loop


@pytest.fixture(scope="module")
def downloader(loop):
return Downloader(timeout=20, loop=loop)


def test_cookies(downloader, loop):
req = HttpRequest("http://httpbin.org/cookies", cookies={"k1": "v1", "k2": "v2"})
resp = loop.run_until_complete(downloader.download(req))
async def test_cookies(loop):
downloader = Downloader(timeout=20, loop=loop)
seed = str(random.randint(0, 2147483647))
req = HttpRequest("http://httpbin.org/cookies", cookies={"seed": seed})
resp = await downloader.download(req)
cookies = json.loads(resp.text)["cookies"]
assert len(cookies) == 2 and cookies.get("k1") == "v1" and cookies.get("k2") == "v2"
assert len(cookies) == 1 and cookies.get("seed") == seed


def test_cookie_jar(downloader, loop):
async def test_cookie_jar(loop):
downloader = Downloader(timeout=20, loop=loop)
dmm = DownloaderMiddlewareManager(CookieJarMiddleware(loop=loop))
loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/set?k1=v1&k2=v2")))
resp = loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies")))
seed = str(random.randint(0, 2147483647))
await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/set?seed={}".format(seed)))
resp = await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies"))
cookies = json.loads(resp.text)["cookies"]
assert len(cookies) == 2 and cookies.get("k1") == "v1" and cookies.get("k2") == "v2"
loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/delete?k1=")))
resp = loop.run_until_complete(dmm.download(downloader, HttpRequest("http://httpbin.org/cookies")))
assert len(cookies) == 1 and cookies.get("seed") == seed
await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies/delete?seed="))
resp = await dmm.download(downloader, HttpRequest("http://httpbin.org/cookies"))
cookies = json.loads(resp.text)["cookies"]
assert len(cookies) == 1 and cookies.get("k2") == "v2"
assert len(cookies) == 0
199 changes: 91 additions & 108 deletions tests/test_downloadermws.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# coding=utf-8

import re
import json
import random
import asyncio
import threading

import pytest
from aiohttp import web
Expand All @@ -13,158 +14,138 @@
from xpaw.downloadermws import *
from xpaw.errors import IgnoreRequest, ResponseNotMatch

from .helpers import wait_server_start


@pytest.fixture(scope="module")
def loop(request):
def close():
loop.close()

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
request.addfinalizer(close)
return loop


class TestForwardedForMiddleware:
def test_handle_request(self, loop):
async def test_handle_request(self):
mw = ForwardedForMiddleware()
req = HttpRequest("http://www.example.com")
loop.run_until_complete(mw.handle_request(req))
req = HttpRequest("http://httpbin.org")
await mw.handle_request(req)
assert re.search(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", req.headers["X-Forwarded-For"])


class TestRequestHeadersMiddleware:
def test_handle_request(self, loop):
async def test_handle_request(self):
headers = {"Content-Type": "text/html", "User-Agent": "xpaw", "Connection": "keep-alive"}
mw = RequestHeadersMiddleware.from_config(dict(request_headers=headers))
req = HttpRequest("http://www.example.com")
loop.run_until_complete(mw.handle_request(req))
req = HttpRequest("http://httpbin.org")
await mw.handle_request(req)
assert headers == req.headers


@pytest.fixture(scope="module")
def agent(request):
async def handle_request(request):
return web.Response(body=b'["127.0.0.1:3128", "127.0.0.1:8080"]')
def make_proxy_list():
return ["127.0.0.1:3128", "127.0.0.1:8080"]

def handle_error(loop, context):
pass

def start_loop():
app = web.Application(loop=loop)
app.router.add_resource("/").add_route("GET", handle_request)
loop.run_until_complete(loop.create_server(app.make_handler(access_log=None, loop=loop), "0.0.0.0", 7340))
try:
loop.run_forever()
except Exception:
pass
finally:
loop.close()
def make_another_proxy_list():
return ["127.0.0.1:8888", "127.0.0.2:9090"]


def stop_loop():
loop.call_soon_threadsafe(loop.stop)
async def make_proxy_agent(test_server):
def get_proxies(request):
return web.Response(body=json.dumps(server.proxy_list).encode("utf-8"),
charset="utf-8",
content_type="application/json")

loop = asyncio.new_event_loop()
loop.set_exception_handler(handle_error)
t = threading.Thread(target=start_loop)
t.start()
wait_server_start("127.0.0.1:7340")
request.addfinalizer(stop_loop)
app = web.Application()
app.router.add_route("GET", "/", get_proxies)
server = await test_server(app)
server.proxy_list = make_proxy_list()
return server


class Random:
def __init__(self):
self.iter = 0

def randint(self, a, b):
res = a + self.iter % (b - a + 1)
self.iter += 1
return res


class TestProxyMiddleware:
def test_hanle_request(self, loop):
mw = ProxyMiddleware(["127.0.0.1"])
req = HttpRequest("http://www.example.com")
loop.run_until_complete(mw.handle_request(req))
assert req.proxy == "http://127.0.0.1"
async def test_hanle_request(self, monkeypatch):
monkeypatch.setattr(random, 'randint', Random().randint)
proxy_list = make_proxy_list()
mw = ProxyMiddleware(proxy_list)
target_list = proxy_list * 2
req = HttpRequest("http://httpbin.org")
for i in range(len(target_list)):
await mw.handle_request(req)
assert req.proxy == "http://{}".format(target_list[i])


class TestProxyAgentMiddleware:
def test_handle_request(self, loop, monkeypatch):
async def _pick_proxy():
return "http://127.0.0.1"

mw = ProxyAgentMiddleware.from_config(Config({"proxy_agent": {"addr": "http://127.0.0.1:7340"}}))
monkeypatch.setattr(mw, "_pick_proxy", _pick_proxy)
req = HttpRequest("http://www.example.com")
loop.run_until_complete(mw.handle_request(req))
assert req.proxy == "http://127.0.0.1"

def test_pick_proxy(self, loop, monkeypatch):
async def _update_proxy_list():
while self.index < 2:
await asyncio.sleep(0.1)
self.index += 1
mw._proxy_list = proxy_list[self.index - 1]

self.index = 0
proxy_list = [[], ["127.0.0.1", "127.0.0.2"]]
res = ["http://127.0.0.1", "http://127.0.0.2"]
mw = ProxyAgentMiddleware.from_config(
Config({"proxy_agent": {"addr": "127.0.0.1:7340", "update_interval": 0.1}}))
monkeypatch.setattr(mw, "_update_proxy_list", _update_proxy_list)
async def test_handle_request(self, monkeypatch, test_server, loop):
monkeypatch.setattr(random, 'randint', Random().randint)
server = await make_proxy_agent(test_server)
mw = ProxyAgentMiddleware.from_config(Config({"proxy_agent":
{"addr": "http://{}:{}".format(server.host, server.port)},
"downloader_loop": loop}))
mw.open()
for i in range(len(res)):
req = HttpRequest("http://www.example.com")
loop.run_until_complete(mw.handle_request(req))
assert req.proxy in res

def test_update_proxy_list(self, loop, agent):
async def _func():
while mw._proxy_list is None:
await asyncio.sleep(0.1)
await asyncio.sleep(0.1, loop=loop)
req = HttpRequest("http://httpbin.org")
target_list = make_proxy_list() * 2
for i in range(len(target_list)):
await mw.handle_request(req)
assert req.proxy == "http://{}".format(target_list[i])
mw.close()

async def test_update_proxy_list(self, test_server, loop):
server = await make_proxy_agent(test_server)
mw = ProxyAgentMiddleware.from_config(
Config({"proxy_agent": {"addr": "http://127.0.0.1:7340", "update_interval": 0.1}}))
Config({"proxy_agent": {"addr": "http://{}:{}".format(server.host, server.port),
"update_interval": 0.05},
"downloader_loop": loop}))
mw.open()
loop.run_until_complete(_func())
assert mw._proxy_list == ["127.0.0.1:3128", "127.0.0.1:8080"]
await asyncio.sleep(0.1, loop=loop)
assert mw._proxy_list == make_proxy_list()
server.proxy_list = make_another_proxy_list()
await asyncio.sleep(0.1, loop=loop)
assert mw._proxy_list == make_another_proxy_list()
mw.close()
loop.run_until_complete(asyncio.sleep(0.1))


class TestRetryMiddleware:
def test_handle_reponse(self, loop, monkeypatch):
async def test_handle_reponse(self, monkeypatch, loop):
class ErrorFlag(Exception):
pass

def _retry(request, reason):
assert isinstance(request, HttpRequest) and isinstance(reason, str)
raise ErrorFlag

mw = RetryMiddleware.from_config(Config())
mw = RetryMiddleware.from_config(Config({"downloader_loop": loop}))
monkeypatch.setattr(mw, "retry", _retry)
req = HttpRequest("http://www.example.com")
resp = HttpResponse(URL("http://www.example.com"), 400)
loop.run_until_complete(mw.handle_response(req, resp))
req = HttpRequest("http://httpbin.org")
resp = HttpResponse(URL("http://httpbin.org"), 400)
await mw.handle_response(req, resp)
with pytest.raises(ErrorFlag):
resp = HttpResponse(URL("http://www.example.com"), 503)
loop.run_until_complete(mw.handle_response(req, resp))
resp = HttpResponse(URL("http://httpbin.org"), 503)
await mw.handle_response(req, resp)

def test_handle_error(self, loop, monkeypatch):
async def test_handle_error(self, loop, monkeypatch):
class ErrorFlag(Exception):
pass

def _retry(request, reason):
assert isinstance(request, HttpRequest) and isinstance(reason, str)
raise ErrorFlag

mw = RetryMiddleware.from_config(Config())
mw = RetryMiddleware.from_config(Config({"downloader_loop": loop}))
monkeypatch.setattr(mw, "retry", _retry)
req = HttpRequest("http://www.example.com")
req = HttpRequest("http://httpbin.org")
err = ValueError()
loop.run_until_complete(mw.handle_error(req, err))
await mw.handle_error(req, err)
with pytest.raises(ErrorFlag):
err = ResponseNotMatch()
loop.run_until_complete(mw.handle_error(req, err))
await mw.handle_error(req, err)

def test_retry(self):
async def test_retry(self, loop):
max_retry_times = 2
mw = RetryMiddleware.from_config(Config({"retry": {"max_retry_times": max_retry_times}}))
req = HttpRequest("http://www.example.com")
mw = RetryMiddleware.from_config(Config({"retry": {"max_retry_times": max_retry_times},
"downloader_loop": loop}))
req = HttpRequest("http://httpbin.org")
for i in range(max_retry_times):
req = mw.retry(req, "")
assert isinstance(req, HttpRequest)
Expand All @@ -180,26 +161,28 @@ def test_match_status(self):
assert RetryMiddleware.match_status("50x", 403) is False
assert RetryMiddleware.match_status("~20X", 200) is False
assert RetryMiddleware.match_status("!20x", 400) is True
assert RetryMiddleware.match_status("0200", 200) is False


class TestResponseMatchMiddleware:
def test_handle_response(self, loop):
async def test_handle_response(self, loop):
req_baidu = HttpRequest("http://www.baidu.com")
req_qq = HttpRequest("http://www.qq.com")
resp_baidu = HttpResponse(URL("http://www.baidu.com"), 200, body="<title>百度一下,你就知道</title>".encode("utf-8"))
resp_qq = HttpResponse(URL("http://www.qq.com"), 200, body="<title>腾讯QQ</title>".encode("utf-8"))
mw = ResponseMatchMiddleware.from_config(Config({"response_match": [{"url_pattern": "baidu\\.com",
"body_pattern": "百度",
"encoding": "utf-8"}]}))
loop.run_until_complete(mw.handle_response(req_baidu, resp_baidu))
"encoding": "utf-8"}],
"downloader_loop": loop}))
await mw.handle_response(req_baidu, resp_baidu)
with pytest.raises(ResponseNotMatch):
loop.run_until_complete(mw.handle_response(req_baidu, resp_qq))
loop.run_until_complete(mw.handle_response(req_qq, resp_qq))
await mw.handle_response(req_baidu, resp_qq)
await mw.handle_response(req_qq, resp_qq)


class TestCookieJarMiddleware:
def test_handle_request(self, loop):
async def test_handle_request(self, loop):
mw = CookieJarMiddleware.from_config(Config({"downloader_loop": loop}))
req = HttpRequest("http://www.example.com")
loop.run_until_complete(mw.handle_request(req))
req = HttpRequest("http://httpbin.org")
await mw.handle_request(req)
assert req.meta.get("cookie_jar") is mw._cookie_jar
1 change: 1 addition & 0 deletions xpaw/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
from xpaw.spider import Spider
from xpaw.http import HttpRequest, HttpResponse
from xpaw.selector import Selector
from xpaw.downloader import Downloader
Loading

0 comments on commit 95f7b5a

Please sign in to comment.