Skip to content

Commit

Permalink
add CacheOnlyClient, CacheSkipClient, WebClient
Browse files Browse the repository at this point in the history
fix bug with CrawlerClient
  • Loading branch information
eugen1j committed Oct 14, 2019
1 parent 1428177 commit 050a5fb
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 24 deletions.
6 changes: 5 additions & 1 deletion aioscrapy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
WebByteClient,
Client,
CacheClient,
RetryClient
CacheOnlyClient,
CacheSkipClient,
RetryClient,
CrawlerClient,
WebClient
)

from .cache import (
Expand Down
67 changes: 50 additions & 17 deletions aioscrapy/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from abc import ABC, abstractmethod
from typing import Generic, Optional, Tuple, Iterable

from aiohttp import ClientResponse

from .cache import Cache
from .typedefs import KT, VT
from .session import SessionPool
Expand Down Expand Up @@ -36,16 +38,40 @@ async def fetch(self, key: str) -> Optional[VT]:
return new_value


class WebTextClient(Client[str, str]):
class CacheOnlyClient(Client[str, VT]):
def __init__(self, client: Client[str, VT], cache: Cache[str, VT]):
self._client = client
self._cache = cache

async def fetch(self, key: str) -> Optional[VT]:
return self._cache.get(key)


class CacheSkipClient(Client[str, VT]):
def __init__(self, client: Client[str, VT], cache: Cache[str, VT]):
self._client = client
self._cache = cache

async def fetch(self, key: str) -> Optional[VT]:
cache_value = self._cache.get(key)
if cache_value is not None:
return None
else:
new_value = await self._client.fetch(key)
if new_value is not None:
self._cache.set(key, new_value)
return new_value


class WebClient(Client[str, Tuple[ClientResponse, bytes]]):
def __init__(self, session_pool: SessionPool):
self._session_pool = session_pool

async def fetch(self, url: str) -> Optional[str]:
async def fetch(self, key: str) -> Optional[Tuple[ClientResponse, bytes]]:
proxy, session = self._session_pool.rand()
try:
response: aiohttp.ClientResponse = await session.get(url, proxy=proxy)
data = await response.text()
return data
response: aiohttp.ClientResponse = await session.get(key, proxy=proxy)
return response, await response.read()
except (aiohttp.ClientHttpProxyError, aiohttp.ClientProxyConnectionError):
if proxy is not None:
self._session_pool.pop(proxy)
Expand All @@ -55,23 +81,30 @@ async def fetch(self, url: str) -> Optional[str]:
return None


class WebByteClient(Client[str, str]):
class WebTextClient(Client[str, str]):
def __init__(self, session_pool: SessionPool):
self._session_pool = session_pool

async def fetch(self, url: str) -> Optional[bytes]:
proxy, session = self._session_pool.rand()
try:
response: aiohttp.ClientResponse = await session.get(url, proxy=proxy)
data = await response.read()
return data
except (aiohttp.ClientHttpProxyError, aiohttp.ClientProxyConnectionError):
if proxy is not None:
self._session_pool.pop(proxy)
async def fetch(self, key: str) -> Optional[str]:
client = WebClient(self._session_pool)
response_and_body = await client.fetch(key)
if response_and_body is None:
return None
except aiohttp.ClientError:
print(traceback.format_exc())
response, body = response_and_body
return body.decode(response.get_encoding())


class WebByteClient(Client[str, bytes]):
def __init__(self, session_pool: SessionPool):
self._session_pool = session_pool

async def fetch(self, key: str) -> Optional[bytes]:
client = WebClient(self._session_pool)
response_and_body = await client.fetch(key)
if response_and_body is None:
return None
response, body = response_and_body
return body


class RetryClient(Client[KT, VT]):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
EMAIL = "eugenij.bondar@gmail.com"
AUTHOR = "eugen1j"
REQUIRES_PYTHON = ">=3.7.0"
VERSION = "0.1.4"
VERSION = "0.1.5"

here = os.path.abspath(os.path.dirname(__file__))
with open(f"{here}/README.md") as f:
Expand Down
36 changes: 31 additions & 5 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from aioscrapy.cache import FakeCache
from aioscrapy.client import Client, FakeClient, CacheClient, RetryClient
from aioscrapy.client import Client, FakeClient, CacheClient, RetryClient, CacheOnlyClient, CacheSkipClient


class ForRetryClient(Client[str, str]):
Expand All @@ -25,7 +25,7 @@ async def fetch(self, key: str) -> Optional[str]:
async def test_fake_client():
client = FakeClient()
key = 'key'
assert key == await client.fetch(key)
assert await client.fetch(key) == key


@pytest.mark.asyncio
Expand All @@ -36,8 +36,35 @@ async def test_cache_client():
)

key = 'key'
assert key == await client.fetch(key)
assert key == await client.fetch(key)
assert await client.fetch(key) == key
assert await client.fetch(key) == key


@pytest.mark.asyncio
async def test_cache_only_client():
cache = FakeCache()
fake_client = FakeClient()
key = 'key'
client = CacheOnlyClient(
FakeClient(),
cache
)

assert await client.fetch(key) is None
cache.set(key, await fake_client.fetch(key))
assert await client.fetch(key) == key


@pytest.mark.asyncio
async def test_cache_skip_client():
client = CacheSkipClient(
FakeClient(),
FakeCache()
)

key = 'key'
assert await client.fetch(key) == key
assert await client.fetch(key) is None


@pytest.mark.asyncio
Expand Down Expand Up @@ -67,4 +94,3 @@ async def test_retry_client_enough_tries():
)
key = 'key'
assert await client.fetch(key) is key

0 comments on commit 050a5fb

Please sign in to comment.