-
Notifications
You must be signed in to change notification settings - Fork 64
/
requests_client.py
76 lines (55 loc) · 2.26 KB
/
requests_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""requests-based implementation of web client class."""
from typing import Optional
import requests
from requests import Response
from .abstract_client import AbstractWebClientResponse, AbstractWebClient
from usp.__about__ import __version__
class RequestsWebClientResponse(AbstractWebClientResponse):
"""
requests-based web client response.
"""
__slots__ = [
'__requests_response',
'__max_response_data_length',
]
def __init__(self, requests_response: Response, max_response_data_length: Optional[int]):
self.__requests_response = requests_response
self.__max_response_data_length = max_response_data_length
def status_code(self) -> int:
return int(self.__requests_response.status_code)
def status_message(self) -> str:
return self.__requests_response.reason
def header(self, case_insensitive_name: str) -> Optional[str]:
return self.__requests_response.headers.get(case_insensitive_name.lower(), None)
def raw_data(self) -> bytes:
if self.__max_response_data_length:
data = self.__requests_response.content[:self.__max_response_data_length]
else:
data = self.__requests_response.content
return data
class RequestsWebClient(AbstractWebClient):
"""requests-based web client to be used by the sitemap fetcher."""
__USER_AGENT = 'ultimate_sitemap_parser/{}'.format(__version__)
__HTTP_REQUEST_TIMEOUT = 60
"""
HTTP request timeout.
Some webservers might be generating huge sitemaps on the fly, so this is why it's rather big.
"""
__slots__ = [
'__max_response_data_length',
]
def __init__(self):
self.__max_response_data_length = None
def set_max_response_data_length(self, max_response_data_length: int) -> None:
self.__max_response_data_length = max_response_data_length
def get(self, url: str) -> RequestsWebClientResponse:
response = requests.get(
url,
timeout=self.__HTTP_REQUEST_TIMEOUT,
stream=True,
headers={'User-Agent': self.__USER_AGENT},
)
return RequestsWebClientResponse(
requests_response=response,
max_response_data_length=self.__max_response_data_length,
)