Skip to content

Commit

Permalink
linkcheck builder: begin using session-based HTTP requests
Browse files Browse the repository at this point in the history
  • Loading branch information
jayaddison committed Apr 18, 2023
1 parent 8269c6d commit ad12d25
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 36 deletions.
16 changes: 10 additions & 6 deletions sphinx/builders/linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def __init__(self, env: BuildEnvironment, config: Config, rqueue: Queue[CheckRes
for doc in self.config.linkcheck_exclude_documents]
self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info
in self.config.linkcheck_auth]
self.session = requests.Session()

super().__init__(daemon=True)

Expand Down Expand Up @@ -316,8 +317,8 @@ def check_uri() -> tuple[str, str, int]:
try:
if anchor and self.config.linkcheck_anchors:
# Read the whole document and see if #anchor exists
with requests.get(req_url, stream=True, config=self.config, auth=auth_info,
**kwargs) as response:
with self.session.get(req_url, stream=True, config=self.config,
auth=auth_info, **kwargs) as response:
response.raise_for_status()
found = check_anchor(response, unquote(anchor))

Expand All @@ -327,8 +328,9 @@ def check_uri() -> tuple[str, str, int]:
try:
# try a HEAD request first, which should be easier on
# the server and the network
with requests.head(req_url, allow_redirects=True, config=self.config,
auth=auth_info, **kwargs) as response:
with self.session.head(req_url, allow_redirects=True,
config=self.config, auth=auth_info,
**kwargs) as response:
response.raise_for_status()
# Servers drop the connection on HEAD requests, causing
# ConnectionError.
Expand All @@ -337,8 +339,9 @@ def check_uri() -> tuple[str, str, int]:
raise
# retry with GET request if that fails, some servers
# don't like HEAD requests.
with requests.get(req_url, stream=True, config=self.config,
auth=auth_info, **kwargs) as response:
with self.session.get(req_url, stream=True,
config=self.config,
auth=auth_info, **kwargs) as response:
response.raise_for_status()
except HTTPError as err:
if err.response.status_code == 401:
Expand Down Expand Up @@ -422,6 +425,7 @@ def check(docname: str) -> tuple[str, str, int]:
check_request = self.wqueue.get()
next_check, hyperlink = check_request
if hyperlink is None:
self.session.close()
break

uri, docname, lineno = hyperlink
Expand Down
62 changes: 32 additions & 30 deletions sphinx/util/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,33 +60,35 @@ def _get_user_agent(config: Config) -> str:
])


def get(url: str, **kwargs: Any) -> requests.Response:
"""Sends a GET request like requests.get().
This sets up User-Agent header and TLS verification automatically."""
headers = kwargs.setdefault('headers', {})
config = kwargs.pop('config', None)
if config:
kwargs.setdefault('verify', _get_tls_cacert(url, config))
headers.setdefault('User-Agent', _get_user_agent(config))
else:
headers.setdefault('User-Agent', useragent_header[0][1])

with ignore_insecure_warning(**kwargs):
return requests.get(url, **kwargs)


def head(url: str, **kwargs: Any) -> requests.Response:
"""Sends a HEAD request like requests.head().
This sets up User-Agent header and TLS verification automatically."""
headers = kwargs.setdefault('headers', {})
config = kwargs.pop('config', None)
if config:
kwargs.setdefault('verify', _get_tls_cacert(url, config))
headers.setdefault('User-Agent', _get_user_agent(config))
else:
headers.setdefault('User-Agent', useragent_header[0][1])

with ignore_insecure_warning(**kwargs):
return requests.head(url, **kwargs)
class Session(requests.Session):

def get(self, url: str, **kwargs: Any) -> requests.Response:
"""Sends a GET request like requests.get().
This sets up User-Agent header and TLS verification automatically."""
headers = kwargs.setdefault('headers', {})
config = kwargs.pop('config', None)
if config:
kwargs.setdefault('verify', _get_tls_cacert(url, config))
headers.setdefault('User-Agent', _get_user_agent(config))
else:
headers.setdefault('User-Agent', useragent_header[0][1])

with ignore_insecure_warning(**kwargs):
return super().get(url, **kwargs)


def head(self, url: str, **kwargs: Any) -> requests.Response:
"""Sends a HEAD request like requests.head().
This sets up User-Agent header and TLS verification automatically."""
headers = kwargs.setdefault('headers', {})
config = kwargs.pop('config', None)
if config:
kwargs.setdefault('verify', _get_tls_cacert(url, config))
headers.setdefault('User-Agent', _get_user_agent(config))
else:
headers.setdefault('User-Agent', useragent_header[0][1])

with ignore_insecure_warning(**kwargs):
return super().head(url, **kwargs)

0 comments on commit ad12d25

Please sign in to comment.