Skip to content

Commit

Permalink
Implement granular URL error hierarchy in the HTTP client
Browse files Browse the repository at this point in the history
This patch introduces 5 granular user-facing exceptions that may occur
when HTTP requests are made:
* `InvalidUrlClientError`
* `RedirectClientError`
* `NonHttpUrlClientError`
* `InvalidUrlRedirectClientError`
* `NonHttpUrlRedirectClientError`

Previously `ValueError` or `InvalidURL` was raised and screening out was
complicated (a valid URL that redirects to invalid one raised the same error
as an invalid URL).

Ref: #6722 (comment)

PR #6722

Resolves #2507
Resolves #2630
Resolves #3315

Co-authored-by: Sviatoslav Sydorenko <sviat@redhat.com>
  • Loading branch information
setla and webknjaz committed Feb 13, 2024
1 parent 0ec65c0 commit fb465e1
Show file tree
Hide file tree
Showing 10 changed files with 324 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGES/2507.feature.rst
1 change: 1 addition & 0 deletions CHANGES/3315.feature.rst
12 changes: 12 additions & 0 deletions CHANGES/6722.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Added 5 new exceptions: :py:exc:`~aiohttp.InvalidUrlClientError`, :py:exc:`~aiohttp.RedirectClientError`,
:py:exc:`~aiohttp.NonHttpUrlClientError`, :py:exc:`~aiohttp.InvalidUrlRedirectClientError`,
:py:exc:`~aiohttp.NonHttpUrlRedirectClientError`

:py:exc:`~aiohttp.InvalidUrlRedirectClientError`, :py:exc:`~aiohttp.NonHttpUrlRedirectClientError`
are raised instead of :py:exc:`ValueError` or :py:exc:`~aiohttp.InvalidURL` when the redirect URL is invalid. Classes
:py:exc:`~aiohttp.InvalidUrlClientError`, :py:exc:`~aiohttp.RedirectClientError`,
:py:exc:`~aiohttp.NonHttpUrlClientError` are base for them.

The :py:exc:`~aiohttp.InvalidURL` now exposes a ``description`` property with the text explanation of the error details.

-- by :user:`setla`
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -375,5 +375,6 @@ Yuvi Panda
Zainab Lawal
Zeal Wierslee
Zlatan Sičanica
Łukasz Setla
Марк Коренберг
Семён Марьясин
10 changes: 10 additions & 0 deletions aiohttp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@
ContentTypeError,
Fingerprint,
InvalidURL,
InvalidUrlClientError,
InvalidUrlRedirectClientError,
NamedPipeConnector,
NonHttpUrlClientError,
NonHttpUrlRedirectClientError,
RedirectClientError,
RequestInfo,
ServerConnectionError,
ServerDisconnectedError,
Expand Down Expand Up @@ -129,6 +134,11 @@
"ContentTypeError",
"Fingerprint",
"InvalidURL",
"InvalidUrlClientError",
"InvalidUrlRedirectClientError",
"NonHttpUrlClientError",
"NonHttpUrlRedirectClientError",
"RedirectClientError",
"RequestInfo",
"ServerConnectionError",
"ServerDisconnectedError",
Expand Down
57 changes: 45 additions & 12 deletions aiohttp/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
ConnectionTimeoutError,
ContentTypeError,
InvalidURL,
InvalidUrlClientError,
InvalidUrlRedirectClientError,
NonHttpUrlClientError,
NonHttpUrlRedirectClientError,
RedirectClientError,
ServerConnectionError,
ServerDisconnectedError,
ServerFingerprintMismatch,
Expand Down Expand Up @@ -108,6 +113,11 @@
"ConnectionTimeoutError",
"ContentTypeError",
"InvalidURL",
"InvalidUrlClientError",
"RedirectClientError",
"NonHttpUrlClientError",
"InvalidUrlRedirectClientError",
"NonHttpUrlRedirectClientError",
"ServerConnectionError",
"ServerDisconnectedError",
"ServerFingerprintMismatch",
Expand Down Expand Up @@ -167,6 +177,7 @@ class ClientTimeout:

# https://www.rfc-editor.org/rfc/rfc9110#section-9.2.2
IDEMPOTENT_METHODS = frozenset({"GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE"})
HTTP_SCHEMA_SET = frozenset({"http", "https", ""})

_RetType = TypeVar("_RetType")
_CharsetResolver = Callable[[ClientResponse, bytes], str]
Expand Down Expand Up @@ -404,7 +415,10 @@ async def _request(
try:
url = self._build_url(str_or_url)
except ValueError as e:
raise InvalidURL(str_or_url) from e
raise InvalidUrlClientError(str_or_url) from e

if url.scheme not in HTTP_SCHEMA_SET:
raise NonHttpUrlClientError(url)

skip_headers = set(self._skip_auto_headers)
if skip_auto_headers is not None:
Expand Down Expand Up @@ -459,6 +473,15 @@ async def _request(
retry_persistent_connection = method in IDEMPOTENT_METHODS
while True:
url, auth_from_url = strip_auth_from_url(url)
if not url.raw_host:
# NOTE: Bail early, otherwise, causes `InvalidURL` through
# NOTE: `self._request_class()` below.
err_exc_cls = (
InvalidUrlRedirectClientError
if redirects
else InvalidUrlClientError
)
raise err_exc_cls(url)
if auth and auth_from_url:
raise ValueError(
"Cannot combine AUTH argument with "
Expand Down Expand Up @@ -611,34 +634,44 @@ async def _request(
resp.release()

try:
parsed_url = URL(
parsed_redirect_url = URL(
r_url, encoded=not self._requote_redirect_url
)

except ValueError as e:
raise InvalidURL(r_url) from e
raise InvalidUrlRedirectClientError(
r_url,
"Server attempted redirecting to a location that does not look like a URL",
) from e

scheme = parsed_url.scheme
if scheme not in ("http", "https", ""):
scheme = parsed_redirect_url.scheme
if scheme not in HTTP_SCHEMA_SET:
resp.close()
raise ValueError("Can redirect only to http or https")
raise NonHttpUrlRedirectClientError(r_url)
elif not scheme:
parsed_url = url.join(parsed_url)
parsed_redirect_url = url.join(parsed_redirect_url)

is_same_host_https_redirect = (
url.host == parsed_url.host
and parsed_url.scheme == "https"
url.host == parsed_redirect_url.host
and parsed_redirect_url.scheme == "https"
and url.scheme == "http"
)

try:
redirect_origin = parsed_redirect_url.origin()
except ValueError as origin_val_err:
raise InvalidUrlRedirectClientError(
parsed_redirect_url,
"Invalid redirect URL origin",
) from origin_val_err

if (
url.origin() != parsed_url.origin()
url.origin() != redirect_origin
and not is_same_host_https_redirect
):
auth = None
headers.pop(hdrs.AUTHORIZATION, None)

url = parsed_url
url = parsed_redirect_url
params = {}
resp.release()
continue
Expand Down
54 changes: 47 additions & 7 deletions aiohttp/client_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""HTTP related errors."""

import asyncio
from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
from typing import TYPE_CHECKING, Optional, Tuple, Union

from .http_parser import RawResponseMessage
from .typedefs import LooseHeaders
from .typedefs import LooseHeaders, StrOrURL

try:
import ssl
Expand Down Expand Up @@ -40,6 +40,11 @@
"ContentTypeError",
"ClientPayloadError",
"InvalidURL",
"InvalidUrlClientError",
"RedirectClientError",
"NonHttpUrlClientError",
"InvalidUrlRedirectClientError",
"NonHttpUrlRedirectClientError",
)


Expand Down Expand Up @@ -248,17 +253,52 @@ class InvalidURL(ClientError, ValueError):

# Derive from ValueError for backward compatibility

def __init__(self, url: Any) -> None:
def __init__(self, url: StrOrURL, description: Union[str, None] = None) -> None:
# The type of url is not yarl.URL because the exception can be raised
# on URL(url) call
super().__init__(url)
self._url = url
self._description = description

if description:
super().__init__(url, description)
else:
super().__init__(url)

@property
def url(self) -> StrOrURL:
return self._url

@property
def url(self) -> Any:
return self.args[0]
def description(self) -> "str | None":
return self._description

def __repr__(self) -> str:
return f"<{self.__class__.__name__} {self.url}>"
return f"<{self.__class__.__name__} {self}>"

def __str__(self) -> str:
if self._description:
return f"{self._url} - {self._description}"
return str(self._url)


class InvalidUrlClientError(InvalidURL):
"""Invalid URL client error."""


class RedirectClientError(ClientError):
"""Client redirect error."""


class NonHttpUrlClientError(ClientError):
"""Non http URL client error."""


class InvalidUrlRedirectClientError(InvalidUrlClientError, RedirectClientError):
"""Invalid URL redirect client error."""


class NonHttpUrlRedirectClientError(NonHttpUrlClientError, RedirectClientError):
"""Non http URL redirect client error."""


class ClientSSLError(ClientConnectorError):
Expand Down
49 changes: 49 additions & 0 deletions docs/client_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2099,6 +2099,41 @@ All exceptions are available as members of *aiohttp* module.

Invalid URL, :class:`yarl.URL` instance.

.. attribute:: description

Invalid URL description, :class:`str` instance or :data:`None`.

.. exception:: InvalidUrlClientError

Base class for all errors related to client url.

Derived from :exc:`InvalidURL`

.. exception:: RedirectClientError

Base class for all errors related to client redirects.

Derived from :exc:`ClientError`

.. exception:: NonHttpUrlClientError

Base class for all errors related to non http client urls.

Derived from :exc:`ClientError`

.. exception:: InvalidUrlRedirectClientError

Redirect URL is malformed, e.g. it does not contain host part.

Derived from :exc:`InvalidUrlClientError` and :exc:`RedirectClientError`

.. exception:: NonHttpUrlRedirectClientError

Redirect URL does not contain http schema.

Derived from :exc:`RedirectClientError` and :exc:`NonHttpUrlClientError`


.. class:: ContentDisposition

Represent Content-Disposition header
Expand Down Expand Up @@ -2315,3 +2350,17 @@ Hierarchy of exceptions
* :exc:`WSServerHandshakeError`

* :exc:`InvalidURL`

* :exc:`InvalidUrlClientError`

* :exc:`InvalidUrlRedirectClientError`

* :exc:`NonHttpUrlClientError`

* :exc:`NonHttpUrlRedirectClientError`

* :exc:`RedirectClientError`

* :exc:`InvalidUrlRedirectClientError`

* :exc:`NonHttpUrlRedirectClientError`
26 changes: 23 additions & 3 deletions tests/test_client_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import pickle
from typing import Any

from yarl import URL

from aiohttp import client, client_reqrep


Expand Down Expand Up @@ -268,8 +270,9 @@ def test_repr(self) -> None:

class TestInvalidURL:
def test_ctor(self) -> None:
err = client.InvalidURL(url=":wrong:url:")
err = client.InvalidURL(url=":wrong:url:", description=":description:")
assert err.url == ":wrong:url:"
assert err.description == ":description:"

def test_pickle(self) -> None:
err = client.InvalidURL(url=":wrong:url:")
Expand All @@ -280,10 +283,27 @@ def test_pickle(self) -> None:
assert err2.url == ":wrong:url:"
assert err2.foo == "bar"

def test_repr(self) -> None:
def test_repr_no_description(self) -> None:
err = client.InvalidURL(url=":wrong:url:")
assert err.args == (":wrong:url:",)
assert repr(err) == "<InvalidURL :wrong:url:>"

def test_str(self) -> None:
def test_repr_yarl_URL(self) -> None:
err = client.InvalidURL(url=URL(":wrong:url:"))
assert repr(err) == "<InvalidURL :wrong:url:>"

def test_repr_with_description(self) -> None:
err = client.InvalidURL(url=":wrong:url:", description=":description:")
assert repr(err) == "<InvalidURL :wrong:url: - :description:>"

def test_str_no_description(self) -> None:
err = client.InvalidURL(url=":wrong:url:")
assert str(err) == ":wrong:url:"

def test_none_description(self) -> None:
err = client.InvalidURL(":wrong:url:")
assert err.description is None

def test_str_with_description(self) -> None:
err = client.InvalidURL(url=":wrong:url:", description=":description:")
assert str(err) == ":wrong:url: - :description:"

0 comments on commit fb465e1

Please sign in to comment.