diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index e16e812391..ac1c6271bc 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -433,3 +433,19 @@ def quote(string: str, safe: str = "/") -> str: return "".join( [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string] ) + + +def urlencode(items: typing.List[typing.Tuple[str, str]]) -> str: + # We can use a much simpler version of the stdlib urlencode here because + # we don't need to handle a bunch of different typing cases, such as bytes vs str. + # + # https://github.com/python/cpython/blob/b2f7b2ef0b5421e01efb8c7bee2ef95d3bab77eb/Lib/urllib/parse.py#L926 + # + # Note that we use '%20' encoding for spaces, and treat '/' as a safe + # character. This means our query params have the same escaping as other + # characters in the URL path. This is slightly different to `requests`, + # but is the behaviour that browsers use. + # + # See https://github.com/encode/httpx/issues/2536 and + # https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode + return "&".join([quote(k) + "=" + quote(v) for k, v in items]) diff --git a/httpx/_urls.py b/httpx/_urls.py index 1bcbc8b29a..6f5dc6a543 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -1,10 +1,10 @@ import typing -from urllib.parse import parse_qs, unquote, urlencode +from urllib.parse import parse_qs, unquote import idna from ._types import PrimitiveData, QueryParamTypes, RawURL, URLTypes -from ._urlparse import urlparse +from ._urlparse import urlencode, urlparse from ._utils import primitive_value_to_str @@ -616,6 +616,13 @@ def __eq__(self, other: typing.Any) -> bool: return sorted(self.multi_items()) == sorted(other.multi_items()) def __str__(self) -> str: + """ + Note that we use '%20' encoding for spaces, and treat '/' as a safe + character. + + See https://github.com/encode/httpx/issues/2536 and + https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode + """ return urlencode(self.multi_items()) def __repr__(self) -> str: diff --git a/tests/models/test_url.py b/tests/models/test_url.py index cbd8d64269..170066826a 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -350,6 +350,22 @@ def test_url_with_empty_query(): assert url.raw_path == b"/path?" +def test_url_query_encoding(): + """ + URL query parameters should use '%20' to encoding spaces, + and should treat '/' as a safe character. This behaviour differs + across clients, but we're matching browser behaviour here. + + See https://github.com/encode/httpx/issues/2536 + and https://github.com/encode/httpx/discussions/2460 + """ + url = httpx.URL("https://www.example.com/?a=b c&d=e/f") + assert url.raw_path == b"/?a=b%20c&d=e/f" + + url = httpx.URL("https://www.example.com/", params={"a": "b c", "d": "e/f"}) + assert url.raw_path == b"/?a=b%20c&d=e/f" + + def test_url_with_url_encoded_path(): url = httpx.URL("https://www.example.com/path%20to%20somewhere") assert url.path == "/path to somewhere"