From 9c669b78bfab5fea6c1d36f44c5c537a31998b4e Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 7 Dec 2023 11:46:01 +0000 Subject: [PATCH 1/9] Replace path_query_fragment encoding tests --- tests/models/test_url.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 4683a321d6..b96cde1819 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -70,6 +70,43 @@ def test_url_no_authority(): # Tests for percent encoding across path, query, and fragment... +@pytest.mark.parametrize( + "url,raw_path,path,query,fragment", + [ + # URL encoding characters in path. + ( + "https://example.com/!$&'()*+,;= abc ABC 123 :/[]@", + b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@", + "/!$&'()*+,;= abc ABC 123 :/[]@", + b"", + "", + ), + # URL encoding characters in query. + ( + "https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@" + "?", + b"/?!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?", + "/", + b"!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?", + "", + ), + # URL encoding characters in fragment. + ( + "https://example.com/#!$&'()*+,;= abc ABC 123 :/[]@" + "?#", + b"/", + "/", + b"", + "!$&'()*+,;= abc ABC 123 :/[]@?#", + ), + ], +) +def test_path_query_fragment(url, raw_path, path, query, fragment): + url = httpx.URL(url) + assert url.raw_path == raw_path + assert url.path == path + assert url.query == query + assert url.fragment == fragment + + def test_path_percent_encoding(): # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS url = httpx.URL("https://example.com/!$&'()*+,;= abc ABC 123 :/[]@") From 6cc75553961deb7f13e4b8e489333b0de1a4b8a5 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 7 Dec 2023 11:47:42 +0000 Subject: [PATCH 2/9] Remove replaced test cases --- tests/models/test_url.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/tests/models/test_url.py b/tests/models/test_url.py index b96cde1819..fe0b59475a 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -107,33 +107,6 @@ def test_path_query_fragment(url, raw_path, path, query, fragment): assert url.fragment == fragment -def test_path_percent_encoding(): - # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS - url = httpx.URL("https://example.com/!$&'()*+,;= abc ABC 123 :/[]@") - assert url.raw_path == b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@" - assert url.path == "/!$&'()*+,;= abc ABC 123 :/[]@" - assert url.query == b"" - assert url.fragment == "" - - -def test_query_percent_encoding(): - # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS - url = httpx.URL("https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@" + "?") - assert url.raw_path == b"/?!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?" - assert url.path == "/" - assert url.query == b"!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?" - assert url.fragment == "" - - -def test_fragment_percent_encoding(): - # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS - url = httpx.URL("https://example.com/#!$&'()*+,;= abc ABC 123 :/[]@" + "?#") - assert url.raw_path == b"/" - assert url.path == "/" - assert url.query == b"" - assert url.fragment == "!$&'()*+,;= abc ABC 123 :/[]@?#" - - def test_url_query_encoding(): """ URL query parameters should use '%20' to encoding spaces, From b7d42587e57edd10f48433e145f8cc136b5f9d32 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 7 Dec 2023 18:08:19 +0000 Subject: [PATCH 3/9] Fix test case to use correct hex sequence for 'abc' --- tests/models/test_url.py | 48 +++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/tests/models/test_url.py b/tests/models/test_url.py index fe0b59475a..06e239f5e8 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -73,7 +73,7 @@ def test_url_no_authority(): @pytest.mark.parametrize( "url,raw_path,path,query,fragment", [ - # URL encoding characters in path. + # URL with unescaped chars in path. ( "https://example.com/!$&'()*+,;= abc ABC 123 :/[]@", b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@", @@ -81,17 +81,50 @@ def test_url_no_authority(): b"", "", ), - # URL encoding characters in query. + # URL with escaped chars in path. ( - "https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@" + "?", + "https://example.com/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@", + b"/!$&'()*+,;=%20abc%20ABC%20123%20:/[]@", + "/!$&'()*+,;= abc ABC 123 :/[]@", + b"", + "", + ), + # URL with mix of unescaped and escaped chars in path. + # WARNING: This has the incorrect behaviour, adding the test as an interim step. + ( + "https://example.com/ %61%62%63", + b"/%20%61%62%63", + "/ abc", + b"", + "", + ), + # URL with unescaped chars in query. + ( + "https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@?", b"/?!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?", "/", b"!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?", "", ), + # URL with escaped chars in query. + ( + "https://example.com/?!$&%27()*+,;=%20abc%20ABC%20123%20:%2F[]@?", + b"/?!$&%27()*+,;=%20abc%20ABC%20123%20:%2F[]@?", + "/", + b"!$&%27()*+,;=%20abc%20ABC%20123%20:%2F[]@?", + "", + ), + # URL with mix of unescaped and escaped chars in query. + ( + "https://example.com/?%20%97%98%99", + b"/?%20%97%98%99", + "/", + b"%20%97%98%99", + "", + ), # URL encoding characters in fragment. ( - "https://example.com/#!$&'()*+,;= abc ABC 123 :/[]@" + "?#", + "https://example.com/#!$&'()*+,;= abc ABC 123 :/[]@?#", b"/", "/", b"", @@ -123,13 +156,6 @@ def test_url_query_encoding(): assert url.raw_path == b"/?a=b%20c&d=e%2Ff" -def test_url_with_url_encoded_path(): - url = httpx.URL("https://www.example.com/path%20to%20somewhere") - assert url.path == "/path to somewhere" - assert url.query == b"" - assert url.raw_path == b"/path%20to%20somewhere" - - def test_url_params(): url = httpx.URL("https://example.org:123/path/to/somewhere", params={"a": "123"}) assert str(url) == "https://example.org:123/path/to/somewhere?a=123" From 412a880a2f418a259ad7405ac2b16d6e3c78070a Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 7 Dec 2023 18:13:32 +0000 Subject: [PATCH 4/9] Fix 'quote' behaviour so we don't double-escape. --- httpx/_urlparse.py | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index c44e3519a0..41728a541a 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -432,13 +432,12 @@ def is_safe(string: str, safe: str = "/") -> bool: if char not in NON_ESCAPED_CHARS: return False - # Any '%' characters must be valid '%xx' escape sequences. - return string.count("%") == len(PERCENT_ENCODED_REGEX.findall(string)) + return True -def quote(string: str, safe: str = "/") -> str: +def percent_encoded(string: str, safe: str = "/") -> str: """ - Use percent-encoding to quote a string if required. + Use percent-encoding to quote a string. """ if is_safe(string, safe=safe): return string @@ -449,6 +448,32 @@ def quote(string: str, safe: str = "/") -> str: ) +def quote(string: str, safe: str = "/") -> str: + """ + Use percent-encoding to quote a string, omitting existing '%xx' escape sequences. + """ + parts = [] + current_position = 0 + for match in re.finditer(PERCENT_ENCODED_REGEX, string): + start_position, end_position = match.start(), match.end() + matched_text = match.group(0) + # Add any text up to the '%xx' escape sequence. + if start_position != current_position: + leading_text = string[current_position:start_position] + parts.append(percent_encoded(leading_text, safe=safe)) + + # Add the '%xx' escape sequence. + parts.append(matched_text) + current_position = end_position + + # Add any text after the final '%xx' escape sequence. + if current_position != len(string): + trailing_text = string[current_position:] + parts.append(percent_encoded(trailing_text, safe=safe)) + + return "".join(parts) + + def urlencode(items: typing.List[typing.Tuple[str, str]]) -> str: """ We can use a much simpler version of the stdlib urlencode here because @@ -464,4 +489,9 @@ def urlencode(items: typing.List[typing.Tuple[str, str]]) -> str: - https://github.com/encode/httpx/issues/2721 - https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode """ - return "&".join([quote(k, safe="") + "=" + quote(v, safe="") for k, v in items]) + return "&".join( + [ + percent_encoded(k, safe="") + "=" + percent_encoded(v, safe="") + for k, v in items + ] + ) From a1b4afcc24e6420be8403176d98d2bc500864af6 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 7 Dec 2023 18:26:05 +0000 Subject: [PATCH 5/9] Add '/' to safe chars in query strings --- httpx/_urlparse.py | 2 +- tests/models/test_url.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 41728a541a..20842738a2 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -263,7 +263,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # We also exclude '/' because it is more robust to replace it with a percent # encoding despite it not being a requirement of the spec. parsed_query: typing.Optional[str] = ( - None if query is None else quote(query, safe=SUB_DELIMS + ":?[]@") + None if query is None else quote(query, safe=SUB_DELIMS + ":/?[]@") ) # For 'fragment' we can include all of the GEN_DELIMS set. parsed_fragment: typing.Optional[str] = ( diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 06e239f5e8..79e1605a5a 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -101,9 +101,9 @@ def test_url_no_authority(): # URL with unescaped chars in query. ( "https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@?", - b"/?!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?", + b"/?!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?", "/", - b"!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?", + b"!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?", "", ), # URL with escaped chars in query. @@ -142,7 +142,7 @@ def test_path_query_fragment(url, raw_path, path, query, fragment): def test_url_query_encoding(): """ - URL query parameters should use '%20' to encoding spaces, + URL query parameters should use '%20' for encoding spaces, and should treat '/' as a safe character. This behaviour differs across clients, but we're matching browser behaviour here. @@ -150,7 +150,7 @@ def test_url_query_encoding(): and https://github.com/encode/httpx/discussions/2460 """ url = httpx.URL("https://www.example.com/?a=b c&d=e/f") - assert url.raw_path == b"/?a=b%20c&d=e%2Ff" + assert url.raw_path == b"/?a=b%20c&d=e/f" url = httpx.URL("https://www.example.com/", params={"a": "b c", "d": "e/f"}) assert url.raw_path == b"/?a=b%20c&d=e%2Ff" From de9968a849c30b51e17b06d6e72e4cfadc2e0453 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Mon, 11 Dec 2023 09:35:50 +0000 Subject: [PATCH 6/9] Update docstring --- httpx/_urlparse.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 20842738a2..021f669136 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -451,6 +451,13 @@ def percent_encoded(string: str, safe: str = "/") -> str: def quote(string: str, safe: str = "/") -> str: """ Use percent-encoding to quote a string, omitting existing '%xx' escape sequences. + + See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1 + + * `string`: The string to be percent-escaped. + * `safe`: A string containing characters that may be treated as safe, and do not + need to be escaped. Unreserved characters are always treated as safe. + See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3 """ parts = [] current_position = 0 From 7bdf2277ccab31b1e3921d32b035122dbdaaa565 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Mon, 11 Dec 2023 09:37:07 +0000 Subject: [PATCH 7/9] Linting --- httpx/_transports/asgi.py | 4 ++-- httpx/_urlparse.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/httpx/_transports/asgi.py b/httpx/_transports/asgi.py index f67f0fbd5b..5b6b34446d 100644 --- a/httpx/_transports/asgi.py +++ b/httpx/_transports/asgi.py @@ -1,7 +1,5 @@ import typing -import sniffio - from .._models import Request, Response from .._types import AsyncByteStream from .base import AsyncBaseTransport @@ -25,6 +23,8 @@ def create_event() -> "Event": + import sniffio + if sniffio.current_async_library() == "trio": import trio diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 021f669136..9f17fb172f 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -453,7 +453,7 @@ def quote(string: str, safe: str = "/") -> str: Use percent-encoding to quote a string, omitting existing '%xx' escape sequences. See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1 - + * `string`: The string to be percent-escaped. * `safe`: A string containing characters that may be treated as safe, and do not need to be escaped. Unreserved characters are always treated as safe. From 0d8cc4b001e46e290550e02862a4d040bce0be43 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 13 Dec 2023 07:36:18 +0000 Subject: [PATCH 8/9] Update outdated comment. --- httpx/_urlparse.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 9f17fb172f..07bbea9070 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -260,8 +260,6 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult: # For 'path' we need to drop ? and # from the GEN_DELIMS set. parsed_path: str = quote(path, safe=SUB_DELIMS + ":/[]@") # For 'query' we need to drop '#' from the GEN_DELIMS set. - # We also exclude '/' because it is more robust to replace it with a percent - # encoding despite it not being a requirement of the spec. parsed_query: typing.Optional[str] = ( None if query is None else quote(query, safe=SUB_DELIMS + ":/?[]@") ) From e3532aa68043b5a52d3ef6c2e8b7b6ba09bcf638 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Wed, 13 Dec 2023 10:28:16 +0000 Subject: [PATCH 9/9] Revert unrelated change --- httpx/_transports/asgi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/httpx/_transports/asgi.py b/httpx/_transports/asgi.py index fd9ffdce81..08cd392f75 100644 --- a/httpx/_transports/asgi.py +++ b/httpx/_transports/asgi.py @@ -1,5 +1,7 @@ import typing +import sniffio + from .._models import Request, Response from .._types import AsyncByteStream from .base import AsyncBaseTransport @@ -23,8 +25,6 @@ def create_event() -> "Event": - import sniffio - if sniffio.current_async_library() == "trio": import trio