encode · elupus · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py
@@ -26,10 +26,41 @@
 MAX_URL_LENGTH = 65536
 
 # https://datatracker.ietf.org/doc/html/rfc3986.html#section-2.3
-UNRESERVED_CHARACTERS = (
+UNRESERVED_CHARACTERS = set(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
 )
-SUB_DELIMS = "!$&'()*+,;="
+
+SUB_DELIMS = set("!$&'()*+,;=")
+GEN_DELIMS = set(":/?#[]@")
+
+RESERVED = SUB_DELIMS.union(GEN_DELIMS)
+
+# Safe characters in a full URL
+URL_RAW_SAFE = RESERVED.union(set("%"))
+
+# Safe characters in the path section
+PATH_RAW_SAFE = URL_RAW_SAFE.difference(set("?#"))
+
+# Safe characters in a single path element
+PATH_VAL_SAFE = PATH_RAW_SAFE.difference(set("/%"))
+
+# Safe characters in the query section
+QUERY_RAW_SAFE = URL_RAW_SAFE.difference(set("#"))
+
+# Safe characters in a name or value of a query
+QUERY_VAL_SAFE = QUERY_RAW_SAFE.difference(set("&=%"))
+
+# Safe characters in the fragment section
+FRAGMENT_RAW_SAFE = URL_RAW_SAFE
+
+# Safe characters in a single fragment
+FRAGMENT_VAL_SAFE = FRAGMENT_RAW_SAFE.difference(set("&=%"))
+
+# Safe characters in the user info section
+USERINFO_RAW_SAFE = SUB_DELIMS.union(set(":%"))
+
+# Safe characters in a password or username
+USERINFO_VAL_SAFE = USERINFO_RAW_SAFE.difference(set(":%"))
 
 PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}")
 
@@ -174,8 +205,8 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
 
     # Replace "username" and/or "password" with "userinfo".
     if "username" in kwargs or "password" in kwargs:
-        username = quote(kwargs.pop("username", "") or "")
-        password = quote(kwargs.pop("password", "") or "")
+        username = quote(kwargs.pop("username", "") or "", set("/"))
+        password = quote(kwargs.pop("password", "") or "", set("/"))
         kwargs["userinfo"] = f"{username}:{password}" if password else username
 
     # Replace "raw_path" with "path" and "query".
@@ -241,7 +272,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
     # We end up with a parsed representation of the URL,
     # with components that are plain ASCII bytestrings.
     parsed_scheme: str = scheme.lower()
-    parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":")
+    parsed_userinfo: str = quote(userinfo, safe=USERINFO_RAW_SAFE)
     parsed_host: str = encode_host(host)
     parsed_port: typing.Optional[int] = normalize_port(port, scheme)
 
@@ -258,16 +289,14 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
     # specific component.
 
     # For 'path' we need to drop ? and # from the GEN_DELIMS set.
-    parsed_path: str = quote(path, safe=SUB_DELIMS + ":/[]@")
+    parsed_path: str = quote(path, safe=PATH_RAW_SAFE)
     # For 'query' we need to drop '#' from the GEN_DELIMS set.
-    # We also exclude '/' because it is more robust to replace it with a percent
-    # encoding despite it not being a requirement of the spec.
     parsed_query: typing.Optional[str] = (
-        None if query is None else quote(query, safe=SUB_DELIMS + ":?[]@")
+        None if query is None else quote(query, safe=QUERY_RAW_SAFE)
     )
     # For 'fragment' we can include all of the GEN_DELIMS set.
     parsed_fragment: typing.Optional[str] = (
-        None if fragment is None else quote(fragment, safe=SUB_DELIMS + ":/?#[]@")
+        None if fragment is None else quote(fragment, safe=FRAGMENT_RAW_SAFE)
     )
 
     # The parsed ASCII bytestrings are our canonical form.
@@ -421,11 +450,11 @@ def percent_encode(char: str) -> str:
     return "".join([f"%{byte:02x}" for byte in char.encode("utf-8")]).upper()
 
 
-def is_safe(string: str, safe: str = "/") -> bool:
+def is_safe(string: str, safe: typing.Set[str]) -> bool:
     """
     Determine if a given string is already quote-safe.
     """
-    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe + "%"
+    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS.union(safe, set("%"))
 
     # All characters must already be non-escaping or '%'
     for char in string:
@@ -436,14 +465,14 @@ def is_safe(string: str, safe: str = "/") -> bool:
     return string.count("%") == len(PERCENT_ENCODED_REGEX.findall(string))
 
 
-def quote(string: str, safe: str = "/") -> str:
+def quote(string: str, safe: typing.Set[str]) -> str:
     """
     Use percent-encoding to quote a string if required.
     """
     if is_safe(string, safe=safe):
         return string
 
-    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe
+    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS.union(safe)
     return "".join(
         [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string]
     )
@@ -464,4 +493,9 @@ def urlencode(items: typing.List[typing.Tuple[str, str]]) -> str:
     - https://github.com/encode/httpx/issues/2721
     - https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
     """
-    return "&".join([quote(k, safe="") + "=" + quote(v, safe="") for k, v in items])
+    return "&".join(
+        [
+            quote(k, safe=QUERY_VAL_SAFE) + "=" + quote(v, safe=QUERY_VAL_SAFE)
+            for k, v in items
+        ]
+    )
diff --git a/tests/models/test_url.py b/tests/models/test_url.py
@@ -360,10 +360,10 @@ def test_url_query_encoding():
     and https://github.com/encode/httpx/discussions/2460
     """
     url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
-    assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
+    assert url.raw_path == b"/?a=b%20c&d=e/f"
 
     url = httpx.URL("https://www.example.com/", params={"a": "b c", "d": "e/f"})
-    assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
+    assert url.raw_path == b"/?a=b%20c&d=e/f"
 
 
 def test_url_with_url_encoded_path():

diff --git a/tests/test_urlparse.py b/tests/test_urlparse.py
@@ -147,7 +147,30 @@ def test_param_does_not_require_encoding():
 
 def test_param_with_existing_escape_requires_encoding():
     url = httpx.URL("http://webservice", params={"u": "http://example.com?q=foo%2Fa"})
-    assert str(url) == "http://webservice?u=http%3A%2F%2Fexample.com%3Fq%3Dfoo%252Fa"
+    assert str(url) == "http://webservice?u=http://example.com?q%3Dfoo%252Fa"
+
+
+def test_param_with_existing_escape():
+    url = httpx.URL("https://webservice/?u=/%3D%26&v=1%202")
+    assert str(url) == "https://webservice/?u=/%3D%26&v=1%202"
+    assert url.params["u"] == "/=&"
+    assert url.params["v"] == "1 2"
+
+
+def test_param_nested_urls_in_query():
+    src = "special;string with:reserved?cha%20raca/ters&d"
+    data1 = str(httpx.URL("http://webservice", params={"u": src}))
+    data2 = str(httpx.URL("http://webservice", params={"u": data1}))
+    data3 = str(httpx.URL("http://webservice", params={"u": data2}))
+
+    url3 = httpx.URL(data3)
+    assert str(url3) == data3
+    url2 = httpx.URL(url3.params["u"])
+    assert str(url2) == data2
+    url1 = httpx.URL(url2.params["u"])
+    assert str(url1) == data1
+
+    assert url1.params["u"] == src
 
 
 # Tests for invalid URLs
@@ -270,9 +293,9 @@ def test_path_percent_encoding():
 def test_query_percent_encoding():
     # Test percent encoding for SUB_DELIMS ALPHA NUM and allowable GEN_DELIMS
     url = httpx.URL("https://example.com/?!$&'()*+,;= abc ABC 123 :/[]@" + "?")
-    assert url.raw_path == b"/?!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?"
+    assert url.raw_path == b"/?!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?"
     assert url.path == "/"
-    assert url.query == b"!$&'()*+,;=%20abc%20ABC%20123%20:%2F[]@?"
+    assert url.query == b"!$&'()*+,;=%20abc%20ABC%20123%20:/[]@?"
     assert url.fragment == ""