Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions httpx/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
)
from ._urls import URL
from ._utils import (
guess_json_utf,
is_known_encoding,
normalize_header_key,
normalize_header_value,
Expand Down Expand Up @@ -759,11 +758,7 @@ def raise_for_status(self) -> "Response":
raise HTTPStatusError(message, request=request, response=self)

def json(self, **kwargs: typing.Any) -> typing.Any:
if self.charset_encoding is None and self.content and len(self.content) > 3:
encoding = guess_json_utf(self.content)
if encoding is not None:
return jsonlib.loads(self.content.decode(encoding), **kwargs)
return jsonlib.loads(self.text, **kwargs)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a random thing I noticed looking through the code, not a problem I've seen in the wild: isn't using self.text like this this better than the new version when there's a non-utf charset?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JSON is spec'ed as always UTF encoded.

return jsonlib.loads(self.content, **kwargs)

@property
def cookies(self) -> "Cookies":
Expand Down
35 changes: 0 additions & 35 deletions httpx/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,41 +91,6 @@ def replacer(match: typing.Match[str]) -> str:
return f'{name}="{value}"'.encode()


# Null bytes; no need to recreate these on each call to guess_json_utf
_null = b"\x00"
_null2 = _null * 2
_null3 = _null * 3


def guess_json_utf(data: bytes) -> typing.Optional[str]:
# JSON always starts with two ASCII characters, so detection is as
# easy as counting the nulls and from their location and count
# determine the encoding. Also detect a BOM, if present.
sample = data[:4]
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
return "utf-32" # BOM included
if sample[:3] == codecs.BOM_UTF8:
return "utf-8-sig" # BOM included, MS style (discouraged)
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
return "utf-16" # BOM included
nullcount = sample.count(_null)
if nullcount == 0:
return "utf-8"
if nullcount == 2:
if sample[::2] == _null2: # 1st and 3rd are null
return "utf-16-be"
if sample[1::2] == _null2: # 2nd and 4th are null
return "utf-16-le"
# Did not detect 2 valid UTF-16 ascii-range characters
if nullcount == 3:
if sample[:3] == _null3:
return "utf-32-be"
if sample[1:] == _null3:
return "utf-32-le"
# Did not detect a valid UTF-32 ascii-range character
return None


def get_ca_bundle_from_env() -> typing.Optional[str]:
if "SSL_CERT_FILE" in os.environ:
ssl_file = Path(os.environ["SSL_CERT_FILE"])
Expand Down
17 changes: 11 additions & 6 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import os
import random
Expand All @@ -10,7 +11,6 @@
URLPattern,
get_ca_bundle_from_env,
get_environment_proxies,
guess_json_utf,
is_https_redirect,
obfuscate_sensitive_headers,
parse_header_links,
Expand All @@ -34,12 +34,16 @@
),
)
def test_encoded(encoding):
data = "{}".encode(encoding)
assert guess_json_utf(data) == encoding
content = '{"abc": 123}'.encode(encoding)
response = httpx.Response(200, content=content)
assert response.json() == {"abc": 123}


def test_bad_utf_like_encoding():
assert guess_json_utf(b"\x00\x00\x00\x00") is None
content = b"\x00\x00\x00\x00"
response = httpx.Response(200, content=content)
with pytest.raises(json.decoder.JSONDecodeError):
response.json()


@pytest.mark.parametrize(
Expand All @@ -52,8 +56,9 @@ def test_bad_utf_like_encoding():
),
)
def test_guess_by_bom(encoding, expected):
data = "\ufeff{}".encode(encoding)
assert guess_json_utf(data) == expected
content = '\ufeff{"abc": 123}'.encode(encoding)
response = httpx.Response(200, content=content)
assert response.json() == {"abc": 123}


@pytest.mark.parametrize(
Expand Down