Skip to content

Commit

Permalink
pythongh-116609: Ignore UIF-16 BOM in importlib.resources._functional…
Browse files Browse the repository at this point in the history
… tests

To test the `errors` argument, we read a UTF-16 file as UTF-8
with "backslashreplace" error handling. However, the utf-16
codec adds an endian-specific byte-order mark, so on big-endian
machines the expectation doesn't match the test file (which was
saved on a little-endian machine).

Use endswith to ignore the BOM.
  • Loading branch information
encukou committed Apr 5, 2024
1 parent abfa16b commit da2898f
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions Lib/test/test_importlib/resources/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def _gen_resourcetxt_path_parts(self):
with self.subTest(path_parts=path_parts):
yield path_parts

def assertEndsWith(self, string, suffix):
"""Assert that `string` ends with `suffix`.
Used to ignore an architecture-specific UTF-16 byte-order mark."""
self.assertEqual(string[-len(suffix):], suffix)

def test_read_text(self):
self.assertEqual(
resources.read_text(self.anchor01, 'utf-8.file'),
Expand Down Expand Up @@ -65,12 +71,12 @@ def test_read_text(self):
),
'\x00\x01\x02\x03',
)
self.assertEqual(
self.assertEndsWith( # ignore the BOM
resources.read_text(
self.anchor01, 'utf-16.file',
errors='backslashreplace',
),
'Hello, UTF-16 world!\n'.encode('utf-16').decode(
'Hello, UTF-16 world!\n'.encode('utf-16-le').decode(
errors='backslashreplace',
),
)
Expand Down Expand Up @@ -112,9 +118,9 @@ def test_open_text(self):
self.anchor01, 'utf-16.file',
errors='backslashreplace',
) as f:
self.assertEqual(
self.assertEndsWith( # ignore the BOM
f.read(),
'Hello, UTF-16 world!\n'.encode('utf-16').decode(
'Hello, UTF-16 world!\n'.encode('utf-16-le').decode(
errors='backslashreplace',
),
)
Expand Down

0 comments on commit da2898f

Please sign in to comment.