Skip to content

Commit

Permalink
Merge pull request #89 from jaraco/bugfix/gh-101144
Browse files Browse the repository at this point in the history
fix encoding positional parameter
  • Loading branch information
jaraco committed Jan 27, 2023
2 parents a3c2f1b + c140d67 commit ad532d5
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 12 deletions.
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
v3.12.0
=======

* gh-101144: Honor ``encoding`` as positional parameter
to ``Path.open()`` and ``Path.read_text()``.

v3.11.0
=======

Expand Down
13 changes: 13 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import builtins
import sys


def pytest_configure():
add_future_flags()


def add_future_flags():
if sys.version_info > (3, 10):
return

builtins.EncodingWarning = type('EncodingWarning', (Warning,), {})
8 changes: 6 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,12 @@
url='https://peps.python.org/pep-{pep_number:0>4}/',
),
dict(
pattern=r'(Python #|bpo-)(?P<python>\d+)',
url='http://bugs.python.org/issue{python}',
pattern=r'(bpo-)(?P<bpo>\d+)',
url='http://bugs.python.org/issue{bpo}',
),
dict(
pattern=r'(gh-)(?P<python_gh>\d+)',
url='http://bugs.python.org/issue{python_gh}',
),
],
)
Expand Down
72 changes: 67 additions & 5 deletions tests/test_zipp.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import io
import zipfile
import itertools
import contextlib
import pathlib
import unittest
import tempfile
import shutil
import string
import pickle
import itertools
import string
import sys
import unittest
import zipfile

import jaraco.itertools
import func_timeout
Expand Down Expand Up @@ -140,7 +141,66 @@ def test_open(self, alpharep):
a, b, g = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
assert data == "content of a"
self.assertEqual(data, "content of a")
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
data = strm.read()
self.assertEqual(data, "content of a")

def test_open_encoding_utf16(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
zf.filename = "test_open_utf16.zip"
root = zipp.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("16.txt")
with u16.open('r', "utf-16") as strm:
data = strm.read()
assert data == "This was utf-16"
with u16.open(encoding="utf-16") as strm:
data = strm.read()
assert data == "This was utf-16"

def test_open_encoding_errors(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
zf.filename = "test_read_text_encoding_errors.zip"
root = zipp.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("bad-utf8.bin")

# encoding= as a positional argument for gh-101144.
data = u16.read_text("utf-8", errors="ignore")
assert data == "invalid utf-8: ."
with u16.open("r", "utf-8", errors="surrogateescape") as f:
assert f.read() == "invalid utf-8: \udcff\udcff."

# encoding= both positional and keyword is an error; gh-101144.
with self.assertRaisesRegex(TypeError, "encoding"):
data = u16.read_text("utf-8", encoding="utf-8")

# both keyword arguments work.
with u16.open("r", encoding="utf-8", errors="strict") as f:
# error during decoding with wrong codec.
with self.assertRaises(UnicodeDecodeError):
f.read()

@unittest.skipIf(
not getattr(sys.flags, 'warn_default_encoding', 0),
"Requires warn_default_encoding",
)
@pass_alpharep
def test_encoding_warnings(self, alpharep):
"""EncodingWarning must blame the read_text and open calls."""
assert sys.flags.warn_default_encoding
root = zipp.Path(alpharep)
with self.assertWarns(EncodingWarning) as wc:
root.joinpath("a.txt").read_text()
assert __file__ == wc.filename
with self.assertWarns(EncodingWarning) as wc:
root.joinpath("a.txt").open("r").close()
assert __file__ == wc.filename

def test_open_write(self):
"""
Expand Down Expand Up @@ -182,6 +242,8 @@ def test_read(self, alpharep):
root = zipp.Path(alpharep)
a, b, g = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
# Also check positional encoding arg (gh-101144).
assert a.read_text("utf-8") == "content of a"
assert a.read_bytes() == b"content of a"

@pass_alpharep
Expand Down
15 changes: 10 additions & 5 deletions zipp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ def _name_set(self):
return self.__lookup


def _extract_text_encoding(encoding=None, *args, **kwargs):
# stacklevel=3 so that the caller of the caller see any warning.
return text_encoding(encoding, 3), args, kwargs


class Path:
"""
A pathlib-compatible interface for zip files.
Expand Down Expand Up @@ -273,9 +278,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
else:
kwargs["encoding"] = text_encoding(kwargs.get("encoding"))
return io.TextIOWrapper(stream, *args, **kwargs)
# Text mode:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
return io.TextIOWrapper(stream, encoding, *args, **kwargs)

@property
def name(self):
Expand All @@ -298,8 +303,8 @@ def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)

def read_text(self, *args, **kwargs):
kwargs["encoding"] = text_encoding(kwargs.get("encoding"))
with self.open('r', *args, **kwargs) as strm:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
with self.open('r', encoding, *args, **kwargs) as strm:
return strm.read()

def read_bytes(self):
Expand Down

0 comments on commit ad532d5

Please sign in to comment.