From 5bf0ca88c534093bb24182e94c8abe14dc21cce9 Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Mon, 2 Dec 2019 10:25:53 -0600 Subject: [PATCH] Consistently handle text and binary modes in the three FileObject classes. And add tests. Fixes #1441. --- CHANGES.rst | 13 +++ src/gevent/_fileobjectcommon.py | 122 +++++++++++++++++++-------- src/gevent/_fileobjectposix.py | 79 +++++++++++------ src/gevent/tests/test__fileobject.py | 75 ++++++++++++---- 4 files changed, 214 insertions(+), 75 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index c048e9670..7816ef2f9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,19 @@ 3.5.9, 3.6.9, 3.7.5 and 3.8.0 (final). It is also tested with PyPy2 7.2 and PyPy 3.6 7.2 +- The file objects (FileObjectPosix, FileObjectThread) now + consistently text and binary modes. If neither 'b' nor 't' is given + in the mode, they will read and write native strings. If 't' is + given, they will always work with unicode strings, and 'b' will + always work with byte strings. See :issue:`1441`. + +- The file objects accept *encoding*, *errors* and *newline* + arguments. On Python 2, these are only used if 't' is in the mode. + +- The default mode for FileObjectPosix changed from ``rb`` to simply + ``r``, for consistency with the other file objects and the standard + ``open`` and ``io.open`` functions. + 1.5a2 (2019-10-21) ================== diff --git a/src/gevent/_fileobjectcommon.py b/src/gevent/_fileobjectcommon.py index 99404f3ed..7be5f77ed 100644 --- a/src/gevent/_fileobjectcommon.py +++ b/src/gevent/_fileobjectcommon.py @@ -5,8 +5,7 @@ except ImportError: EBADF = 9 -import os -from io import TextIOWrapper +import io import functools import sys @@ -14,6 +13,7 @@ from gevent.hub import _get_hub_noargs as get_hub from gevent._compat import integer_types from gevent._compat import reraise +from gevent._compat import fspath from gevent.lock import Semaphore, DummySemaphore class cancel_wait_ex(IOError): @@ -32,7 +32,8 @@ def __init__(self): class FileObjectBase(object): """ Internal base class to ensure a level of consistency - between FileObjectPosix and FileObjectThread + between :class:`~.FileObjectPosix`, :class:`~.FileObjectThread` + and :class:`~.FileObjectBlock`. """ # List of methods we delegate to the wrapping IO object, if they @@ -60,17 +61,18 @@ class FileObjectBase(object): ) - # Whether we are translating universal newlines or not. + # Whether we should apply a TextWrapper (the names are historical). + # Subclasses should set these before calling our constructor. _translate = False - _translate_encoding = None _translate_errors = None + _translate_newline = None # None means universal - def __init__(self, io, closefd): + def __init__(self, fobj, closefd): """ - :param io: An io.IOBase-like object. + :param fobj: An io.IOBase-like object. """ - self._io = io + self._io = fobj # We don't actually use this property ourself, but we save it (and # pass it along) for compatibility. self._close = closefd @@ -78,7 +80,9 @@ def __init__(self, io, closefd): if self._translate: # This automatically handles delegation by assigning to # self.io - self.translate_newlines(None, self._translate_encoding, self._translate_errors) + self.translate_newlines(None, + self._translate_encoding, + self._translate_errors) else: self._do_delegate_methods() @@ -108,7 +112,7 @@ def _wrap_method(self, method): return method def translate_newlines(self, mode, *text_args, **text_kwargs): - wrapper = TextIOWrapper(self._io, *text_args, **text_kwargs) + wrapper = io.TextIOWrapper(self._io, *text_args, **text_kwargs) if mode: wrapper.mode = mode self.io = wrapper @@ -123,9 +127,9 @@ def close(self): if self._io is None: return - io = self._io + fobj = self._io self._io = None - self._do_close(io, self._close) + self._do_close(fobj, self._close) def _do_close(self, fobj, closefd): raise NotImplementedError() @@ -147,22 +151,71 @@ def __enter__(self): def __exit__(self, *args): self.close() + @classmethod + def _open_raw(cls, fobj, mode='r', buffering=-1, + encoding=None, errors=None, newline=None, closefd=True): + """ + Uses :func:`io.open` on *fobj* and returns the IO object. + + This is a compatibility wrapper for Python 2 and Python 3. It + supports PathLike objects for *fobj* on all versions. + + If the object is already an object with a ``fileno`` method, + it is returned unchanged. + + On Python 2, if the mode only specifies read, write or append, + and encoding and errors are not specified, then + :obj:`io.FileIO` is used to get the IO object. This ensures we + return native strings unless explicitly requested. + + .. versionchanged: 1.5 + Support closefd for Python 2 native string readers. + """ + if hasattr(fobj, 'fileno'): + return fobj + + if not isinstance(fobj, integer_types): + # Not an integer. Support PathLike on Python 2 and Python <= 3.5. + fobj = fspath(fobj) + closefd = True + + if bytes is str and mode in ('r', 'r+', 'w', 'w+', 'a', 'a+') \ + and encoding is None and errors is None: + # Python 2, default open. Return native str type, not unicode, which + # is what would happen with io.open('r'), but we don't want to open the file + # in binary mode since that skips newline conversion. + fobj = io.FileIO(fobj, mode, closefd=closefd) + if '+' in mode: + BufFactory = io.BufferedRandom + elif mode[0] == 'r': + BufFactory = io.BufferedReader + else: + BufFactory = io.BufferedWriter + + if buffering == -1: + fobj = BufFactory(fobj) + elif buffering != 0: + fobj = BufFactory(fobj, buffering) + else: + # Python 3, or we have a mode that Python 2's os.fdopen/open can't handle + # (x) or they explicitly asked for binary or text mode. + + fobj = io.open(fobj, mode, buffering, encoding, errors, newline, closefd) + return fobj + class FileObjectBlock(FileObjectBase): def __init__(self, fobj, *args, **kwargs): - closefd = kwargs.pop('close', True) - if kwargs: - raise TypeError('Unexpected arguments: %r' % kwargs.keys()) - if isinstance(fobj, integer_types): - if not closefd: - # we cannot do this, since fdopen object will close the descriptor - raise TypeError('FileObjectBlock does not support close=False on an fd.') - fobj = os.fdopen(fobj, *args) + closefd = kwargs['closefd'] = kwargs.pop('close', True) + if 'bufsize' in kwargs: # compat with other constructors + kwargs['buffering'] = kwargs.pop('bufsize') + fobj = self._open_raw(fobj, *args, **kwargs) super(FileObjectBlock, self).__init__(fobj, closefd) def _do_close(self, fobj, closefd): fobj.close() + class FileObjectThread(FileObjectBase): """ A file-like object wrapping another file-like object, performing all blocking @@ -176,12 +229,18 @@ class FileObjectThread(FileObjectBase): The file object is closed using the threadpool. Note that whether or not this action is synchronous or asynchronous is not documented. + .. versionchanged:: 1.5 + Accept str and ``PathLike`` objects for *fobj* on all versions of Python. + .. versionchanged:: 1.5 + Add *encoding*, *errors* and *newline* arguments. """ - def __init__(self, fobj, mode=None, bufsize=-1, close=True, threadpool=None, lock=True): + def __init__(self, fobj, mode='r', bufsize=-1, close=True, threadpool=None, lock=True, + encoding=None, errors=None, newline=None): """ - :param fobj: The underlying file-like object to wrap, or an integer fileno - that will be pass to :func:`os.fdopen` along with *mode* and *bufsize*. + :param fobj: The underlying file-like object to wrap, or something + acceptable to :func:`io.open` (along with *mode* and *bufsize*, which is translated + to *buffering*). :keyword bool lock: If True (the default) then all operations will be performed one-by-one. Note that this does not guarantee that, if using this file object from multiple threads/greenlets, operations will be performed @@ -200,16 +259,9 @@ def __init__(self, fobj, mode=None, bufsize=-1, close=True, threadpool=None, loc self.lock = DummySemaphore() if not hasattr(self.lock, '__enter__'): raise TypeError('Expected a Semaphore or boolean, got %r' % type(self.lock)) - if isinstance(fobj, integer_types): - if not closefd: - # we cannot do this, since fdopen object will close the descriptor - raise TypeError('FileObjectThread does not support close=False on an fd.') - if mode is None: - assert bufsize == -1, "If you use the default mode, you can't choose a bufsize" - fobj = os.fdopen(fobj) - else: - fobj = os.fdopen(fobj, mode, bufsize) - + fobj = self._open_raw(fobj, mode, bufsize, + encoding=encoding, errors=errors, newline=newline, + closefd=close) self.__io_holder = [fobj] # signal for _wrap_method super(FileObjectThread, self).__init__(fobj, closefd) @@ -244,8 +296,8 @@ def close(_fobj=fobj): def _do_delegate_methods(self): super(FileObjectThread, self)._do_delegate_methods() - if not hasattr(self, 'read1') and 'r' in getattr(self._io, 'mode', ''): - self.read1 = self.read + # if not hasattr(self, 'read1') and 'r' in getattr(self._io, 'mode', ''): + # self.read1 = self.read self.__io_holder[0] = self._io def _extra_repr(self): diff --git a/src/gevent/_fileobjectposix.py b/src/gevent/_fileobjectposix.py index 699743c86..c2b84736d 100644 --- a/src/gevent/_fileobjectposix.py +++ b/src/gevent/_fileobjectposix.py @@ -10,6 +10,8 @@ from io import UnsupportedOperation from gevent._compat import reraise +from gevent._compat import PY2 +from gevent._compat import PY3 from gevent._fileobjectcommon import cancel_wait_ex from gevent._fileobjectcommon import FileObjectBase from gevent.hub import get_hub @@ -183,6 +185,8 @@ def write(self, b): return ret +_marker = object() + class FileObjectPosix(FileObjectBase): """ A file-like object that operates on non-blocking files but @@ -234,12 +238,21 @@ class FileObjectPosix(FileObjectBase): better file-like semantics (and portability to Python 3). .. versionchanged:: 1.2a1 Document the ``fileio`` attribute for non-blocking reads. + .. versionchanged:: 1.5 + The default value for *mode* was changed from ``rb`` to ``r``. + .. versionchanged:: 1.5 + Support strings and ``PathLike`` objects for ``fobj`` on all versions + of Python. Note that caution above. + .. versionchanged:: 1.5 + Add *encoding*, *errors* and *newline* argument. """ #: platform specific default for the *bufsize* parameter default_bufsize = io.DEFAULT_BUFFER_SIZE - def __init__(self, fobj, mode='rb', bufsize=-1, close=True): + def __init__(self, fobj, mode='r', bufsize=-1, close=True, + encoding=None, errors=None, newline=_marker): + # pylint:disable=too-many-locals """ :param fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file *will* be @@ -248,7 +261,7 @@ def __init__(self, fobj, mode='rb', bufsize=-1, close=True): (where the "b" or "U" can be omitted). If "U" is part of the mode, universal newlines will be used. On Python 2, if 't' is not in the mode, this will result in returning byte (native) strings; - putting 't' in the mode will return text strings. This may cause + putting 't' in the mode will return text (unicode) strings. This may cause :exc:`UnicodeDecodeError` to be raised. :keyword int bufsize: If given, the size of the buffer to use. The default value means to use a platform-specific default @@ -274,15 +287,40 @@ def __init__(self, fobj, mode='rb', bufsize=-1, close=True): fileno = fobj fobj = None else: + # The underlying object, if we have to open it, should be + # in binary mode. We'll take care of any coding needed. + raw_mode = mode.replace('t', 'b') + raw_mode = raw_mode + 'b' if 'b' not in raw_mode else raw_mode + new_fobj = self._open_raw(fobj, raw_mode, bufsize, closefd=False) + if new_fobj is not fobj: + close = True + fobj = new_fobj fileno = fobj.fileno() - if not isinstance(fileno, int): - raise TypeError('fileno must be int: %r' % fileno) - orig_mode = mode - mode = (mode or 'rb').replace('b', '') + self.__fobj = fobj + assert isinstance(fileno, int) + + # We want text mode if: + # - We're on Python 3, and no 'b' is in the mode. + # - A 't' is in the mode on any version. + # - We're on Python 2 and no 'b' is in the mode, and an encoding or errors is + # given. + text_mode = ( + 't' in mode + or (PY3 and 'b' not in mode) + or (PY2 and 'b' not in mode and (encoding, errors) != (None, None)) + ) + if text_mode: + self._translate = True + self._translate_newline = os.linesep if newline is _marker else newline + self._translate_encoding = encoding + self._transalate_errors = errors + if 'U' in mode: self._translate = True - if bytes is str and 't' not in mode: + self._translate_newline = None + + if PY2 and not text_mode: # We're going to be producing unicode objects, but # universal newlines doesn't do that in the stdlib, # so fix that to return str objects. The fix is two parts: @@ -301,20 +339,6 @@ def wrapped(*args, **kwargs): return wrapped return m self._wrap_method = wrap_method - mode = mode.replace('U', '') - else: - self._translate = False - - mode = mode.replace('t', '') - - if len(mode) != 1 and mode not in 'rw': # pragma: no cover - # Python 3 builtin `open` raises a ValueError for invalid modes; - # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was - # enabled (which it usually was). Match Python 3 because it makes more sense - # and because __debug__ may not be enabled. - # NOTE: This is preventing a mode like 'rwb' for binary random access; - # that code was never tested and was explicitly marked as "not used" - raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,)) self._orig_bufsize = bufsize @@ -323,10 +347,10 @@ def wrapped(*args, **kwargs): elif bufsize == 0: bufsize = 1 - if mode == 'r': + if mode[0] == 'r': IOFamily = BufferedReader else: - assert mode == 'w' + assert mode[0] == 'w' IOFamily = BufferedWriter if self._orig_bufsize == 0: # We could also simply pass self.fileio as *io*, but this way @@ -335,7 +359,7 @@ def wrapped(*args, **kwargs): IOFamily = FlushingBufferedWriter - self._fobj = fobj + # This attribute is documented as available for non-blocking reads. self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) @@ -349,8 +373,13 @@ def _do_close(self, fobj, closefd): # self.fileio already knows whether or not to close the # file descriptor self.fileio.close() + if closefd and self.__fobj is not None: + try: + self.__fobj.close() + except IOError: + pass finally: - self._fobj = None + self.__fobj = None self.fileio = None def __iter__(self): diff --git a/src/gevent/tests/test__fileobject.py b/src/gevent/tests/test__fileobject.py index 5cd345f9d..686c7ef5c 100644 --- a/src/gevent/tests/test__fileobject.py +++ b/src/gevent/tests/test__fileobject.py @@ -7,6 +7,7 @@ import gevent from gevent import fileobject +from gevent._compat import PY2 import gevent.testing as greentest from gevent.testing.sysinfo import PY3 @@ -21,7 +22,7 @@ class ResourceWarning(Warning): "Python 2 fallback" -def writer(fobj, line): +def Writer(fobj, line): for character in line: fobj.write(character) fobj.flush() @@ -34,7 +35,7 @@ def close_fd_quietly(fd): except (IOError, OSError): pass -class TestFileObjectBlock(greentest.TestCase): +class TestFileObjectBlock(greentest.TestCase): # serves as a base for the concurrent tests too def _getTargetClass(self): return fileobject.FileObjectBlock @@ -127,6 +128,34 @@ def test_seek(self): self.assertEqual(native_data, s) self.assertEqual(native_data, fileobj_data) + def test_str_default_to_native(self): + # With no 'b' or 't' given, read and write native str. + fileno, path = tempfile.mkstemp('.gevent_test_str_default') + self.addCleanup(os.remove, path) + + os.write(fileno, b'abcdefg') + os.close(fileno) + + with open(path, 'r') as f: + native_data = f.read() + + with self._makeOne(path, 'r') as f: + gevent_data = f.read() + + self.assertEqual(native_data, gevent_data) + + def test_text_encoding(self): + fileno, path = tempfile.mkstemp('.gevent_test_str_default') + self.addCleanup(os.remove, path) + + os.write(fileno, u'\N{SNOWMAN}'.encode('utf-8')) + os.close(fileno) + + with self._makeOne(path, 'r+', bufsize=5, encoding='utf-8') as f: + gevent_data = f.read() + + self.assertEqual(u'\N{SNOWMAN}', gevent_data) + def test_close_pipe(self): # Issue #190, 203 r, w = os.pipe() @@ -140,14 +169,31 @@ class ConcurrentFileObjectMixin(object): # Additional tests for fileobjects that cooperate # and we have full control of the implementation - def test_read1(self): + def test_read1_binary_present(self): # Issue #840 r, w = os.pipe() - x = self._makeOne(r) - y = self._makeOne(w, 'w') - self._close_on_teardown(x) - self._close_on_teardown(y) - self.assertTrue(hasattr(x, 'read1')) + reader = self._makeOne(r, 'rb') + self._close_on_teardown(reader) + writer = self._makeOne(w, 'w') + self._close_on_teardown(writer) + self.assertTrue(hasattr(reader, 'read1'), dir(reader)) + + def test_read1_text_not_present(self): + # Only defined for binary. + r, w = os.pipe() + reader = self._makeOne(r, 'rt') + self._close_on_teardown(reader) + self.addCleanup(os.close, w) + self.assertFalse(hasattr(reader, 'read1'), dir(reader)) + + def test_read1_default(self): + # If just 'r' is given, whether it has one or not + # depends on if we're Python 2 or 3. + r, w = os.pipe() + self.addCleanup(os.close, w) + reader = self._makeOne(r) + self._close_on_teardown(reader) + self.assertEqual(PY2, hasattr(reader, 'read1')) def test_bufsize_0(self): # Issue #840 @@ -168,7 +214,7 @@ def test_newlines(self): import warnings r, w = os.pipe() lines = [b'line1\n', b'line2\r', b'line3\r\n', b'line4\r\nline5', b'\nline6'] - g = gevent.spawn(writer, self._makeOne(w, 'wb'), lines) + g = gevent.spawn(Writer, self._makeOne(w, 'wb'), lines) try: with warnings.catch_warnings(): @@ -188,13 +234,12 @@ class TestFileObjectThread(ConcurrentFileObjectMixin, def _getTargetClass(self): return fileobject.FileObjectThread - # FileObjectThread uses os.fdopen() when passed a file-descriptor, - # which returns an object with a destructor that can't be - # bypassed, so we can't even create one that way def test_del_noclose(self): - with self.assertRaisesRegex(TypeError, - 'FileObjectThread does not support close=False on an fd.'): - self._test_del(close=False) + # In the past, we used os.fdopen() when given a file descriptor, + # and that has a destructor that can't be bypassed, so + # close=false wasn't allowed. Now that we do everything with the + # io module, it is allowed. + self._test_del(close=False) # We don't test this with FileObjectThread. Sometimes the # visibility of the 'close' operation, which happens in a