Skip to content

Commit

Permalink
Consistently handle text and binary modes in the three FileObject cla…
Browse files Browse the repository at this point in the history
…sses.

And add tests.

Fixes #1441.
  • Loading branch information
jamadden committed Dec 2, 2019
1 parent 625c88b commit 5bf0ca8
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 75 deletions.
13 changes: 13 additions & 0 deletions CHANGES.rst
Expand Up @@ -11,6 +11,19 @@
3.5.9, 3.6.9, 3.7.5 and 3.8.0 (final). It is also tested with PyPy2
7.2 and PyPy 3.6 7.2

- The file objects (FileObjectPosix, FileObjectThread) now
consistently text and binary modes. If neither 'b' nor 't' is given

This comment has been minimized.

Copy link
@wiggin15

wiggin15 Dec 2, 2019

Contributor

I am not a native english speaker, but I think the syntax of this sentence is wrong? Should be "now consistently handle text and binary modes"?

in the mode, they will read and write native strings. If 't' is
given, they will always work with unicode strings, and 'b' will
always work with byte strings. See :issue:`1441`.

- The file objects accept *encoding*, *errors* and *newline*
arguments. On Python 2, these are only used if 't' is in the mode.

- The default mode for FileObjectPosix changed from ``rb`` to simply
``r``, for consistency with the other file objects and the standard
``open`` and ``io.open`` functions.


1.5a2 (2019-10-21)
==================
Expand Down
122 changes: 87 additions & 35 deletions src/gevent/_fileobjectcommon.py
Expand Up @@ -5,15 +5,15 @@
except ImportError:
EBADF = 9

import os
from io import TextIOWrapper
import io
import functools
import sys


from gevent.hub import _get_hub_noargs as get_hub
from gevent._compat import integer_types
from gevent._compat import reraise
from gevent._compat import fspath
from gevent.lock import Semaphore, DummySemaphore

class cancel_wait_ex(IOError):
Expand All @@ -32,7 +32,8 @@ def __init__(self):
class FileObjectBase(object):
"""
Internal base class to ensure a level of consistency
between FileObjectPosix and FileObjectThread
between :class:`~.FileObjectPosix`, :class:`~.FileObjectThread`
and :class:`~.FileObjectBlock`.
"""

# List of methods we delegate to the wrapping IO object, if they
Expand Down Expand Up @@ -60,25 +61,28 @@ class FileObjectBase(object):
)


# Whether we are translating universal newlines or not.
# Whether we should apply a TextWrapper (the names are historical).
# Subclasses should set these before calling our constructor.
_translate = False

_translate_encoding = None
_translate_errors = None
_translate_newline = None # None means universal

def __init__(self, io, closefd):
def __init__(self, fobj, closefd):
"""
:param io: An io.IOBase-like object.
:param fobj: An io.IOBase-like object.
"""
self._io = io
self._io = fobj
# We don't actually use this property ourself, but we save it (and
# pass it along) for compatibility.
self._close = closefd

if self._translate:
# This automatically handles delegation by assigning to
# self.io
self.translate_newlines(None, self._translate_encoding, self._translate_errors)
self.translate_newlines(None,
self._translate_encoding,
self._translate_errors)
else:
self._do_delegate_methods()

Expand Down Expand Up @@ -108,7 +112,7 @@ def _wrap_method(self, method):
return method

def translate_newlines(self, mode, *text_args, **text_kwargs):
wrapper = TextIOWrapper(self._io, *text_args, **text_kwargs)
wrapper = io.TextIOWrapper(self._io, *text_args, **text_kwargs)
if mode:
wrapper.mode = mode
self.io = wrapper
Expand All @@ -123,9 +127,9 @@ def close(self):
if self._io is None:
return

io = self._io
fobj = self._io
self._io = None
self._do_close(io, self._close)
self._do_close(fobj, self._close)

def _do_close(self, fobj, closefd):
raise NotImplementedError()
Expand All @@ -147,22 +151,71 @@ def __enter__(self):
def __exit__(self, *args):
self.close()

@classmethod
def _open_raw(cls, fobj, mode='r', buffering=-1,
encoding=None, errors=None, newline=None, closefd=True):
"""
Uses :func:`io.open` on *fobj* and returns the IO object.
This is a compatibility wrapper for Python 2 and Python 3. It
supports PathLike objects for *fobj* on all versions.
If the object is already an object with a ``fileno`` method,
it is returned unchanged.
On Python 2, if the mode only specifies read, write or append,
and encoding and errors are not specified, then
:obj:`io.FileIO` is used to get the IO object. This ensures we
return native strings unless explicitly requested.
.. versionchanged: 1.5
Support closefd for Python 2 native string readers.
"""
if hasattr(fobj, 'fileno'):
return fobj

if not isinstance(fobj, integer_types):
# Not an integer. Support PathLike on Python 2 and Python <= 3.5.
fobj = fspath(fobj)
closefd = True

if bytes is str and mode in ('r', 'r+', 'w', 'w+', 'a', 'a+') \
and encoding is None and errors is None:
# Python 2, default open. Return native str type, not unicode, which
# is what would happen with io.open('r'), but we don't want to open the file
# in binary mode since that skips newline conversion.
fobj = io.FileIO(fobj, mode, closefd=closefd)
if '+' in mode:
BufFactory = io.BufferedRandom
elif mode[0] == 'r':
BufFactory = io.BufferedReader
else:
BufFactory = io.BufferedWriter

if buffering == -1:
fobj = BufFactory(fobj)
elif buffering != 0:
fobj = BufFactory(fobj, buffering)
else:
# Python 3, or we have a mode that Python 2's os.fdopen/open can't handle
# (x) or they explicitly asked for binary or text mode.

fobj = io.open(fobj, mode, buffering, encoding, errors, newline, closefd)
return fobj

class FileObjectBlock(FileObjectBase):

def __init__(self, fobj, *args, **kwargs):
closefd = kwargs.pop('close', True)
if kwargs:
raise TypeError('Unexpected arguments: %r' % kwargs.keys())
if isinstance(fobj, integer_types):
if not closefd:
# we cannot do this, since fdopen object will close the descriptor
raise TypeError('FileObjectBlock does not support close=False on an fd.')
fobj = os.fdopen(fobj, *args)
closefd = kwargs['closefd'] = kwargs.pop('close', True)
if 'bufsize' in kwargs: # compat with other constructors
kwargs['buffering'] = kwargs.pop('bufsize')
fobj = self._open_raw(fobj, *args, **kwargs)
super(FileObjectBlock, self).__init__(fobj, closefd)

def _do_close(self, fobj, closefd):
fobj.close()


class FileObjectThread(FileObjectBase):
"""
A file-like object wrapping another file-like object, performing all blocking
Expand All @@ -176,12 +229,18 @@ class FileObjectThread(FileObjectBase):
The file object is closed using the threadpool. Note that whether or
not this action is synchronous or asynchronous is not documented.
.. versionchanged:: 1.5
Accept str and ``PathLike`` objects for *fobj* on all versions of Python.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* arguments.
"""

def __init__(self, fobj, mode=None, bufsize=-1, close=True, threadpool=None, lock=True):
def __init__(self, fobj, mode='r', bufsize=-1, close=True, threadpool=None, lock=True,
encoding=None, errors=None, newline=None):
"""
:param fobj: The underlying file-like object to wrap, or an integer fileno
that will be pass to :func:`os.fdopen` along with *mode* and *bufsize*.
:param fobj: The underlying file-like object to wrap, or something
acceptable to :func:`io.open` (along with *mode* and *bufsize*, which is translated
to *buffering*).
:keyword bool lock: If True (the default) then all operations will
be performed one-by-one. Note that this does not guarantee that, if using
this file object from multiple threads/greenlets, operations will be performed
Expand All @@ -200,16 +259,9 @@ def __init__(self, fobj, mode=None, bufsize=-1, close=True, threadpool=None, loc
self.lock = DummySemaphore()
if not hasattr(self.lock, '__enter__'):
raise TypeError('Expected a Semaphore or boolean, got %r' % type(self.lock))
if isinstance(fobj, integer_types):
if not closefd:
# we cannot do this, since fdopen object will close the descriptor
raise TypeError('FileObjectThread does not support close=False on an fd.')
if mode is None:
assert bufsize == -1, "If you use the default mode, you can't choose a bufsize"
fobj = os.fdopen(fobj)
else:
fobj = os.fdopen(fobj, mode, bufsize)

fobj = self._open_raw(fobj, mode, bufsize,
encoding=encoding, errors=errors, newline=newline,
closefd=close)
self.__io_holder = [fobj] # signal for _wrap_method
super(FileObjectThread, self).__init__(fobj, closefd)

Expand Down Expand Up @@ -244,8 +296,8 @@ def close(_fobj=fobj):

def _do_delegate_methods(self):
super(FileObjectThread, self)._do_delegate_methods()
if not hasattr(self, 'read1') and 'r' in getattr(self._io, 'mode', ''):
self.read1 = self.read
# if not hasattr(self, 'read1') and 'r' in getattr(self._io, 'mode', ''):
# self.read1 = self.read
self.__io_holder[0] = self._io

def _extra_repr(self):
Expand Down
79 changes: 54 additions & 25 deletions src/gevent/_fileobjectposix.py
Expand Up @@ -10,6 +10,8 @@
from io import UnsupportedOperation

from gevent._compat import reraise
from gevent._compat import PY2
from gevent._compat import PY3
from gevent._fileobjectcommon import cancel_wait_ex
from gevent._fileobjectcommon import FileObjectBase
from gevent.hub import get_hub
Expand Down Expand Up @@ -183,6 +185,8 @@ def write(self, b):
return ret


_marker = object()

class FileObjectPosix(FileObjectBase):
"""
A file-like object that operates on non-blocking files but
Expand Down Expand Up @@ -234,12 +238,21 @@ class FileObjectPosix(FileObjectBase):
better file-like semantics (and portability to Python 3).
.. versionchanged:: 1.2a1
Document the ``fileio`` attribute for non-blocking reads.
.. versionchanged:: 1.5
The default value for *mode* was changed from ``rb`` to ``r``.
.. versionchanged:: 1.5
Support strings and ``PathLike`` objects for ``fobj`` on all versions
of Python. Note that caution above.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* argument.
"""

#: platform specific default for the *bufsize* parameter
default_bufsize = io.DEFAULT_BUFFER_SIZE

def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
def __init__(self, fobj, mode='r', bufsize=-1, close=True,
encoding=None, errors=None, newline=_marker):
# pylint:disable=too-many-locals
"""
:param fobj: Either an integer fileno, or an object supporting the
usual :meth:`socket.fileno` method. The file *will* be
Expand All @@ -248,7 +261,7 @@ def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
(where the "b" or "U" can be omitted).
If "U" is part of the mode, universal newlines will be used. On Python 2,
if 't' is not in the mode, this will result in returning byte (native) strings;
putting 't' in the mode will return text strings. This may cause
putting 't' in the mode will return text (unicode) strings. This may cause
:exc:`UnicodeDecodeError` to be raised.
:keyword int bufsize: If given, the size of the buffer to use. The default
value means to use a platform-specific default
Expand All @@ -274,15 +287,40 @@ def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
fileno = fobj
fobj = None
else:
# The underlying object, if we have to open it, should be
# in binary mode. We'll take care of any coding needed.
raw_mode = mode.replace('t', 'b')
raw_mode = raw_mode + 'b' if 'b' not in raw_mode else raw_mode
new_fobj = self._open_raw(fobj, raw_mode, bufsize, closefd=False)
if new_fobj is not fobj:
close = True
fobj = new_fobj
fileno = fobj.fileno()
if not isinstance(fileno, int):
raise TypeError('fileno must be int: %r' % fileno)

orig_mode = mode
mode = (mode or 'rb').replace('b', '')
self.__fobj = fobj
assert isinstance(fileno, int)

# We want text mode if:
# - We're on Python 3, and no 'b' is in the mode.
# - A 't' is in the mode on any version.
# - We're on Python 2 and no 'b' is in the mode, and an encoding or errors is
# given.
text_mode = (
't' in mode
or (PY3 and 'b' not in mode)
or (PY2 and 'b' not in mode and (encoding, errors) != (None, None))
)
if text_mode:
self._translate = True
self._translate_newline = os.linesep if newline is _marker else newline
self._translate_encoding = encoding
self._transalate_errors = errors

if 'U' in mode:
self._translate = True
if bytes is str and 't' not in mode:
self._translate_newline = None

if PY2 and not text_mode:
# We're going to be producing unicode objects, but
# universal newlines doesn't do that in the stdlib,
# so fix that to return str objects. The fix is two parts:
Expand All @@ -301,20 +339,6 @@ def wrapped(*args, **kwargs):
return wrapped
return m
self._wrap_method = wrap_method
mode = mode.replace('U', '')
else:
self._translate = False

mode = mode.replace('t', '')

if len(mode) != 1 and mode not in 'rw': # pragma: no cover
# Python 3 builtin `open` raises a ValueError for invalid modes;
# Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
# enabled (which it usually was). Match Python 3 because it makes more sense
# and because __debug__ may not be enabled.
# NOTE: This is preventing a mode like 'rwb' for binary random access;
# that code was never tested and was explicitly marked as "not used"
raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))


self._orig_bufsize = bufsize
Expand All @@ -323,10 +347,10 @@ def wrapped(*args, **kwargs):
elif bufsize == 0:
bufsize = 1

if mode == 'r':
if mode[0] == 'r':
IOFamily = BufferedReader
else:
assert mode == 'w'
assert mode[0] == 'w'
IOFamily = BufferedWriter
if self._orig_bufsize == 0:
# We could also simply pass self.fileio as *io*, but this way
Expand All @@ -335,7 +359,7 @@ def wrapped(*args, **kwargs):
IOFamily = FlushingBufferedWriter


self._fobj = fobj

# This attribute is documented as available for non-blocking reads.
self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

Expand All @@ -349,8 +373,13 @@ def _do_close(self, fobj, closefd):
# self.fileio already knows whether or not to close the
# file descriptor
self.fileio.close()
if closefd and self.__fobj is not None:
try:
self.__fobj.close()
except IOError:
pass
finally:
self._fobj = None
self.__fobj = None
self.fileio = None

def __iter__(self):
Expand Down

0 comments on commit 5bf0ca8

Please sign in to comment.