Skip to content

Commit

Permalink
pythongh-91954: Emit EncodingWarning from locale and subprocess (pyth…
Browse files Browse the repository at this point in the history
…onGH-91977)

locale.getpreferredencoding() and subprocess.Popen() emit EncodingWarning
  • Loading branch information
methane committed Apr 30, 2022
1 parent c7b7f12 commit 354ace8
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 14 deletions.
14 changes: 10 additions & 4 deletions Doc/library/subprocess.rst
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ functions.

If *encoding* or *errors* are specified, or *text* is true, the file objects
*stdin*, *stdout* and *stderr* are opened in text mode with the specified
encoding and *errors*, as described above in :ref:`frequently-used-arguments`.
*encoding* and *errors*, as described above in :ref:`frequently-used-arguments`.
The *universal_newlines* argument is equivalent to *text* and is provided
for backwards compatibility. By default, file objects are opened in binary mode.

Expand Down Expand Up @@ -1445,12 +1445,13 @@ This module also provides the following legacy functions from the 2.x
none of the guarantees described above regarding security and exception
handling consistency are valid for these functions.

.. function:: getstatusoutput(cmd)
.. function:: getstatusoutput(cmd, *, encoding=None, errors=None)

Return ``(exitcode, output)`` of executing *cmd* in a shell.

Execute the string *cmd* in a shell with :meth:`Popen.check_output` and
return a 2-tuple ``(exitcode, output)``. The locale encoding is used;
return a 2-tuple ``(exitcode, output)``.
*encoding* and *errors* are used to decode output;
see the notes on :ref:`frequently-used-arguments` for more details.

A trailing newline is stripped from the output.
Expand All @@ -1475,8 +1476,10 @@ handling consistency are valid for these functions.
as it did in Python 3.3.3 and earlier. exitcode has the same value as
:attr:`~Popen.returncode`.

.. versionadded:: 3.11
Added *encoding* and *errors* arguments.

.. function:: getoutput(cmd)
.. function:: getoutput(cmd, *, encoding=None, errors=None)

Return output (stdout and stderr) of executing *cmd* in a shell.

Expand All @@ -1491,6 +1494,9 @@ handling consistency are valid for these functions.
.. versionchanged:: 3.3.4
Windows support added

.. versionadded:: 3.11
Added *encoding* and *errors* arguments.


Notes
-----
Expand Down
11 changes: 11 additions & 0 deletions Lib/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,11 @@ def getencoding():
except NameError:
def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using."""
if sys.flags.warn_default_encoding:
import warnings
warnings.warn(
"UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
EncodingWarning, 2)
if sys.flags.utf8_mode:
return 'utf-8'
return getencoding()
Expand All @@ -663,6 +668,12 @@ def getpreferredencoding(do_setlocale=True):
def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using,
according to the system configuration."""

if sys.flags.warn_default_encoding:
import warnings
warnings.warn(
"UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
EncodingWarning, 2)
if sys.flags.utf8_mode:
return 'utf-8'

Expand Down
38 changes: 28 additions & 10 deletions Lib/subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import builtins
import errno
import io
import locale
import os
import time
import signal
Expand Down Expand Up @@ -344,6 +345,26 @@ def _args_from_interpreter_flags():
return args


def _text_encoding():
# Return default text encoding and emit EncodingWarning if
# sys.flags.warn_default_encoding is true.
if sys.flags.warn_default_encoding:
f = sys._getframe()
filename = f.f_code.co_filename
stacklevel = 2
while f := f.f_back:
if f.f_code.co_filename != filename:
break
stacklevel += 1
warnings.warn("'encoding' argument not specified.",
EncodingWarning, stacklevel)

if sys.flags.utf8_mode:
return "utf-8"
else:
return locale.getencoding()


def call(*popenargs, timeout=None, **kwargs):
"""Run command with arguments. Wait for command to complete or
timeout, then return the returncode attribute.
Expand Down Expand Up @@ -610,7 +631,7 @@ def list2cmdline(seq):
# Various tools for executing commands and looking at their output and status.
#

def getstatusoutput(cmd):
def getstatusoutput(cmd, *, encoding=None, errors=None):
"""Return (exitcode, output) of executing cmd in a shell.
Execute the string 'cmd' in a shell with 'check_output' and
Expand All @@ -632,7 +653,8 @@ def getstatusoutput(cmd):
(-15, '')
"""
try:
data = check_output(cmd, shell=True, text=True, stderr=STDOUT)
data = check_output(cmd, shell=True, text=True, stderr=STDOUT,
encoding=encoding, errors=errors)
exitcode = 0
except CalledProcessError as ex:
data = ex.output
Expand All @@ -641,7 +663,7 @@ def getstatusoutput(cmd):
data = data[:-1]
return exitcode, data

def getoutput(cmd):
def getoutput(cmd, *, encoding=None, errors=None):
"""Return output (stdout or stderr) of executing cmd in a shell.
Like getstatusoutput(), except the exit status is ignored and the return
Expand All @@ -651,7 +673,8 @@ def getoutput(cmd):
>>> subprocess.getoutput('ls /bin/ls')
'/bin/ls'
"""
return getstatusoutput(cmd)[1]
return getstatusoutput(cmd, encoding=encoding, errors=errors)[1]



def _use_posix_spawn():
Expand Down Expand Up @@ -858,13 +881,8 @@ def __init__(self, args, bufsize=-1, executable=None,
errread = msvcrt.open_osfhandle(errread.Detach(), 0)

self.text_mode = encoding or errors or text or universal_newlines

# PEP 597: We suppress the EncodingWarning in subprocess module
# for now (at Python 3.10), because we focus on files for now.
# This will be changed to encoding = io.text_encoding(encoding)
# in the future.
if self.text_mode and encoding is None:
self.encoding = encoding = "locale"
self.encoding = encoding = _text_encoding()

# How long to resume waiting on a child after the first ^C.
# There is no right value for this. The purpose is to be polite
Expand Down
14 changes: 14 additions & 0 deletions Lib/test/test_subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -1733,6 +1733,20 @@ def test_run_with_shell_timeout_and_capture_output(self):
msg="TimeoutExpired was delayed! Bad traceback:\n```\n"
f"{stacks}```")

def test_encoding_warning(self):
code = textwrap.dedent("""\
from subprocess import *
args = ["echo", "hello"]
run(args, text=True)
check_output(args, text=True)
""")
cp = subprocess.run([sys.executable, "-Xwarn_default_encoding", "-c", code],
capture_output=True)
lines = cp.stderr.splitlines()
self.assertEqual(len(lines), 2)
self.assertTrue(lines[0].startswith(b"<string>:3: EncodingWarning: "))
self.assertTrue(lines[1].startswith(b"<string>:4: EncodingWarning: "))


def _get_test_grp_name():
for name_group in ('staff', 'nogroup', 'grp', 'nobody', 'nfsnobody'):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add *encoding* and *errors* arguments to :func:`subprocess.getoutput` and
:func:`subprocess.getstatusoutput`.

0 comments on commit 354ace8

Please sign in to comment.