Skip to content

Commit

Permalink
ceph-volume: terminal: encode unicode when writing to stdout
Browse files Browse the repository at this point in the history
python determins the encoding of stdout and stderr based on the LC_CTYPE
and PYTHONIOENCODING env variable, by default, python3's sys.stdout uses
'utf-8' as its encoding, so it will be able to write unicode string even
the stdout is not attached to a tty device. but when it comes to
python2, it will default to ascii if neither of these variabls is set.
so, if we are writing unicode using `_Write` in an environment where
LC_CTYPE and/or PYTHONIOENCODING are using non UTF-8 encoding, it chokes
by raising `UnicodeEncodeError` exception.

in this change, we add a wrapper around `_Write._writer` so it is able
to write unicode string in such a non-unicode-friendly environment.

for more info related the encoding of stdout and stderr, see
https://docs.python.org/3/using/cmdline.html#envvar-PYTHONIOENCODING .

Signed-off-by: Alfredo Deza <adeza@redhat.com>
Signed-off-by: Kefu Chai <kchai@redhat.com>
  • Loading branch information
tchaikov committed Mar 27, 2019
1 parent cdba92a commit 77912c0
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 1 deletion.
25 changes: 24 additions & 1 deletion src/ceph-volume/ceph_volume/terminal.py
@@ -1,3 +1,4 @@
import codecs
import logging
import sys

Expand Down Expand Up @@ -80,11 +81,33 @@ def make(cls, string):
class _Write(object):

def __init__(self, _writer=None, prefix='', suffix='', flush=False):
self._writer = _writer or sys.stdout
if _writer is None:
_writer = sys.stdout
self._writer = _Write._unicode_output_stream(_writer)
if _writer is sys.stdout:
sys.stdout = self._writer
self.suffix = suffix
self.prefix = prefix
self.flush = flush

@staticmethod
def _unicode_output_stream(stream):
# wrapper for given stream, so it can write unicode without throwing
# exception
# sys.stdout.encoding is None if !isatty
encoding = stream.encoding or ''
if encoding.upper() in ('UTF-8', 'UTF8'):
# already using unicode encoding, nothing to do
return stream
encoding = encoding or 'UTF-8'
if sys.version_info >= (3, 0):
# try to use whatever writer class the stream was
return stream.__class__(stream.buffer, encoding, 'replace',
stream.newlines, stream.line_buffering)
else:
# in python2, stdout is but a "file"
return codecs.getwriter(encoding)(stream, 'replace')

def bold(self, string):
self.write(bold(string))

Expand Down
52 changes: 52 additions & 0 deletions src/ceph-volume/ceph_volume/tests/test_terminal.py
@@ -1,4 +1,9 @@
# -*- mode:python; tab-width:4; indent-tabs-mode:nil; coding:utf-8 -*-

import codecs
import io
import pytest
import sys
from ceph_volume import terminal


Expand Down Expand Up @@ -66,3 +71,50 @@ def test_subcommand_found_and_dispatched_with_errors(self):
with pytest.raises(SystemExit) as error:
terminal.dispatch({'sub': BadSubCommand}, argv=['sub'])
assert str(error.value) == '100'


@pytest.fixture
def stream():
def make_stream(buffer, encoding):
# mock a stdout with given encoding
if sys.version_info >= (3, 0):
stdout = sys.stdout
stream = io.TextIOWrapper(buffer,
encoding=encoding,
errors=stdout.errors,
newline=stdout.newlines,
line_buffering=stdout.line_buffering)
else:
stream = codecs.getwriter(encoding)(buffer)
# StreamWriter does not have encoding attached to it, it will ask
# the inner buffer for "encoding" attribute in this case
stream.encoding = encoding
return stream
return make_stream


class TestWriteUnicode(object):

def setup(self):
self.octpus_and_squid_en = u'octpus and squid'
octpus_and_squid_zh = u'章鱼和鱿鱼'
self.message = self.octpus_and_squid_en + octpus_and_squid_zh

def test_stdout_writer(self, capsys):
# should work with whatever stdout is
terminal.stdout(self.message)
out, _ = capsys.readouterr()
assert self.octpus_and_squid_en in out

@pytest.mark.parametrize('encoding', ['ascii', 'utf8'])
def test_writer(self, encoding, stream, monkeypatch, capsys):
buffer = io.BytesIO()
# should keep writer alive
with capsys.disabled():
# we want to have access to the sys.stdout's attributes in
# make_stream(), not the ones of pytest.capture.EncodedFile
writer = stream(buffer, encoding)
monkeypatch.setattr(sys, 'stdout', writer)
terminal.stdout(self.message)
sys.stdout.flush()
assert self.octpus_and_squid_en.encode(encoding) in buffer.getvalue()

1 comment on commit 77912c0

@changchengx
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tchaikov
If I'm still using python2, how to solve the problem?

Please sign in to comment.