diff --git a/fix_encoding.py b/fix_encoding.py deleted file mode 100644 index 3218373660..0000000000 --- a/fix_encoding.py +++ /dev/null @@ -1,322 +0,0 @@ -# Copyright (c) 2011 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -"""Collection of functions and classes to fix various encoding problems on -multiple platforms with python. -""" - -import codecs -import locale -import os -import sys - - -def complain(message): - """If any exception occurs in this file, we'll probably try to print it - on stderr, which makes for frustrating debugging if stderr is directed - to our wrapper. So be paranoid about catching errors and reporting them - to sys.__stderr__, so that the user has a higher chance to see them. - """ - print(isinstance(message, str) and message or repr(message), - file=sys.__stderr__) - - -def fix_default_encoding(): - """Forces utf8 solidly on all platforms. - - By default python execution environment is lazy and defaults to ascii - encoding. - - http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/ - """ - if sys.getdefaultencoding() == 'utf-8': - return False - - # Regenerate setdefaultencoding. - reload(sys) - # Module 'sys' has no 'setdefaultencoding' member - # pylint: disable=no-member - sys.setdefaultencoding('utf-8') - for attr in dir(locale): - if attr[0:3] != 'LC_': - continue - aref = getattr(locale, attr) - try: - locale.setlocale(aref, '') - except locale.Error: - continue - try: - lang, _ = locale.getdefaultlocale() - except (TypeError, ValueError): - continue - if lang: - try: - locale.setlocale(aref, (lang, 'UTF-8')) - except locale.Error: - os.environ[attr] = lang + '.UTF-8' - try: - locale.setlocale(locale.LC_ALL, '') - except locale.Error: - pass - return True - - -############################### -# Windows specific - - -def fix_win_codec(): - """Works around .""" - # - try: - codecs.lookup('cp65001') - return False - except LookupError: - codecs.register( - lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) - return True - - -class WinUnicodeOutputBase(object): - """Base class to adapt sys.stdout or sys.stderr to behave correctly on - Windows. - - Setting encoding to utf-8 is recommended. - """ - def __init__(self, fileno, name, encoding): - # Corresponding file handle. - self._fileno = fileno - self.encoding = encoding - self.name = name - - self.closed = False - self.softspace = False - self.mode = 'w' - - @staticmethod - def isatty(): - return False - - def close(self): - # Don't really close the handle, that would only cause problems. - self.closed = True - - def fileno(self): - return self._fileno - - def flush(self): - raise NotImplementedError() - - def write(self, text): - raise NotImplementedError() - - def writelines(self, lines): - try: - for line in lines: - self.write(line) - except Exception as e: - complain('%s.writelines: %r' % (self.name, e)) - raise - - -class WinUnicodeConsoleOutput(WinUnicodeOutputBase): - """Output adapter to a Windows Console. - - Understands how to use the win32 console API. - """ - def __init__(self, console_handle, fileno, stream_name, encoding): - super(WinUnicodeConsoleOutput, - self).__init__(fileno, '' % stream_name, - encoding) - # Handle to use for WriteConsoleW - self._console_handle = console_handle - - # Loads the necessary function. - # These types are available on linux but not Mac. - # pylint: disable=no-name-in-module,F0401 - from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE - from ctypes.wintypes import BOOL, DWORD, HANDLE, LPWSTR - from ctypes.wintypes import LPVOID # pylint: disable=no-name-in-module - - self._DWORD = DWORD - self._byref = byref - - # - self._WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, - POINTER(DWORD), - LPVOID)(('WriteConsoleW', - windll.kernel32)) - self._GetLastError = GetLastError - - def flush(self): - # No need to flush the console since it's immediate. - pass - - def write(self, text): - try: - if isinstance(text, bytes): - # Bytestrings need to be decoded to a string before being passed - # to Windows. - text = text.decode(self.encoding, 'replace') - remaining = len(text) - while remaining > 0: - n = self._DWORD(0) - # There is a shorter-than-documented limitation on the length of - # the string passed to WriteConsoleW. See - # . - retval = self._WriteConsoleW(self._console_handle, text, - min(remaining, 10000), - self._byref(n), None) - if retval == 0 or n.value == 0: - raise IOError('WriteConsoleW returned %r, n.value = %r, ' - 'last error = %r' % - (retval, n.value, self._GetLastError())) - remaining -= n.value - if not remaining: - break - text = text[int(n.value):] - except Exception as e: - complain('%s.write: %r' % (self.name, e)) - raise - - -class WinUnicodeOutput(WinUnicodeOutputBase): - """Output adaptor to a file output on Windows. - - If the standard FileWrite function is used, it will be encoded in the - current code page. WriteConsoleW() permits writing any character. - """ - def __init__(self, stream, fileno, encoding): - super(WinUnicodeOutput, - self).__init__(fileno, '' % stream.name, - encoding) - # Output stream - self._stream = stream - - # Flush right now. - self.flush() - - def flush(self): - try: - self._stream.flush() - except Exception as e: - complain('%s.flush: %r from %r' % (self.name, e, self._stream)) - raise - - def write(self, text): - try: - if isinstance(text, bytes): - # Replace characters that cannot be printed instead of failing. - text = text.decode(self.encoding, 'replace') - # When redirecting to a file or process any \n characters will be - # replaced with \r\n. If the text to be printed already has \r\n - # line endings then \r\r\n line endings will be generated, leading - # to double-spacing of some output. Normalizing line endings to \n - # avoids this problem. - text = text.replace('\r\n', '\n') - self._stream.write(text) - except Exception as e: - complain('%s.write: %r' % (self.name, e)) - raise - - -def win_handle_is_a_console(handle): - """Returns True if a Windows file handle is a handle to a console.""" - # These types are available on linux but not Mac. - # pylint: disable=no-name-in-module,F0401 - from ctypes import byref, POINTER, windll, WINFUNCTYPE - from ctypes.wintypes import BOOL, DWORD, HANDLE - - FILE_TYPE_CHAR = 0x0002 - FILE_TYPE_REMOTE = 0x8000 - INVALID_HANDLE_VALUE = DWORD(-1).value - - # - GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))( - ('GetConsoleMode', windll.kernel32)) - # - GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32)) - - # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle. - if handle == INVALID_HANDLE_VALUE or handle is None: - return False - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR - and GetConsoleMode(handle, byref(DWORD()))) - - -def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding): - """Returns a unicode-compatible stream. - - This function will return a direct-Console writing object only if: - - the file number is the expected console file number - - the handle the expected file handle - - the 'real' handle is in fact a handle to a console. - """ - old_fileno = getattr(stream, 'fileno', lambda: None)() - if old_fileno == excepted_fileno: - # These types are available on linux but not Mac. - # pylint: disable=no-name-in-module,F0401 - from ctypes import windll, WINFUNCTYPE - from ctypes.wintypes import DWORD, HANDLE - - # - GetStdHandle = WINFUNCTYPE(HANDLE, - DWORD)(('GetStdHandle', windll.kernel32)) - - real_output_handle = GetStdHandle(DWORD(output_handle)) - if win_handle_is_a_console(real_output_handle): - # It's a console. - return WinUnicodeConsoleOutput(real_output_handle, old_fileno, - stream.name, encoding) - - # It's something else. Create an auto-encoding stream. - return WinUnicodeOutput(stream, old_fileno, encoding) - - -def fix_win_console(encoding): - """Makes Unicode console output work independently of the current code page. - - This also fixes . - Credit to Michael Kaplan - and - TZOmegaTZIOY - . - """ - if (isinstance(sys.stdout, WinUnicodeOutputBase) - or isinstance(sys.stderr, WinUnicodeOutputBase)): - return False - - try: - # SetConsoleCP and SetConsoleOutputCP could be used to change the code - # page but it's not really useful since the code here is using - # WriteConsoleW(). Also, changing the code page is 'permanent' to the - # console and needs to be reverted manually. In practice one needs to - # set the console font to a TTF font to be able to see all the - # characters but it failed for me in practice. In any case, it won't - # throw any exception when printing, which is the important part. -11 - # and -12 are defined in stdio.h - sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding) - sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding) - # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation - # is "It doesn't appear to be possible to read Unicode characters in - # UTF-8 mode" and this appears to be a limitation of cmd.exe. - except Exception as e: - complain('exception %r while fixing up sys.stdout and sys.stderr' % e) - return True - - -def fix_encoding(): - """Fixes various encoding problems on all platforms. - - Should be called at the very beginning of the process. - """ - ret = True - if sys.platform == 'win32': - ret &= fix_win_codec() - - ret &= fix_default_encoding() - - if sys.platform == 'win32': - encoding = sys.getdefaultencoding() - ret &= fix_win_console(encoding) - return ret diff --git a/gclient.py b/gclient.py index 07f31b5746..82c3909bcb 100755 --- a/gclient.py +++ b/gclient.py @@ -97,7 +97,6 @@ from collections.abc import Collection, Mapping, Sequence import detect_host_arch -import fix_encoding import git_common import gclient_eval import gclient_paths @@ -4122,7 +4121,6 @@ def main(argv): execute.""" if not can_run_gclient_and_helpers(): return 2 - fix_encoding.fix_encoding() disable_buffering() setup_color.init() dispatcher = subcommand.CommandDispatcher(__name__) diff --git a/gerrit_client.py b/gerrit_client.py index 293a91e7bd..6295b0a799 100755 --- a/gerrit_client.py +++ b/gerrit_client.py @@ -15,7 +15,6 @@ import sys import urllib.parse -import fix_encoding import gerrit_util import setup_color @@ -518,7 +517,6 @@ def main(argv): if __name__ == '__main__': # These affect sys.stdout so do it outside of main() to simplify mocks in # unit testing. - fix_encoding.fix_encoding() setup_color.init() try: sys.exit(main(sys.argv[1:])) diff --git a/git_cl.py b/git_cl.py index 3a89663f43..debc7aa48e 100755 --- a/git_cl.py +++ b/git_cl.py @@ -40,7 +40,6 @@ from typing import Tuple import auth import clang_format -import fix_encoding import gclient_paths import gclient_utils import gerrit_util @@ -6707,7 +6706,6 @@ def main(argv): if __name__ == '__main__': # These affect sys.stdout, so do it outside of main() to simplify mocks in # the unit tests. - fix_encoding.fix_encoding() setup_color.init() with metrics.collector.print_notice_and_exit(): sys.exit(main(sys.argv[1:])) diff --git a/git_migrate_default_branch.py b/git_migrate_default_branch.py index 735f049cc4..642a346a2a 100644 --- a/git_migrate_default_branch.py +++ b/git_migrate_default_branch.py @@ -4,7 +4,6 @@ # found in the LICENSE file. """Migrate local repository onto new default branch.""" -import fix_encoding import gerrit_util import git_common import metrics @@ -91,7 +90,6 @@ def main(): if __name__ == '__main__': - fix_encoding.fix_encoding() logging.basicConfig(level=logging.INFO) with metrics.collector.print_notice_and_exit(): try: diff --git a/my_activity.py b/my_activity.py index 8040abc674..f2f8b0e123 100755 --- a/my_activity.py +++ b/my_activity.py @@ -49,7 +49,6 @@ import re import auth -import fix_encoding import gclient_utils import gerrit_util @@ -1040,7 +1039,6 @@ def main(): if __name__ == '__main__': # Fix encoding to support non-ascii issue titles. - fix_encoding.fix_encoding() try: sys.exit(main()) diff --git a/presubmit_support.py b/presubmit_support.py index 10480854d5..a76ecb5530 100755 --- a/presubmit_support.py +++ b/presubmit_support.py @@ -39,7 +39,6 @@ from warnings import warn # Local imports. -import fix_encoding import gclient_paths # Exposed through the API import gclient_utils import git_footers @@ -2201,7 +2200,6 @@ def main(argv=None): if __name__ == '__main__': - fix_encoding.fix_encoding() try: sys.exit(main()) except KeyboardInterrupt: diff --git a/tests/fix_encoding_test.py b/tests/fix_encoding_test.py deleted file mode 100755 index 4c9c5438d6..0000000000 --- a/tests/fix_encoding_test.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 -# coding=utf-8 -# Copyright (c) 2011 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -"""Unit tests for fix_encoding.py.""" - -import os -import sys -import unittest - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import fix_encoding - - -class FixEncodingTest(unittest.TestCase): - # Nice mix of latin, hebrew, arabic and chinese. Doesn't mean anything. - text = u'Héllô 偉大 سيد' - - def test_code_page(self): - # Make sure printing garbage won't throw. - print(self.text.encode() + b'\xff') - print(self.text.encode() + b'\xff', file=sys.stderr) - - def test_utf8(self): - # Make sure printing utf-8 works. - print(self.text.encode('utf-8')) - print(self.text.encode('utf-8'), file=sys.stderr) - - @unittest.skipIf(os.name == 'nt', 'Does not work on Windows') - def test_unicode(self): - # Make sure printing unicode works. - print(self.text) - print(self.text, file=sys.stderr) - - @unittest.skipIf(os.name == 'nt', 'Does not work on Windows') - def test_default_encoding(self): - self.assertEqual('utf-8', sys.getdefaultencoding()) - - def test_win_console(self): - if sys.platform != 'win32': - return - # This should fail if not redirected, e.g. run directly instead of - # through the presubmit check. Can be checked with: python - # tests\fix_encoding_test.py - self.assertEqual(sys.stdout.__class__, fix_encoding.WinUnicodeOutput) - self.assertEqual(sys.stderr.__class__, fix_encoding.WinUnicodeOutput) - self.assertEqual(sys.stdout.encoding, sys.getdefaultencoding()) - self.assertEqual(sys.stderr.encoding, sys.getdefaultencoding()) - - def test_multiple_calls(self): - # Shouldn't do anything. - self.assertEqual(False, fix_encoding.fix_encoding()) - - -if __name__ == '__main__': - fix_encoding.fix_encoding() - unittest.main()