diff --git a/setuptools/command/develop.py b/setuptools/command/develop.py index aeb491fe2c..9966681bad 100644 --- a/setuptools/command/develop.py +++ b/setuptools/command/develop.py @@ -10,7 +10,7 @@ from setuptools import namespaces import setuptools -from ..compat import py39 +from ..unicode_utils import read_utf8_with_fallback class develop(namespaces.DevelopInstaller, easy_install): @@ -131,14 +131,10 @@ def uninstall_link(self): if os.path.exists(self.egg_link): log.info("Removing %s (link to %s)", self.egg_link, self.egg_base) - try: - with open(self.egg_link, encoding="utf-8") as egg_link_file: - contents = [line.rstrip() for line in egg_link_file] - except UnicodeDecodeError: # pragma: no cover - with open( - self.egg_link, encoding=py39.LOCALE_ENCODING - ) as egg_link_file: - contents = [line.rstrip() for line in egg_link_file] + contents = [ + line.rstrip() + for line in read_utf8_with_fallback(self.egg_link).splitlines() + ] if contents not in ([self.egg_path], [self.egg_path, self.setup_path]): log.warn("Link points to %s: uninstall aborted", contents) @@ -165,14 +161,7 @@ def install_egg_scripts(self, dist): for script_name in self.distribution.scripts or []: script_path = os.path.abspath(convert_path(script_name)) script_name = os.path.basename(script_path) - - try: - with open(script_path, encoding="utf-8") as strm: - script_text = strm.read() - except UnicodeDecodeError: # pragma: no cover - with open(script_path, encoding=py39.LOCALE_ENCODING) as strm: - script_text = strm.read() - + script_text = read_utf8_with_fallback(script_path) self.install_script(dist, script_name, script_text, script_path) return None diff --git a/setuptools/package_index.py b/setuptools/package_index.py index f835bdcf14..2aa8464162 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -41,6 +41,7 @@ from setuptools.extern.more_itertools import unique_everseen from .compat import py39 +from .unicode_utils import read_utf8_with_fallback EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$') @@ -1120,12 +1121,7 @@ def local_open(url): for f in os.listdir(filename): filepath = os.path.join(filename, f) if f == 'index.html': - try: - with open(filepath, 'r', encoding="utf-8") as fp: - body = fp.read() - except UnicodeDecodeError: # pragma: no cover - with open(filepath, 'r', encoding=py39.LOCALE_ENCODING) as fp: - body = fp.read() + body = read_utf8_with_fallback(filepath) break elif os.path.isdir(filepath): f += '/' diff --git a/setuptools/unicode_utils.py b/setuptools/unicode_utils.py index d43dcc11f9..4bc67feba0 100644 --- a/setuptools/unicode_utils.py +++ b/setuptools/unicode_utils.py @@ -1,6 +1,8 @@ import unicodedata import sys +from .compat import py39 + # HFS Plus uses decomposed UTF-8 def decompose(path): @@ -42,3 +44,18 @@ def try_encode(string, enc): return string.encode(enc) except UnicodeEncodeError: return None + + +def read_utf8_with_fallback(file: str, fallback_encoding=py39.LOCALE_ENCODING) -> str: + """ + First try to read the file with UTF-8, if there is an error fallback to a + different encoding ("locale" by default). Returns the content of the file. + Also useful when reading files that might have been produced by an older version of + setuptools. + """ + try: + with open(file, "r", encoding="utf-8") as f: + return f.read() + except UnicodeDecodeError: # pragma: no cover + with open(file, "r", encoding=fallback_encoding) as f: + return f.read() diff --git a/setuptools/wheel.py b/setuptools/wheel.py index 19f4157423..babd45940f 100644 --- a/setuptools/wheel.py +++ b/setuptools/wheel.py @@ -18,7 +18,7 @@ from setuptools.command.egg_info import write_requirements, _egg_basename from setuptools.archive_util import _unpack_zipfile_obj -from .compat import py39 +from .unicode_utils import read_utf8_with_fallback WHEEL_NAME = re.compile( @@ -224,12 +224,7 @@ def _move_data_entries(destination_eggdir, dist_data): def _fix_namespace_packages(egg_info, destination_eggdir): namespace_packages = os.path.join(egg_info, 'namespace_packages.txt') if os.path.exists(namespace_packages): - try: - with open(namespace_packages, encoding="utf-8") as fp: - namespace_packages = fp.read().split() - except UnicodeDecodeError: # pragma: no cover - with open(namespace_packages, encoding=py39.LOCALE_ENCODING) as fp: - namespace_packages = fp.read().split() + namespace_packages = read_utf8_with_fallback(namespace_packages).split() for mod in namespace_packages: mod_dir = os.path.join(destination_eggdir, *mod.split('.'))