Skip to content

Commit

Permalink
Refactor some try..excepts into read_utf8_with_fallback
Browse files Browse the repository at this point in the history
Extract common pattern for reading a file with UTF-8 into the
unicode_utils module.
  • Loading branch information
abravalheri committed Apr 17, 2024
1 parent 30e8fe5 commit 8ec554f
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 30 deletions.
23 changes: 6 additions & 17 deletions setuptools/command/develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from setuptools import namespaces
import setuptools

from ..compat import py39
from ..unicode_utils import read_utf8_with_fallback


class develop(namespaces.DevelopInstaller, easy_install):
Expand Down Expand Up @@ -131,14 +131,10 @@ def uninstall_link(self):
if os.path.exists(self.egg_link):
log.info("Removing %s (link to %s)", self.egg_link, self.egg_base)

try:
with open(self.egg_link, encoding="utf-8") as egg_link_file:
contents = [line.rstrip() for line in egg_link_file]
except UnicodeDecodeError: # pragma: no cover
with open(
self.egg_link, encoding=py39.LOCALE_ENCODING
) as egg_link_file:
contents = [line.rstrip() for line in egg_link_file]
contents = [
line.rstrip()
for line in read_utf8_with_fallback(self.egg_link).splitlines()
]

if contents not in ([self.egg_path], [self.egg_path, self.setup_path]):
log.warn("Link points to %s: uninstall aborted", contents)
Expand All @@ -165,14 +161,7 @@ def install_egg_scripts(self, dist):
for script_name in self.distribution.scripts or []:
script_path = os.path.abspath(convert_path(script_name))
script_name = os.path.basename(script_path)

try:
with open(script_path, encoding="utf-8") as strm:
script_text = strm.read()
except UnicodeDecodeError: # pragma: no cover
with open(script_path, encoding=py39.LOCALE_ENCODING) as strm:
script_text = strm.read()

script_text = read_utf8_with_fallback(script_path)
self.install_script(dist, script_name, script_text, script_path)

return None
Expand Down
8 changes: 2 additions & 6 deletions setuptools/package_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from setuptools.extern.more_itertools import unique_everseen

from .compat import py39
from .unicode_utils import read_utf8_with_fallback


EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
Expand Down Expand Up @@ -1120,12 +1121,7 @@ def local_open(url):
for f in os.listdir(filename):
filepath = os.path.join(filename, f)
if f == 'index.html':
try:
with open(filepath, 'r', encoding="utf-8") as fp:
body = fp.read()
except UnicodeDecodeError: # pragma: no cover
with open(filepath, 'r', encoding=py39.LOCALE_ENCODING) as fp:
body = fp.read()
body = read_utf8_with_fallback(filepath)
break
elif os.path.isdir(filepath):
f += '/'
Expand Down
17 changes: 17 additions & 0 deletions setuptools/unicode_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import unicodedata
import sys

from .compat import py39


# HFS Plus uses decomposed UTF-8
def decompose(path):
Expand Down Expand Up @@ -42,3 +44,18 @@ def try_encode(string, enc):
return string.encode(enc)
except UnicodeEncodeError:
return None


def read_utf8_with_fallback(file: str, fallback_encoding=py39.LOCALE_ENCODING) -> str:
"""
First try to read the file with UTF-8, if there is an error fallback to a
different encoding ("locale" by default). Returns the content of the file.
Also useful when reading files that might have been produced by an older version of
setuptools.
"""
try:
with open(file, "r", encoding="utf-8") as f:
return f.read()
except UnicodeDecodeError: # pragma: no cover
with open(file, "r", encoding=fallback_encoding) as f:
return f.read()
9 changes: 2 additions & 7 deletions setuptools/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from setuptools.command.egg_info import write_requirements, _egg_basename
from setuptools.archive_util import _unpack_zipfile_obj

from .compat import py39
from .unicode_utils import read_utf8_with_fallback


WHEEL_NAME = re.compile(
Expand Down Expand Up @@ -224,12 +224,7 @@ def _move_data_entries(destination_eggdir, dist_data):
def _fix_namespace_packages(egg_info, destination_eggdir):
namespace_packages = os.path.join(egg_info, 'namespace_packages.txt')
if os.path.exists(namespace_packages):
try:
with open(namespace_packages, encoding="utf-8") as fp:
namespace_packages = fp.read().split()
except UnicodeDecodeError: # pragma: no cover
with open(namespace_packages, encoding=py39.LOCALE_ENCODING) as fp:
namespace_packages = fp.read().split()
namespace_packages = read_utf8_with_fallback(namespace_packages).split()

for mod in namespace_packages:
mod_dir = os.path.join(destination_eggdir, *mod.split('.'))
Expand Down

0 comments on commit 8ec554f

Please sign in to comment.