Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed issue #496 - sanitize/truncate bug #1361

Merged
merged 5 commits into from
Jul 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 9 additions & 16 deletions beets/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,26 +790,19 @@ def destination(self, fragment=False, basedir=None, platform=None,
if beets.config['asciify_paths']:
subpath = unidecode(subpath)

# Truncate components and remove forbidden characters.
subpath = util.sanitize_path(subpath, self._db.replacements)

# Encode for the filesystem.
if not fragment:
subpath = bytestring_path(subpath)

# Preserve extension.
_, extension = os.path.splitext(self.path)
if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')
subpath += extension.lower()

# Truncate too-long components.
maxlen = beets.config['max_filename_length'].get(int)
if not maxlen:
# When zero, try to determine from filesystem.
maxlen = util.max_filename_length(self._db.directory)
subpath = util.truncate_path(subpath, maxlen)

subpath, fellback = util.legalize_path(
subpath, self._db.replacements, maxlen,
os.path.splitext(self.path)[1], fragment
)

# Print an error message if legalize fell back to default replacements
if fellback:
log.warning(u'fell back to default replacements when naming file')

if fragment:
return subpath
Expand Down
57 changes: 57 additions & 0 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,63 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH):
return os.path.join(*out)


def _legalize_stage(path, replacements, length, extension, fragment):
""" Performs sanitization of the path, then encodes for the filesystem,
appends the extension and truncates. Returns the path (unicode if fragment
is set, otherwise bytes), and whether truncation was required.
"""
# Perform an initial sanitization including user replacements.
path = sanitize_path(path, replacements)

# Encode for the filesystem.
if not fragment:
path = bytestring_path(path)

# Preserve extension.
path += extension.lower()

# Truncate too-long components.
pre_truncate_path = path
path = truncate_path(path, length)

return path, path != pre_truncate_path


def legalize_path(path, replacements, length, extension, fragment):
""" Perform up to three calls to _legalize_stage, to generate a stable
output path, taking user replacements into consideration if possible. The
limited number of iterations avoids the possibility of an infinite loop of
sanitization and truncation operations, which could be caused by some user
replacements.
"""

if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')

first_stage_path, _ = _legalize_stage(
path, replacements, length, extension, fragment
)

# Convert back to Unicode with extension removed.
first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0]

# Re-sanitize following truncation (including user replacements).
second_stage_path, retruncated = _legalize_stage(
first_stage_path, replacements, length, extension, fragment
)

# If the path was once again truncated, discard user replacements.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested expansion:

This is to avoid a "vicious cycle," in which truncation requires sanitation, then that sanitation requires truncation, ad infinitum.

if retruncated:
second_stage_path, _ = _legalize_stage(
first_stage_path, None, length, extension, fragment
)

return second_stage_path, True
else:
return second_stage_path, False


def str2bool(value):
"""Returns a boolean reflecting a human-entered string."""
return value.lower() in ('yes', '1', 'true', 't', 'y')
Expand Down
5 changes: 5 additions & 0 deletions docs/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ compatibility with Windows-influenced network filesystems like Samba).
Trailing dots and trailing whitespace, which can cause problems on Windows
clients, are also removed.

When replacements other than the defaults are used, it is possible that they
will increase the length of the path. In the scenario where this leads to a
conflict with the maximum filename length, the default replacements will be
used to resolve the conflict and beets will display a warning.

Note that paths might contain special characters such as typographical
quotes (``“”``). With the configuration above, those will not be
replaced as they don't match the typewriter quote (``"``). To also strip these
Expand Down
21 changes: 18 additions & 3 deletions test/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,7 @@ def test_destination_with_empty_final_component(self):
self.assertEqual(self.i.destination(),
np('base/one/_.mp3'))

@unittest.skip('unimplemented: #496')
def test_truncation_does_not_conflict_with_replacement(self):
def test_legalize_path_one_for_one_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with a Z.
self.lib.replacements = [
Expand All @@ -466,7 +465,23 @@ def test_truncation_does_not_conflict_with_replacement(self):

# The final path should reflect the replacement.
dest = self.i.destination()
self.assertTrue('XZ' in dest)
self.assertEqual(dest[-2:], 'XZ')

def test_legalize_path_one_for_many_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with four Zs.
self.lib.replacements = [
(re.compile(r'X$'), u'ZZZZ'),
]

# Construct an item whose untruncated path ends with a Y but whose
# truncated version ends with an X.
self.i.title = 'X' * 300 + 'Y'

# The final path should ignore the user replacement and create a path
# of the correct length, containing Xs.
dest = self.i.destination()
self.assertEqual(dest[-2:], 'XX')


class ItemFormattedMappingTest(_common.LibTestCase):
Expand Down