Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed issue #496 - sanitize/truncate bug #1361

Merged
merged 5 commits into from
Jul 8, 2015
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 3 additions & 16 deletions beets/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,26 +790,13 @@ def destination(self, fragment=False, basedir=None, platform=None,
if beets.config['asciify_paths']:
subpath = unidecode(subpath)

# Truncate components and remove forbidden characters.
subpath = util.sanitize_path(subpath, self._db.replacements)

# Encode for the filesystem.
if not fragment:
subpath = bytestring_path(subpath)

# Preserve extension.
_, extension = os.path.splitext(self.path)
if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')
subpath += extension.lower()

# Truncate too-long components.
maxlen = beets.config['max_filename_length'].get(int)
if not maxlen:
# When zero, try to determine from filesystem.
maxlen = util.max_filename_length(self._db.directory)
subpath = util.truncate_path(subpath, maxlen)

subpath = util.legalize_path(subpath, self._db.replacements, maxlen,
os.path.splitext(self.path)[1], fragment)

if fragment:
return subpath
Expand Down
53 changes: 53 additions & 0 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,59 @@ def truncate_path(path, length=MAX_FILENAME_LENGTH):
return os.path.join(*out)


def _legalize_stage(path, replacements, length, extension, fragment):
""" Perform a signle stage of legalization of the path (ie. a single
sanitization and truncation). Returns the path (unicode if fragment is set,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps, to be slightly more specific:

(i.e., it sanitizes the Unicode path, encodes it to bytes, adds the extension, and then truncates it)

otherwise bytes), and whether truncation was required.
"""
# Perform an initial sanitization including user replacements.
path = sanitize_path(path, replacements)

# Encode for the filesystem.
if not fragment:
path = bytestring_path(path)

# Preserve extension.
path += extension.lower()

# Truncate too-long components.
pre_truncate_path = path
path = truncate_path(path, length)

return (path, path != pre_truncate_path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tiny style nit: the parentheses are not needed here.



def legalize_path(path, replacements, length, extension, fragment):
""" Perform up to three calls to _legalize_stage, to generate a stable
output path, taking user replacements into consideration if possible. This
additional complexity is necessary for cases where truncation produces
unclean paths (eg. trailing space).
"""

if fragment:
# Outputting Unicode.
extension = extension.decode('utf8', 'ignore')

first_stage_path =\
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be first_stage_path, _ = ... to ignore the second returned value?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops! Just noticed the [0] below. Using the unpacking syntax can be marginally clearer, though.

_legalize_stage(path, replacements, length, extension, fragment)[0]

# Convert back to Unicode with extension removed.
first_stage_path = os.path.splitext(displayable_path(first_stage_path))[0]

# Re-sanitize following truncation (including user replacements).
second_stage_path, retruncated = _legalize_stage(
first_stage_path, replacements, length, extension, fragment
)

# If the path was once again truncated, discard user replacements.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested expansion:

This is to avoid a "vicious cycle," in which truncation requires sanitation, then that sanitation requires truncation, ad infinitum.

if retruncated:
second_stage_path = _legalize_stage(
first_stage_path, None, length, extension, fragment
)[0]

return second_stage_path


def str2bool(value):
"""Returns a boolean reflecting a human-entered string."""
return value.lower() in ('yes', '1', 'true', 't', 'y')
Expand Down
21 changes: 18 additions & 3 deletions test/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,7 @@ def test_destination_with_empty_final_component(self):
self.assertEqual(self.i.destination(),
np('base/one/_.mp3'))

@unittest.skip('unimplemented: #496')
def test_truncation_does_not_conflict_with_replacement(self):
def test_legalize_path_one_for_one_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with a Z.
self.lib.replacements = [
Expand All @@ -466,7 +465,23 @@ def test_truncation_does_not_conflict_with_replacement(self):

# The final path should reflect the replacement.
dest = self.i.destination()
self.assertTrue('XZ' in dest)
self.assertEqual(dest[-2:], 'XZ')

def test_legalize_path_one_for_many_replacement(self):
# Use a replacement that should always replace the last X in any
# path component with four Zs.
self.lib.replacements = [
(re.compile(r'X$'), u'ZZZZ'),
]

# Construct an item whose untruncated path ends with a Y but whose
# truncated version ends with an X.
self.i.title = 'X' * 300 + 'Y'

# The final path should ignore the user replacement and create a path
# of the correct length, containing Xs.
dest = self.i.destination()
self.assertEqual(dest[-2:], 'XX')


class ItemFormattedMappingTest(_common.LibTestCase):
Expand Down