Skip to content

Commit

Permalink
importer: Add ability to verify copy and move operations
Browse files Browse the repository at this point in the history
To avoid corrupting files and worse, remove the original files (move), a
quick check is added to ensure files are still in order.

On failure the crc is printed to aid in discovering if the problem was
with the source (unlikely) or destination file. Using `filecmp` would only
tell us of a miss-match, but making it harder to figure out why/where
things fail.

Since `beets import` can be destructive (move), we want to be delicate
here.

Signed-off-by: Olliver Schinagl <oliver@schinagl.nl>
  • Loading branch information
oliv3r committed May 18, 2024
1 parent c75f07a commit 605d12b
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 10 deletions.
1 change: 1 addition & 0 deletions beets/config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import:
set_fields: {}
ignored_alias_types: []
singleton_album_disambig: yes
verify: no

# --------------- Paths ---------------

Expand Down
9 changes: 7 additions & 2 deletions beets/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,11 @@ def manipulate_files(self, operation=None, write=False, session=None):
If `write` is `True` metadata is written to the files.
"""

if session is not None and "verify" in session.config:
verify = session.config["verify"]
else:
verify = False

items = self.imported_items()
# Save the original paths of all items for deletion and pruning
# in the next step (finalization).
Expand All @@ -791,14 +796,14 @@ def manipulate_files(self, operation=None, write=False, session=None):
and self.replaced_items[item]
and session.lib.directory in util.ancestry(old_path)
):
item.move()
item.move(verify=verify)
# We moved the item, so remove the
# now-nonexistent file from old_paths.
self.old_paths.remove(old_path)
else:
# A normal import. Just copy files and keep track of
# old paths.
item.move(operation)
item.move(operation, verify=verify)

if write and (self.apply or self.choice_flag == action.RETAG):
item.try_write()
Expand Down
9 changes: 5 additions & 4 deletions beets/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,7 @@ def try_sync(self, write, move, with_album=True):

# Files themselves.

def move_file(self, dest, operation=MoveOperation.MOVE):
def move_file(self, dest, operation=MoveOperation.MOVE, verify=False):
"""Move, copy, link or hardlink the item depending on `operation`,
updating the path value if the move succeeds.
Expand All @@ -906,12 +906,12 @@ def move_file(self, dest, operation=MoveOperation.MOVE):
source=self.path,
destination=dest,
)
util.move(self.path, dest)
util.move(self.path, dest, verify = verify)

Check failure on line 909 in beets/library.py

View workflow job for this annotation

GitHub Actions / lint

E251 unexpected spaces around keyword / parameter equals

Check failure on line 909 in beets/library.py

View workflow job for this annotation

GitHub Actions / lint

E251 unexpected spaces around keyword / parameter equals
plugins.send(
"item_moved", item=self, source=self.path, destination=dest
)
elif operation == MoveOperation.COPY:
util.copy(self.path, dest)
util.copy(self.path, dest, verify = verify)

Check failure on line 914 in beets/library.py

View workflow job for this annotation

GitHub Actions / lint

E251 unexpected spaces around keyword / parameter equals

Check failure on line 914 in beets/library.py

View workflow job for this annotation

GitHub Actions / lint

E251 unexpected spaces around keyword / parameter equals
plugins.send(
"item_copied", item=self, source=self.path, destination=dest
)
Expand Down Expand Up @@ -992,6 +992,7 @@ def move(
basedir=None,
with_album=True,
store=True,
verify=False,
):
"""Move the item to its designated location within the library
directory (provided by destination()).
Expand Down Expand Up @@ -1022,7 +1023,7 @@ def move(

# Perform the move and store the change.
old_path = self.path
self.move_file(dest, operation)
self.move_file(dest, operation, verify)
if store:
self.store()

Expand Down
6 changes: 6 additions & 0 deletions beets/ui/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1576,6 +1576,12 @@ def import_func(lib, opts, args):
metavar="FIELD=VALUE",
help="set the given fields to the supplied values",
)
import_cmd.parser.add_option(
"--verify",
dest="verify",
action="store_true",
help="Verify copy/move operations before removing any files",
)
import_cmd.func = import_func
default_commands.append(import_cmd)

Expand Down
24 changes: 20 additions & 4 deletions beets/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

"""Miscellaneous utility functions."""

import binascii
import errno
import fnmatch
import functools
Expand Down Expand Up @@ -482,11 +483,11 @@ def remove(path: Optional[bytes], soft: bool = True):
raise FilesystemError(exc, "delete", (path,), traceback.format_exc())


def copy(path: bytes, dest: bytes, replace: bool = False):
def copy(path: bytes, dest: bytes, replace: bool = False, verify: bool = False):
"""Copy a plain file. Permissions are not copied. If `dest` already
exists, raises a FilesystemError unless `replace` is True. Has no
effect if `path` is the same as `dest`. Paths are translated to
system paths before the syscall.
system paths before the syscall. Abort of verification failure.
"""
if samefile(path, dest):
return
Expand All @@ -499,14 +500,21 @@ def copy(path: bytes, dest: bytes, replace: bool = False):
except OSError as exc:
raise FilesystemError(exc, "copy", (path, dest), traceback.format_exc())

if verify:
pathcrc = binascii.crc32(open(path, "rb").read())
destcrc = binascii.crc32(open(dest, "rb").read())
if pathcrc != destcrc:
raise FilesystemError(f"CRC failure ({pathcrc} != {dstcrc})",

Check failure on line 507 in beets/util/__init__.py

View workflow job for this annotation

GitHub Actions / lint

F821 undefined name 'dstcrc'
"copy", (path, dest))

def move(path: bytes, dest: bytes, replace: bool = False):

def move(path: bytes, dest: bytes, replace: bool = False, verify: bool = False):
"""Rename a file. `dest` may not be a directory. If `dest` already
exists, raises an OSError unless `replace` is True. Has no effect if
`path` is the same as `dest`. If the paths are on different
filesystems (or the rename otherwise fails), a copy is attempted
instead, in which case metadata will *not* be preserved. Paths are
translated to system paths.
translated to system paths. Verify (checksum) copy before remove.
"""
if os.path.isdir(syspath(path)):
raise FilesystemError("source is directory", "move", (path, dest))
Expand All @@ -532,10 +540,18 @@ def move(path: bytes, dest: bytes, replace: bool = False):
)
try:
with open(syspath(path), "rb") as f:
if verify:
pathcrc = binascii.crc32(f.read())
shutil.copyfileobj(f, tmp)
finally:
tmp.close()

if verify:
destcrc = binascii.crc32(tmp.read())
if pathcrc != destcrc:
raise FilesystemError(f"CRC failure ({pathcrc} != {dstcrc})",

Check failure on line 552 in beets/util/__init__.py

View workflow job for this annotation

GitHub Actions / lint

F821 undefined name 'dstcrc'
"move", (path, dest))

# Move the copied file into place.
try:
os.replace(tmp.name, syspath(dest))
Expand Down
1 change: 1 addition & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ New features:
* Add support for `barcode` field.
:bug:`3172`
* :doc:`/plugins/smartplaylist`: Add new config option `smartplaylist.fields`.
* :ref:`import-options`: Ability to verify copy/move operations.

Bug fixes:

Expand Down
16 changes: 16 additions & 0 deletions docs/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,22 @@ This option *overrides* ``copy``, so enabling it will always move
(and not copy) files. The ``-c`` switch to the ``beet import`` command,
however, still takes precedence.

.. _config-import-verify:

verify
~~~~~~

Either ``yes`` or ``no``, indicating whether **copy** and **move** should
verify copy and move operations when using ``beet import``.
Defaults to ``no``.

Files are being compared when doing either a copy or move operation and will
fail because of it. Usually this indicates problems with physical storage, but
we wouldn't want to be responsible for corrupting files.

The ``--verify`` switch to the ``beet import`` command overrides the config
file.

.. _link:

link
Expand Down
6 changes: 6 additions & 0 deletions test/test_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,12 @@ def test_empty_directory_singleton_warning(self):
import_dir = displayable_path(import_dir)
self.assertIn(f"No files imported from {import_dir}", logs)

def test_crc_missmatch_on_move(self):
config["import"]["verify"] = True

Check failure on line 593 in test/test_importer.py

View workflow job for this annotation

GitHub Actions / lint

E999 TabError: inconsistent use of tabs and spaces in indentation
self.setup_importer.run()
# Corrupt file? library=/dev/null?
self.assertIn(f"CRC failure", logs);

def test_asis_no_data_source(self):
self.assertIsNone(self.lib.items().get())

Expand Down

0 comments on commit 605d12b

Please sign in to comment.