Skip to content

Commit

Permalink
Merge pull request #7837 from ThomasWaldmann/remove-recreate-recompress
Browse files Browse the repository at this point in the history
Remove recreate --recompress option
  • Loading branch information
ThomasWaldmann committed Sep 24, 2023
2 parents a9e625d + 0fcd3e9 commit 506718e
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 80 deletions.
30 changes: 4 additions & 26 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from .chunker import get_chunker, Chunk
from .cache import ChunkListEntry
from .crypto.key import key_factory, UnsupportedPayloadError
from .compress import Compressor, CompressionSpec
from .compress import CompressionSpec
from .constants import * # NOQA
from .crypto.low_level import IntegrityError as IntegrityErrorBase
from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
Expand Down Expand Up @@ -2349,8 +2349,6 @@ def __init__(
keep_exclude_tags=False,
chunker_params=None,
compression=None,
recompress=False,
always_recompress=False,
dry_run=False,
stats=False,
progress=False,
Expand All @@ -2374,8 +2372,6 @@ def __init__(
if self.rechunkify:
logger.debug("Rechunking archives to %s", chunker_params)
self.chunker_params = chunker_params or CHUNKER_PARAMS
self.recompress = recompress
self.always_recompress = always_recompress
self.compression = compression or CompressionSpec("none")
self.seen_chunks = set()

Expand All @@ -2393,13 +2389,7 @@ def recreate(self, archive_name, comment=None, target_name=None):
target = self.create_target(archive, target_name)
if self.exclude_if_present or self.exclude_caches:
self.matcher_add_tagged_dirs(archive)
if (
self.matcher.empty()
and not self.recompress
and not target.recreate_rechunkify
and comment is None
and target_name is None
):
if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
# nothing to do
return False
self.process_items(archive, target)
Expand Down Expand Up @@ -2432,7 +2422,7 @@ def process_item(self, archive, target, item):
self.print_file_status(status, item.path)

def process_chunks(self, archive, target, item):
if not self.recompress and not target.recreate_rechunkify:
if not target.recreate_rechunkify:
for chunk_id, size in item.chunks:
self.cache.chunk_incref(chunk_id, target.stats)
return item.chunks
Expand All @@ -2444,19 +2434,7 @@ def chunk_processor(self, target, chunk):
chunk_id, data = cached_hash(chunk, self.key.id_hash)
if chunk_id in self.seen_chunks:
return self.cache.chunk_incref(chunk_id, target.stats)
overwrite = self.recompress
if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
# Check if this chunk is already compressed the way we want it
old_meta = self.repo_objs.parse_meta(chunk_id, self.repository.get(chunk_id, read_data=False))
compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
compressor_cls, level = Compressor.detect(compr_hdr)
if (
compressor_cls.name == self.repo_objs.compressor.decide({}, data).name
and level == self.repo_objs.compressor.level
):
# Stored chunk has the same compression method and level as we wanted
overwrite = False
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, overwrite=overwrite, wait=False)
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
self.cache.repository.async_response(wait=False)
self.seen_chunks.add(chunk_entry.id)
return chunk_entry
Expand Down
32 changes: 2 additions & 30 deletions src/borg/archiver/recreate_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ def do_recreate(self, args, repository, manifest, cache):
matcher = build_matcher(args.patterns, args.paths)
self.output_list = args.output_list
self.output_filter = args.output_filter
recompress = args.recompress != "never"
always_recompress = args.recompress == "always"

recreater = ArchiveRecreater(
manifest,
Expand All @@ -33,8 +31,6 @@ def do_recreate(self, args, repository, manifest, cache):
keep_exclude_tags=args.keep_exclude_tags,
chunker_params=args.chunker_params,
compression=args.compression,
recompress=recompress,
always_recompress=always_recompress,
progress=args.progress,
stats=args.stats,
file_status_printer=self.print_file_status,
Expand Down Expand Up @@ -81,11 +77,6 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
Note that all paths in an archive are relative, therefore absolute patterns/paths
will *not* match (``--exclude``, ``--exclude-from``, PATHs).
``--recompress`` allows one to change the compression of existing data in archives.
Due to how Borg stores compressed size information this might display
incorrect information for archives that were not recreated at the same time.
There is no risk of data loss by this.
``--chunker-params`` will re-chunk all files in the archive, this can be
used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
Expand All @@ -101,9 +92,9 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
With ``--target`` the original archive is not replaced, instead a new archive is created.
When rechunking (or recompressing), space usage can be substantial - expect
When rechunking, space usage can be substantial - expect
at least the entire deduplicated size of the archives using the previous
chunker (or compression) params.
chunker params.
If you recently ran borg check --repair and it had to fix lost chunks with all-zero
replacement chunks, please first run another backup for the same data and re-run
Expand Down Expand Up @@ -201,25 +192,6 @@ def build_parser_recreate(self, subparsers, common_parser, mid_common_parser):
action=Highlander,
help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
)
archive_group.add_argument(
"--recompress",
metavar="MODE",
dest="recompress",
nargs="?",
default="never",
const="if-different",
choices=("never", "if-different", "always"),
action=Highlander,
help="recompress data chunks according to `MODE` and ``--compression``. "
"Possible modes are "
"`if-different`: recompress if current compression is with a different "
"compression algorithm or different level; "
"`always`: recompress unconditionally; and "
"`never`: do not recompress (use this option explicitly to prevent "
"recompression). "
"If no MODE is given, `if-different` will be used. "
'Not passing --recompress is equivalent to "--recompress never".',
)
archive_group.add_argument(
"--chunker-params",
metavar="PARAMS",
Expand Down
9 changes: 3 additions & 6 deletions src/borg/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,15 +939,13 @@ def update_compatibility(self):
self.cache_config.ignored_features.update(repo_features - my_features)
self.cache_config.mandatory_features.update(repo_features & my_features)

def add_chunk(
self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None, ctype=None, clevel=None
):
def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None):
if not self.txn_active:
self.begin_txn()
if size is None and compress:
size = len(data) # data is still uncompressed
refcount = self.seen_chunk(id, size)
if refcount and not overwrite:
if refcount:
return self.chunk_incref(id, stats)
if size is None:
raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
Expand Down Expand Up @@ -1115,8 +1113,7 @@ def file_known_and_unchanged(self, hashed_path, path_hash, st):
def memorize_file(self, hashed_path, path_hash, st, ids):
pass

def add_chunk(self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None):
assert not overwrite, "AdHocCache does not permit overwrites — trying to use it for recreate?"
def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None):
if not self._txn_active:
self.begin_txn()
if size is None and compress:
Expand Down
14 changes: 0 additions & 14 deletions src/borg/testsuite/archiver/recreate_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,20 +191,6 @@ def test_recreate_no_rechunkify(archivers, request):
assert num_chunks == num_chunks_after_recreate


def test_recreate_recompress(archivers, request):
archiver = request.getfixturevalue(archivers)
create_regular_file(archiver.input_path, "compressible", size=10000)
cmd(archiver, "rcreate", RK_ENCRYPTION)
cmd(archiver, "create", "test", "input", "-C", "none")
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_before = file_list.split(" ")
cmd(archiver, "recreate", "-C", "lz4", "--recompress")
check_cache(archiver)
file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
size, sha256_after = file_list.split(" ")
assert sha256_before == sha256_after


def test_recreate_timestamp(archivers, request):
archiver = request.getfixturevalue(archivers)
create_test_files(archiver.input_path)
Expand Down
4 changes: 0 additions & 4 deletions src/borg/testsuite/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,6 @@ def test_does_not_delete_existing_chunks(self, repository, cache):
cache.chunk_decref(H(1), Statistics())
assert repository.get(H(1)) == b"1234"

def test_does_not_overwrite(self, cache):
with pytest.raises(AssertionError):
cache.add_chunk(H(1), {}, b"5678", stats=Statistics(), overwrite=True)

def test_seen_chunk_add_chunk_size(self, cache):
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)

Expand Down

0 comments on commit 506718e

Please sign in to comment.