Skip to content

Commit

Permalink
Add --format option to borg diff, resolve issue #4634 (#7534)
Browse files Browse the repository at this point in the history
diff: add --format option

also: refactoring/improvements of BaseFormatter
  • Loading branch information
RF-Tar-Railt committed Jun 11, 2023
1 parent 8506c05 commit 616d5e7
Show file tree
Hide file tree
Showing 10 changed files with 493 additions and 267 deletions.
71 changes: 34 additions & 37 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from getpass import getuser
from io import BytesIO
from itertools import groupby, zip_longest
from typing import Iterator
from shutil import get_terminal_size

from .platformflags import is_win32
Expand Down Expand Up @@ -297,31 +298,24 @@ def unpack_many(self, ids, *, filter=None, preload=False):
unpacker = msgpack.Unpacker(use_list=False)
for data in self.fetch_many(ids):
unpacker.feed(data)
items = [Item(internal_dict=item) for item in unpacker]
for item in items:
for _item in unpacker:
item = Item(internal_dict=_item)
if "chunks" in item:
item.chunks = [ChunkListEntry(*e) for e in item.chunks]

if filter:
items = [item for item in items if filter(item)]

if preload:
for item in items:
if "chunks" in item:
hlid = item.get("hlid", None)
if hlid is None:
preload_chunks = True
else:
if hlid in hlids_preloaded:
preload_chunks = False
else:
# not having the hardlink's chunks already preloaded for other hardlink to same inode
preload_chunks = True
hlids_preloaded.add(hlid)
if preload_chunks:
self.repository.preload([c.id for c in item.chunks])

for item in items:
if filter and not filter(item):
continue
if preload and "chunks" in item:
hlid = item.get("hlid", None)
if hlid is None:
preload_chunks = True
elif hlid in hlids_preloaded:
preload_chunks = False
else:
# not having the hardlink's chunks already preloaded for other hardlink to same inode
preload_chunks = True
hlids_preloaded.add(hlid)
if preload_chunks:
self.repository.preload([c.id for c in item.chunks])
yield item

def fetch_many(self, ids, is_preloaded=False):
Expand Down Expand Up @@ -631,10 +625,9 @@ def item_filter(self, item, filter=None):
def iter_items(self, filter=None, preload=False):
# note: when calling this with preload=True, later fetch_many() must be called with
# is_preloaded=True or the RemoteRepository code will leak memory!
for item in self.pipeline.unpack_many(
yield from self.pipeline.unpack_many(
self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)
):
yield item
)

def add_item(self, item, show_progress=True, stats=None):
if show_progress and self.show_progress:
Expand Down Expand Up @@ -1123,55 +1116,59 @@ def chunk_decref(id, stats):
logger.warning("borg check --repair is required to free all space.")

@staticmethod
def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
def compare_archives_iter(
archive1: "Archive", archive2: "Archive", matcher=None, can_compare_chunk_ids=False
) -> Iterator[ItemDiff]:
"""
Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
Yields an ItemDiff instance describing changes/indicating equality.
:param matcher: PatternMatcher class to restrict results to only matching paths.
:param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
"""

def compare_items(item1, item2):
def compare_items(path: str, item1: Item, item2: Item):
return ItemDiff(
path,
item1,
item2,
archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
can_compare_chunk_ids=can_compare_chunk_ids,
content_only=content_only,
)

orphans_archive1 = OrderedDict()
orphans_archive2 = OrderedDict()
orphans_archive1: OrderedDict[str, Item] = OrderedDict()
orphans_archive2: OrderedDict[str, Item] = OrderedDict()

assert matcher is not None, "matcher must be set"

for item1, item2 in zip_longest(
archive1.iter_items(lambda item: matcher.match(item.path)),
archive2.iter_items(lambda item: matcher.match(item.path)),
):
if item1 and item2 and item1.path == item2.path:
yield (item1.path, compare_items(item1, item2))
yield compare_items(item1.path, item1, item2)
continue
if item1:
matching_orphan = orphans_archive2.pop(item1.path, None)
if matching_orphan:
yield (item1.path, compare_items(item1, matching_orphan))
yield compare_items(item1.path, item1, matching_orphan)
else:
orphans_archive1[item1.path] = item1
if item2:
matching_orphan = orphans_archive1.pop(item2.path, None)
if matching_orphan:
yield (matching_orphan.path, compare_items(matching_orphan, item2))
yield compare_items(matching_orphan.path, matching_orphan, item2)
else:
orphans_archive2[item2.path] = item2
# At this point orphans_* contain items that had no matching partner in the other archive
for added in orphans_archive2.values():
path = added.path
deleted_item = Item.create_deleted(path)
yield (path, compare_items(deleted_item, added))
yield compare_items(path, deleted_item, added)
for deleted in orphans_archive1.values():
path = deleted.path
deleted_item = Item.create_deleted(path)
yield (path, compare_items(deleted, deleted_item))
yield compare_items(path, deleted, deleted_item)


class MetadataCollector:
Expand Down
116 changes: 82 additions & 34 deletions src/borg/archiver/diff_cmd.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import argparse
import textwrap
import json
import sys
import os

from ._common import with_repository, with_archive, build_matcher
from ._common import with_repository, with_archive, build_matcher, Highlander
from ..archive import Archive
from ..constants import * # NOQA
from ..helpers import archivename_validator
from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, BorgJsonEncoder
from ..manifest import Manifest
from ..helpers.parseformat import BorgJsonEncoder

from ..logger import create_logger

logger = create_logger()
Expand All @@ -18,14 +19,12 @@ class DiffMixIn:
@with_archive
def do_diff(self, args, repository, manifest, archive):
"""Diff contents of two archives"""

def print_json_output(diff, path):
print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))

def print_text_output(diff, path):
print("{:<19} {}".format(" ".join([str for j, str in diff]), path))

print_output = print_json_output if args.json_lines else print_text_output
if args.format is not None:
format = args.format
elif args.content_only:
format = "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}"
else:
format = os.environ.get("BORG_DIFF_FORMAT", "{change} {path}{NL}")

archive1 = archive
archive2 = Archive(manifest, args.other_name)
Expand All @@ -43,17 +42,36 @@ def print_text_output(diff, path):

matcher = build_matcher(args.patterns, args.paths)

diffs = Archive.compare_archives_iter(
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only
diffs_iter = Archive.compare_archives_iter(
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
)
# Conversion to string and filtering for diff.equal to save memory if sorting
diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)
diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))

if args.sort:
diffs = sorted(diffs)

for path, diff in diffs:
print_output(diff, path)
diffs = sorted(diffs, key=lambda diff: diff.path)

formatter = DiffFormatter(format, args.content_only)
for diff in diffs:
if args.json_lines:
print(
json.dumps(
{
"path": diff.path,
"changes": [
change.to_dict()
for name, change in diff.changes().items()
if not args.content_only or (name not in DiffFormatter.METADATA)
],
},
sort_keys=True,
cls=BorgJsonEncoder,
)
)
else:
res: str = formatter.format_item(diff)
if res.strip():
sys.stdout.write(res)

for pattern in matcher.get_unmatched_include_patterns():
self.print_warning("Include pattern '%s' never matched.", pattern)
Expand All @@ -64,25 +82,48 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
from ._common import process_epilog
from ._common import define_exclusion_group

diff_epilog = process_epilog(
"""
This command finds differences (file contents, user/group/mode) between archives.
diff_epilog = (
process_epilog(
"""
This command finds differences (file contents, metadata) between ARCHIVE1 and ARCHIVE2.
For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
.. man NOTES
The FORMAT specifier syntax
+++++++++++++++++++++++++++
The ``--format`` option uses python's `format string syntax
<https://docs.python.org/3.9/library/string.html#formatstrings>`_.
A repository location and an archive name must be specified for REPO::ARCHIVE1.
ARCHIVE2 is just another archive name in same repository (no repository location
allowed).
Examples:
::
For archives created with Borg 1.1 or newer diff automatically detects whether
the archives are created with the same chunker params. If so, only chunk IDs
are compared, which is very fast.
$ borg diff --format '{content:30} {path}{NL}' ArchiveFoo ArchiveBar
modified: +4.1 kB -1.0 kB file-diff
...
For archives prior to Borg 1.1 chunk contents are compared by default.
If you did not create the archives with different chunker params,
pass ``--same-chunker-params``.
Note that the chunker params changed from Borg 0.xx to 1.0.
# {VAR:<NUMBER} - pad to NUMBER columns left-aligned.
# {VAR:>NUMBER} - pad to NUMBER columns right-aligned.
$ borg diff --format '{content:>30} {path}{NL}' ArchiveFoo ArchiveBar
modified: +4.1 kB -1.0 kB file-diff
...
For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
"""
The following keys are always available:
"""
)
+ BaseFormatter.keys_help()
+ textwrap.dedent(
"""
Keys available only when showing differences between archives:
"""
)
+ DiffFormatter.keys_help()
)
subparser = subparsers.add_parser(
"diff",
Expand All @@ -107,6 +148,13 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
help="Override check of chunker parameters.",
)
subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
subparser.add_argument(
"--format",
metavar="FORMAT",
dest="format",
action=Highlander,
help='specify format for differences between archives (default: "{change} {path}{NL}")',
)
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
subparser.add_argument(
"--content-only",
Expand Down
5 changes: 2 additions & 3 deletions src/borg/archiver/list_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@ def do_list(self, args, repository, manifest):

def _list_inner(cache):
archive = Archive(manifest, args.name, cache=cache)

formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
formatter = ItemFormatter(archive, format)
for item in archive.iter_items(lambda item: matcher.match(item.path)):
sys.stdout.write(formatter.format_item(item))
sys.stdout.write(formatter.format_item(item, args.json_lines, sort=True))

# Only load the cache if it will be used
if ItemFormatter.format_needs_cache(format):
Expand Down
4 changes: 2 additions & 2 deletions src/borg/archiver/prune_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def do_prune(self, args, repository, manifest):
format = "{archive}"
else:
format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]")
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=False, iec=args.iec)
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)

checkpoint_re = r"\.checkpoint(\.\d+)?"
archives_checkpoints = manifest.archives.list(
Expand Down Expand Up @@ -169,7 +169,7 @@ def checkpoint_func():
or (args.list_pruned and archive in to_delete)
or (args.list_kept and archive not in to_delete)
):
list_logger.info(f"{log_message:<40} {formatter.format_item(archive)}")
list_logger.info(f"{log_message:<40} {formatter.format_item(archive, jsonline=False)}")
pi.finish()
if sig_int:
# Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
Expand Down
6 changes: 3 additions & 3 deletions src/borg/archiver/rlist_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ def do_rlist(self, args, repository, manifest):
format = "{archive}{NL}"
else:
format = os.environ.get("BORG_RLIST_FORMAT", "{archive:<36} {time} [{id}]{NL}")
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=args.json, iec=args.iec)
formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)

output_data = []

for archive_info in manifest.archives.list_considering(args):
if args.json:
output_data.append(formatter.get_item_data(archive_info))
output_data.append(formatter.get_item_data(archive_info, args.json))
else:
sys.stdout.write(formatter.format_item(archive_info))
sys.stdout.write(formatter.format_item(archive_info, args.json))

if args.json:
json_print(basic_json_data(manifest, extra={"archives": output_data}))
Expand Down
2 changes: 1 addition & 1 deletion src/borg/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator
from .parseformat import format_archive, parse_stringified_list, clean_lines
from .parseformat import location_validator, archivename_validator, comment_validator
from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, DiffFormatter, file_status
from .parseformat import swidth_slice, ellipsis_truncate
from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
from .parseformat import Highlander, MakePathSafeAction
Expand Down

0 comments on commit 616d5e7

Please sign in to comment.