diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py index 4d2814b4ef..5032104910 100644 --- a/src/borg/repoobj.py +++ b/src/borg/repoobj.py @@ -1,8 +1,6 @@ from collections import namedtuple from struct import Struct -from xxhash import xxh64 - from .constants import * # NOQA from .helpers import msgpack, workarounds from .helpers.errors import IntegrityError @@ -13,10 +11,9 @@ class RepoObj: - # Object header format includes size information for parsing the object into meta and data, - # as well as hashes to enable checking consistency without having the borg key. - obj_header = Struct(" bytes: @@ -67,9 +64,7 @@ def format( data_encrypted = self.key.encrypt(id, data_compressed) meta_packed = msgpack.packb(meta) meta_encrypted = self.key.encrypt(id, meta_packed) - hdr = self.ObjHeader( - len(meta_encrypted), len(data_encrypted), xxh64(meta_encrypted).digest(), xxh64(data_encrypted).digest() - ) + hdr = self.ObjHeader(len(meta_encrypted), len(data_encrypted)) hdr_packed = self.obj_header.pack(*hdr) return hdr_packed + meta_encrypted + data_encrypted diff --git a/src/borg/repository.py b/src/borg/repository.py index 0a37163955..7fc8c48619 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -2,8 +2,6 @@ import time from pathlib import Path -from xxhash import xxh64 - from borgstore.store import Store from borgstore.store import ObjectNotFound as StoreObjectNotFound from borgstore.backends.errors import BackendError as StoreBackendError @@ -307,13 +305,9 @@ def check_object(obj): meta = obj[hdr_size : hdr_size + hdr.meta_size] if hdr.meta_size != len(meta): log_error("metadata size incorrect.") - elif hdr.meta_hash != xxh64(meta).digest(): - log_error("metadata does not match checksum.") data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size] if hdr.data_size != len(data): log_error("data size incorrect.") - elif hdr.data_hash != xxh64(data).digest(): - log_error("data does not match checksum.") else: log_error("too small.") diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index c392b52410..aeccfd91d2 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -362,50 +362,34 @@ def test_verify_data(archivers, request, init_args): if archiver.get_kind() != "local": pytest.skip("only works locally, patches objects") - # it's tricky to test the cryptographic data verification, because usually already the - # repository-level xxh64 hash fails to verify. So we use a fake one that doesn't. - # note: it only works like tested here for a highly engineered data corruption attack, - # because with accidental corruption, usually already the xxh64 low-level check fails. - def fake_xxh64(data, seed=0): - # xxhash.xxh64.digest() returns -> bytes - class FakeDigest: - def digest(self): - return b"fakefake" - - return FakeDigest() - - import borg.repoobj - import borg.repository - - with patch.object(borg.repoobj, "xxh64", fake_xxh64), patch.object(borg.repository, "xxh64", fake_xxh64): - check_cmd_setup(archiver) - shutil.rmtree(archiver.repository_path) - cmd(archiver, "repo-create", *init_args) - create_src_archive(archiver, "archive1") - archive, repository = open_archive(archiver.repository_path, "archive1") - with repository: - for item in archive.iter_items(): - if item.path.endswith(src_file): - chunk = item.chunks[-1] - data = repository.get(chunk.id) - data = data[0:123] + b"x" + data[123:] - repository.put(chunk.id, data) - break + check_cmd_setup(archiver) + shutil.rmtree(archiver.repository_path) + cmd(archiver, "repo-create", *init_args) + create_src_archive(archiver, "archive1") + archive, repository = open_archive(archiver.repository_path, "archive1") + with repository: + for item in archive.iter_items(): + if item.path.endswith(src_file): + chunk = item.chunks[-1] + data = repository.get(chunk.id) + data = data[0:123] + b"x" + data[123:] + repository.put(chunk.id, data) + break - # the normal archives check does not read file content data. - cmd(archiver, "check", "--archives-only", exit_code=0) - # but with --verify-data, it does and notices the issue. - output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1) - assert f"{bin_to_hex(chunk.id)}, integrity error" in output + # the normal archives check does not read file content data. + cmd(archiver, "check", "--archives-only", exit_code=0) + # but with --verify-data, it does and notices the issue. + output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1) + assert f"{bin_to_hex(chunk.id)}, integrity error" in output - # repair will find the defect chunk and remove it - output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0) - assert f"{bin_to_hex(chunk.id)}, integrity error" in output - assert f"{src_file}: Missing file chunk detected" in output + # repair will find the defect chunk and remove it + output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0) + assert f"{bin_to_hex(chunk.id)}, integrity error" in output + assert f"{src_file}: Missing file chunk detected" in output - # run with --verify-data again, it will notice the missing chunk. - output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1) - assert f"{src_file}: Missing file chunk detected" in output + # run with --verify-data again, it will notice the missing chunk. + output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1) + assert f"{src_file}: Missing file chunk detected" in output @pytest.mark.parametrize("init_args", [["--encryption=repokey-aes-ocb"], ["--encryption", "none"]]) @@ -427,13 +411,13 @@ def test_corrupted_file_chunk(archivers, request, init_args): repository.put(chunk.id, data) break - # the normal check checks all repository objects and the xxh64 checksum fails. - output = cmd(archiver, "check", "--repository-only", exit_code=1) - assert f"{bin_to_hex(chunk.id)} is corrupted: data does not match checksum." in output + # --verify-data decrypts and catches the corruption. + output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1) + assert f"{bin_to_hex(chunk.id)}, integrity error" in output - # repair: the defect chunk will be removed by repair. - output = cmd(archiver, "check", "--repair", exit_code=0) - assert f"{bin_to_hex(chunk.id)} is corrupted: data does not match checksum." in output + # repair: the defect chunk will be removed. + output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0) + assert f"{bin_to_hex(chunk.id)}, integrity error" in output assert f"{src_file}: Missing file chunk detected" in output # run normal check again diff --git a/src/borg/testsuite/legacyrepository_test.py b/src/borg/testsuite/legacyrepository_test.py index e313949739..4887b70b91 100644 --- a/src/borg/testsuite/legacyrepository_test.py +++ b/src/borg/testsuite/legacyrepository_test.py @@ -5,8 +5,6 @@ from unittest.mock import patch import pytest -from xxhash import xxh64 - from ..legacy.hashindex import NSIndex1 from ..helpers import Location from ..helpers import IntegrityError @@ -75,7 +73,7 @@ def get_path(repository): def fchunk(data, meta=b""): # Create a raw chunk that has a valid RepoObj layout but does not use encryption or compression. - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) + hdr = RepoObj.obj_header.pack(len(meta), len(data)) assert isinstance(data, bytes) chunk = hdr + meta + data return chunk @@ -150,7 +148,7 @@ def test_multiple_transactions(repo_fixtures, request): def test_read_data(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: meta, data = b"meta", b"data" - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) + hdr = RepoObj.obj_header.pack(len(meta), len(data)) chunk_complete = hdr + meta + data repository.put(H(0), chunk_complete) repository.commit(compact=False) diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py index 112d2094d9..74f4729797 100644 --- a/src/borg/testsuite/repository_test.py +++ b/src/borg/testsuite/repository_test.py @@ -3,8 +3,6 @@ import sys import pytest -from xxhash import xxh64 - from ..helpers import Location from ..helpers import IntegrityError from ..platformflags import is_win32 @@ -57,7 +55,7 @@ def reopen(repository, exclusive: bool | None = True, create=False): def fchunk(data, meta=b""): # Format chunk: create a raw chunk that has a valid RepoObj layout, but does not use encryption or compression. - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) + hdr = RepoObj.obj_header.pack(len(meta), len(data)) assert isinstance(data, bytes) chunk = hdr + meta + data return chunk @@ -99,7 +97,7 @@ def test_basic_operations(repo_fixtures, request): def test_read_data(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: meta, data = b"meta", b"data" - hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest()) + hdr = RepoObj.obj_header.pack(len(meta), len(data)) chunk_complete = hdr + meta + data chunk_short = hdr + meta repository.put(H(0), chunk_complete)