Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions src/borg/repoobj.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from collections import namedtuple
from struct import Struct

from xxhash import xxh64

from .constants import * # NOQA
from .helpers import msgpack, workarounds
from .helpers.errors import IntegrityError
Expand All @@ -13,10 +11,9 @@


class RepoObj:
# Object header format includes size information for parsing the object into meta and data,
# as well as hashes to enable checking consistency without having the borg key.
obj_header = Struct("<II8s8s") # meta size (32b), data size (32b), meta hash (64b), data hash (64b)
ObjHeader = namedtuple("ObjHeader", "meta_size data_size meta_hash data_hash")
# Object header: sizes of the encrypted meta and data sections.
obj_header = Struct("<II") # meta size (32b), data size (32b)
ObjHeader = namedtuple("ObjHeader", "meta_size data_size")

@classmethod
def extract_crypted_data(cls, data: bytes) -> bytes:
Expand Down Expand Up @@ -67,9 +64,7 @@ def format(
data_encrypted = self.key.encrypt(id, data_compressed)
meta_packed = msgpack.packb(meta)
meta_encrypted = self.key.encrypt(id, meta_packed)
hdr = self.ObjHeader(
len(meta_encrypted), len(data_encrypted), xxh64(meta_encrypted).digest(), xxh64(data_encrypted).digest()
)
hdr = self.ObjHeader(len(meta_encrypted), len(data_encrypted))
hdr_packed = self.obj_header.pack(*hdr)
return hdr_packed + meta_encrypted + data_encrypted

Expand Down
6 changes: 0 additions & 6 deletions src/borg/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import time
from pathlib import Path

from xxhash import xxh64

from borgstore.store import Store
from borgstore.store import ObjectNotFound as StoreObjectNotFound
from borgstore.backends.errors import BackendError as StoreBackendError
Expand Down Expand Up @@ -307,13 +305,9 @@ def check_object(obj):
meta = obj[hdr_size : hdr_size + hdr.meta_size]
if hdr.meta_size != len(meta):
log_error("metadata size incorrect.")
elif hdr.meta_hash != xxh64(meta).digest():
log_error("metadata does not match checksum.")
data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size]
if hdr.data_size != len(data):
log_error("data size incorrect.")
elif hdr.data_hash != xxh64(data).digest():
log_error("data does not match checksum.")
else:
log_error("too small.")

Expand Down
78 changes: 31 additions & 47 deletions src/borg/testsuite/archiver/check_cmd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,50 +362,34 @@ def test_verify_data(archivers, request, init_args):
if archiver.get_kind() != "local":
pytest.skip("only works locally, patches objects")

# it's tricky to test the cryptographic data verification, because usually already the
# repository-level xxh64 hash fails to verify. So we use a fake one that doesn't.
# note: it only works like tested here for a highly engineered data corruption attack,
# because with accidental corruption, usually already the xxh64 low-level check fails.
def fake_xxh64(data, seed=0):
# xxhash.xxh64.digest() returns -> bytes
class FakeDigest:
def digest(self):
return b"fakefake"

return FakeDigest()

import borg.repoobj
import borg.repository

with patch.object(borg.repoobj, "xxh64", fake_xxh64), patch.object(borg.repository, "xxh64", fake_xxh64):
check_cmd_setup(archiver)
shutil.rmtree(archiver.repository_path)
cmd(archiver, "repo-create", *init_args)
create_src_archive(archiver, "archive1")
archive, repository = open_archive(archiver.repository_path, "archive1")
with repository:
for item in archive.iter_items():
if item.path.endswith(src_file):
chunk = item.chunks[-1]
data = repository.get(chunk.id)
data = data[0:123] + b"x" + data[123:]
repository.put(chunk.id, data)
break
check_cmd_setup(archiver)
shutil.rmtree(archiver.repository_path)
cmd(archiver, "repo-create", *init_args)
create_src_archive(archiver, "archive1")
archive, repository = open_archive(archiver.repository_path, "archive1")
with repository:
for item in archive.iter_items():
if item.path.endswith(src_file):
chunk = item.chunks[-1]
data = repository.get(chunk.id)
data = data[0:123] + b"x" + data[123:]
repository.put(chunk.id, data)
break

# the normal archives check does not read file content data.
cmd(archiver, "check", "--archives-only", exit_code=0)
# but with --verify-data, it does and notices the issue.
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
# the normal archives check does not read file content data.
cmd(archiver, "check", "--archives-only", exit_code=0)
# but with --verify-data, it does and notices the issue.
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
assert f"{bin_to_hex(chunk.id)}, integrity error" in output

# repair will find the defect chunk and remove it
output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0)
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
assert f"{src_file}: Missing file chunk detected" in output
# repair will find the defect chunk and remove it
output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0)
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
assert f"{src_file}: Missing file chunk detected" in output

# run with --verify-data again, it will notice the missing chunk.
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
assert f"{src_file}: Missing file chunk detected" in output
# run with --verify-data again, it will notice the missing chunk.
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
assert f"{src_file}: Missing file chunk detected" in output


@pytest.mark.parametrize("init_args", [["--encryption=repokey-aes-ocb"], ["--encryption", "none"]])
Expand All @@ -427,13 +411,13 @@ def test_corrupted_file_chunk(archivers, request, init_args):
repository.put(chunk.id, data)
break

# the normal check checks all repository objects and the xxh64 checksum fails.
output = cmd(archiver, "check", "--repository-only", exit_code=1)
assert f"{bin_to_hex(chunk.id)} is corrupted: data does not match checksum." in output
# --verify-data decrypts and catches the corruption.
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
assert f"{bin_to_hex(chunk.id)}, integrity error" in output

# repair: the defect chunk will be removed by repair.
output = cmd(archiver, "check", "--repair", exit_code=0)
assert f"{bin_to_hex(chunk.id)} is corrupted: data does not match checksum." in output
# repair: the defect chunk will be removed.
output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0)
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
assert f"{src_file}: Missing file chunk detected" in output

# run normal check again
Expand Down
6 changes: 2 additions & 4 deletions src/borg/testsuite/legacyrepository_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from unittest.mock import patch

import pytest
from xxhash import xxh64

from ..legacy.hashindex import NSIndex1
from ..helpers import Location
from ..helpers import IntegrityError
Expand Down Expand Up @@ -75,7 +73,7 @@ def get_path(repository):

def fchunk(data, meta=b""):
# Create a raw chunk that has a valid RepoObj layout but does not use encryption or compression.
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
Comment thread
mr-raj12 marked this conversation as resolved.
hdr = RepoObj.obj_header.pack(len(meta), len(data))
assert isinstance(data, bytes)
chunk = hdr + meta + data
return chunk
Expand Down Expand Up @@ -150,7 +148,7 @@ def test_multiple_transactions(repo_fixtures, request):
def test_read_data(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
meta, data = b"meta", b"data"
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
hdr = RepoObj.obj_header.pack(len(meta), len(data))
chunk_complete = hdr + meta + data
repository.put(H(0), chunk_complete)
repository.commit(compact=False)
Expand Down
6 changes: 2 additions & 4 deletions src/borg/testsuite/repository_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import sys

import pytest
from xxhash import xxh64

from ..helpers import Location
from ..helpers import IntegrityError
from ..platformflags import is_win32
Expand Down Expand Up @@ -57,7 +55,7 @@ def reopen(repository, exclusive: bool | None = True, create=False):

def fchunk(data, meta=b""):
# Format chunk: create a raw chunk that has a valid RepoObj layout, but does not use encryption or compression.
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
hdr = RepoObj.obj_header.pack(len(meta), len(data))
assert isinstance(data, bytes)
chunk = hdr + meta + data
return chunk
Expand Down Expand Up @@ -99,7 +97,7 @@ def test_basic_operations(repo_fixtures, request):
def test_read_data(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
meta, data = b"meta", b"data"
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
hdr = RepoObj.obj_header.pack(len(meta), len(data))
chunk_complete = hdr + meta + data
chunk_short = hdr + meta
repository.put(H(0), chunk_complete)
Expand Down
Loading