Skip to content

Commit

Permalink
Merge pull request #7598 from ThomasWaldmann/chunker-params-master
Browse files Browse the repository at this point in the history
tests: check buzhash chunksize distribution, see #7586
  • Loading branch information
ThomasWaldmann committed May 22, 2023
2 parents eeefa55 + 1fb1cc3 commit 5e4202e
Showing 1 changed file with 27 additions and 1 deletion.
28 changes: 27 additions & 1 deletion src/borg/testsuite/chunker_pytest.py
Expand Up @@ -5,7 +5,7 @@
import pytest

from .chunker import cf
from ..chunker import ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing
from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing
from ..constants import * # NOQA

BS = 4096 # fs block size
Expand Down Expand Up @@ -151,3 +151,29 @@ def test_chunker_failing():
assert c1.data == data[:SIZE]
assert c2.data == data[SIZE : 2 * SIZE]
assert c3.data == data[2 * SIZE :]


def test_buzhash_chunksize_distribution():
data = os.urandom(1048576)
min_exp, max_exp, mask = 10, 16, 14 # chunk size target 16kiB, clip at 1kiB and 64kiB
chunker = Chunker(0, min_exp, max_exp, mask, 4095)
f = BytesIO(data)
chunks = cf(chunker.chunkify(f))
chunk_sizes = [len(chunk) for chunk in chunks]
chunks_count = len(chunks)
min_chunksize_observed = min(chunk_sizes)
max_chunksize_observed = max(chunk_sizes)
min_count = sum((int(size == 2**min_exp) for size in chunk_sizes))
max_count = sum((int(size == 2**max_exp) for size in chunk_sizes))
print(
f"count: {chunks_count} min: {min_chunksize_observed} max: {max_chunksize_observed} "
f"min count: {min_count} max count: {max_count}"
)
# usually there will about 64 chunks
assert 32 < chunks_count < 128
# chunks always must be between min and max (clipping must work):
assert min_chunksize_observed >= 2**min_exp
assert max_chunksize_observed <= 2**max_exp
# most chunks should be cut due to buzhash triggering, not due to clipping at min/max size:
assert min_count < 10
assert max_count < 10

0 comments on commit 5e4202e

Please sign in to comment.