Skip to content

Commit

Permalink
Use optimal block length to generate deltas
Browse files Browse the repository at this point in the history
Previously, we used a block length hardcoded to 512 bytes. Our
measurements have shown that this value was generally inadequate: it
produced relatively large deltas and took relatively long times to do
that.

librsync, by default, uses block length equals to the square root of the
old (basis) file (and a minimum of 256). This value results in
significantly smaller deltas and shorter run times.

In this commit, we do one more optimization and round this value up to
the next power of two value. Since librsync-go has a code path optimized
for buffers with sizes that are powers of two, this gives us another
performance gain.
  • Loading branch information
lmbarros committed Jun 12, 2023
1 parent 19a8c64 commit c725494
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 1 deletion.
31 changes: 30 additions & 1 deletion daemon/images/image_delta.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/json"
"io"
"io/ioutil"
"math"
"os"
"time"

Expand Down Expand Up @@ -64,8 +65,9 @@ func (i *ImageService) DeltaCreate(deltaSrc, deltaDest string, options types.Ima
progressReader := progress.NewProgressReader(srcData, progressOutput, srcDataLen, deltaSrc, "Fingerprinting")
defer progressReader.Close()

blockLen := deltaBlockSize(srcDataLen)
sigStart := time.Now()
srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC)
srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, blockLen, 32, librsync.BLAKE2_SIG_MAGIC)
if err != nil {
return err
}
Expand Down Expand Up @@ -285,3 +287,30 @@ func (lock *imglock) unlock(ls layer.Store) {
layer.ReleaseAndLog(ls, l)
}
}

// deltaBlockSize returns the block size to use when generating a delta for a
// basis file that is basisSize bytes long.
func deltaBlockSize(basisSize int64) uint32 {
// Start with the "ideal" size recommended by the librsync devs. See
// https://github.com/librsync/librsync/pull/109/files#diff-7a3cd9075c1eaa0d219f7c0a516a10679dae11922bd1dd0ff54a10cdaa457f6fR147
if basisSize <= 256*256 {
return 256
}
x := uint32(math.Sqrt(float64(basisSize)))

// Avoid overflowing.
if x >= 2147483648 {
return 2147483648 // the largest power of two that fits into an uint32.
}

// Round to the next power of two (because librsync-go has an optimized code
// path for power of two blocks). This algorithm is from Hacker's Delight,
// 2nd Edition, p.62.
x -= 1
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
return x + 1
}
39 changes: 39 additions & 0 deletions daemon/images/image_delta_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package images

import (
"fmt"
"math"
"testing"
)

func Test_deltaBlockSize(t *testing.T) {
tests := []struct {
x int64
want uint32
}{
{0, 256},
{1, 256},
{100, 256},
{1_024, 256},
{33_333, 256},
{65_536, 256},
{88_887, 512},
{262_144, 512},
{262_145, 512},
{777_111, 1024},
{22_654_123, 8192},
{1_333_555_888, 65536},
{35_000_000_000, 262144},
{123_456_678_901, 524288},
{4_611_686_018_427_387_904, 2147483648},
{5_000_000_000_000_000_000, 2147483648},
{math.MaxInt64, 2147483648},
}
for _, tt := range tests {
t.Run(fmt.Sprintf("deltaBlockSize(%v)", tt.x), func(t *testing.T) {
if got := deltaBlockSize(tt.x); got != tt.want {
t.Errorf("got deltaBlockSize(%v) = %v, want %v", tt.x, got, tt.want)
}
})
}
}

0 comments on commit c725494

Please sign in to comment.