Skip to content

Commit

Permalink
Use optimal block length to generate deltas
Browse files Browse the repository at this point in the history
Previously, we used a block length hardcoded to 512 bytes. Our
measurements have shown that this value was generally inadequate: it
produced relatively large deltas and took relatively long times to do
that.

librsync, by default, uses block length equals to the square root of the
old (basis) file. This value results in significantly smaller deltas and
shorter run times.

In this commit, we do one more optimization and round this value up to
the next power of two value. Since librsync-go has a code path optimized
for buffers with sizes that are powers of two, this gives us another
performance gain.
  • Loading branch information
lmbarros committed Jun 24, 2022
1 parent a9348cc commit dc63235
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 1 deletion.
29 changes: 28 additions & 1 deletion daemon/images/image_delta.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/json"
"io"
"io/ioutil"
"math"
"os"
"time"

Expand Down Expand Up @@ -64,8 +65,9 @@ func (i *ImageService) DeltaCreate(deltaSrc, deltaDest string, options types.Ima
progressReader := progress.NewProgressReader(srcData, progressOutput, srcDataLen, deltaSrc, "Fingerprinting")
defer progressReader.Close()

blockLen := deltaBlockSize(srcDataLen)
sigStart := time.Now()
srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC)
srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, blockLen, 32, librsync.BLAKE2_SIG_MAGIC)
if err != nil {
return err
}
Expand Down Expand Up @@ -285,3 +287,28 @@ func (lock *imglock) unlock(ls layer.Store) {
layer.ReleaseAndLog(ls, l)
}
}

// deltaBlockSize returns the block size to use when generating a delta for a
// basis file that is basisSize bytes long.
func deltaBlockSize(basisSize int64) uint32 {
// Start with the "ideal" size recommended by the librsync devs.
x := uint32(math.Sqrt(float64(basisSize)))

// Remain within reasonable limits.
if x == 0 {
return 1 // always return a positive block size.
} else if x >= 2147483648 {
return 2147483648 // the largest power of two that fits an uint32.
}

// Round to the next power of two (because librsync-go has an optimized code
// path for power of two blocks). This algorithm is from Hacker's Delight,
// 2nd Edition, p.62.
x -= 1
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
return x + 1
}
41 changes: 41 additions & 0 deletions daemon/images/image_delta_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package images

import (
"fmt"
"math"
"testing"
)

func Test_deltaBlockSize(t *testing.T) {
tests := []struct {
x int64
want uint32
}{
{0, 1},
{1, 1},
{100, 16},
{256, 16},
{288, 16},
{289, 32},
{1_024, 32},
{33_333, 256},
{88_887, 512},
{262_144, 512},
{262_145, 512},
{777_111, 1024},
{22_654_123, 8192},
{1_333_555_888, 65536},
{35_000_000_000, 262144},
{123_456_678_901, 524288},
{4_611_686_018_427_387_904, 2147483648},
{5_000_000_000_000_000_000, 2147483648},
{math.MaxInt64, 2147483648},
}
for _, tt := range tests {
t.Run(fmt.Sprintf("deltaBlockSize(%v)", tt.x), func(t *testing.T) {
if got := deltaBlockSize(tt.x); got != tt.want {
t.Errorf("got deltaBlockSize(%v) = %v, want %v", tt.x, got, tt.want)
}
})
}
}

0 comments on commit dc63235

Please sign in to comment.