Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions pack/daggen/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package daggen
import (
"bytes"
"context"
"sort"

"github.com/cockroachdb/errors"
"github.com/data-preservation-programs/singularity/model"
Expand Down Expand Up @@ -356,18 +357,35 @@ func UnmarshalToBlocks(in []byte) ([]blocks.Block, error) {
return nil, errors.WithStack(err)
}

// Iterate Reals and Additional in CID-sorted order so the resulting
// CAR layout is deterministic across runs. Go map iteration is
// randomized; without a sort, the same DAG produces a different piece
// CID on every regeneration even though the root CID is stable.
blks := make([]blocks.Block, 0, len(data.Reals)+len(data.Additional))
for c, d := range data.Reals {
blk, _ := blocks.NewBlockWithCid(d, c)
for _, c := range sortedCids(data.Reals) {
blk, _ := blocks.NewBlockWithCid(data.Reals[c], c)
blks = append(blks, blk)
}
for c, d := range data.Additional {
blk, _ := blocks.NewBlockWithCid(d, c)
for _, c := range sortedCids(data.Additional) {
blk, _ := blocks.NewBlockWithCid(data.Additional[c], c)
blks = append(blks, blk)
}
return blks, nil
}

// sortedCids returns the keys of a cid→bytes map in lexicographic order
// (by CID bytes). Used to make CAR layouts deterministic.
func sortedCids(m map[cid.Cid][]byte) []cid.Cid {
keys := make([]cid.Cid, 0, len(m))
for c := range m {
keys = append(keys, c)
}
sort.Slice(keys, func(i, j int) bool {
return bytes.Compare(keys[i].Bytes(), keys[j].Bytes()) < 0
})
return keys
}

// UnmarshalBinary deserializes binary data into the current DirectoryData object.
// This method:
// 1. Creates a new blockstore and DAG service.
Expand Down
43 changes: 43 additions & 0 deletions pack/daggen/directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,46 @@ func TestResolveDirectoryTree(t *testing.T) {
require.Equal(t, "name", node.Links()[0].Name)
require.Equal(t, "test", node.Links()[1].Name)
}

// TestUnmarshalToBlocksDeterministic verifies that UnmarshalToBlocks returns
// blocks in a stable, content-derived order across calls. Without this, Go's
// randomized map iteration leaks into the CAR layout, producing a different
// piece CID on every regeneration even when the underlying DAG is identical.
func TestUnmarshalToBlocksDeterministic(t *testing.T) {
ctx := context.Background()

// Build a directoryData with many real blocks. We populate Additional
// directly via AddBlocks because AddFile creates dummy nodes that get
// filtered out by UnmarshalToBlocks.
dirData := NewDirectoryData()
const blockCount = 50
for i := 0; i < blockCount; i++ {
c := cid.NewCidV1(cid.Raw, util.Hash([]byte(strconv.Itoa(i))))
dirData.additional[c] = []byte("block-data-" + strconv.Itoa(i))
}

marshaled, err := dirData.MarshalBinary(ctx)
require.NoError(t, err)

// Unmarshal multiple times and compare the block CID sequences. Each
// call must produce the same ordering or the resulting CAR file will
// be byte-different and the piece CID will drift.
first, err := UnmarshalToBlocks(marshaled)
require.NoError(t, err)
require.GreaterOrEqual(t, len(first), blockCount, "expected at least %d blocks", blockCount)

firstCids := make([]string, len(first))
for i, blk := range first {
firstCids[i] = blk.Cid().String()
}

for run := 0; run < 20; run++ {
blks, err := UnmarshalToBlocks(marshaled)
require.NoError(t, err)
require.Len(t, blks, len(first))
for i, blk := range blks {
require.Equal(t, firstCids[i], blk.Cid().String(),
"block order must be deterministic across UnmarshalToBlocks calls (run %d, position %d)", run, i)
}
}
}
Loading