-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(testutil): add test data generation utils (extracted from Lassie)
Previously: github.com/filecoin-project/lassie/pkg/internal/itest/unixfs Ref: filecoin-project/lassie#232
- Loading branch information
Showing
3 changed files
with
456 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
package testutil | ||
|
||
import ( | ||
"context" | ||
"crypto/sha256" | ||
"encoding/hex" | ||
"fmt" | ||
"io" | ||
"testing" | ||
|
||
"github.com/ipfs/go-cid" | ||
dagpb "github.com/ipld/go-codec-dagpb" | ||
"github.com/ipld/go-ipld-prime" | ||
"github.com/ipld/go-ipld-prime/datamodel" | ||
"github.com/ipld/go-ipld-prime/linking" | ||
cidlink "github.com/ipld/go-ipld-prime/linking/cid" | ||
"github.com/ipld/go-ipld-prime/node/basicnode" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
// DirEntry represents a flattened directory entry, where Path is from the | ||
// root of the directory and Content is the file contents. It is intended | ||
// that a DirEntry slice can be used to represent a full-depth directory without | ||
// needing nesting. | ||
type DirEntry struct { | ||
Path string | ||
Content []byte | ||
Root cid.Cid | ||
SelfCids []cid.Cid | ||
TSize uint64 | ||
Children []DirEntry | ||
} | ||
|
||
func (de DirEntry) Size() (int64, error) { | ||
return int64(de.TSize), nil | ||
} | ||
|
||
func (de DirEntry) Link() ipld.Link { | ||
return cidlink.Link{Cid: de.Root} | ||
} | ||
|
||
// ToDirEntry takes a LinkSystem containing UnixFS data and builds a DirEntry | ||
// tree representing the file and directory structure it finds starting at the | ||
// rootCid. If expectFull is true, it will error if it encounters a UnixFS | ||
// node that it cannot fully load. If expectFull is false, it will ignore | ||
// errors and return nil for any node it cannot load. | ||
func ToDirEntry(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, expectFull bool) DirEntry { | ||
de := toDirEntryRecursive(t, linkSys, rootCid, "", expectFull) | ||
return *de | ||
} | ||
|
||
func toDirEntryRecursive(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, name string, expectFull bool) *DirEntry { | ||
var proto datamodel.NodePrototype = dagpb.Type.PBNode | ||
if rootCid.Prefix().Codec == cid.Raw { | ||
proto = basicnode.Prototype.Any | ||
} | ||
node, err := linkSys.Load(linking.LinkContext{Ctx: context.TODO()}, cidlink.Link{Cid: rootCid}, proto) | ||
if expectFull { | ||
require.NoError(t, err) | ||
} else if err != nil { | ||
if e, ok := err.(interface{ NotFound() bool }); ok && e.NotFound() { | ||
return nil | ||
} | ||
require.NoError(t, err) | ||
} | ||
|
||
if node.Kind() == ipld.Kind_Bytes { // is a file | ||
byts, err := node.AsBytes() | ||
require.NoError(t, err) | ||
return &DirEntry{ | ||
Path: name, | ||
Content: byts, | ||
Root: rootCid, | ||
} | ||
} | ||
// else is a directory | ||
children := make([]DirEntry, 0) | ||
for itr := node.MapIterator(); !itr.Done(); { | ||
k, v, err := itr.Next() | ||
require.NoError(t, err) | ||
childName, err := k.AsString() | ||
require.NoError(t, err) | ||
childLink, err := v.AsLink() | ||
require.NoError(t, err) | ||
child := toDirEntryRecursive(t, linkSys, childLink.(cidlink.Link).Cid, name+"/"+childName, expectFull) | ||
children = append(children, *child) | ||
} | ||
return &DirEntry{ | ||
Path: name, | ||
Root: rootCid, | ||
Children: children, | ||
} | ||
} | ||
|
||
// CompareDirEntries is a safe, recursive comparison between two DirEntry | ||
// values. It doesn't strictly require child ordering to match, but it does | ||
// require that all children exist and match, in some order. | ||
func CompareDirEntries(t *testing.T, a, b DirEntry) { | ||
// t.Log("CompareDirEntries", a.Path, b.Path) // TODO: remove this | ||
require.Equal(t, a.Path, b.Path) | ||
require.Equal(t, a.Root.String(), b.Root.String(), a.Path+" root mismatch") | ||
hashA := sha256.Sum256(a.Content) | ||
hashB := sha256.Sum256(b.Content) | ||
require.Equal(t, hex.EncodeToString(hashA[:]), hex.EncodeToString(hashB[:]), a.Path+"content hash mismatch") | ||
require.Equal(t, len(a.Children), len(b.Children), fmt.Sprintf("%s child length mismatch %d <> %d", a.Path, len(a.Children), len(b.Children))) | ||
for i := range a.Children { | ||
// not necessarily in order | ||
var found bool | ||
for j := range b.Children { | ||
if a.Children[i].Path == b.Children[j].Path { | ||
found = true | ||
CompareDirEntries(t, a.Children[i], b.Children[j]) | ||
} | ||
} | ||
require.True(t, found, fmt.Sprintf("%s child %s not found in b", a.Path, a.Children[i].Path)) | ||
} | ||
} | ||
|
||
// WrapContent embeds the content we want in some random nested content such | ||
// that it's fetchable under the provided path. If exclusive is true, the | ||
// content will be the only thing under the path. If false, there will be | ||
// content before and after the wrapped content at each point in the path. | ||
func WrapContent(t *testing.T, rndReader io.Reader, lsys *ipld.LinkSystem, content DirEntry, wrapPath string, exclusive bool) DirEntry { | ||
want := content | ||
ps := datamodel.ParsePath(wrapPath) | ||
for ps.Len() > 0 { | ||
de := []DirEntry{} | ||
if !exclusive { | ||
before := GenerateDirectory(t, lsys, rndReader, 4<<10, false) | ||
before.Path = "!before" | ||
de = append(de, before) | ||
} | ||
want.Path = ps.Last().String() | ||
de = append(de, want) | ||
if !exclusive { | ||
after := GenerateDirectory(t, lsys, rndReader, 4<<11, true) | ||
after.Path = "~after" | ||
de = append(de, after) | ||
} | ||
want = BuildDirectory(t, lsys, de, false) | ||
ps = ps.Pop() | ||
} | ||
return want | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
package testutil | ||
|
||
import ( | ||
"bytes" | ||
"crypto/rand" | ||
"io" | ||
"math/big" | ||
"sort" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/ipfs/go-cid" | ||
"github.com/ipfs/go-unixfsnode/data/builder" | ||
"github.com/ipfs/go-unixfsnode/testutil/namegen" | ||
dagpb "github.com/ipld/go-codec-dagpb" | ||
"github.com/ipld/go-ipld-prime" | ||
"github.com/ipld/go-ipld-prime/linking" | ||
cidlink "github.com/ipld/go-ipld-prime/linking/cid" | ||
"github.com/multiformats/go-multihash" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
// GenerateFile generates a random unixfs file of the given size, storing the | ||
// blocks in the provided LinkSystem and returns a DirEntry representation of | ||
// the file. | ||
func GenerateFile(t *testing.T, linkSys *linking.LinkSystem, randReader io.Reader, size int) DirEntry { | ||
// a file of `size` random bytes, packaged into unixfs DAGs, stored in the remote blockstore | ||
delimited := io.LimitReader(randReader, int64(size)) | ||
var buf bytes.Buffer | ||
buf.Grow(size) | ||
delimited = io.TeeReader(delimited, &buf) | ||
// "size-256144" sets the chunker, splitting bytes at 256144b boundaries | ||
cids := make([]cid.Cid, 0) | ||
var undo func() | ||
linkSys.StorageWriteOpener, undo = cidCollector(linkSys, &cids) | ||
defer undo() | ||
root, gotSize, err := builder.BuildUnixFSFile(delimited, "size-256144", linkSys) | ||
require.NoError(t, err) | ||
srcData := buf.Bytes() | ||
rootCid := root.(cidlink.Link).Cid | ||
return DirEntry{ | ||
Path: "", | ||
Content: srcData, | ||
Root: rootCid, | ||
SelfCids: cids, | ||
TSize: uint64(gotSize), | ||
} | ||
} | ||
|
||
// GenerateDirectory generates a random UnixFS directory that aims for the | ||
// requested targetSize (in bytes, although it is likely to fall somewhere | ||
// under this number), storing the blocks in the provided LinkSystem and | ||
// returns a DirEntry representation of the directory. If rootSharded is true, | ||
// the root directory will be built as HAMT sharded (with a low "width" to | ||
// maximise the chance of collisions and therefore greater depth for smaller | ||
// number of files). | ||
func GenerateDirectory(t *testing.T, linkSys *linking.LinkSystem, randReader io.Reader, targetSize int, rootSharded bool) DirEntry { | ||
return GenerateDirectoryFrom(t, linkSys, randReader, targetSize, "", rootSharded) | ||
} | ||
|
||
// GenerateDirectoryFrom is the same as GenerateDirectory but allows the caller | ||
// to specify a directory path to start from. This is useful for generating | ||
// nested directories. | ||
func GenerateDirectoryFrom( | ||
t *testing.T, | ||
linkSys *linking.LinkSystem, | ||
randReader io.Reader, | ||
targetSize int, | ||
dir string, | ||
sharded bool, | ||
) DirEntry { | ||
var curSize int | ||
targetFileSize := targetSize / 16 | ||
children := make([]DirEntry, 0) | ||
for curSize < targetSize { | ||
switch rndInt(randReader, 6) { | ||
case 0: // 1 in 6 chance of finishing this directory if not at root | ||
if dir != "" && len(children) > 0 { | ||
curSize = targetSize // not really, but we're done with this directory | ||
} // else at the root we don't get to finish early | ||
case 1: // 1 in 6 chance of making a new directory | ||
if targetSize-curSize <= 1024 { // don't make tiny directories | ||
continue | ||
} | ||
var newDir string | ||
for { | ||
var err error | ||
newDir, err = namegen.RandomDirectoryName(randReader) | ||
require.NoError(t, err) | ||
if !isDupe(children, newDir) { | ||
break | ||
} | ||
} | ||
child := GenerateDirectoryFrom(t, linkSys, randReader, targetSize-curSize, dir+"/"+newDir, false) | ||
children = append(children, child) | ||
curSize += int(child.TSize) | ||
default: // 4 in 6 chance of making a new file | ||
var size int | ||
for size == 0 { // don't make empty files | ||
sizeB, err := rand.Int(randReader, big.NewInt(int64(targetFileSize))) | ||
require.NoError(t, err) | ||
size = int(sizeB.Int64()) | ||
if size > targetSize-curSize { | ||
size = targetSize - curSize | ||
} | ||
} | ||
entry := GenerateFile(t, linkSys, randReader, size) | ||
var name string | ||
for { | ||
var err error | ||
name, err = namegen.RandomFileName(randReader) | ||
require.NoError(t, err) | ||
if !isDupe(children, name) { | ||
break | ||
} | ||
} | ||
entry.Path = dir + "/" + name | ||
curSize += size | ||
children = append(children, entry) | ||
} | ||
} | ||
dirEntry := BuildDirectory(t, linkSys, children, sharded) | ||
dirEntry.Path = dir | ||
return dirEntry | ||
} | ||
|
||
// BuildDirectory builds a directory from the given children, storing the | ||
// blocks in the provided LinkSystem and returns a DirEntry representation of | ||
// the directory. If sharded is true, the root directory will be built as HAMT | ||
// sharded (with a low "width" to maximise the chance of collisions and | ||
// therefore greater depth for smaller number of files). | ||
func BuildDirectory(t *testing.T, linkSys *linking.LinkSystem, children []DirEntry, sharded bool) DirEntry { | ||
// create stable sorted children, which should match the encoded form | ||
// in dag-pb | ||
sort.Slice(children, func(i, j int) bool { | ||
return strings.Compare(children[i].Path, children[j].Path) < 0 | ||
}) | ||
|
||
dirLinks := make([]dagpb.PBLink, 0) | ||
for _, child := range children { | ||
paths := strings.Split(child.Path, "/") | ||
name := paths[len(paths)-1] | ||
lnk, err := builder.BuildUnixFSDirectoryEntry(name, int64(child.TSize), cidlink.Link{Cid: child.Root}) | ||
require.NoError(t, err) | ||
dirLinks = append(dirLinks, lnk) | ||
} | ||
cids := make([]cid.Cid, 0) | ||
var undo func() | ||
linkSys.StorageWriteOpener, undo = cidCollector(linkSys, &cids) | ||
defer undo() | ||
var root ipld.Link | ||
var size uint64 | ||
var err error | ||
if sharded { | ||
// node arity of 16, quite small to increase collision probability so we actually get sharding | ||
const width = 16 | ||
const hasher = multihash.MURMUR3X64_64 | ||
root, size, err = builder.BuildUnixFSShardedDirectory(width, hasher, dirLinks, linkSys) | ||
require.NoError(t, err) | ||
} else { | ||
root, size, err = builder.BuildUnixFSDirectory(dirLinks, linkSys) | ||
require.NoError(t, err) | ||
} | ||
|
||
return DirEntry{ | ||
Path: "", | ||
Root: root.(cidlink.Link).Cid, | ||
SelfCids: cids, | ||
TSize: size, | ||
Children: children, | ||
} | ||
} | ||
|
||
func rndInt(randReader io.Reader, max int) int { | ||
coin, err := rand.Int(randReader, big.NewInt(int64(max))) | ||
if err != nil { | ||
return 0 // eh, whatever | ||
} | ||
return int(coin.Int64()) | ||
} | ||
|
||
func cidCollector(ls *ipld.LinkSystem, cids *[]cid.Cid) (ipld.BlockWriteOpener, func()) { | ||
swo := ls.StorageWriteOpener | ||
return func(linkCtx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { | ||
w, c, err := swo(linkCtx) | ||
if err != nil { | ||
return nil, nil, err | ||
} | ||
return w, func(lnk ipld.Link) error { | ||
*cids = append(*cids, lnk.(cidlink.Link).Cid) | ||
return c(lnk) | ||
}, nil | ||
}, func() { | ||
// reset | ||
ls.StorageWriteOpener = swo | ||
} | ||
} | ||
|
||
func isDupe(children []DirEntry, name string) bool { | ||
for _, child := range children { | ||
if strings.HasSuffix(child.Path, "/"+name) { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
Oops, something went wrong.