Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(testutil): add test data generation utils (extracted from Lassie) #47

Merged
merged 1 commit into from
May 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions testutil/directory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package testutil

import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"testing"

"github.com/ipfs/go-cid"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/linking"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
"github.com/ipld/go-ipld-prime/node/basicnode"
"github.com/stretchr/testify/require"
)

// DirEntry represents a flattened directory entry, where Path is from the
// root of the directory and Content is the file contents. It is intended
// that a DirEntry slice can be used to represent a full-depth directory without
// needing nesting.
type DirEntry struct {
Path string
Content []byte
Root cid.Cid
SelfCids []cid.Cid
TSize uint64
Children []DirEntry
}

func (de DirEntry) Size() (int64, error) {
return int64(de.TSize), nil
}

func (de DirEntry) Link() ipld.Link {
return cidlink.Link{Cid: de.Root}
}

// ToDirEntry takes a LinkSystem containing UnixFS data and builds a DirEntry
// tree representing the file and directory structure it finds starting at the
// rootCid. If expectFull is true, it will error if it encounters a UnixFS
// node that it cannot fully load. If expectFull is false, it will ignore
// errors and return nil for any node it cannot load.
func ToDirEntry(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, expectFull bool) DirEntry {
de := toDirEntryRecursive(t, linkSys, rootCid, "", expectFull)
return *de
}

func toDirEntryRecursive(t *testing.T, linkSys linking.LinkSystem, rootCid cid.Cid, name string, expectFull bool) *DirEntry {
var proto datamodel.NodePrototype = dagpb.Type.PBNode
if rootCid.Prefix().Codec == cid.Raw {
proto = basicnode.Prototype.Any
}
node, err := linkSys.Load(linking.LinkContext{Ctx: context.TODO()}, cidlink.Link{Cid: rootCid}, proto)
if expectFull {
require.NoError(t, err)
} else if err != nil {
if e, ok := err.(interface{ NotFound() bool }); ok && e.NotFound() {
return nil
}
require.NoError(t, err)
}

if node.Kind() == ipld.Kind_Bytes { // is a file
byts, err := node.AsBytes()
require.NoError(t, err)
return &DirEntry{
Path: name,
Content: byts,
Root: rootCid,
}
}
// else is a directory
children := make([]DirEntry, 0)
for itr := node.MapIterator(); !itr.Done(); {
k, v, err := itr.Next()
require.NoError(t, err)
childName, err := k.AsString()
require.NoError(t, err)
childLink, err := v.AsLink()
require.NoError(t, err)
child := toDirEntryRecursive(t, linkSys, childLink.(cidlink.Link).Cid, name+"/"+childName, expectFull)
children = append(children, *child)
}
return &DirEntry{
Path: name,
Root: rootCid,
Children: children,
}
}

// CompareDirEntries is a safe, recursive comparison between two DirEntry
// values. It doesn't strictly require child ordering to match, but it does
// require that all children exist and match, in some order.
func CompareDirEntries(t *testing.T, a, b DirEntry) {
// t.Log("CompareDirEntries", a.Path, b.Path) // TODO: remove this
require.Equal(t, a.Path, b.Path)
require.Equal(t, a.Root.String(), b.Root.String(), a.Path+" root mismatch")
hashA := sha256.Sum256(a.Content)
hashB := sha256.Sum256(b.Content)
require.Equal(t, hex.EncodeToString(hashA[:]), hex.EncodeToString(hashB[:]), a.Path+"content hash mismatch")
require.Equal(t, len(a.Children), len(b.Children), fmt.Sprintf("%s child length mismatch %d <> %d", a.Path, len(a.Children), len(b.Children)))
for i := range a.Children {
// not necessarily in order
var found bool
for j := range b.Children {
if a.Children[i].Path == b.Children[j].Path {
found = true
CompareDirEntries(t, a.Children[i], b.Children[j])
}
}
require.True(t, found, fmt.Sprintf("%s child %s not found in b", a.Path, a.Children[i].Path))
}
}

// WrapContent embeds the content we want in some random nested content such
// that it's fetchable under the provided path. If exclusive is true, the
// content will be the only thing under the path. If false, there will be
// content before and after the wrapped content at each point in the path.
func WrapContent(t *testing.T, rndReader io.Reader, lsys *ipld.LinkSystem, content DirEntry, wrapPath string, exclusive bool) DirEntry {
want := content
ps := datamodel.ParsePath(wrapPath)
for ps.Len() > 0 {
de := []DirEntry{}
if !exclusive {
before := GenerateDirectory(t, lsys, rndReader, 4<<10, false)
before.Path = "!before"
de = append(de, before)
}
want.Path = ps.Last().String()
de = append(de, want)
if !exclusive {
after := GenerateDirectory(t, lsys, rndReader, 4<<11, true)
after.Path = "~after"
de = append(de, after)
}
want = BuildDirectory(t, lsys, de, false)
ps = ps.Pop()
}
return want
}
206 changes: 206 additions & 0 deletions testutil/generator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
package testutil

import (
"bytes"
"crypto/rand"
"io"
"math/big"
"sort"
"strings"
"testing"

"github.com/ipfs/go-cid"
"github.com/ipfs/go-unixfsnode/data/builder"
"github.com/ipfs/go-unixfsnode/testutil/namegen"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/linking"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
"github.com/multiformats/go-multihash"
"github.com/stretchr/testify/require"
)

// GenerateFile generates a random unixfs file of the given size, storing the
// blocks in the provided LinkSystem and returns a DirEntry representation of
// the file.
func GenerateFile(t *testing.T, linkSys *linking.LinkSystem, randReader io.Reader, size int) DirEntry {
// a file of `size` random bytes, packaged into unixfs DAGs, stored in the remote blockstore
delimited := io.LimitReader(randReader, int64(size))
var buf bytes.Buffer
buf.Grow(size)
delimited = io.TeeReader(delimited, &buf)
// "size-256144" sets the chunker, splitting bytes at 256144b boundaries
cids := make([]cid.Cid, 0)
var undo func()
linkSys.StorageWriteOpener, undo = cidCollector(linkSys, &cids)
defer undo()
root, gotSize, err := builder.BuildUnixFSFile(delimited, "size-256144", linkSys)
require.NoError(t, err)
srcData := buf.Bytes()
rootCid := root.(cidlink.Link).Cid
return DirEntry{
Path: "",
Content: srcData,
Root: rootCid,
SelfCids: cids,
TSize: uint64(gotSize),
}
}

// GenerateDirectory generates a random UnixFS directory that aims for the
// requested targetSize (in bytes, although it is likely to fall somewhere
// under this number), storing the blocks in the provided LinkSystem and
// returns a DirEntry representation of the directory. If rootSharded is true,
// the root directory will be built as HAMT sharded (with a low "width" to
// maximise the chance of collisions and therefore greater depth for smaller
// number of files).
func GenerateDirectory(t *testing.T, linkSys *linking.LinkSystem, randReader io.Reader, targetSize int, rootSharded bool) DirEntry {
return GenerateDirectoryFrom(t, linkSys, randReader, targetSize, "", rootSharded)
}

// GenerateDirectoryFrom is the same as GenerateDirectory but allows the caller
// to specify a directory path to start from. This is useful for generating
// nested directories.
func GenerateDirectoryFrom(
t *testing.T,
linkSys *linking.LinkSystem,
randReader io.Reader,
targetSize int,
dir string,
sharded bool,
) DirEntry {
var curSize int
targetFileSize := targetSize / 16
children := make([]DirEntry, 0)
for curSize < targetSize {
switch rndInt(randReader, 6) {
case 0: // 1 in 6 chance of finishing this directory if not at root
if dir != "" && len(children) > 0 {
curSize = targetSize // not really, but we're done with this directory
} // else at the root we don't get to finish early
case 1: // 1 in 6 chance of making a new directory
if targetSize-curSize <= 1024 { // don't make tiny directories
continue
}
var newDir string
for {
var err error
newDir, err = namegen.RandomDirectoryName(randReader)
require.NoError(t, err)
if !isDupe(children, newDir) {
break
}
}
child := GenerateDirectoryFrom(t, linkSys, randReader, targetSize-curSize, dir+"/"+newDir, false)
children = append(children, child)
curSize += int(child.TSize)
default: // 4 in 6 chance of making a new file
var size int
for size == 0 { // don't make empty files
sizeB, err := rand.Int(randReader, big.NewInt(int64(targetFileSize)))
require.NoError(t, err)
size = int(sizeB.Int64())
if size > targetSize-curSize {
size = targetSize - curSize
}
}
entry := GenerateFile(t, linkSys, randReader, size)
var name string
for {
var err error
name, err = namegen.RandomFileName(randReader)
require.NoError(t, err)
if !isDupe(children, name) {
break
}
}
entry.Path = dir + "/" + name
curSize += size
children = append(children, entry)
}
}
dirEntry := BuildDirectory(t, linkSys, children, sharded)
dirEntry.Path = dir
return dirEntry
}

// BuildDirectory builds a directory from the given children, storing the
// blocks in the provided LinkSystem and returns a DirEntry representation of
// the directory. If sharded is true, the root directory will be built as HAMT
// sharded (with a low "width" to maximise the chance of collisions and
// therefore greater depth for smaller number of files).
func BuildDirectory(t *testing.T, linkSys *linking.LinkSystem, children []DirEntry, sharded bool) DirEntry {
// create stable sorted children, which should match the encoded form
// in dag-pb
sort.Slice(children, func(i, j int) bool {
return strings.Compare(children[i].Path, children[j].Path) < 0
})

dirLinks := make([]dagpb.PBLink, 0)
for _, child := range children {
paths := strings.Split(child.Path, "/")
name := paths[len(paths)-1]
lnk, err := builder.BuildUnixFSDirectoryEntry(name, int64(child.TSize), cidlink.Link{Cid: child.Root})
require.NoError(t, err)
dirLinks = append(dirLinks, lnk)
}
cids := make([]cid.Cid, 0)
var undo func()
linkSys.StorageWriteOpener, undo = cidCollector(linkSys, &cids)
defer undo()
var root ipld.Link
var size uint64
var err error
if sharded {
// node arity of 16, quite small to increase collision probability so we actually get sharding
const width = 16
const hasher = multihash.MURMUR3X64_64
root, size, err = builder.BuildUnixFSShardedDirectory(width, hasher, dirLinks, linkSys)
require.NoError(t, err)
} else {
root, size, err = builder.BuildUnixFSDirectory(dirLinks, linkSys)
require.NoError(t, err)
}

return DirEntry{
Path: "",
Root: root.(cidlink.Link).Cid,
SelfCids: cids,
TSize: size,
Children: children,
}
}

func rndInt(randReader io.Reader, max int) int {
coin, err := rand.Int(randReader, big.NewInt(int64(max)))
if err != nil {
return 0 // eh, whatever
}
return int(coin.Int64())
}

func cidCollector(ls *ipld.LinkSystem, cids *[]cid.Cid) (ipld.BlockWriteOpener, func()) {
swo := ls.StorageWriteOpener
return func(linkCtx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) {
w, c, err := swo(linkCtx)
if err != nil {
return nil, nil, err
}
return w, func(lnk ipld.Link) error {
*cids = append(*cids, lnk.(cidlink.Link).Cid)
return c(lnk)
}, nil
}, func() {
// reset
ls.StorageWriteOpener = swo
}
}

func isDupe(children []DirEntry, name string) bool {
for _, child := range children {
if strings.HasSuffix(child.Path, "/"+name) {
return true
}
}
return false
}
Loading