diff --git a/go.mod b/go.mod index 368f12e..c31842f 100644 --- a/go.mod +++ b/go.mod @@ -5,11 +5,11 @@ go 1.19 require ( github.com/ipfs/go-bitfield v1.1.0 github.com/ipfs/go-cid v0.3.2 - github.com/ipfs/go-ipfs-chunker v0.0.1 + github.com/ipfs/go-ipfs-chunker v0.0.5 github.com/ipfs/go-ipfs-util v0.0.2 github.com/ipfs/go-ipld-format v0.4.0 github.com/ipfs/go-merkledag v0.10.0 - github.com/ipfs/go-unixfs v0.4.3 + github.com/ipfs/go-unixfs v0.4.4 github.com/ipld/go-car/v2 v2.8.0 github.com/ipld/go-codec-dagpb v1.6.0 github.com/ipld/go-ipld-prime v0.20.0 diff --git a/go.sum b/go.sum index 7a279d2..e193de6 100644 --- a/go.sum +++ b/go.sum @@ -67,8 +67,8 @@ github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46U github.com/ipfs/go-ipfs-blockstore v1.2.0 h1:n3WTeJ4LdICWs/0VSfjHrlqpPpl6MZ+ySd3j8qz0ykw= github.com/ipfs/go-ipfs-blockstore v1.2.0/go.mod h1:eh8eTFLiINYNSNawfZOC7HOxNTxpB1PFuA5E1m/7exE= github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IWFJMcGIPQ= -github.com/ipfs/go-ipfs-chunker v0.0.1 h1:cHUUxKFQ99pozdahi+uSC/3Y6HeRpi9oTeUHbE27SEw= -github.com/ipfs/go-ipfs-chunker v0.0.1/go.mod h1:tWewYK0we3+rMbOh7pPFGDyypCtvGcBFymgY4rSDLAw= +github.com/ipfs/go-ipfs-chunker v0.0.5 h1:ojCf7HV/m+uS2vhUGWcogIIxiO5ubl5O57Q7NapWLY8= +github.com/ipfs/go-ipfs-chunker v0.0.5/go.mod h1:jhgdF8vxRHycr00k13FM8Y0E+6BoalYeobXmUyTreP8= github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= github.com/ipfs/go-ipfs-ds-help v1.1.0 h1:yLE2w9RAsl31LtfMt91tRZcrx+e61O5mDxFRR994w4Q= @@ -104,8 +104,8 @@ github.com/ipfs/go-merkledag v0.10.0/go.mod h1:zkVav8KiYlmbzUzNM6kENzkdP5+qR7+2m github.com/ipfs/go-metrics-interface v0.0.1 h1:j+cpbjYvu4R8zbleSs36gvB7jR+wsL2fGD6n0jO4kdg= github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j/b/tL7HTWtJ4VPgWY= github.com/ipfs/go-peertaskqueue v0.8.1 h1:YhxAs1+wxb5jk7RvS0LHdyiILpNmRIRnZVztekOF0pg= -github.com/ipfs/go-unixfs v0.4.3 h1:EdDc1sNZNFDUlo4UrVAvvAofVI5EwTnKu8Nv8mgXkWQ= -github.com/ipfs/go-unixfs v0.4.3/go.mod h1:TSG7G1UuT+l4pNj91raXAPkX0BhJi3jST1FDTfQ5QyM= +github.com/ipfs/go-unixfs v0.4.4 h1:D/dLBOJgny5ZLIur2vIXVQVW0EyDHdOMBDEhgHrt6rY= +github.com/ipfs/go-unixfs v0.4.4/go.mod h1:TSG7G1UuT+l4pNj91raXAPkX0BhJi3jST1FDTfQ5QyM= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU= github.com/ipld/go-car/v2 v2.8.0 h1:8tUI+VM1mAQ2Qa7ScK++lfyuZYcGQ70bZ6NpGOcJj5o= @@ -138,7 +138,6 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/libp2p/go-buffer-pool v0.0.1/go.mod h1:xtyIz9PMobb13WaxR6Zo1Pd1zXJKYg0a8KiIvDp3TzQ= github.com/libp2p/go-buffer-pool v0.0.2/go.mod h1:MvaB6xw5vOrDl8rYZGLFdKAuk/hRoRZd1Vi32+RXyFM= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= diff --git a/hamt/fixtures/wikipedia-cryptographic-hash-function.car b/hamt/fixtures/wikipedia-cryptographic-hash-function.car new file mode 100644 index 0000000..c914805 Binary files /dev/null and b/hamt/fixtures/wikipedia-cryptographic-hash-function.car differ diff --git a/hamt/shardeddir.go b/hamt/shardeddir.go index 2f329b9..a273b29 100644 --- a/hamt/shardeddir.go +++ b/hamt/shardeddir.go @@ -197,6 +197,8 @@ type _UnixFSShardedDir__ListItr struct { } func (itr *_UnixFSShardedDir__ListItr) Next() (int64, dagpb.PBLink, error) { + total := itr.total + itr.total++ next, err := itr.next() if err != nil { return -1, nil, err @@ -204,13 +206,10 @@ func (itr *_UnixFSShardedDir__ListItr) Next() (int64, dagpb.PBLink, error) { if next == nil { return -1, nil, nil } - total := itr.total - itr.total++ return total, next, nil } func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) { - if itr.childIter == nil { if itr._substrate.Done() { return nil, nil @@ -232,15 +231,16 @@ func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) { nd: child, maxPadLen: maxPadLength(child.data), } - } _, next, err := itr.childIter.Next() - if err != nil { - return nil, err - } if itr.childIter.Done() { + // do this even on error to make sure we don't overrun a shard where the + // end is missing and the user is ignoring NotFound errors itr.childIter = nil } + if err != nil { + return nil, err + } return next, nil } diff --git a/hamt/shardeddir_test.go b/hamt/shardeddir_test.go index dd1f217..fcc7601 100644 --- a/hamt/shardeddir_test.go +++ b/hamt/shardeddir_test.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "math/rand" + "os" "sort" "testing" "time" @@ -16,10 +17,12 @@ import ( ft "github.com/ipfs/go-unixfs" legacy "github.com/ipfs/go-unixfs/hamt" "github.com/ipfs/go-unixfsnode/hamt" + "github.com/ipld/go-car/v2/storage" dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/fluent/qp" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" "github.com/ipld/go-ipld-prime/schema" "github.com/stretchr/testify/require" ) @@ -206,3 +209,83 @@ func TestFindNonExisting(t *testing.T) { require.EqualError(t, err, schema.ErrNoSuchField{Field: ipld.PathSegmentOfString(key)}.Error()) } } + +func TestIncompleteShardedIteration(t *testing.T) { + ctx := context.Background() + req := require.New(t) + + fixture := "./fixtures/wikipedia-cryptographic-hash-function.car" + f, err := os.Open(fixture) + req.NoError(err) + defer f.Close() + carstore, err := storage.OpenReadable(f) + req.NoError(err) + lsys := cidlink.DefaultLinkSystem() + lsys.TrustedStorage = true + lsys.SetReadStorage(carstore) + + // classic recursive go-ipld-prime map iteration, being forgiving about + // NotFound block loads to see what we end up with + + kvs := make(map[string]string) + var iterNotFound int + blockNotFound := make(map[string]struct{}) + + var iter func(string, ipld.Link) + iter = func(dir string, lnk ipld.Link) { + nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any) + if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() { + // got a named link that we can't load + blockNotFound[dir] = struct{}{} + return + } + req.NoError(err) + if nd.Kind() == ipld.Kind_Bytes { + bv, err := nd.AsBytes() + req.NoError(err) + kvs[dir] = string(bv) + return + } + + nb := dagpb.Type.PBNode.NewBuilder() + req.NoError(nb.AssignNode(nd)) + pbn := nb.Build() + hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, pbn, &lsys) + req.NoError(err) + + mi := hamtShard.MapIterator() + for !mi.Done() { + k, v, err := mi.Next() + if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() { + // internal shard link that won't load, we don't know what it might + // point to + iterNotFound++ + continue + } + req.NoError(err) + ks, err := k.AsString() + req.NoError(err) + req.Equal(ipld.Kind_Link, v.Kind()) + lv, err := v.AsLink() + req.NoError(err) + iter(dir+"/"+ks, lv) + } + } + // walk the tree + iter("", cidlink.Link{Cid: carstore.Roots()[0]}) + + req.Len(kvs, 1) + req.Contains(kvs, "/wiki/Cryptographic_hash_function") + req.Contains(kvs["/wiki/Cryptographic_hash_function"], "Cryptographic hash function\n") + req.Equal(iterNotFound, 570) // tried to load 570 blocks that were not in the CAR + req.Len(blockNotFound, 110) // 110 blocks, for named links, were not found in the CAR + // some of the root block links + req.Contains(blockNotFound, "/favicon.ico") + req.Contains(blockNotFound, "/index.html") + req.Contains(blockNotFound, "/zimdump_version") + // some of the shard links + req.Contains(blockNotFound, "/wiki/UK_railway_Signal") + req.Contains(blockNotFound, "/wiki/Australian_House") + req.Contains(blockNotFound, "/wiki/ICloud_Drive") + req.Contains(blockNotFound, "/wiki/Édouard_Bamberger") +}