Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement option to start traversals at a path #358

Merged
merged 3 commits into from Mar 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions traversal/fns.go
Expand Up @@ -37,15 +37,17 @@ type Progress struct {
Path datamodel.Path
Link datamodel.Link
}
Budget *Budget // If present, tracks "budgets" for how many more steps we're willing to take before we should halt.
SeenLinks map[datamodel.Link]struct{} // Set used to remember which links have been visited before, if Cfg.LinkVisitOnlyOnce is true.
PastStartAtPath bool // Indicates whether the traversal has progressed passed the StartAtPath in the config -- use to avoid path checks when inside a sub portion of a DAG that is entirely inside the "not-skipped" portion of a traversal
Budget *Budget // If present, tracks "budgets" for how many more steps we're willing to take before we should halt.
SeenLinks map[datamodel.Link]struct{} // Set used to remember which links have been visited before, if Cfg.LinkVisitOnlyOnce is true.
}

type Config struct {
Ctx context.Context // Context carried through a traversal. Optional; use it if you need cancellation.
LinkSystem linking.LinkSystem // LinkSystem used for automatic link loading, and also any storing if mutation features (e.g. traversal.Transform) are used.
LinkTargetNodePrototypeChooser LinkTargetNodePrototypeChooser // Chooser for Node implementations to produce during automatic link traversal.
LinkVisitOnlyOnce bool // By default, we visit across links wherever we see them again, even if we've visited them before, because the reason for visiting might be different than it was before since we got to it via a different path. If set to true, track links we've seen before in Progress.SeenLinks and do not visit them again. Note that sufficiently complex selectors may require valid revisiting of some links, so setting this to true can change behavior noticably and should be done with care.
StartAtPath datamodel.Path // If set, causes a traversal to skip forward until passing this path, and only then begins calling visit functions. Block loads will also be skipped wherever possible.
}

type Budget struct {
Expand Down
39 changes: 31 additions & 8 deletions traversal/walk.go
Expand Up @@ -181,14 +181,16 @@ func (prog Progress) walkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitF
n = rn
}

// Decide if this node is matched -- do callbacks as appropriate.
if s.Decide(n) {
if err := fn(prog, n, VisitReason_SelectionMatch); err != nil {
return err
}
} else {
if err := fn(prog, n, VisitReason_SelectionCandidate); err != nil {
return err
if prog.Path.Len() >= prog.Cfg.StartAtPath.Len() || !prog.PastStartAtPath {
// Decide if this node is matched -- do callbacks as appropriate.
if s.Decide(n) {
if err := fn(prog, n, VisitReason_SelectionMatch); err != nil {
return err
}
} else {
if err := fn(prog, n, VisitReason_SelectionCandidate); err != nil {
return err
}
}
}
// If we're handling scalars (e.g. not maps and lists) we can return now.
Expand All @@ -211,11 +213,23 @@ func (prog Progress) walkAdv(n datamodel.Node, s selector.Selector, fn AdvVisitF
}

func (prog Progress) walkAdv_iterateAll(n datamodel.Node, s selector.Selector, fn AdvVisitFn) error {
var reachedStartAtPath bool
for itr := selector.NewSegmentIterator(n); !itr.Done(); {
if reachedStartAtPath {
prog.PastStartAtPath = reachedStartAtPath
}
ps, v, err := itr.Next()
if err != nil {
return err
}
if prog.Path.Len() < prog.Cfg.StartAtPath.Len() && !prog.PastStartAtPath {
if ps.Equals(prog.Cfg.StartAtPath.Segments()[prog.Path.Len()]) {
reachedStartAtPath = true
}
if !reachedStartAtPath {
continue
}
}
sNext, err := s.Explore(n, ps)
if err != nil {
return err
Expand Down Expand Up @@ -252,7 +266,16 @@ func (prog Progress) walkAdv_iterateAll(n datamodel.Node, s selector.Selector, f
}

func (prog Progress) walkAdv_iterateSelective(n datamodel.Node, attn []datamodel.PathSegment, s selector.Selector, fn AdvVisitFn) error {
var reachedStartAtPath bool
for _, ps := range attn {
if prog.Path.Len() < prog.Cfg.StartAtPath.Len() {
if ps.Equals(prog.Cfg.StartAtPath.Segments()[prog.Path.Len()]) {
reachedStartAtPath = true
}
if !reachedStartAtPath {
continue
}
}
v, err := n.LookupBySegment(ps)
if err != nil {
continue
Expand Down
76 changes: 71 additions & 5 deletions traversal/walk_test.go
Expand Up @@ -268,6 +268,46 @@ func TestWalkMatching(t *testing.T) {
qt.Check(t, err, qt.IsNil)
qt.Check(t, order, qt.Equals, 7)
})

t.Run("no visiting of nodes before start path", func(t *testing.T) {
ss := ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert("linkedList", ssb.ExploreAll(ssb.Matcher()))
efsb.Insert("linkedMap", ssb.ExploreRecursive(selector.RecursionLimitDepth(3), ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert("foo", ssb.Matcher())
efsb.Insert("nonlink", ssb.Matcher())
efsb.Insert("alink", ssb.Matcher())
efsb.Insert("nested", ssb.ExploreRecursiveEdge())
})))
})
s, err := ss.Selector()
var order int
lsys := cidlink.DefaultLinkSystem()
lsys.SetReadStorage(&store)
err = traversal.Progress{
Cfg: &traversal.Config{
LinkSystem: lsys,
LinkTargetNodePrototypeChooser: basicnode.Chooser,
StartAtPath: datamodel.ParsePath("linkedMap/nested/nonlink"),
},
}.WalkMatching(rootNode, s, func(prog traversal.Progress, n datamodel.Node) error {
switch order {
case 0:
qt.Check(t, n, nodetests.NodeContentEquals, basicnode.NewString("zoo"))
qt.Check(t, prog.Path.String(), qt.Equals, "linkedMap/nested/nonlink")
qt.Check(t, prog.LastBlock.Path.String(), qt.Equals, "linkedMap")
qt.Check(t, prog.LastBlock.Link.String(), qt.Equals, middleMapNodeLnk.String())
case 1:
qt.Check(t, n, nodetests.NodeContentEquals, basicnode.NewString("alpha"))
qt.Check(t, prog.Path.String(), qt.Equals, "linkedMap/nested/alink")
qt.Check(t, prog.LastBlock.Path.String(), qt.Equals, "linkedMap/nested/alink")
qt.Check(t, prog.LastBlock.Link.String(), qt.Equals, leafAlphaLnk.String())
}
order++
return nil
})
qt.Check(t, err, qt.IsNil)
qt.Check(t, order, qt.Equals, 2)
})
}

func TestWalkBudgets(t *testing.T) {
Expand Down Expand Up @@ -390,6 +430,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {
expected []datamodel.Link,
s datamodel.Node,
linkVisitOnce bool,
startAtPath datamodel.Path,
readFn func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error)) {

var count int
Expand All @@ -407,6 +448,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {
LinkSystem: lsys,
LinkTargetNodePrototypeChooser: basicnode.Chooser,
LinkVisitOnlyOnce: linkVisitOnce,
StartAtPath: startAtPath,
},
}.WalkMatching(newRootNode, sel, func(prog traversal.Progress, n datamodel.Node) error {
return nil
Expand All @@ -417,14 +459,14 @@ func TestWalkBlockLoadOrder(t *testing.T) {

t.Run("CommonSelector_MatchAllRecursively", func(t *testing.T) {
s := selectorparse.CommonSelector_MatchAllRecursively
verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedAllBlocks, s, false, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})

t.Run("CommonSelector_ExploreAllRecursively", func(t *testing.T) {
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedAllBlocks, s, false, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})
Expand All @@ -435,7 +477,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {
s := ssb.ExploreRecursive(selector.RecursionLimitNone(),
ssb.ExploreAll(ssb.ExploreRecursiveEdge())).
Node()
verifySelectorLoads(t, expectedAllBlocks, s, false, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedAllBlocks, s, false, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})
Expand Down Expand Up @@ -464,7 +506,7 @@ func TestWalkBlockLoadOrder(t *testing.T) {

s := selectorparse.CommonSelector_ExploreAllRecursively
visited := make(map[datamodel.Link]bool)
verifySelectorLoads(t, expectedSkipMeBlocks, s, false, func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedSkipMeBlocks, s, false, datamodel.NewPath(nil), func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) {
log.Printf("load %v [%v]\n", l, visited[l])
if visited[l] {
return nil, traversal.SkipMe{}
Expand All @@ -486,10 +528,34 @@ func TestWalkBlockLoadOrder(t *testing.T) {
middleMapNodeLnk,
}
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedLinkRevisitBlocks, s, true, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
verifySelectorLoads(t, expectedLinkRevisitBlocks, s, true, datamodel.NewPath(nil), func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
})
t.Run("explore-all with duplicate traversal skip via load at path", func(t *testing.T) {
// when using LinkRevisit:false to skip duplicate block loads, our loader
// doesn't even get to see the load attempts (unlike SkipMe, where the
// loader signals the skips)
testPathsToBlocksSkipped := map[string][]datamodel.Link{
// 5th node in load sequence for rootNode
"0/linkedList/2": append([]datamodel.Link{rootNodeLnk, middleListNodeLnk}, expectedAllBlocks[4:]...),
// LinkedMap is 7th no, foo doesn't affect loading
"0/linkedMap/foo": append([]datamodel.Link{rootNodeLnk}, expectedAllBlocks[6:]...),
// 8th node in load sequence for rootNode
"0/linkedMap/nested/alink": append([]datamodel.Link{rootNodeLnk, middleMapNodeLnk}, expectedAllBlocks[7:]...),
"0/linkedString": append([]datamodel.Link{rootNodeLnk}, expectedAllBlocks[8:]...),
// pash through all nodes first root block, then go load middle list block
"1/2": append([]datamodel.Link{middleListNodeLnk}, expectedAllBlocks[len(rootNodeExpectedLinks)+3:]...),
"3/1": append([]datamodel.Link{middleListNodeLnk}, expectedAllBlocks[2*len(rootNodeExpectedLinks)+len(middleListNodeLinks)+2:]...),
}
for path, expectedLinkVisits := range testPathsToBlocksSkipped {
startAtPath := datamodel.ParsePath(path)
s := selectorparse.CommonSelector_ExploreAllRecursively
verifySelectorLoads(t, expectedLinkVisits, s, false, startAtPath, func(lctx linking.LinkContext, lnk datamodel.Link) (io.Reader, error) {
return storage.GetStream(lctx.Ctx, &store, lnk.Binary())
})
}
})
}

func TestWalk_ADLs(t *testing.T) {
Expand Down