From 2a979ca159d87e8843f272c9b5d472fe4615766b Mon Sep 17 00:00:00 2001 From: Josh Newman Date: Tue, 19 Oct 2021 23:59:26 +0000 Subject: [PATCH] base/file/addfs: per-node transformations Summary: Complementing D65999's whole-directory ones. Test Plan: New unit tests. Reviewers: jcharumilind Reviewed By: jcharumilind Subscribers: smahadevan Differential Revision: https://phabricator.grailbio.com/D66026 fbshipit-source-id: 1235d5f --- file/addfs/per_node.go | 179 ++++++++++++++++++++++++++++++++++++ file/addfs/per_node_test.go | 119 ++++++++++++++++++++++++ file/addfs/per_subtree.go | 7 ++ 3 files changed, 305 insertions(+) create mode 100644 file/addfs/per_node.go create mode 100644 file/addfs/per_node_test.go create mode 100644 file/addfs/per_subtree.go diff --git a/file/addfs/per_node.go b/file/addfs/per_node.go new file mode 100644 index 00000000..e8a1dc05 --- /dev/null +++ b/file/addfs/per_node.go @@ -0,0 +1,179 @@ +package addfs + +import ( + "context" + "fmt" + "time" + + "github.com/grailbio/base/file/fsnode" + "github.com/grailbio/base/log" +) + +type ( + // PerNodeFunc computes nodes to add to a directory tree, for example to present alternate views + // of raw data, expand archive files, etc. It operates on a single node at a time. If it returns + // any "addition" nodes, ApplyPerNodeFuncs will place them under a sibling directory called + // "...". For example, suppose we have an input directory: + // parent/ + // └─dir1/ + // ├─fileA + // ├─fileB + // └─dir2/ + // and we call ApplyPerNodeFuncs(parent/, ourFns). The resulting directory tree will be + // parent/ + // ├─.../ + // │ └─dir1/ + // │ └─[ nodes returned by PerNodeFunc.Apply(_, dir1/) for all ourFns ] + // └─dir1/ + // ├─.../ + // │ ├─fileA/ + // │ │ └─[ nodes returned by PerNodeFunc.Apply(_, fileA) for all ourFns ] + // │ ├─fileB/ + // │ │ └─[ nodes returned by PerNodeFunc.Apply(_, fileB) for all ourFns ] + // │ └─dir2/ + // │ └─[ nodes returned by PerNodeFunc.Apply(_, dir2/) for all ourFns ] + // ├─fileA + // ├─fileB + // └─dir2/ + // └─.../ + // Users browsing this resulting tree can work with just the original files and ourFns won't + // be invoked. However, they can also navigate into any of the .../s if interested and then + // use the additional views generated by ourFns. If they're interested in our_view for + // /path/to/a/file, they just need to prepend .../, like /path/to/a/.../file/our_view. + // (Perhaps it'd be more intuitive to "append", like /path/to/a/file/our_view, but then the + // file name would conflict with the view-containing directory.) + // + // Funcs that need to list the children of a fsnode.Parent should be careful: they may want to + // set an upper limit on number of entries to read, and otherwise default to empty, to avoid + // performance problems (resulting in bad UX) for very large directories. + // + // Funcs that simply look at filenames and declare derived outputs may want to place their + // children directly under /.../file/ for convenient access. However, Funcs that are expensive, + // for example reading some file contents, etc., may want to separate themselves under their own + // subdirectory, like .../file/func_name/. This lets users browsing the tree "opt-in" to seeing + // the results of the expensive computation by navigating to .../file/func_name/. + // + // If the input tree has any "..." that conflict with the added ones, the added ones override. + // The originals will simply not be accessible. + PerNodeFunc interface { + Apply(context.Context, fsnode.T) (adds []fsnode.T, _ error) + } + perNodeFunc func(context.Context, fsnode.T) (adds []fsnode.T, _ error) +) + +func NewPerNodeFunc(fn func(context.Context, fsnode.T) ([]fsnode.T, error)) PerNodeFunc { + return perNodeFunc(fn) +} +func (f perNodeFunc) Apply(ctx context.Context, n fsnode.T) ([]fsnode.T, error) { return f(ctx, n) } + +const addsDirName = "..." + +// perNodeImpl extends the original Parent with the .../ child. +type perNodeImpl struct { + fsnode.Parent + fns []PerNodeFunc + adds fsnode.Parent +} + +var ( + _ fsnode.Parent = (*perNodeImpl)(nil) + _ fsnode.Cacheable = (*perNodeImpl)(nil) +) + +// ApplyPerNodeFuncs returns a new Parent that contains original's nodes plus any added by fns. +// See PerNodeFunc's for more documentation on how this works. +// Later fns's added nodes will overwrite earlier ones, if any names conflict. +func ApplyPerNodeFuncs(original fsnode.Parent, fns ...PerNodeFunc) fsnode.Parent { + fns = append([]PerNodeFunc{}, fns...) + adds := perNodeAdds{ + fsnode.CopyFileInfo(original).WithName(addsDirName), + original, fns} + return &perNodeImpl{original, fns, &adds} +} + +func (n *perNodeImpl) CacheableFor() time.Duration { return fsnode.CacheableFor(n.Parent) } +func (n *perNodeImpl) Child(ctx context.Context, name string) (fsnode.T, error) { + if name == addsDirName { + return n.adds, nil + } + child, err := n.Parent.Child(ctx, name) + if err != nil { + return nil, err + } + return perNodeRecurse(child, n.fns), nil +} +func (n *perNodeImpl) Children() fsnode.Iterator { + return fsnode.NewConcatIterator( + // TODO: Consider omitting .../ if the directory has no other children. + fsnode.NewIterator(n.adds), + // TODO: Filter out any conflicting ... to be consistent with Child. + fsnode.MapIterator(n.Parent.Children(), func(_ context.Context, child fsnode.T) (fsnode.T, error) { + return perNodeRecurse(child, n.fns), nil + }), + ) +} + +// perNodeAdds is the .../ Parent. It has a child (directory) for each original child (both +// directories and files). The children contain the PerNodeFunc.Apply outputs. +type perNodeAdds struct { + fsnode.FileInfo + original fsnode.Parent + fns []PerNodeFunc +} + +var ( + _ fsnode.Parent = (*perNodeAdds)(nil) + _ fsnode.Cacheable = (*perNodeAdds)(nil) +) + +func (n *perNodeAdds) Child(ctx context.Context, name string) (fsnode.T, error) { + child, err := n.original.Child(ctx, name) + if err != nil { + return nil, err + } + return n.newAddsForChild(child), nil +} +func (n *perNodeAdds) Children() fsnode.Iterator { + // TODO: Filter out any conflicting ... to be consistent with Child. + return fsnode.MapIterator(n.original.Children(), func(_ context.Context, child fsnode.T) (fsnode.T, error) { + return n.newAddsForChild(child), nil + }) +} +func (n *perNodeAdds) FSNodeT() {} + +func (n *perNodeAdds) newAddsForChild(original fsnode.T) fsnode.Parent { + return fsnode.NewParent( + fsnode.NewDirInfo(original.Name()). + WithModTime(original.ModTime()). + // Derived directory must be executable to be usable, even if original file wasn't. + WithModePerm(original.Mode().Perm()|0111). + WithCacheableFor(fsnode.CacheableFor(original)), + fsnode.FuncChildren(func(ctx context.Context) ([]fsnode.T, error) { + adds := make(map[string]fsnode.T) + for _, fn := range n.fns { + fnAdds, err := fn.Apply(ctx, original) + if err != nil { + return nil, fmt.Errorf("addfs: error running func %v: %w", fn, err) + } + for _, add := range fnAdds { + log.Debug.Printf("addfs %s: conflict for added name: %s", n.Name(), add.Name()) + // TODO: Consider returning an error here. Or merging the added trees? + adds[add.Name()] = add + } + } + wrapped := make([]fsnode.T, 0, len(adds)) + for _, add := range adds { + wrapped = append(wrapped, perNodeRecurse(add, n.fns)) + } + return wrapped, nil + }), + ) +} + +func perNodeRecurse(node fsnode.T, fns []PerNodeFunc) fsnode.T { + parent, ok := node.(fsnode.Parent) + if !ok { + return node + } + return ApplyPerNodeFuncs(parent, fns...) +} diff --git a/file/addfs/per_node_test.go b/file/addfs/per_node_test.go new file mode 100644 index 00000000..9ecac84d --- /dev/null +++ b/file/addfs/per_node_test.go @@ -0,0 +1,119 @@ +package addfs + +import ( + "context" + "fmt" + "sort" + "strings" + "testing" + + "github.com/grailbio/base/file/fsnode" + . "github.com/grailbio/base/file/fsnode/fsnodetesting" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPerNodeFuncs(t *testing.T) { + ctx := context.Background() + root := func() Parent { + return Parent{ + "dir0": Parent{}, + "dir1": Parent{ + "dir10": Parent{ + "a": []byte("content dir10/a"), + "b": []byte("content dir10/b"), + }, + "a": []byte("content dir1/a"), + "b": []byte("content dir1/b"), + }, + } + } + t.Run("basic", func(t *testing.T) { + root := root() + n := MakeT(t, "", root).(fsnode.Parent) + n = ApplyPerNodeFuncs(n, + NewPerNodeFunc( + func(ctx context.Context, node fsnode.T) ([]fsnode.T, error) { + switch n := node.(type) { + case fsnode.Parent: + iter := n.Children() + defer func() { assert.NoError(t, iter.Close(ctx)) }() + children, err := fsnode.IterateAll(ctx, iter) + assert.NoError(t, err) + var names []string + for _, child := range children { + names = append(names, child.Name()) + } + sort.Strings(names) + return []fsnode.T{ + fsnode.ConstLeaf(fsnode.NewRegInfo("children names"), []byte(strings.Join(names, ","))), + }, nil + case fsnode.Leaf: + return []fsnode.T{ + fsnode.ConstLeaf(fsnode.NewRegInfo("copy"), nil), // Will be overwritten. + }, nil + } + require.Failf(t, "invalid node type", "node: %T", node) + panic("unreachable") + }, + ), + NewPerNodeFunc( + func(ctx context.Context, node fsnode.T) ([]fsnode.T, error) { + switch n := node.(type) { + case fsnode.Parent: + return nil, nil + case fsnode.Leaf: + return []fsnode.T{ + fsnode.ConstLeaf(fsnode.NewRegInfo("copy"), LeafReadAll(ctx, t, n)), + }, nil + } + require.Failf(t, "invalid node type", "node: %T", node) + panic("unreachable") + }, + ), + ) + got := Walker{}.WalkContents(ctx, t, n) + want := Parent{ + "...": Parent{ + "dir0": Parent{"children names": []byte("")}, + "dir1": Parent{"children names": []byte("a,b,dir10")}, + }, + "dir0": Parent{ + "...": Parent{}, + }, + "dir1": Parent{ + "...": Parent{ + "dir10": Parent{"children names": []byte("a,b")}, + "a": Parent{"copy": []byte("content dir1/a")}, + "b": Parent{"copy": []byte("content dir1/b")}, + }, + "dir10": Parent{ + "...": Parent{ + "a": Parent{"copy": []byte("content dir10/a")}, + "b": Parent{"copy": []byte("content dir10/b")}, + }, + "a": []byte("content dir10/a"), + "b": []byte("content dir10/b"), + }, + "a": []byte("content dir1/a"), + "b": []byte("content dir1/b"), + }, + } + assert.Equal(t, want, got) + }) + t.Run("lazy", func(t *testing.T) { + root := root() + n := MakeT(t, "", root).(fsnode.Parent) + n = ApplyPerNodeFuncs(n, NewPerNodeFunc( + func(_ context.Context, node fsnode.T) ([]fsnode.T, error) { + return nil, fmt.Errorf("func was called: %q", node.Name()) + }, + )) + got := Walker{ + IgnoredNames: map[string]struct{}{ + addsDirName: struct{}{}, + }, + }.WalkContents(ctx, t, n) + assert.Equal(t, root, got) + }) +} diff --git a/file/addfs/per_subtree.go b/file/addfs/per_subtree.go new file mode 100644 index 00000000..cc904222 --- /dev/null +++ b/file/addfs/per_subtree.go @@ -0,0 +1,7 @@ +package addfs + +// TODO: Implement PerSubtreeFunc. +// A PerNodeFunc is applied independently to each node in an entire directory tree. It may be +// useful to define funcs that are contextual. For example if an fsnode.Parent called base/ has a +// child called .git, we may want to define git-repository-aware views for each descendent node, +// like base/file/addfs/.../per_subtree.go/git/log.txt containing history.