Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix directory resolver to consider CWD and root path input correctly #1840

Merged
merged 10 commits into from
May 25, 2023
5 changes: 2 additions & 3 deletions syft/formats/common/spdxhelpers/to_format_model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
)

// TODO: Add ToFormatModel tests
Expand Down Expand Up @@ -505,14 +504,14 @@ func Test_toSPDXID(t *testing.T) {
}{
{
name: "short filename",
it: source.Coordinates{
it: file.Coordinates{
RealPath: "/short/path/file.txt",
},
expected: "File-short-path-file.txt",
},
{
name: "long filename",
it: source.Coordinates{
it: file.Coordinates{
RealPath: "/some/long/path/with/a/lot/of-text/that-contains-a/file.txt",
},
expected: "File-...a-lot-of-text-that-contains-a-file.txt",
Expand Down
11 changes: 3 additions & 8 deletions syft/internal/fileresolver/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathI
if err != nil {
return nil, fmt.Errorf("could not get CWD: %w", err)
}
// we have to account for the root being accessed through a symlink path and always resolve the real path. Otherwise
// we will not be able to normalize given paths that fall under the resolver
cleanCWD, err := filepath.EvalSymlinks(currentWD)
if err != nil {
return nil, fmt.Errorf("could not evaluate CWD symlinks: %w", err)
}

cleanRoot, err := filepath.EvalSymlinks(root)
if err != nil {
Expand All @@ -80,7 +74,7 @@ func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathI

var currentWdRelRoot string
if path.IsAbs(cleanRoot) {
currentWdRelRoot, err = filepath.Rel(cleanCWD, cleanRoot)
currentWdRelRoot, err = filepath.Rel(currentWD, cleanRoot)
if err != nil {
return nil, fmt.Errorf("could not determine given root path to CWD: %w", err)
}
Expand All @@ -91,7 +85,7 @@ func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathI
return &Directory{
path: cleanRoot,
base: cleanBase,
currentWd: cleanCWD,
currentWd: currentWD,
currentWdRelativeToRoot: currentWdRelRoot,
tree: filetree.New(),
index: filetree.NewIndex(),
Expand Down Expand Up @@ -132,6 +126,7 @@ func (r Directory) requestPath(userPath string) (string, error) {
return userPath, nil
}

// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the directory resolver.
func (r Directory) responsePath(path string) string {
// check to see if we need to encode back to Windows from posix
if runtime.GOOS == WindowsOS {
Expand Down
96 changes: 96 additions & 0 deletions syft/internal/fileresolver/directory_indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"path"
"path/filepath"
"runtime"
"strings"

"github.com/wagoodman/go-partybus"
"github.com/wagoodman/go-progress"
Expand Down Expand Up @@ -119,6 +120,22 @@ func (r *directoryIndexer) indexTree(root string, stager *progress.Stage) ([]str
return roots, nil
}

shouldIndexFullTree, err := isRealPath(root)
if err != nil {
return nil, err
}

if !shouldIndexFullTree {
newRoots, err := r.indexBranch(root, stager)
if err != nil {
return nil, fmt.Errorf("unable to index branch=%q: %w", root, err)
}

roots = append(roots, newRoots...)

return roots, nil
}

err = filepath.Walk(root,
func(path string, info os.FileInfo, err error) error {
stager.Current = path
Expand All @@ -143,6 +160,85 @@ func (r *directoryIndexer) indexTree(root string, stager *progress.Stage) ([]str
return roots, nil
}

func isRealPath(root string) (bool, error) {
rootParent := filepath.Clean(filepath.Dir(root))

realRootParent, err := filepath.EvalSymlinks(rootParent)
if err != nil {
return false, err
}

realRootParent = filepath.Clean(realRootParent)

return rootParent == realRootParent, nil
}

func (r *directoryIndexer) indexBranch(root string, stager *progress.Stage) ([]string, error) {
rootRealPath, err := filepath.EvalSymlinks(root)
if err != nil {
return nil, err
}

// there is a symlink within the path to the root, we need to index the real root parent first
// then capture the symlinks to the root path
roots, err := r.indexTree(rootRealPath, stager)
if err != nil {
return nil, fmt.Errorf("unable to index real root=%q: %w", rootRealPath, err)
}

// walk down all ancestor paths and shallow-add non-existing elements to the tree
for idx, p := range allContainedPaths(root) {
var targetPath string
if idx != 0 {
parent := path.Dir(p)
cleanParent, err := filepath.EvalSymlinks(parent)
if err != nil {
return nil, fmt.Errorf("unable to evaluate symlink for contained path parent=%q: %w", parent, err)
}
targetPath = filepath.Join(cleanParent, filepath.Base(p))
} else {
targetPath = p
}

stager.Current = targetPath

lstat, err := os.Lstat(targetPath)
newRoot, err := r.indexPath(targetPath, lstat, err)
if err != nil && !errors.Is(err, ErrSkipPath) && !errors.Is(err, fs.SkipDir) {
return nil, fmt.Errorf("unable to index ancestor path=%q: %w", targetPath, err)
}
if newRoot != "" {
roots = append(roots, newRoot)
}
}

return roots, nil
}

func allContainedPaths(p string) []string {
var all []string
var currentPath string

cleanPath := strings.TrimSpace(p)

if cleanPath == "" {
return nil
}

// iterate through all parts of the path, replacing path elements with link resolutions where possible.
for idx, part := range strings.Split(filepath.Clean(cleanPath), file.DirSeparator) {
if idx == 0 && part == "" {
currentPath = file.DirSeparator
continue
}

// cumulatively gather where we are currently at and provide a rich object
currentPath = path.Join(currentPath, part)
all = append(all, currentPath)
}
return all
}

func (r *directoryIndexer) indexPath(path string, info os.FileInfo, err error) (string, error) {
// ignore any path which a filter function returns true
for _, filterFn := range r.pathIndexVisitors {
Expand Down
69 changes: 63 additions & 6 deletions syft/internal/fileresolver/directory_indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ func TestDirectoryIndexer_SkipsAlreadyVisitedLinkDestinations(t *testing.T) {
var observedPaths []string
pathObserver := func(p string, _ os.FileInfo, _ error) error {
fields := strings.Split(p, "test-fixtures/symlinks-prune-indexing")
if len(fields) != 2 {
t.Fatalf("unable to parse path: %s", p)
if len(fields) < 2 {
return nil
}
clean := strings.TrimLeft(fields[1], "/")
if clean != "" {
Expand Down Expand Up @@ -261,9 +261,11 @@ func TestDirectoryIndexer_SkipsAlreadyVisitedLinkDestinations(t *testing.T) {
"path/5/6/7/8/dont-index-me-twice-either.txt",
"path/file.txt",
// everything below is after the original tree is indexed, and we are now indexing additional roots from symlinks
"path", // considered from symlink before-path, but pruned
"before-path/file.txt", // considered from symlink c-file.txt, but pruned
"before-path", // considered from symlink c-path, but pruned
"path", // considered from symlink before-path, but pruned
"path/file.txt", // leaf
"before-path", // considered from symlink c-path, but pruned
"path/file.txt", // leaf
"before-path", // considered from symlink c-path, but pruned
}

assert.Equal(t, expected, observedPaths, "visited paths differ \n %s", cmp.Diff(expected, observedPaths))
Expand All @@ -282,7 +284,7 @@ func TestDirectoryIndexer_IndexesAllTypes(t *testing.T) {
for _, ref := range allRefs {
fields := strings.Split(string(ref.RealPath), "test-fixtures/symlinks-prune-indexing")
if len(fields) != 2 {
t.Fatalf("unable to parse path: %s", ref.RealPath)
continue
}
clean := strings.TrimLeft(fields[1], "/")
if clean == "" {
Expand Down Expand Up @@ -326,3 +328,58 @@ func TestDirectoryIndexer_IndexesAllTypes(t *testing.T) {
}

}

func Test_allContainedPaths(t *testing.T) {

tests := []struct {
name string
path string
want []string
}{
{
name: "empty",
path: "",
want: nil,
},
{
name: "single relative",
path: "a",
want: []string{"a"},
},
{
name: "single absolute",
path: "/a",
want: []string{"/a"},
},
{
name: "multiple relative",
path: "a/b/c",
want: []string{"a", "a/b", "a/b/c"},
},
{
name: "multiple absolute",
path: "/a/b/c",
want: []string{"/a", "/a/b", "/a/b/c"},
},
{
name: "multiple absolute with extra slashs",
path: "///a/b//c/",
want: []string{"/a", "/a/b", "/a/b/c"},
},
{
name: "relative with single dot",
path: "a/./b",
want: []string{"a", "a/b"},
},
{
name: "relative with double single dot",
path: "a/../b",
want: []string{"b"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, allContainedPaths(tt.path))
})
}
}