Skip to content

Commit

Permalink
add additional catalog indexes for performance
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
  • Loading branch information
wagoodman committed Jan 24, 2023
1 parent c5ff155 commit 527aaf3
Show file tree
Hide file tree
Showing 9 changed files with 993 additions and 111 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04
github.com/becheran/wildmatch-go v1.0.0
github.com/bmatcuk/doublestar/v4 v4.0.2
github.com/containerd/containerd v1.6.12
github.com/docker/cli v20.10.12+incompatible
Expand All @@ -15,6 +16,7 @@ require (
github.com/docker/docker v20.10.12+incompatible
github.com/gabriel-vasile/mimetype v1.4.0
github.com/go-test/deep v1.0.8
github.com/google/go-cmp v0.5.6
github.com/google/go-containerregistry v0.7.0
github.com/hashicorp/go-multierror v1.1.1
github.com/logrusorgru/aurora v0.0.0-20200102142835-e9ef32dff381
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ github.com/aws/smithy-go v1.6.0 h1:T6puApfBcYiTIsaI+SYWqanjMt5pc3aoyyDrI+0YH54=
github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04 h1:p2I85zYI9z5/c/3Q0LiO3RtNXcmXHTtJfml/hV16zNg=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04/go.mod h1:Z+bXnIbhKJYSvxNwsNnwde7pDKxuqlEZCbUBoTwAqf0=
github.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA=
github.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4=
github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM=
github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
Expand Down
2 changes: 1 addition & 1 deletion pkg/file/tarutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestMetadataFromTar(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
f := getTarFixture(t, "fixture-1")
f := getTarFixture(t, test.fixture)
metadata, err := MetadataFromTar(f, test.name)
assert.NoError(t, err)
assert.Equal(t, test.expected, metadata)
Expand Down
39 changes: 38 additions & 1 deletion pkg/image/content_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,46 @@ func fetchFileContentsByPath(ft *filetree.FileTree, fileCatalog *FileCatalog, pa
func fetchFilesByMIMEType(ft *filetree.FileTree, fileCatalog *FileCatalog, mType string) ([]file.Reference, error) {
fileEntries, err := fileCatalog.GetByMIMEType(mType)
if err != nil {
return nil, fmt.Errorf("unable to fetch file references by MIME type: %w", err)
return nil, fmt.Errorf("unable to fetch file references by MIME type (%q): %w", mType, err)
}

return filterCatalogFilesRelativesToTree(ft, fileEntries)
}

// fetchFilesByExtension is a common helper function for resolving file references for a file extension from the file
// catalog relative to the given tree.
func fetchFilesByExtension(ft *filetree.FileTree, fileCatalog *FileCatalog, extension string) ([]file.Reference, error) {
fileEntries, err := fileCatalog.GetByExtension(extension)
if err != nil {
return nil, fmt.Errorf("unable to fetch file references by extension (%q): %w", extension, err)
}

return filterCatalogFilesRelativesToTree(ft, fileEntries)
}

// fetchFilesByBasename is a common helper function for resolving file references for a file basename
// catalog relative to the given tree.
func fetchFilesByBasename(ft *filetree.FileTree, fileCatalog *FileCatalog, basename string) ([]file.Reference, error) {
fileEntries, err := fileCatalog.GetByBasename(basename)
if err != nil {
return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", basename, err)
}

return filterCatalogFilesRelativesToTree(ft, fileEntries)
}

// fetchFilesByBasenameGlob is a common helper function for resolving file references for a file basename glob pattern
// catalog relative to the given tree.
func fetchFilesByBasenameGlob(ft *filetree.FileTree, fileCatalog *FileCatalog, basenameGlob string) ([]file.Reference, error) {
fileEntries, err := fileCatalog.GetByBasenameGlob(basenameGlob)
if err != nil {
return nil, fmt.Errorf("unable to fetch file references by basename glob (%q): %w", basenameGlob, err)
}

return filterCatalogFilesRelativesToTree(ft, fileEntries)
}

func filterCatalogFilesRelativesToTree(ft *filetree.FileTree, fileEntries []FileCatalogEntry) ([]file.Reference, error) {
var refs []file.Reference
for _, entry := range fileEntries {
_, ref, err := ft.File(entry.File.RealPath, filetree.FollowBasenameLinks)
Expand Down
141 changes: 133 additions & 8 deletions pkg/image/file_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package image

import (
"fmt"
"github.com/becheran/wildmatch-go"
"io"
"path"
"strings"
"sync"

"github.com/anchore/stereoscope/pkg/file"
Expand All @@ -14,8 +17,11 @@ var ErrFileNotFound = fmt.Errorf("could not find file")
// blobs (i.e. everything except for the image index/manifest/metadata files).
type FileCatalog struct {
sync.RWMutex
catalog map[file.ID]FileCatalogEntry
byMIMEType map[string][]file.ID
catalog map[file.ID]FileCatalogEntry
byMIMEType map[string][]file.ID
byExtension map[string][]file.ID
byFilename map[string][]file.ID
basenames []string
}

// FileCatalogEntry represents all stored metadata for a single file reference.
Expand All @@ -29,8 +35,10 @@ type FileCatalogEntry struct {
// NewFileCatalog returns an empty FileCatalog.
func NewFileCatalog() FileCatalog {
return FileCatalog{
catalog: make(map[file.ID]FileCatalogEntry),
byMIMEType: make(map[string][]file.ID),
catalog: make(map[file.ID]FileCatalogEntry),
byMIMEType: make(map[string][]file.ID),
byExtension: make(map[string][]file.ID),
byFilename: make(map[string][]file.ID),
}
}

Expand All @@ -39,12 +47,23 @@ func NewFileCatalog() FileCatalog {
func (c *FileCatalog) Add(f file.Reference, m file.Metadata, l *Layer, opener file.Opener) {
c.Lock()
defer c.Unlock()
id := f.ID()

if m.MIMEType != "" {
// an empty MIME type means that we didn't have the contents of the file to determine the MIME type. If we have
// the contents and the MIME type could not be determined then the default value is application/octet-stream.
c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], f.ID())
c.byMIMEType[m.MIMEType] = append(c.byMIMEType[m.MIMEType], id)
}
c.catalog[f.ID()] = FileCatalogEntry{

basename := path.Base(string(f.RealPath))
c.byFilename[basename] = append(c.byFilename[basename], id)
c.basenames = append(c.basenames, basename)

for _, ext := range fileExtensions(string(f.RealPath)) {
c.byExtension[ext] = append(c.byExtension[ext], id)
}

c.catalog[id] = FileCatalogEntry{
File: f,
Metadata: m,
Layer: l,
Expand Down Expand Up @@ -72,26 +91,108 @@ func (c *FileCatalog) Get(f file.Reference) (FileCatalogEntry, error) {
return value, nil
}

func (c *FileCatalog) Basenames() []string {
c.RLock()
defer c.RUnlock()

return c.basenames
}

func (c *FileCatalog) GetByMIMEType(mType string) ([]FileCatalogEntry, error) {
c.RLock()
defer c.RUnlock()

fileIDs, ok := c.byMIMEType[mType]
if !ok {
return nil, nil
}

var entries []FileCatalogEntry
for _, id := range fileIDs {
entry, ok := c.catalog[id]
if !ok {
return nil, ErrFileNotFound
}
entries = append(entries, entry)
}

return entries, nil
}

func (c *FileCatalog) GetByExtension(extension string) ([]FileCatalogEntry, error) {
c.RLock()
defer c.RUnlock()

fileIDs, ok := c.byExtension[extension]
if !ok {
return nil, nil
}

var entries []FileCatalogEntry
for _, id := range fileIDs {
entry, ok := c.catalog[id]
if !ok {
return nil, fmt.Errorf("could not find file: %+v", id)
return nil, ErrFileNotFound
}
entries = append(entries, entry)
}

return entries, nil
}

// FetchContents reads the file contents for the given file reference from the underlying image/layer blob. An error
func (c *FileCatalog) GetByBasename(basename string) ([]FileCatalogEntry, error) {
c.RLock()
defer c.RUnlock()

if strings.Contains(basename, "/") {
return nil, fmt.Errorf("found directory separator in a basename")
}

fileIDs, ok := c.byFilename[basename]
if !ok {
return nil, nil
}

var entries []FileCatalogEntry
for _, id := range fileIDs {
entry, ok := c.catalog[id]
if !ok {
return nil, ErrFileNotFound
}
entries = append(entries, entry)
}

return entries, nil
}

func (c *FileCatalog) GetByBasenameGlob(glob string) ([]FileCatalogEntry, error) {
c.RLock()
defer c.RUnlock()

if strings.Contains(glob, "**") {
return nil, fmt.Errorf("basename glob patterns with '**' are not supported")
}
if strings.Contains(glob, "/") {
return nil, fmt.Errorf("found directory separator in a basename")
}

patternObj := wildmatch.NewWildMatch(glob)

var fileEntries []FileCatalogEntry
for _, b := range c.Basenames() {
if patternObj.IsMatch(b) {
bns, err := c.GetByBasename(b)
if err != nil {
return nil, fmt.Errorf("unable to fetch file references by basename (%q): %w", b, err)
}
fileEntries = append(fileEntries, bns...)
}
}

return fileEntries, nil
}

// FileContents reads the file contents for the given file reference from the underlying image/layer blob. An error
// is returned if there is no file at the given path and layer or the read operation cannot continue.
func (c *FileCatalog) FileContents(f file.Reference) (io.ReadCloser, error) {
c.RLock()
Expand All @@ -107,3 +208,27 @@ func (c *FileCatalog) FileContents(f file.Reference) (io.ReadCloser, error) {

return catalogEntry.Contents(), nil
}

func fileExtensions(p string) []string {
var exts []string
p = strings.TrimSpace(p)

// ignore oddities
if strings.HasSuffix(p, ".") {
return exts
}

// ignore directories
if strings.HasSuffix(p, "/") {
return exts
}

// ignore . which indicate a hidden file
p = strings.TrimLeft(path.Base(p), ".")
for i := len(p) - 1; i >= 0; i-- {
if p[i] == '.' {
exts = append(exts, p[i:])
}
}
return exts
}
Loading

0 comments on commit 527aaf3

Please sign in to comment.