Skip to content

Commit

Permalink
feat: catalog python files for installed-files.txt file metadata (#1217)
Browse files Browse the repository at this point in the history
Co-authored-by: houdini91 <mdstrauss91@gmail.com>
  • Loading branch information
spiffcs and houdini91 committed Sep 19, 2022
1 parent c2005fa commit 04d288b
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 1 deletion.
41 changes: 40 additions & 1 deletion syft/pkg/cataloger/python/package_cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"path/filepath"

"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
Expand Down Expand Up @@ -91,13 +92,44 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad
return p, nil
}

// fetchRecordFiles finds a corresponding installed-files.txt file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchInstalledFiles(resolver source.FileResolver, metadataLocation source.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the installed-files.txt path to the same layer (or the next adjacent lower layer).

// find the installed-files.txt file relative to the directory where the METADATA file resides (in path AND layer structure)
installedFilesPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "installed-files.txt")
installedFilesRef := resolver.RelativeFileByPath(metadataLocation, installedFilesPath)

if installedFilesRef != nil {
sources = append(sources, *installedFilesRef)

installedFilesContents, err := resolver.FileContentsByLocation(*installedFilesRef)
if err != nil {
return nil, nil, err
}
defer internal.CloseAndLogError(installedFilesContents, installedFilesPath)

// parse the installed-files contents
installedFiles, err := parseInstalledFiles(installedFilesContents, metadataLocation.RealPath, sitePackagesRootPath)
if err != nil {
log.Warnf("unable to parse installed-files.txt for python package=%+v: %w", metadataLocation.RealPath, err)
return files, sources, nil
}

files = append(files, installedFiles...)
}
return files, sources, nil
}

// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).

// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
// find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD")
recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath)

Expand Down Expand Up @@ -206,6 +238,13 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv
if err != nil {
return nil, nil, err
}
if len(r) == 0 {
r, s, err = c.fetchInstalledFiles(resolver, metadataLocation, metadata.SitePackagesRootPath)
if err != nil {
return nil, nil, err
}
}

sources = append(sources, s...)
metadata.Files = r

Expand Down
33 changes: 33 additions & 0 deletions syft/pkg/cataloger/python/parse_wheel_egg_record.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package python

import (
"bufio"
"encoding/csv"
"fmt"
"io"
"path/filepath"
"strings"

"github.com/anchore/syft/internal/log"
Expand Down Expand Up @@ -59,3 +61,34 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {

return records, nil
}

func parseInstalledFiles(reader io.Reader, location, sitePackagesRootPath string) ([]pkg.PythonFileRecord, error) {
var installedFiles []pkg.PythonFileRecord
r := bufio.NewReader(reader)

for {
line, err := r.ReadString('\n')
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("unable to read python installed-files file: %w", err)
}

if location != "" && sitePackagesRootPath != "" {
joinedPath := filepath.Join(filepath.Dir(location), line)
line, err = filepath.Rel(sitePackagesRootPath, joinedPath)
if err != nil {
return nil, err
}
}

installedFile := pkg.PythonFileRecord{
Path: strings.ReplaceAll(line, "\n", ""),
}

installedFiles = append(installedFiles, installedFile)
}

return installedFiles, nil
}
37 changes: 37 additions & 0 deletions syft/pkg/cataloger/python/parse_wheel_egg_record_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,42 @@ func TestParseWheelEggRecord(t *testing.T) {
}
})
}
}

func TestParseInstalledFiles(t *testing.T) {
tests := []struct {
Fixture string
ExpectedMetadata []pkg.PythonFileRecord
}{
{
Fixture: "test-fixtures/installed-files/installed-files.txt",
ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "../__pycache__/dicttoxml.cpython-36.pyc"},
{Path: "../dicttoxml.py"},
{Path: "PKG-INFO"},
{Path: "SOURCES.txt"},
{Path: "dependency_links.txt"},
{Path: "top_level.txt"},
},
},
}

for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}

actual, err := parseInstalledFiles(fixture, "", "")
if err != nil {
t.Fatalf("failed to parse: %+v", err)
}

for _, d := range deep.Equal(actual, test.ExpectedMetadata) {
t.Errorf("diff: %+v", d)
}

})
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
../__pycache__/dicttoxml.cpython-36.pyc
../dicttoxml.py
PKG-INFO
SOURCES.txt
dependency_links.txt
top_level.txt

0 comments on commit 04d288b

Please sign in to comment.