-
Notifications
You must be signed in to change notification settings - Fork 521
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fill in SPDX originator for all supported package types (#2822)
* add failing test + beef up doc comments Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * cover more metadata types in spdx originator processing Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
- Loading branch information
Showing
6 changed files
with
698 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
210 changes: 210 additions & 0 deletions
210
syft/format/internal/spdxutil/helpers/originator_supplier.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
package helpers | ||
|
||
import ( | ||
"fmt" | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/anchore/syft/internal" | ||
"github.com/anchore/syft/syft/pkg" | ||
) | ||
|
||
const ( | ||
orgType = "Organization" | ||
personType = "Person" | ||
) | ||
|
||
// Originator needs to conform to the SPDX spec here: | ||
// https://spdx.github.io/spdx-spec/v2.2.2/package-information/#76-package-originator-field | ||
// | ||
// Definition: | ||
// | ||
// If the package identified in the SPDX document originated from a different person or | ||
// organization than identified as Package Supplier (see 7.5 above), this field identifies from | ||
// where or whom the package originally came. In some cases, a package may be created and | ||
// originally distributed by a different third party than the Package Supplier of the package. | ||
// For example, the SPDX document identifies the package as glibc and the Package Supplier as | ||
// Red Hat, but the Free Software Foundation is the Package Originator. | ||
// | ||
// Use NOASSERTION if: | ||
// | ||
// - the SPDX document creator has attempted to but cannot reach a reasonable objective determination; | ||
// - the SPDX document creator has made no attempt to determine this field; or | ||
// - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so). | ||
// | ||
// Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org> | ||
// return values are: <type>, <value> | ||
func Originator(p pkg.Package) (typ string, author string) { // nolint: funlen | ||
if !hasMetadata(p) { | ||
return typ, author | ||
} | ||
|
||
switch metadata := p.Metadata.(type) { | ||
case pkg.ApkDBEntry: | ||
author = metadata.Maintainer | ||
|
||
case pkg.DotnetPortableExecutableEntry: | ||
typ = orgType | ||
author = metadata.CompanyName | ||
|
||
case pkg.DpkgDBEntry: | ||
author = metadata.Maintainer | ||
|
||
case pkg.JavaArchive: | ||
if metadata.Manifest != nil { | ||
author = metadata.Manifest.Main.MustGet("Specification-Vendor") | ||
if author == "" { | ||
author = metadata.Manifest.Main.MustGet("Implementation-Vendor") | ||
} | ||
} | ||
|
||
case pkg.LinuxKernelModule: | ||
author = metadata.Author | ||
|
||
case pkg.PhpComposerLockEntry: | ||
if len(metadata.Authors) > 0 { | ||
entry := metadata.Authors[0] | ||
author = formatPersonOrOrg(entry.Name, entry.Email) | ||
} | ||
|
||
case pkg.PhpComposerInstalledEntry: | ||
if len(metadata.Authors) > 0 { | ||
entry := metadata.Authors[0] | ||
author = formatPersonOrOrg(entry.Name, entry.Email) | ||
} | ||
|
||
case pkg.RDescription: | ||
// this is most likely to have a name and email | ||
author = metadata.Maintainer | ||
|
||
if author == "" { | ||
author = metadata.Author | ||
} | ||
|
||
case pkg.NpmPackage: | ||
author = metadata.Author | ||
|
||
case pkg.PythonPackage: | ||
author = formatPersonOrOrg(metadata.Author, metadata.AuthorEmail) | ||
|
||
case pkg.RubyGemspec: | ||
if len(metadata.Authors) > 0 { | ||
author = metadata.Authors[0] | ||
} | ||
case pkg.RpmDBEntry: | ||
typ = orgType | ||
author = metadata.Vendor | ||
|
||
case pkg.RpmArchive: | ||
typ = orgType | ||
author = metadata.Vendor | ||
|
||
case pkg.WordpressPluginEntry: | ||
// it seems that the vast majority of the time the author is an org, not a person | ||
typ = orgType | ||
author = metadata.Author | ||
} | ||
|
||
if typ == "" && author != "" { | ||
typ = personType | ||
} | ||
|
||
return typ, parseAndFormatPersonOrOrg(author) | ||
} | ||
|
||
// Supplier needs to conform to the SPDX spec here: | ||
// https://spdx.github.io/spdx-spec/v2.2.2/package-information/#75-package-supplier-field | ||
// | ||
// Definition: | ||
// | ||
// Identify the actual distribution source for the package/directory identified in the SPDX document. This might | ||
// or might not be different from the originating distribution source for the package. The name of the Package Supplier | ||
// shall be an organization or recognized author and not a web site. For example, SourceForge is a host website, not a | ||
// supplier, the supplier for https://sourceforge.net/projects/bridge/ is “The Linux Foundation.” | ||
// | ||
// Use NOASSERTION if: | ||
// | ||
// - the SPDX document creator has attempted to but cannot reach a reasonable objective determination; | ||
// - the SPDX document creator has made no attempt to determine this field; or | ||
// - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so). | ||
// | ||
// Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org> | ||
// return values are: <type>, <value> | ||
func Supplier(p pkg.Package) (typ string, author string) { | ||
if !hasMetadata(p) { | ||
return | ||
} | ||
|
||
if metadata, ok := p.Metadata.(pkg.AlpmDBEntry); ok { | ||
// most indications here are that this is the person that is simply packaging the upstream software. Most | ||
// of the time this is not the original author of the upstream software (which would be the originator). | ||
// Though it is possible for users to be both the packager and the author, this code cannot distinct this | ||
// case and sticks to the semantically correct interpretation of the "packager" (which says nothing about the | ||
// authorship of the upstream software). | ||
author = metadata.Packager | ||
} | ||
|
||
if author == "" { | ||
// TODO: this uses the Originator function for now until a better distinction can be made for supplier | ||
return Originator(p) | ||
} | ||
|
||
if typ == "" && author != "" { | ||
typ = personType | ||
} | ||
|
||
return typ, parseAndFormatPersonOrOrg(author) | ||
} | ||
|
||
var nameEmailURLPattern = regexp.MustCompile(`^(?P<name>[^<>()]*)( <(?P<email>[^@]+@\w+\.\w+)>)?( \((?P<url>.*)\))?$`) | ||
|
||
func parseAndFormatPersonOrOrg(s string) string { | ||
name, email, _ := parseNameEmailURL(s) | ||
return formatPersonOrOrg(name, email) | ||
} | ||
|
||
func parseNameEmailURL(s string) (name, email, url string) { | ||
fields := internal.MatchNamedCaptureGroups(nameEmailURLPattern, s) | ||
name = strings.TrimSpace(fields["name"]) | ||
email = strings.TrimSpace(fields["email"]) | ||
url = strings.TrimSpace(fields["url"]) | ||
|
||
if email == "" { | ||
if approximatesAsEmail(url) { | ||
email = url | ||
url = "" | ||
} else if approximatesAsEmail(name) { | ||
email = name | ||
name = "" | ||
} | ||
} | ||
return name, email, url | ||
} | ||
|
||
func approximatesAsEmail(s string) bool { | ||
atIndex := strings.Index(s, "@") | ||
if atIndex == -1 { | ||
return false | ||
} | ||
dotIndex := strings.Index(s[atIndex:], ".") | ||
return dotIndex != -1 | ||
} | ||
|
||
func formatPersonOrOrg(name, email string) string { | ||
name = strings.TrimSpace(name) | ||
email = strings.TrimSpace(email) | ||
|
||
blankName := name == "" | ||
blankEmail := email == "" | ||
|
||
if !blankEmail && !blankName { | ||
return fmt.Sprintf("%s (%s)", name, email) | ||
} | ||
if !blankName && blankEmail { | ||
return name | ||
} | ||
if blankName && !blankEmail { | ||
return email | ||
} | ||
return "" | ||
} |
Oops, something went wrong.