Skip to content

Commit

Permalink
support for scanning license files in golang packages
Browse files Browse the repository at this point in the history
Signed-off-by: Avi Deitcher <avi@deitcher.net>
  • Loading branch information
deitch committed Mar 3, 2023
1 parent 01230aa commit f399e5c
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 0 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ require (
github.com/anchore/stereoscope v0.0.0-20230301191755-abfb374a1122
github.com/docker/docker v23.0.1+incompatible
github.com/google/go-containerregistry v0.13.0
github.com/google/licensecheck v0.3.1
github.com/invopop/jsonschema v0.7.0
github.com/knqyf263/go-rpmdb v0.0.0-20221030135625-4082a22221ce
github.com/opencontainers/go-digest v1.0.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
github.com/google/go-containerregistry v0.13.0 h1:y1C7Z3e149OJbOPDBxLYR8ITPz8dTKqQwjErKVHJC8k=
github.com/google/go-containerregistry v0.13.0/go.mod h1:J9FQ+eSS4a1aC2GNZxvNpbWhgp0487v+cgiilB4FqDo=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/licensecheck v0.3.1 h1:QoxgoDkaeC4nFrtGN1jV7IPmDCHFNIVh54e5hSt6sPs=
github.com/google/licensecheck v0.3.1/go.mod h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
Expand Down
58 changes: 58 additions & 0 deletions internal/licenses/list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package licenses

// all of these taken from https://github.com/golang/pkgsite/blob/8996ff632abee854aef1b764ca0501f262f8f523/internal/licenses/licenses.go#L338
// which unfortunately is not exported. But fortunately is under BSD-style license.

var (
FileNames = []string{
"COPYING",
"COPYING.md",
"COPYING.markdown",
"COPYING.txt",
"LICENCE",
"LICENCE.md",
"LICENCE.markdown",
"LICENCE.txt",
"LICENSE",
"LICENSE.md",
"LICENSE.markdown",
"LICENSE.txt",
"LICENSE-2.0.txt",
"LICENCE-2.0.txt",
"LICENSE-APACHE",
"LICENCE-APACHE",
"LICENSE-APACHE-2.0.txt",
"LICENCE-APACHE-2.0.txt",
"LICENSE-MIT",
"LICENCE-MIT",
"LICENSE.MIT",
"LICENCE.MIT",
"LICENSE.code",
"LICENCE.code",
"LICENSE.docs",
"LICENCE.docs",
"LICENSE.rst",
"LICENCE.rst",
"MIT-LICENSE",
"MIT-LICENCE",
"MIT-LICENSE.md",
"MIT-LICENCE.md",
"MIT-LICENSE.markdown",
"MIT-LICENCE.markdown",
"MIT-LICENSE.txt",
"MIT-LICENCE.txt",
"MIT_LICENSE",
"MIT_LICENCE",
"UNLICENSE",
"UNLICENCE",
}
)

var fileNames map[string]bool

func init() {
fileNames = make(map[string]bool)
for _, name := range FileNames {
fileNames[name] = true
}
}
65 changes: 65 additions & 0 deletions internal/licenses/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package licenses

import (
"io"
"io/fs"
"path/filepath"
"strings"

"github.com/google/licensecheck"
)

const (
coverageThreshold = 75
unknownLicenseType = "UNKNOWN"
)

// ScanLicenses scan an fs.FS for licenses, First finds files that fit with the list
// in FileNames, and then uses github.com/google/licensecheck to scan the contents.
func ScanLicenses(fsys fs.FS) []string {
var (
licenses []string
isVendor bool
)
_ = fs.WalkDir(fsys, ".", func(p string, d fs.DirEntry, err error) error {
if err != nil {
return nil
}
filename := filepath.Base(p)
// ignore any tat are not a known filetype
if _, ok := fileNames[filename]; !ok {
return nil
}
// make sure it is not in a vendored path
parts := strings.Split(filepath.Dir(p), string(filepath.Separator))
for _, part := range parts {
if part == "vendor" {
isVendor = true
break
}
}
if isVendor {
return nil
}
// read the file contents
rc, err := fsys.Open(p)
if err != nil {
return nil
}
defer rc.Close()
contents, err := io.ReadAll(rc)
if err != nil {
return nil
}
cov := licensecheck.Scan(contents)

if cov.Percent < float64(coverageThreshold) {
licenses = append(licenses, unknownLicenseType)
}
for _, m := range cov.Match {
licenses = append(licenses, m.ID)
}
return nil
})
return licenses
}
57 changes: 57 additions & 0 deletions syft/pkg/cataloger/golang/package.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@
package golang

import (
"archive/zip"
"bytes"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"path/filepath"
"regexp"
"runtime/debug"
"strings"

"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)

const DefaultGoProxy = "https://proxy.golang.org"

// this to be removed when we enable remote retrieval of go modules
const disableRemotePackage = true

func newGoBinaryPackage(dep *debug.Module, mainModule, goVersion, architecture string, buildSettings map[string]string, locations ...source.Location) pkg.Package {
if dep.Replace != nil {
dep = dep.Replace
Expand All @@ -18,6 +32,7 @@ func newGoBinaryPackage(dep *debug.Module, mainModule, goVersion, architecture s
p := pkg.Package{
Name: dep.Path,
Version: dep.Version,
Licenses: goLicenses(dep.Path, dep.Version),
PURL: packageURL(dep.Path, dep.Version),
Language: pkg.Go,
Type: pkg.GoModulePkg,
Expand Down Expand Up @@ -67,3 +82,45 @@ func packageURL(moduleName, moduleVersion string) string {
subpath,
).ToString()
}

func goLicenses(moduleName, moduleVersion string) []string {
fsys, err := getModule(moduleName, moduleVersion, DefaultGoProxy)
if err != nil {
return nil
}
return licenses.ScanLicenses(fsys)
}

func getModule(module, version, proxy string) (fs.FS, error) {
// first see if we have it locally
goPath := os.Getenv("GOPATH")
if goPath != "" {
modPath := filepath.Join(goPath, "pkg", "mod", fmt.Sprintf("%s@%s", module, version))
if fi, err := os.Stat(modPath); err == nil && fi != nil && fi.IsDir() {
modFS := os.DirFS(modPath)
return modFS, nil
}
}

if disableRemotePackage {
return nil, fmt.Errorf("module %s@%s not found locally", module, version)
}

// we could not get it locally, so get it from the proxy, but only if network is enabled

// get the module zip
resp, err := http.Get(fmt.Sprintf("%s/%s/@v/%s.zip", proxy, module, version))
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
}
// read the zip
b, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return zip.NewReader(bytes.NewReader(b), resp.ContentLength)
}

0 comments on commit f399e5c

Please sign in to comment.