diff --git a/pkg/osv/osv.go b/pkg/osv/osv.go index 3d9a13b2a7..6fe7443363 100644 --- a/pkg/osv/osv.go +++ b/pkg/osv/osv.go @@ -20,6 +20,8 @@ const ( QueryEndpoint = "https://api.osv.dev/v1/querybatch" // GetEndpoint is the URL for getting vulenrabilities from OSV. GetEndpoint = "https://api.osv.dev/v1/vulns" + // DetermineVersionEndpoint is the URL for posting determineversion queries to OSV. + DetermineVersionEndpoint = "https://api.osv.dev/v1experimental/determineversion" // BaseVulnerabilityURL is the base URL for detailed vulnerability views. BaseVulnerabilityURL = "https://osv.dev/" // maxQueriesPerRequest splits up querybatch into multiple requests if @@ -76,6 +78,30 @@ type HydratedBatchedResponse struct { Results []Response `json:"results"` } +// DetermineVersionHash holds the per file hash and path information for determineversion. +type DetermineVersionHash struct { + Path string `json:"path"` + Hash []byte `json:"hash"` +} + +type DetermineVersionResponse struct { + Matches []struct { + Score float64 `json:"score"` + RepoInfo struct { + Type string `json:"type"` + Address string `json:"address"` + Tag string `json:"tag"` + Version string `json:"version"` + Commit string `json:"commit"` + } `json:"repo_info"` + } `json:"matches"` +} + +type determineVersionsRequest struct { + Name string `json:"name"` + FileHashes []DetermineVersionHash `json:"file_hashes"` +} + // MakeCommitRequest makes a commit hash request. func MakeCommitRequest(commit string) *Query { return &Query{ @@ -302,3 +328,41 @@ func makeRetryRequest(action func() (*http.Response, error)) (*http.Response, er return resp, err } + +func MakeDetermineVersionRequest(name string, hashes []DetermineVersionHash) (*DetermineVersionResponse, error) { + var buf bytes.Buffer + + request := determineVersionsRequest{ + Name: name, + FileHashes: hashes, + } + + if err := json.NewEncoder(&buf).Encode(request); err != nil { + return nil, err + } + + //nolint:noctx + req, err := http.NewRequest(http.MethodPost, DetermineVersionEndpoint, &buf) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/json") + if RequestUserAgent != "" { + req.Header.Set("User-Agent", RequestUserAgent) + } + + client := http.DefaultClient + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var result DetermineVersionResponse + decoder := json.NewDecoder(resp.Body) + if err := decoder.Decode(&result); err != nil { + return nil, err + } + + return &result, nil +} diff --git a/pkg/osvscanner/osvscanner.go b/pkg/osvscanner/osvscanner.go index 5ae43a8fff..fce5b039bd 100644 --- a/pkg/osvscanner/osvscanner.go +++ b/pkg/osvscanner/osvscanner.go @@ -2,8 +2,10 @@ package osvscanner import ( "bufio" + "crypto/md5" //nolint:gosec "errors" "fmt" + "io/fs" "os" "os/exec" "path" @@ -58,12 +60,35 @@ var VulnerabilitiesFoundErr = errors.New("vulnerabilities found") //nolint:errname,stylecheck // Would require version bump to change var OnlyUncalledVulnerabilitiesFoundErr = errors.New("only uncalled vulnerabilities found") +var ( + vendoredLibNames = map[string]struct{}{ + "3rdparty": {}, + "dep": {}, + "deps": {}, + "thirdparty": {}, + "third-party": {}, + "third_party": {}, + "libs": {}, + "external": {}, + "externals": {}, + "vendor": {}, + "vendored": {}, + } +) + +const ( + // This value may need to be tweaked, or be provided as a configurable flag. + determineVersionThreshold = 0.15 + maxDetermineVersionFiles = 10000 +) + // scanDir walks through the given directory to try to find any relevant files // These include: // - Any lockfiles with scanLockfile // - Any SBOM files with scanSBOMFile // - Any git repositories with scanGit -func scanDir(r reporter.Reporter, dir string, skipGit bool, recursive bool, useGitIgnore bool) ([]scannedPackage, error) { + +func scanDir(r reporter.Reporter, dir string, skipGit bool, recursive bool, useGitIgnore bool, compareOffline bool) ([]scannedPackage, error) { var ignoreMatcher *gitIgnoreMatcher if useGitIgnore { var err error @@ -133,6 +158,16 @@ func scanDir(r reporter.Reporter, dir string, skipGit bool, recursive bool, useG scannedPackages = append(scannedPackages, pkgs...) } + if info.IsDir() && !compareOffline { + if _, ok := vendoredLibNames[strings.ToLower(filepath.Base(path))]; ok { + pkgs, err := scanDirWithVendoredLibs(r, path) + if err != nil { + r.PrintText(fmt.Sprintf("scan failed for dir containing vendored libs %s: %v\n", path, err)) + } + scannedPackages = append(scannedPackages, pkgs...) + } + } + if !root && !recursive && info.IsDir() { return filepath.SkipDir } @@ -181,6 +216,90 @@ func parseGitIgnores(path string) (*gitIgnoreMatcher, error) { return &gitIgnoreMatcher{matcher: matcher, repoPath: repopath}, nil } +func queryDetermineVersions(repoDir string) (*osv.DetermineVersionResponse, error) { + fileExts := []string{ + ".hpp", + ".h", + ".hh", + ".cc", + ".c", + ".cpp", + } + + var hashes []osv.DetermineVersionHash + if err := filepath.Walk(repoDir, func(p string, info fs.FileInfo, err error) error { + if info.IsDir() { + if _, err := os.Stat(filepath.Join(p, ".git")); err == nil { + // Found a git repo, stop here as otherwise we may get duplicated + // results with our regular git commit scanning. + return filepath.SkipDir + } + + return nil + } + for _, ext := range fileExts { + if filepath.Ext(p) == ext { + buf, err := os.ReadFile(p) + if err != nil { + return err + } + hash := md5.Sum(buf) //nolint:gosec + hashes = append(hashes, osv.DetermineVersionHash{ + Path: strings.ReplaceAll(p, repoDir, ""), + Hash: hash[:], + }) + if len(hashes) > maxDetermineVersionFiles { + return errors.New("too many files to hash") + } + } + } + + return nil + }); err != nil { + return nil, fmt.Errorf("failed during hashing: %w", err) + } + + result, err := osv.MakeDetermineVersionRequest(filepath.Base(repoDir), hashes) + if err != nil { + return nil, fmt.Errorf("failed to determine versions: %w", err) + } + + return result, nil +} + +func scanDirWithVendoredLibs(r reporter.Reporter, path string) ([]scannedPackage, error) { + r.PrintText(fmt.Sprintf("Scanning directory for vendored libs: %s\n", path)) + entries, err := os.ReadDir(path) + if err != nil { + return nil, err + } + + var packages []scannedPackage + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + libPath := filepath.Join(path, entry.Name()) + + r.PrintText(fmt.Sprintf("Scanning potential vendored dir: %s\n", libPath)) + // TODO: make this a goroutine to parallelise this operation + results, err := queryDetermineVersions(libPath) + if err != nil { + r.PrintText(fmt.Sprintf("Error scanning sub-directory '%s' with error: %v", libPath, err)) + continue + } + + if len(results.Matches) > 0 && results.Matches[0].Score > determineVersionThreshold { + match := results.Matches[0] + r.PrintText(fmt.Sprintf("Identified %s as %s at %s.\n", libPath, match.RepoInfo.Address, match.RepoInfo.Commit)) + packages = append(packages, createCommitQueryPackage(match.RepoInfo.Commit, libPath)) + } + } + + return packages, nil +} + // gitIgnoreMatcher.match will return true if the file/directory matches a gitignore entry // i.e. true if it should be ignored func (m *gitIgnoreMatcher) match(absPath string, isDir bool) (bool, error) { @@ -632,7 +751,7 @@ func DoScan(actions ScannerActions, r reporter.Reporter) (models.VulnerabilityRe for _, dir := range actions.DirectoryPaths { r.PrintText(fmt.Sprintf("Scanning dir %s\n", dir)) - pkgs, err := scanDir(r, dir, actions.SkipGit, actions.Recursive, !actions.NoIgnore) + pkgs, err := scanDir(r, dir, actions.SkipGit, actions.Recursive, !actions.NoIgnore, actions.CompareOffline) if err != nil { return models.VulnerabilityResults{}, err }