diff --git a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go index c6e6d865097..1240c215d6a 100644 --- a/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go +++ b/vulnfeeds/cmd/converters/cve/nvd-cve-osv/main.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" - "errors" "flag" "fmt" "log/slog" @@ -29,6 +28,7 @@ var ( outDir = flag.String("out-dir", "", "Path to output results.") outFormat = flag.String("out-format", "OSV", "Format to output {OSV,PackageInfo}") workers = flag.Int("workers", 30, "The number of concurrent workers to use for processing CVEs.") + rejectFailed = flag.Bool("reject-failed", false, "If set, OSV records with a failed conversion outcome will not be generated.") outputMetrics = flag.Bool("output-metrics", true, "If true, output the metrics information about the conversion") cpuProfile = flag.String("cpuprofile", "", "Path to write cpu profile to file (default = no output)") ) @@ -131,30 +131,15 @@ func processCVE(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache repos := nvd.FindRepos(cve, vpRepoCache, repoTagsCache, metrics, http.DefaultClient) metrics.Repos = repos - var err error + var outcome models.ConversionOutcome switch *outFormat { case "OSV": - err = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics) + outcome = nvd.CVEToOSV(cve, repos, repoTagsCache, *outDir, metrics, *rejectFailed, *outputMetrics) case "PackageInfo": - err = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) + outcome = nvd.CVEToPackageInfo(cve, repos, repoTagsCache, *outDir, metrics) } - // Parse this error to determine which failure mode it was - if err != nil { - if errors.Is(err, nvd.ErrNoRanges) { - metrics.Outcome = models.NoRanges - return models.NoRanges - } - if errors.Is(err, nvd.ErrUnresolvedFix) { - metrics.Outcome = models.FixUnresolvable - return models.FixUnresolvable - } - metrics.Outcome = models.ConversionUnknown - - return models.ConversionUnknown - } - metrics.Outcome = models.Successful - return models.Successful + return outcome } func worker(wg *sync.WaitGroup, jobs <-chan models.NVDCVE, _ string, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache) { diff --git a/vulnfeeds/conversion/common.go b/vulnfeeds/conversion/common.go index c14265b482c..c5906bc917b 100644 --- a/vulnfeeds/conversion/common.go +++ b/vulnfeeds/conversion/common.go @@ -5,6 +5,7 @@ package conversion import ( "encoding/csv" "encoding/json" + "errors" "fmt" "io/fs" "log/slog" @@ -171,7 +172,8 @@ func WriteMetricsFile(metrics *models.ConversionMetrics, metricsFile *os.File) e return nil } -// Examines repos and tries to convert versions to commits by treating them as Git tags. +// GitVersionsToCommits examines repos and tries to convert versions to commits by treating them as Git tags. +// Returns the resolved ranges, unresolved ranges, and successful repos involved. func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache) ([]*osvschema.Range, []*osvschema.Range, []string) { var newVersionRanges []*osvschema.Range unresolvedRanges := versionRanges @@ -181,7 +183,9 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr if len(unresolvedRanges) == 0 { break // All ranges have been resolved. } - + if cache.IsInvalid(repo) { + continue + } normalizedTags, err := git.NormalizeRepoTags(repo, cache) if err != nil { metrics.AddNote("Failed to normalize tags - %s", repo) @@ -207,10 +211,19 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr if introduced == "0" { introducedCommit = "0" } else { - introducedCommit = resolveVersionToCommit(introduced, normalizedTags) + introducedCommit, err = git.VersionToCommit(introduced, normalizedTags) + if err != nil { + metrics.AddNote("error resolving version to commit - %s - %s", introduced, err) + } + } + fixedCommit, err := git.VersionToCommit(fixed, normalizedTags) + if err != nil { + metrics.AddNote("error resolving version to commit - %s - %s", fixed, err) + } + lastAffectedCommit, err := git.VersionToCommit(lastAffected, normalizedTags) + if err != nil { + metrics.AddNote("error resolving version to commit - %s - %s", lastAffected, err) } - fixedCommit := resolveVersionToCommit(fixed, normalizedTags) - lastAffectedCommit := resolveVersionToCommit(lastAffected, normalizedTags) if introducedCommit != "" && (fixedCommit != "" || lastAffectedCommit != "") { var newVR *osvschema.Range @@ -240,18 +253,6 @@ func GitVersionsToCommits(versionRanges []*osvschema.Range, repos []string, metr unresolvedRanges = stillUnresolvedRanges } - if len(newVersionRanges) > 0 { - metrics.ResolvedRangesCount += len(newVersionRanges) - metrics.Outcome = models.Successful - } - - if len(unresolvedRanges) > 0 { - metrics.UnresolvedRangesCount += len(unresolvedRanges) - if len(newVersionRanges) == 0 { - metrics.Outcome = models.NoCommitRanges - } - } - return newVersionRanges, unresolvedRanges, successfulRepos } @@ -280,16 +281,117 @@ func BuildVersionRange(intro string, lastAff string, fixed string) *osvschema.Ra return &versionRange } -// resolveVersionToCommit is a helper to convert a version string to a commit hash. -// It logs the outcome of the conversion attempt and returns an empty string on failure. -func resolveVersionToCommit(version string, normalizedTags map[string]git.NormalizedTag) string { - if version == "" { - return "" +// MergeTwoRanges combines two osvschema.Range objects into a single range. +// It merges the events and the DatabaseSpecific fields. If the ranges are +// not for the same repository or are of different types, it returns an error. +// When merging DatabaseSpecific fields, it handles lists, maps, and simple +// strings. If there are mismatching types for the same key, it returns an error. +func MergeTwoRanges(range1, range2 *osvschema.Range) (*osvschema.Range, error) { + // check if the ranges are the same + if range1.GetRepo() != range2.GetRepo() || range1.GetType() != range2.GetType() { + // return an error if not the case + return nil, errors.New("ranges are not the same repo or type") } - commit, err := git.VersionToCommit(version, normalizedTags) - if err != nil { - return "" + + mergedRange := &osvschema.Range{ + Repo: range1.GetRepo(), + Type: range1.GetType(), + Events: append(range1.Events, range2.GetEvents()...), + } + + db1 := range1.GetDatabaseSpecific() + db2 := range2.GetDatabaseSpecific() + + if db1 == nil && db2 == nil { + return mergedRange, nil } - return commit + mergedMap := make(map[string]any) + + if db1 != nil { + for k, v := range db1.GetFields() { + mergedMap[k] = v.AsInterface() + } + } + + if db2 != nil { + for k, v := range db2.GetFields() { + val2 := v.AsInterface() + if existing, ok := mergedMap[k]; ok { + mergedVal, err := mergeDatabaseSpecificValues(existing, val2) + if err != nil { + logger.Info("Failed to merge database specific key", "key", k, "err", err) + } + mergedMap[k] = mergedVal + } else { + mergedMap[k] = val2 + } + } + } + + if len(mergedMap) > 0 { + if ds, err := utility.NewStructpbFromMap(mergedMap); err == nil { + mergedRange.DatabaseSpecific = ds + } else { + logger.Warn("Failed to create DatabaseSpecific for merged range: %v", err) + } + } + + return mergedRange, nil +} + +// mergeDatabaseSpecificValues is a helper function that recursively merges two +// values from a DatabaseSpecific field. It handles lists (by appending), maps +// (by recursively merging keys), and simple strings (by creating a list if they +// differ). It returns an error if the types of the two values do not match. +func mergeDatabaseSpecificValues(val1, val2 any) (any, error) { + switch v1 := val1.(type) { + case []any: + if v2, ok := val2.([]any); ok { + return append(v1, v2...), nil + } + + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + case map[string]any: + if v2, ok := val2.(map[string]any); ok { + merged := make(map[string]any) + for k, v := range v1 { + merged[k] = v + } + for k, v := range v2 { + if existing, ok := merged[k]; ok { + mergedVal, err := mergeDatabaseSpecificValues(existing, v) + if err != nil { + return nil, err + } + merged[k] = mergedVal + } else { + merged[k] = v + } + } + + return merged, nil + } + + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + case string: + if v2, ok := val2.(string); ok { + if v1 == v2 { + return v1, nil + } + + return []any{v1, v2}, nil + } + + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + default: + if fmt.Sprintf("%T", val1) != fmt.Sprintf("%T", val2) { + return nil, fmt.Errorf("mismatching types: %T and %T", val1, val2) + } + if val1 == val2 { + return val1, nil + } + + return []any{val1, val2}, nil + } } diff --git a/vulnfeeds/conversion/common_test.go b/vulnfeeds/conversion/common_test.go index 0641db47c6d..84ca6444c61 100644 --- a/vulnfeeds/conversion/common_test.go +++ b/vulnfeeds/conversion/common_test.go @@ -6,6 +6,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/ossf/osv-schema/bindings/go/osvschema" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/structpb" ) func TestBuildVersionRange(t *testing.T) { @@ -66,3 +67,251 @@ func TestBuildVersionRange(t *testing.T) { }) } } + +func TestMergeTwoRanges(t *testing.T) { + tests := []struct { + name string + range1 *osvschema.Range + range2 *osvschema.Range + want *osvschema.Range + wantErr bool + }{ + { + name: "Merge identical ranges", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + Events: []*osvschema.Event{ + {Introduced: "0"}, + }, + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + Events: []*osvschema.Event{ + {Fixed: "1.0.0"}, + }, + }, + want: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + Events: []*osvschema.Event{ + {Introduced: "0"}, + {Fixed: "1.0.0"}, + }, + }, + }, + { + name: "Different repos should return nil and error", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo1", + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo2", + }, + want: nil, + wantErr: true, + }, + { + name: "Different types should return nil and error", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + }, + range2: &osvschema.Range{ + Type: osvschema.Range_ECOSYSTEM, + Repo: "https://github.com/example/repo", + }, + want: nil, + wantErr: true, + }, + { + name: "Merge with DatabaseSpecific", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key1": structpb.NewStringValue("value1"), + }, + }, + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key2": structpb.NewStringValue("value2"), + }, + }, + }, + want: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key1": structpb.NewStringValue("value1"), + "key2": structpb.NewStringValue("value2"), + }, + }, + }, + }, + { + name: "Merge DatabaseSpecific lists", + range1: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "list": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{structpb.NewStringValue("item1")}, + }), + }, + }, + }, + range2: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "list": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{structpb.NewStringValue("item2")}, + }), + }, + }, + }, + want: &osvschema.Range{ + Type: osvschema.Range_GIT, + Repo: "https://github.com/example/repo", + DatabaseSpecific: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "list": structpb.NewListValue(&structpb.ListValue{ + Values: []*structpb.Value{ + structpb.NewStringValue("item1"), + structpb.NewStringValue("item2"), + }, + }), + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := MergeTwoRanges(tt.range1, tt.range2) + if (err != nil) != tt.wantErr { + t.Errorf("MergeTwoRanges() error = %v, wantErr %v", err, tt.wantErr) + return + } + if diff := cmp.Diff(tt.want, got, protocmp.Transform()); diff != "" { + t.Errorf("mergeTwoRanges() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestMergeDatabaseSpecificValues(t *testing.T) { + tests := []struct { + name string + val1 any + val2 any + want any + wantErr bool + }{ + { + name: "Merge lists", + val1: []any{"a", "b"}, + val2: []any{"c", "d"}, + want: []any{"a", "b", "c", "d"}, + }, + { + name: "List and string mismatch", + val1: []any{"a", "b"}, + val2: "c", + wantErr: true, + }, + { + name: "Merge maps", + val1: map[string]any{"key1": "value1"}, + val2: map[string]any{"key2": "value2"}, + want: map[string]any{"key1": "value1", "key2": "value2"}, + }, + { + name: "Merge nested maps", + val1: map[string]any{ + "nested": map[string]any{ + "key1": "value1", + }, + }, + val2: map[string]any{ + "nested": map[string]any{ + "key2": "value2", + }, + }, + want: map[string]any{ + "nested": map[string]any{ + "key1": "value1", + "key2": "value2", + }, + }, + }, + { + name: "Map and string mismatch", + val1: map[string]any{"key1": "value1"}, + val2: "string", + wantErr: true, + }, + { + name: "Merge same strings", + val1: "value1", + val2: "value1", + want: "value1", + }, + { + name: "Merge different strings", + val1: "value1", + val2: "value2", + want: []any{"value1", "value2"}, + }, + { + name: "String and int mismatch", + val1: "value1", + val2: 123, + wantErr: true, + }, + { + name: "Merge same ints", + val1: 123, + val2: 123, + want: 123, + }, + { + name: "Merge different ints", + val1: 123, + val2: 456, + want: []any{123, 456}, + }, + { + name: "Int and float64 mismatch", + val1: 123, + val2: 456.0, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := mergeDatabaseSpecificValues(tt.val1, tt.val2) + if (err != nil) != tt.wantErr { + t.Errorf("mergeDatabaseSpecificValues() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && !cmp.Equal(got, tt.want) { + t.Errorf("mergeDatabaseSpecificValues() mismatch (-want +got):\n%s", cmp.Diff(tt.want, got)) + } + }) + } +} diff --git a/vulnfeeds/conversion/nvd/converter.go b/vulnfeeds/conversion/nvd/converter.go index 12a36232838..0c1e176b14e 100644 --- a/vulnfeeds/conversion/nvd/converter.go +++ b/vulnfeeds/conversion/nvd/converter.go @@ -4,8 +4,8 @@ package nvd import ( "encoding/json" "errors" - "fmt" "log/slog" + "maps" "net/http" "os" "path/filepath" @@ -15,8 +15,10 @@ import ( "github.com/google/osv/vulnfeeds/cves" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" + "github.com/google/osv/vulnfeeds/utility" "github.com/google/osv/vulnfeeds/utility/logger" "github.com/google/osv/vulnfeeds/vulns" + "github.com/ossf/osv-schema/bindings/go/osvschema" ) var ErrNoRanges = errors.New("no ranges") @@ -24,7 +26,7 @@ var ErrNoRanges = errors.New("no ranges") var ErrUnresolvedFix = errors.New("fixes not resolved to commits") // CVEToOSV Takes an NVD CVE record and outputs an OSV file in the specified directory. -func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) error { +func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) models.ConversionOutcome { CPEs := cves.CPEs(cve) metrics.CPEs = CPEs // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. @@ -36,86 +38,98 @@ func CVEToOSV(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, direc maybeVendorName = CPE.Vendor maybeProductName = CPE.Product if err != nil { - return errors.New("can't generate an OSV record without valid CPE data") + metrics.AddNote("Can't generate an OSV record without valid CPE data") + return models.ConversionUnknown } } + // Create basic OSV record v := vulns.FromNVDCVE(cve.ID, cve) - versions := cves.ExtractVersionInfo(cve, nil, http.DefaultClient, metrics) - - if len(versions.AffectedVersions) != 0 { - // There are some AffectedVersions to try and resolve to AffectedCommits. - if len(repos) == 0 { - metrics.AddNote("No affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) - return fmt.Errorf("no affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) - } - metrics.AddNote("Trying to convert version tags to commits: %v with repos: %v", versions, repos) - versions = cves.VersionInfoToCommits(versions, repos, cache, metrics) - hasAnyFixedCommits := false - for _, ac := range versions.AffectedCommits { - if ac.Fixed != "" { - hasAnyFixedCommits = true - break - } - } - if versions.HasFixedVersions() && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert fixed version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } + // At the bare minimum, we want to attempt to extract the raw version information + // from CPEs, whether or not they can resolve to commits. + cpeRanges := cves.ExtractVersionsFromCPEs(cve, nil, metrics) - hasAnyLastAffectedCommits := false - for _, ac := range versions.AffectedCommits { - if ac.LastAffected != "" { - hasAnyLastAffectedCommits = true - break - } - } + // If there are no repos, there are no commits from the refs either + if len(cpeRanges) == 0 && len(repos) == 0 { + metrics.SetOutcome(models.NoRepos) + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) - if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { - metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert last_affected version tags to commits: %+v %w", versions, ErrUnresolvedFix) - } + return models.NoRepos } - versions.AffectedCommits = cves.DeduplicateAffectedCommits(versions.AffectedCommits) + successfulRepos := make(map[string]bool) + var resolvedRanges, unresolvedRanges []*osvschema.Range - vulns.AttachExtractedVersionInfo(v, versions) + // Exit early if there are no repositories + if len(repos) == 0 { + metrics.SetOutcome(models.NoRepos) + metrics.UnresolvedRangesCount += len(cpeRanges) + affected := MergeRangesAndCreateAffected(resolvedRanges, cpeRanges, nil, nil, metrics) + v.Affected = append(v.Affected, affected) + // Exit early + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) - if len(v.Affected) == 0 { - metrics.AddNote("No affected ranges detected for %q", maybeProductName) - return fmt.Errorf("no affected ranges detected for %q %w", maybeProductName, ErrNoRanges) + return models.NoRepos } - vulnDir := filepath.Join(directory, maybeVendorName, maybeProductName) + // If we have ranges, try to resolve them + r, un, sR := processRanges(cpeRanges, repos, metrics, cache, models.VersionSourceCPE) + resolvedRanges = append(resolvedRanges, r...) + unresolvedRanges = append(unresolvedRanges, un...) + for _, s := range sR { + successfulRepos[s] = true + } - if err := os.MkdirAll(vulnDir, 0755); err != nil { - logger.Info("Failed to create directory "+vulnDir, slog.String("cve", string(cve.ID)), slog.String("path", vulnDir), slog.Any("err", err)) + // Extract Commits + commits, err := cves.ExtractCommitsFromRefs(cve.References, http.DefaultClient) + if err != nil { + metrics.AddNote("Failed to extract commits from refs: %#v", err) } - osvFile, errCVE := conversion.CreateOSVFile(cve.ID, vulnDir) - metricsFile, errMetrics := conversion.CreateMetricsFile(cve.ID, vulnDir) - if errCVE != nil || errMetrics != nil { - logger.Fatal("File failed to be created for CVE", slog.String("cve", string(cve.ID))) + if len(commits) > 0 { + metrics.AddNote("Extracted commits from refs: %v", commits) + for _, commit := range commits { + successfulRepos[commit.Repo] = true + } + metrics.SetOutcome(models.Successful) + metrics.VersionSources = append(metrics.VersionSources, models.VersionSourceRefs) } - err := v.ToJSON(osvFile) - if err != nil { - logger.Info("Failed to write", slog.Any("err", err)) - return err + // Extract Versions From Text if no CPE versions found + if len(resolvedRanges) == 0 { + textRanges := cves.ExtractVersionsFromText(nil, models.EnglishDescription(cve.Descriptions), metrics) + if len(textRanges) > 0 { + metrics.AddNote("Extracted versions from description: %v", textRanges) + } + r, un, sR := processRanges(textRanges, repos, metrics, cache, models.VersionSourceDescription) + resolvedRanges = append(resolvedRanges, r...) + unresolvedRanges = append(unresolvedRanges, un...) + for _, s := range sR { + successfulRepos[s] = true + } } - osvFile.Close() + if len(resolvedRanges) == 0 && len(commits) == 0 { + metrics.AddNote("No ranges detected for %q", maybeProductName) + metrics.SetOutcome(models.NoRanges) + } - err = conversion.WriteMetricsFile(metrics, metricsFile) - if err != nil { - return err + // Use the successful repos for more efficient merging. + keys := slices.Collect(maps.Keys(successfulRepos)) + affected := MergeRangesAndCreateAffected(resolvedRanges, unresolvedRanges, commits, keys, metrics) + v.Affected = append(v.Affected, affected) + + if !outputMetrics && rejectFailed && metrics.Outcome != models.Successful { + return metrics.Outcome } - return nil + outputFiles(v, directory, maybeVendorName, maybeProductName, metrics, rejectFailed, outputMetrics) + + return metrics.Outcome } // CVEToPackageInfo takes an NVD CVE record and outputs a PackageInfo struct in a file in the specified directory. -func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) error { +func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCache, directory string, metrics *models.ConversionMetrics) models.ConversionOutcome { CPEs := cves.CPEs(cve) // The vendor name and product name are used to construct the output `vulnDir` below, so need to be set to *something* to keep the output tidy. maybeVendorName := "ENOCPE" @@ -126,7 +140,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach maybeVendorName = CPE.Vendor maybeProductName = CPE.Product if err != nil { - return errors.New("can't generate an OSV record without valid CPE data") + return models.NoRanges } } @@ -137,10 +151,10 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach // There are some AffectedVersions to try and resolve to AffectedCommits. if len(repos) == 0 { metrics.AddNote("No affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) - return fmt.Errorf("no affected ranges for %q, and no repos to try and convert %+v to tags with", maybeProductName, versions.AffectedVersions) + return models.NoRepos } logger.Info("Trying to convert version tags to commits", slog.String("cve", string(cve.ID)), slog.Any("versions", versions), slog.Any("repos", repos)) - versions = cves.VersionInfoToCommits(versions, repos, cache, metrics) + cves.VersionInfoToCommits(&versions, repos, cache, metrics) } hasAnyFixedCommits := false @@ -152,7 +166,7 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach if versions.HasFixedVersions() && !hasAnyFixedCommits { metrics.AddNote("Failed to convert fixed version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert fixed version tags to commits: %+v %w", versions, ErrUnresolvedFix) + return models.NoCommitRanges } hasAnyLastAffectedCommits := false @@ -164,12 +178,12 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach if versions.HasLastAffectedVersions() && !hasAnyLastAffectedCommits && !hasAnyFixedCommits { metrics.AddNote("Failed to convert last_affected version tags to commits: %+v", versions) - return fmt.Errorf("failed to convert last_affected version tags to commits: %+v %w", versions, ErrUnresolvedFix) + return models.NoCommitRanges } if len(versions.AffectedCommits) == 0 { metrics.AddNote("No affected commit ranges determined for %q", maybeProductName) - return fmt.Errorf("no affected commit ranges determined for %q %w", maybeProductName, ErrNoRanges) + return models.NoCommitRanges } versions.AffectedVersions = nil // these have served their purpose and are not required in the resulting output. @@ -184,14 +198,12 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach err := os.MkdirAll(vulnDir, 0755) if err != nil { logger.Warn("Failed to create dir", slog.Any("err", err)) - return fmt.Errorf("failed to create dir: %w", err) } outputFile := filepath.Join(vulnDir, string(cve.ID)+".nvd"+models.Extension) f, err := os.Create(outputFile) if err != nil { logger.Warn("Failed to open for writing", slog.String("path", outputFile), slog.Any("err", err)) - return fmt.Errorf("failed to open %s for writing: %w", outputFile, err) } defer f.Close() @@ -201,27 +213,27 @@ func CVEToPackageInfo(cve models.NVDCVE, repos []string, cache *git.RepoTagsCach if err != nil { logger.Warn("Failed to encode PackageInfo", slog.String("path", outputFile), slog.Any("err", err)) - return fmt.Errorf("failed to encode PackageInfo to %s: %w", outputFile, err) } logger.Info("Generated PackageInfo record", slog.String("cve", string(cve.ID)), slog.String("product", maybeProductName)) metricsFile, err := conversion.CreateMetricsFile(cve.ID, vulnDir) if err != nil { - return err + logger.Warn("Failed to create metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) } err = conversion.WriteMetricsFile(metrics, metricsFile) if err != nil { - return err + logger.Warn("Failed to write metrics file", slog.String("path", metricsFile.Name()), slog.Any("err", err)) } - return nil + return metrics.Outcome } // FindRepos attempts to find the source code repositories for a given CVE. func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) []string { // Find repos refs := cve.References + conversion.DeduplicateRefs(refs) CPEs := cves.CPEs(cve) CVEID := cve.ID var reposForCVE []string @@ -229,7 +241,7 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * if len(refs) == 0 && len(CPEs) == 0 { metrics.AddNote("Skipping due to lack of CPEs and lack of references") // 100% of these in 2022 were rejected CVEs - metrics.Outcome = models.Rejected + metrics.SetOutcome(models.Rejected) return nil } @@ -243,7 +255,6 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Derived repos for CVE with no CPEs: %v", repos) reposForCVE = repos } - vendorProductCombinations := make(map[cves.VendorProduct]bool) for _, CPEstr := range CPEs { CPE, err := cves.ParseCPE(CPEstr) @@ -251,6 +262,9 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * metrics.AddNote("Failed to parse CPE: %v", CPEstr) continue } + if CPE.Part != "a" { // only care about application CPEs + continue + } vendorProductCombinations[cves.VendorProduct{Vendor: CPE.Vendor, Product: CPE.Product}] = true } @@ -285,7 +299,6 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * if len(reposForCVE) == 0 { // We have nothing useful to work with, so we'll assume it's out of scope metrics.AddNote("Passing due to lack of viable repository") - metrics.Outcome = models.NoRepos return nil } @@ -294,3 +307,194 @@ func FindRepos(cve models.NVDCVE, vpRepoCache *cves.VPRepoCache, repoTagsCache * return reposForCVE } + +// MergeRangesAndCreateAffected combines resolved and unresolved ranges with commits to create an OSV Affected object. +// It merges ranges for the same repository and adds commit events to the appropriate ranges at the end. +// +// Arguments: +// - resolvedRanges: A slice of resolved OSV ranges to be merged. +// - unresolvedRanges: A slice of unresolved OSV ranges to be included in the database specific field. +// - commits: A slice of affected commits to be converted into events and added to ranges. +// - successfulRepos: A slice of repository URLs that were successfully processed. +// - metrics: A pointer to ConversionMetrics to track the outcome and notes. +func MergeRangesAndCreateAffected(resolvedRanges []*osvschema.Range, unresolvedRanges []*osvschema.Range, commits []models.AffectedCommit, successfulRepos []string, metrics *models.ConversionMetrics) *osvschema.Affected { + var newResolvedRanges []*osvschema.Range + // Combine the ranges appropriately + if len(resolvedRanges) > 0 { + slices.Sort(successfulRepos) + successfulRepos = slices.Compact(successfulRepos) + for _, repo := range successfulRepos { + var mergedRange *osvschema.Range + for _, vr := range resolvedRanges { + if vr.GetRepo() == repo { + if mergedRange == nil { + mergedRange = vr + } else { + var err error + mergedRange, err = conversion.MergeTwoRanges(mergedRange, vr) + if err != nil { + metrics.AddNote("Failed to merge ranges: %v", err) + } + } + } + } + if len(commits) > 0 { + for _, commit := range commits { + if commit.Repo == repo { + if mergedRange == nil { + mergedRange = conversion.BuildVersionRange(commit.Introduced, commit.LastAffected, commit.Fixed) + mergedRange.Repo = repo + } else { + event := convertCommitToEvent(commit) + if event != nil { + addEventToRange(mergedRange, event) + } + } + } + } + } + if mergedRange != nil { + newResolvedRanges = append(newResolvedRanges, mergedRange) + } + } + } + + // if there are no resolved version but there are commits, we should create a range for each commit + if len(resolvedRanges) == 0 && len(commits) > 0 { + for _, commit := range commits { + newResolvedRanges = append(newResolvedRanges, conversion.BuildVersionRange(commit.Introduced, commit.LastAffected, commit.Fixed)) + metrics.ResolvedRangesCount++ + } + } + + newAffected := &osvschema.Affected{ + Ranges: newResolvedRanges, + } + + if len(unresolvedRanges) > 0 { + databaseSpecific, err := utility.NewStructpbFromMap(map[string]any{"unresolved_ranges": unresolvedRanges}) + if err != nil { + metrics.AddNote("failed to make database specific: %v", err) + } + newAffected.DatabaseSpecific = databaseSpecific + } + + return newAffected +} + +// addEventToRange adds an event to a version range, avoiding duplicates. +// Introduced events are prepended to the events list, while others are appended. +// +// Arguments: +// - versionRange: The OSV range to which the event will be added. +// - event: The OSV event (Introduced, Fixed, or LastAffected) to add. +func addEventToRange(versionRange *osvschema.Range, event *osvschema.Event) { + // Handle duplicate events being added + for _, e := range versionRange.GetEvents() { + if e.GetIntroduced() != "" && e.GetIntroduced() == event.GetIntroduced() { + return + } + if e.GetFixed() != "" && e.GetFixed() == event.GetFixed() { + return + } + if e.GetLastAffected() != "" && e.GetLastAffected() == event.GetLastAffected() { + return + } + } + //TODO: maybe handle if the fixed event appears as an introduced event or similar. + + if event.GetIntroduced() != "" { + versionRange.Events = append([]*osvschema.Event{{ + Introduced: event.GetIntroduced()}}, versionRange.GetEvents()...) + } else { + versionRange.Events = append(versionRange.Events, event) + } +} + +// convertCommitToEvent creates an OSV Event from an AffectedCommit. +// It returns an event with the Introduced, Fixed, or LastAffected value from the commit. +func convertCommitToEvent(commit models.AffectedCommit) *osvschema.Event { + if commit.Introduced != "" { + return &osvschema.Event{ + Introduced: commit.Introduced, + } + } + if commit.Fixed != "" { + return &osvschema.Event{ + Fixed: commit.Fixed, + } + } + if commit.LastAffected != "" { + return &osvschema.Event{ + LastAffected: commit.LastAffected, + } + } + + return nil +} + +// outputFiles writes the OSV vulnerability record and conversion metrics to files in the specified directory. +// It creates the necessary subdirectories based on the vendor and product names and handles whether or not +// the files should be written based on the rejectFailed and outputMetrics flags. +// +// Arguments: +// - v: The OSV Vulnerability object to be written to a file. +// - dir: The base directory where the output files should be created. +// - vendor: The vendor name used to create the subdirectory. +// - product: The product name used to create the subdirectory. +// - metrics: A pointer to ConversionMetrics to be written to a metrics file. +// - rejectFailed: A boolean indicating whether to skip writing the OSV file if the conversion was not successful. +// - outputMetrics: A boolean indicating whether to write the metrics file. +func outputFiles(v *vulns.Vulnerability, dir string, vendor string, product string, metrics *models.ConversionMetrics, rejectFailed bool, outputMetrics bool) { + cveID := v.Id + vulnDir := filepath.Join(dir, vendor, product) + + if err := os.MkdirAll(vulnDir, 0755); err != nil { + logger.Info("Failed to create directory "+vulnDir, slog.String("cve", cveID), slog.String("path", vulnDir), slog.Any("err", err)) + } + + if !rejectFailed || metrics.Outcome == models.Successful { + osvFile, errCVE := conversion.CreateOSVFile(models.CVEID(cveID), vulnDir) + if errCVE != nil { + logger.Fatal("File failed to be created for CVE", slog.String("cve", cveID)) + } + if err := v.ToJSON(osvFile); err != nil { + logger.Error("Failed to write", slog.Any("err", err)) + } + osvFile.Close() + } + if outputMetrics { + metricsFile, errMetrics := conversion.CreateMetricsFile(models.CVEID(cveID), vulnDir) + if errMetrics != nil { + logger.Fatal("File failed to be created for CVE", slog.String("cve", cveID)) + } + if err := conversion.WriteMetricsFile(metrics, metricsFile); err != nil { + logger.Error("Failed to write metrics", slog.Any("err", err)) + } + metricsFile.Close() + } +} + +// processRanges attempts to resolve the given ranges to commits and updates the metrics accordingly. +func processRanges(ranges []*osvschema.Range, repos []string, metrics *models.ConversionMetrics, cache *git.RepoTagsCache, source models.VersionSource) ([]*osvschema.Range, []*osvschema.Range, []string) { + if len(ranges) == 0 { + return nil, nil, nil + } + + r, un, sR := conversion.GitVersionsToCommits(ranges, repos, metrics, cache) + if len(r) > 0 { + metrics.ResolvedRangesCount += len(r) + metrics.SetOutcome(models.Successful) + } + + if len(un) > 0 { + metrics.UnresolvedRangesCount += len(un) + if len(r) == 0 { + metrics.SetOutcome(models.NoCommitRanges) + } + } + + metrics.VersionSources = append(metrics.VersionSources, source) + + return r, un, sR +} diff --git a/vulnfeeds/cves/versions.go b/vulnfeeds/cves/versions.go index 38ed3ef0377..fcac925deeb 100644 --- a/vulnfeeds/cves/versions.go +++ b/vulnfeeds/cves/versions.go @@ -29,8 +29,10 @@ import ( "time" "github.com/knqyf263/go-cpe/naming" + "github.com/ossf/osv-schema/bindings/go/osvschema" "github.com/sethvargo/go-retry" + "github.com/google/osv/vulnfeeds/conversion" "github.com/google/osv/vulnfeeds/git" "github.com/google/osv/vulnfeeds/models" ) @@ -59,6 +61,8 @@ var VendorProductDenyList = []VendorProduct{ // [CVE-2021-28957]: Incorrectly associates with github.com/lxml/lxml {"oracle", "zfs_storage_appliance_kit"}, {"gradle", "enterprise"}, // The OSS repo gets mis-attributed via CVE-2020-15767 + {"qualcomm", ""}, // firmware out of scope + {"linux", "linux_kernel"}, } type VendorProduct struct { @@ -545,11 +549,27 @@ func ValidateAndCanonicalizeLink(link string, httpClient *http.Client) (canonica return canonicalLink, nil } +// For URLs referencing commits in supported Git repository hosts, return a cloneable AffectedCommit. +func ExtractCommitsFromRefs(references []models.Reference, httpClient *http.Client) ([]models.AffectedCommit, error) { + var commits []models.AffectedCommit //nolint:prealloc + + for _, ref := range references { + // (Potentially faulty) Assumption: All viable Git commit reference links are fix commits. + ac, err := extractGitAffectedCommit(ref.URL, models.Fixed, httpClient) + if err != nil { + continue + } + + commits = append(commits, ac) + } + + return commits, nil +} + // For URLs referencing commits in supported Git repository hosts, return a cloneable AffectedCommit. func extractGitAffectedCommit(link string, commitType models.CommitType, httpClient *http.Client) (models.AffectedCommit, error) { var ac models.AffectedCommit c, r, err := ExtractGitCommit(link, httpClient, 0) - if err != nil { return ac, err } @@ -637,7 +657,7 @@ func processExtractedVersion(version string) string { return version } -func ExtractVersionsFromText(validVersions []string, text string, metrics *models.ConversionMetrics) []models.AffectedVersion { +func ExtractVersionsFromText(validVersions []string, text string, metrics *models.ConversionMetrics) []*osvschema.Range { // Match: // - x.x.x before x.x.x // - x.x.x through x.x.x @@ -650,7 +670,7 @@ func ExtractVersionsFromText(validVersions []string, text string, metrics *model return nil } - versions := make([]models.AffectedVersion, 0, len(matches)) + versions := make([]*osvschema.Range, 0, len(matches)) for _, match := range matches { // Trim periods that are part of sentences. @@ -688,11 +708,8 @@ func ExtractVersionsFromText(validVersions []string, text string, metrics *model lastaffected = "" } - versions = append(versions, models.AffectedVersion{ - Introduced: introduced, - Fixed: fixed, - LastAffected: lastaffected, - }) + vr := conversion.BuildVersionRange(introduced, lastaffected, fixed) + versions = append(versions, vr) } return versions @@ -718,9 +735,8 @@ func DeduplicateAffectedCommits(commits []models.AffectedCommit) []models.Affect return uniqueCommits } -func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics *models.ConversionMetrics) []models.AffectedVersion { - versions := []models.AffectedVersion{} - seen := make(map[models.AffectedVersion]bool) +func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics *models.ConversionMetrics) []*osvschema.Range { + versions := []*osvschema.Range{} for _, config := range cve.Configurations { for _, node := range config.Nodes { @@ -767,8 +783,7 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics if err != nil { continue } - if CPE.Part != "a" { - // Skip operating system CPEs. + if CPE.Part != "a" && CPE.Part != "o" { continue } if slices.Contains([]string{"NA", "ANY"}, CPE.Version) { @@ -781,8 +796,11 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics } } - if introduced == "" && fixed == "" && lastaffected == "" { - continue + if introduced == "" { + if fixed == "" && lastaffected == "" { + continue + } + introduced = "0" } if introduced != "" && !HasVersion(validVersions, introduced) { @@ -796,45 +814,114 @@ func ExtractVersionsFromCPEs(cve models.NVDCVE, validVersions []string, metrics if fixed != "" && !HasVersion(validVersions, fixed) { metrics.AddNote("Warning: %s is not a valid fixed version", fixed) } - - possibleNewAffectedVersion := models.AffectedVersion{ - Introduced: introduced, - Fixed: fixed, - LastAffected: lastaffected, - } - - if seen[possibleNewAffectedVersion] { - continue - } - seen[possibleNewAffectedVersion] = true - versions = append(versions, possibleNewAffectedVersion) - metrics.AddNote("Extracted version %+v", possibleNewAffectedVersion) + vr := conversion.BuildVersionRange(introduced, lastaffected, fixed) + versions = append(versions, vr) } } } + if len(versions) > 0 { + metrics.AddNote("Extracted versions from CPEs: %v", versions) + } return versions } +// ExtractVersionInfo extracts version information from a CVE and saves to a VersionInfo struct. +// This is mostly deprecated, but is still used by the Alpine, Debian, and PyPi converters. func ExtractVersionInfo(cve models.NVDCVE, validVersions []string, httpClient *http.Client, metrics *models.ConversionMetrics) (v models.VersionInfo) { - for _, reference := range cve.References { - // (Potentially faulty) Assumption: All viable Git commit reference links are fix commits. - if commit, err := extractGitAffectedCommit(reference.URL, models.Fixed, httpClient); err == nil { - v.AffectedCommits = append(v.AffectedCommits, commit) - } + if commit, err := ExtractCommitsFromRefs(cve.References, httpClient); err == nil { + v.AffectedCommits = append(v.AffectedCommits, commit...) } - if len(v.AffectedCommits) > 0 { + + if v.AffectedCommits != nil { v.AffectedCommits = DeduplicateAffectedCommits(v.AffectedCommits) metrics.AddNote("Extracted %d commits", len(v.AffectedCommits)) } - v.AffectedVersions = ExtractVersionsFromCPEs(cve, validVersions, metrics) - if len(v.AffectedVersions) > 0 { - metrics.AddNote("Extracted versions from CPEs: %v", v.AffectedVersions) - } else { - v.AffectedVersions = ExtractVersionsFromText(validVersions, models.EnglishDescription(cve.Descriptions), metrics) - if len(v.AffectedVersions) > 0 { - metrics.AddNote("Extracted versions from description: %v", v.AffectedVersions) + // Extract versions from CPEs. + for _, config := range cve.Configurations { + for _, node := range config.Nodes { + if node.Operator != "OR" { + continue + } + + for _, match := range node.CPEMatch { + if !match.Vulnerable { + continue + } + + introduced := "" + fixed := "" + lastaffected := "" + if match.VersionStartIncluding != nil { + introduced = cleanVersion(*match.VersionStartIncluding) + } else if match.VersionStartExcluding != nil { + var err error + introduced, err = nextVersion(validVersions, cleanVersion(*match.VersionStartExcluding)) + if err != nil { + metrics.AddNote("%v", err.Error()) + } + } + + if match.VersionEndExcluding != nil { + fixed = cleanVersion(*match.VersionEndExcluding) + } else if match.VersionEndIncluding != nil { + var err error + // Infer the fixed version from the next version after. + fixed, err = nextVersion(validVersions, cleanVersion(*match.VersionEndIncluding)) + if err != nil { + metrics.AddNote("%v", err.Error()) + // if that inference failed, we know this version was definitely still vulnerable. + lastaffected = cleanVersion(*match.VersionEndIncluding) + metrics.AddNote("Using %s as last_affected version instead", cleanVersion(*match.VersionEndIncluding)) + } + } + + if introduced == "" && fixed == "" && lastaffected == "" { + // See if a last affected version is inferable from the CPE string. + // In this situation there is no known introduced version. + CPE, err := ParseCPE(match.Criteria) + if err != nil { + continue + } + if CPE.Part != "a" { + // Skip operating system CPEs. + continue + } + if slices.Contains([]string{"NA", "ANY"}, CPE.Version) { + // These are meaningless converting to commits. + continue + } + lastaffected = CPE.Version + if CPE.Update != "ANY" { + lastaffected += "-" + CPE.Update + } + } + + if introduced == "" && fixed == "" && lastaffected == "" { + continue + } + + if introduced != "" && !HasVersion(validVersions, introduced) { + metrics.AddNote("Warning: %s is not a valid introduced version", introduced) + } + + if fixed != "" && !HasVersion(validVersions, fixed) { + metrics.AddNote("Warning: %s is not a valid fixed version", fixed) + } + + // gotVersions = true + possibleNewAffectedVersion := models.AffectedVersion{ + Introduced: introduced, + Fixed: fixed, + LastAffected: lastaffected, + } + if slices.Contains(v.AffectedVersions, possibleNewAffectedVersion) { + // Avoid appending duplicates + continue + } + v.AffectedVersions = append(v.AffectedVersions, possibleNewAffectedVersion) + } } } @@ -945,6 +1032,7 @@ func RefAcceptable(ref models.Reference, tagDenyList []string) bool { } // Adds the repo to the cache for the Vendor/Product combination if not already present. +// *** Does external calls to verify repos *** func (c *VPRepoCache) MaybeUpdate(vp *VendorProduct, repo string) { if vp == nil { return @@ -1001,20 +1089,16 @@ func (c *VPRepoCache) Initialize(vpMap VendorProductToRepoMap) { // Takes a CVE ID string (for logging), VersionInfo with AffectedVersions and // typically no AffectedCommits and attempts to add AffectedCommits (including Fixed commits) where there aren't any. // Refuses to add the same commit to AffectedCommits more than once. -func VersionInfoToCommits(versions models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) (v models.VersionInfo) { +func VersionInfoToCommits(v *models.VersionInfo, repos []string, cache *git.RepoTagsCache, metrics *models.ConversionMetrics) { // versions is a VersionInfo with AffectedVersions and typically no AffectedCommits // v is a VersionInfo with AffectedCommits (containing Fixed commits) included - v = versions for _, repo := range repos { - if cache.IsInvalid(repo) { - continue - } normalizedTags, err := git.NormalizeRepoTags(repo, cache) if err != nil { metrics.AddNote("Failed to normalize tags %s %s", repo, err) continue } - for _, av := range versions.AffectedVersions { + for _, av := range v.AffectedVersions { metrics.AddNote("Attempting version resolution for %s in %s", av, repo) introducedEquivalentCommit := "" if av.Introduced != "" && av.Introduced != "0" { @@ -1034,7 +1118,7 @@ func VersionInfoToCommits(versions models.VersionInfo, repos []string, cache *gi // AffectedCommits (with Fixed commits) when the CVE has appropriate references, and assuming these references are indeed // Fixed commits, they're also assumed to be more precise than what may be derived from tag to commit mapping. fixedEquivalentCommit := "" - if v.HasFixedCommits(repo) && av.Fixed != "" && len(versions.AffectedVersions) == 1 { + if v.HasFixedCommits(repo) && av.Fixed != "" && len(v.AffectedVersions) == 1 { fixedEquivalentCommit = v.FixedCommits(repo)[0] metrics.AddNote("Using preassumed fixed commits instead of deriving from fixed version %s", av.Fixed) } else if av.Fixed != "" { @@ -1088,12 +1172,10 @@ func VersionInfoToCommits(versions models.VersionInfo, repos []string, cache *gi v.AffectedCommits = append(v.AffectedCommits, ac) } } - - return v } // Examines the CVE references for a CVE and derives repos for it, optionally caching it. -// TODO (jesslowe): refactor with below +// *** Does external calls to verify repos *** func ReposFromReferences(cache *VPRepoCache, vp *VendorProduct, refs []models.Reference, tagDenyList []string, repoTagsCache *git.RepoTagsCache, metrics *models.ConversionMetrics, httpClient *http.Client) (repos []string) { for _, ref := range refs { // If any of the denylist tags are in the ref's tag set, it's out of consideration. diff --git a/vulnfeeds/cves/versions_test.go b/vulnfeeds/cves/versions_test.go index 281b0d72783..a1e293d0a60 100644 --- a/vulnfeeds/cves/versions_test.go +++ b/vulnfeeds/cves/versions_test.go @@ -749,7 +749,7 @@ func TestExtractVersionInfo(t *testing.T) { AffectedCommits: []models.AffectedCommit(nil), AffectedVersions: []models.AffectedVersion{ { - Introduced: "0", + Introduced: "", Fixed: "14.4.5", LastAffected: "", }, @@ -775,7 +775,7 @@ func TestExtractVersionInfo(t *testing.T) { AffectedCommits: []models.AffectedCommit(nil), AffectedVersions: []models.AffectedVersion{ { - Introduced: "0", + Introduced: "", Fixed: "", LastAffected: "2.4.0", }, @@ -839,7 +839,7 @@ func TestExtractVersionInfo(t *testing.T) { }, AffectedVersions: []models.AffectedVersion{ { - Introduced: "0", + Introduced: "", Fixed: "2.6.4", LastAffected: "", }, @@ -873,7 +873,7 @@ func TestExtractVersionInfo(t *testing.T) { inputValidVersions: []string{}, expectedVersionInfo: models.VersionInfo{ AffectedCommits: []models.AffectedCommit{{Repo: "https://gitlab.freedesktop.org/xorg/lib/libxpm", Introduced: "0", Fixed: "a3a7c6dcc3b629d7650148"}}, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "3.5.15"}}, + AffectedVersions: []models.AffectedVersion{{Introduced: "", Fixed: "3.5.15"}}, }, expectedNotes: []string{}, }, @@ -882,7 +882,7 @@ func TestExtractVersionInfo(t *testing.T) { inputCVEItem: loadTestData2("CVE-2021-28429"), expectedVersionInfo: models.VersionInfo{ AffectedCommits: []models.AffectedCommit{{Repo: "https://git.ffmpeg.org/ffmpeg.git", Introduced: "0", Fixed: "c94875471e3ba3dc396c6919ff3ec9b14539cd71"}}, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", LastAffected: "4.3.2"}}, + AffectedVersions: []models.AffectedVersion{{Introduced: "", LastAffected: "4.3.2"}}, }, }, { @@ -909,7 +909,7 @@ func TestExtractVersionInfo(t *testing.T) { }, }, - AffectedVersions: []models.AffectedVersion{{Introduced: "0", Fixed: "1.2.5"}}, + AffectedVersions: []models.AffectedVersion{{Introduced: "", Fixed: "1.2.5"}}, }, }, } diff --git a/vulnfeeds/git/repository.go b/vulnfeeds/git/repository.go index 5322e8c505c..776d630b5a7 100644 --- a/vulnfeeds/git/repository.go +++ b/vulnfeeds/git/repository.go @@ -18,6 +18,7 @@ package git import ( "context" "errors" + "log/slog" "maps" "net/url" "path" @@ -33,6 +34,7 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/transport" "github.com/go-git/go-git/v5/storage/memory" + "github.com/google/osv/vulnfeeds/utility/logger" "github.com/sethvargo/go-retry" ) @@ -133,12 +135,14 @@ func RemoteRepoRefsWithRetry(repoURL string, retries uint64) (refs []*plumbing.R if errors.Is(err, context.DeadlineExceeded) { return retry.RetryableError(err) } + logger.Warn("Error: "+err.Error(), slog.Any("repo", repo)) return err } return nil }); err != nil { + logger.Warn("Error: "+err.Error(), slog.Any("repo", repo)) return refs, err } @@ -159,6 +163,7 @@ func RepoName(repoURL string) (name string, e error) { // RepoTags returns an array of Tag being the (unpeeled, if annotated) tags and associated commits in repoURL. // An optional repoTagsCache can be supplied to reduce repeated remote connections to the same repo. +// *** Does external calls to verify repos *** func RepoTags(repoURL string, repoTagsCache *RepoTagsCache) (tags Tags, e error) { if repoTagsCache != nil { tagsRepoMap, ok := repoTagsCache.Get(repoURL) @@ -300,6 +305,7 @@ func RefBranches(refs []*plumbing.Reference) (branches []*plumbing.Reference) { } // Validate the repo by attempting to query it's references. +// *** Does external calls to verify repos *** func ValidRepo(repoURL string) (valid bool) { _, err := RemoteRepoRefsWithRetry(repoURL, 3) if err != nil && errors.Is(err, transport.ErrAuthenticationRequired) { @@ -314,6 +320,7 @@ func ValidRepo(repoURL string) (valid bool) { } // Otherwise functional repos that don't have any tags are not valid. +// *** Does external calls to verify repos *** func ValidRepoAndHasUsableRefs(repoURL string) (valid bool) { refs, err := RemoteRepoRefsWithRetry(repoURL, 3) if err != nil && errors.Is(err, transport.ErrAuthenticationRequired) { diff --git a/vulnfeeds/git/versions.go b/vulnfeeds/git/versions.go index ed8a0d32673..3983cc651c1 100644 --- a/vulnfeeds/git/versions.go +++ b/vulnfeeds/git/versions.go @@ -15,6 +15,7 @@ package git import ( + "errors" "fmt" "regexp" "slices" @@ -82,6 +83,9 @@ func VersionToAffectedCommit(version string, repo string, commitType models.Comm // Take an unnormalized version string, the pre-normalized mapping of tags to commits and return a commit hash. func VersionToCommit(version string, normalizedTags map[string]NormalizedTag) (string, error) { + if version == "" { + return "", errors.New("version cannot be empty") + } // TODO: try unnormalized version first. normalizedVersion, err := NormalizeVersion(version) if err != nil { diff --git a/vulnfeeds/models/metrics.go b/vulnfeeds/models/metrics.go index 85d7c1db4cd..0fb45656b72 100644 --- a/vulnfeeds/models/metrics.go +++ b/vulnfeeds/models/metrics.go @@ -59,6 +59,14 @@ func (m *ConversionMetrics) AddNote(format string, a ...any) { logger.Debug(fmt.Sprintf(format, a...), slog.String("cna", m.CNA), slog.String("cve", string(m.CVEID))) } +// SetOutcome sets the outcome of the conversion only if it's not already set, or has become successful. +func (m *ConversionMetrics) SetOutcome(outcome ConversionOutcome) { + if m.Outcome != ConversionUnknown || m.Outcome == Successful { + return + } + m.Outcome = outcome +} + // AddSource appends a source to the ConversionMetrics func (m *ConversionMetrics) AddSource(source VersionSource) { m.VersionSources = append(m.VersionSources, source) @@ -73,21 +81,22 @@ const ( VersionSourceGit VersionSource = "GITVERS" VersionSourceCPE VersionSource = "CPEVERS" VersionSourceDescription VersionSource = "DESCRVERS" + VersionSourceRefs VersionSource = "REFS" ) func DetermineOutcome(metrics *ConversionMetrics) { // check if we have affected ranges/versions. if len(metrics.Repos) == 0 { // Fix unlikely, as no repos to resolve - metrics.Outcome = NoRepos + metrics.SetOutcome(NoRepos) return } if metrics.ResolvedRangesCount > 0 { - metrics.Outcome = Successful + metrics.SetOutcome(Successful) } else if metrics.UnresolvedRangesCount > 0 { - metrics.Outcome = NoCommitRanges + metrics.SetOutcome(NoCommitRanges) } else { - metrics.Outcome = NoRanges + metrics.SetOutcome(NoRanges) } }