From d9eb3dfe7e255b0b24a7586f6cd0f01cc28f643b Mon Sep 17 00:00:00 2001 From: Victor Lyuboslavsky <2685025+getvictor@users.noreply.github.com> Date: Fri, 13 Mar 2026 10:41:53 -0500 Subject: [PATCH 1/3] Fixed nondeterministic CPE matching when multiple CPE candidates share the same product name --- changes/39899-deterministic-cpe-matching | 1 + server/vulnerabilities/nvd/cpe.go | 40 +++++++++--- server/vulnerabilities/nvd/cpe_test.go | 68 +++++++++++++++++++-- server/vulnerabilities/nvd/testing_utils.go | 8 +++ 4 files changed, 103 insertions(+), 14 deletions(-) create mode 100644 changes/39899-deterministic-cpe-matching diff --git a/changes/39899-deterministic-cpe-matching b/changes/39899-deterministic-cpe-matching new file mode 100644 index 00000000000..8d3bbe15e83 --- /dev/null +++ b/changes/39899-deterministic-cpe-matching @@ -0,0 +1 @@ +* Fixed nondeterministic CPE matching when multiple CPE candidates share the same product name. diff --git a/server/vulnerabilities/nvd/cpe.go b/server/vulnerabilities/nvd/cpe.go index 66c0275ab28..22a11c872fc 100644 --- a/server/vulnerabilities/nvd/cpe.go +++ b/server/vulnerabilities/nvd/cpe.go @@ -121,6 +121,7 @@ type cpeSearchQuery struct { } const cpeSelectColumns = `SELECT c.rowid, c.product, c.vendor, c.deprecated FROM cpe_2 c` +const cpeOrderBy = ` ORDER BY c.vendor, c.product` // cpeSearchQueries returns individual search queries in priority order for finding CPE matches. // Query 1 (vendor+product) and 2 (product-only) are cheap index lookups. Query 3 (full-text search) @@ -138,12 +139,12 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery { productPlaceholders := strings.TrimSuffix(strings.Repeat("?,", len(products)), ",") if len(vendors) > 0 { vendorPlaceholders := strings.TrimSuffix(strings.Repeat("?,", len(vendors)), ",") - stm = cpeSelectColumns + " WHERE vendor IN (" + vendorPlaceholders + ") AND product IN (" + productPlaceholders + ")" + stm = cpeSelectColumns + " WHERE vendor IN (" + vendorPlaceholders + ") AND product IN (" + productPlaceholders + ")" + cpeOrderBy for _, v := range vendors { args = append(args, v) } } else { - stm = cpeSelectColumns + " WHERE product IN (" + productPlaceholders + ")" + stm = cpeSelectColumns + " WHERE product IN (" + productPlaceholders + ")" + cpeOrderBy } for _, p := range products { args = append(args, p) @@ -153,7 +154,7 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery { // 2 - Try to match product by sanitized name queries = append(queries, cpeSearchQuery{ - stm: cpeSelectColumns + " WHERE product = ?", + stm: cpeSelectColumns + " WHERE product = ?" + cpeOrderBy, args: []any{sanitizeSoftwareName(software)}, }) @@ -161,7 +162,7 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery { sanitizedName := sanitizeMatch(software.Name) if strings.TrimSpace(sanitizedName) != "" { queries = append(queries, cpeSearchQuery{ - stm: cpeSelectColumns + " JOIN cpe_search cs ON cs.rowid = c.rowid WHERE cs.title MATCH ?", + stm: cpeSelectColumns + " JOIN cpe_search cs ON cs.rowid = c.rowid WHERE cs.title MATCH ?" + cpeOrderBy, args: []any{sanitizedName}, }) } @@ -170,7 +171,7 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery { bundleParts := strings.Split(software.BundleIdentifier, ".") if len(bundleParts) == 3 { queries = append(queries, cpeSearchQuery{ - stm: cpeSelectColumns + " WHERE vendor = ? AND product = ?", + stm: cpeSelectColumns + " WHERE vendor = ? AND product = ?" + cpeOrderBy, args: []any{strings.ToLower(bundleParts[1]), strings.ToLower(bundleParts[2])}, }) } @@ -178,6 +179,14 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery { return queries } +// cpeVendorMatchesSoftware returns true when the CPE item's vendor appears in +// the software's vendor field. Used as a tiebreaker when multiple CPE candidates +// pass cpeItemMatchesSoftware. +func cpeVendorMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool { + sVendor := strings.ToLower(software.Vendor) + return sVendor != "" && strings.Contains(sVendor, item.Vendor) +} + // cpeItemMatchesSoftware checks whether a CPE result's vendor/product terms all appear in the // software's name, vendor, and bundle identifier. func cpeItemMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool { @@ -626,14 +635,28 @@ func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, soft return "", fmt.Errorf("getting cpes for: %s: %w", software.Name, err) } + // Collect all matching candidates for this query, then pick the best one. + // This avoids nondeterministic results when multiple CPE entries match + // (e.g. "ge:line" vs "linecorp:line" for the "Line" app). + var bestMatch *IndexedCPEItem + var hasDeprecatedMatches bool for i := range results { if !cpeItemMatchesSoftware(&results[i], software) { continue } - if !results[i].Deprecated { - return results[i].FmtStr(software), nil + if results[i].Deprecated { + hasDeprecatedMatches = true + continue } - // Match is deprecated; try to resolve via deprecation chain + if bestMatch == nil || (!cpeVendorMatchesSoftware(bestMatch, software) && cpeVendorMatchesSoftware(&results[i], software)) { + bestMatch = &results[i] + } + } + if bestMatch != nil { + return bestMatch.FmtStr(software), nil + } + // All matches are deprecated; try to resolve via deprecation chain + if hasDeprecatedMatches { cpe, err := resolveDeprecatedCPE(db, results, software) if err != nil { return "", err @@ -641,7 +664,6 @@ func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, soft if cpe != "" { return cpe, nil } - continue // deprecation unresolved for this result, try next result } } } diff --git a/server/vulnerabilities/nvd/cpe_test.go b/server/vulnerabilities/nvd/cpe_test.go index 874a6bbeb12..5db0bdbd687 100644 --- a/server/vulnerabilities/nvd/cpe_test.go +++ b/server/vulnerabilities/nvd/cpe_test.go @@ -47,6 +47,23 @@ func TestCPEFromSoftware(t *testing.T) { require.NoError(t, err) require.Equal(t, "cpe:2.3:a:vendor2:product4:0.3:*:*:*:*:macos:*:*", cpe) + // When multiple CPE candidates share the same product name and no vendor info + // is available, ORDER BY ensures deterministic results across runs. + for range 5 { + cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{ + Name: "Line", Version: "3.5.1", Source: "chrome_extensions", + }, nil, reCache) + require.NoError(t, err) + require.Equal(t, "cpe:2.3:a:ge:line:3.5.1:*:*:*:*:chrome:*:*", cpe, "should be deterministic across runs") + } + + // When vendor info is present and matches a CPE vendor, prefer that match. + cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{ + Name: "Line", Version: "4.3.1", Vendor: "linecorp inc", Source: "apps", + }, nil, reCache) + require.NoError(t, err) + require.Equal(t, "cpe:2.3:a:linecorp:line:4.3.1:*:*:*:*:macos:*:*", cpe) + // Does not error on Unicode Names _, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{Name: "Девушка Фонарём", Version: "1.2.3", BundleIdentifier: "vendor", Source: "apps"}, nil, reCache) require.NoError(t, err) @@ -910,7 +927,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { Version: "2.37.1", Vendor: "The Git Development Community", BundleIdentifier: "", - }, cpe: "cpe:2.3:a:git-scm:git:2.37.1:*:*:*:*:windows:*:*", + }, cpe: "cpe:2.3:a:git:git:2.37.1:*:*:*:*:windows:*:*", }, { software: fleet.Software{ @@ -1258,7 +1275,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { Version: "3.12.4", Vendor: "", BundleIdentifier: "", - }, cpe: "cpe:2.3:a:google:protobuf:3.12.4:*:*:*:*:python:*:*", + }, cpe: "cpe:2.3:a:golang:protobuf:3.12.4:*:*:*:*:python:*:*", }, { software: fleet.Software{ @@ -1285,7 +1302,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { Version: "2.3.0+ubuntu2.1", Vendor: "", BundleIdentifier: "", - }, cpe: "cpe:2.3:a:ubuntu:python-apt:2.3.0.ubuntu2.1:*:*:*:*:python:*:*", + }, cpe: "cpe:2.3:a:debian:python-apt:2.3.0.ubuntu2.1:*:*:*:*:python:*:*", }, { software: fleet.Software{ @@ -1321,7 +1338,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { Version: "2.25.1", Vendor: "", BundleIdentifier: "", - }, cpe: "cpe:2.3:a:python:requests:2.25.1:*:*:*:*:python:*:*", + }, cpe: "cpe:2.3:a:jenkins:requests:2.25.1:*:*:*:*:python:*:*", }, { software: fleet.Software{ @@ -1800,7 +1817,7 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { Version: "3.9.18_2", Vendor: "", }, - cpe: `cpe:2.3:a:python:python:3.9.18_2:-:*:*:*:macos:*:*`, + cpe: `cpe:2.3:a:microsoft:python:3.9.18_2:*:*:*:*:macos:*:*`, }, { software: fleet.Software{ @@ -2031,6 +2048,47 @@ func TestCPEFromSoftwareIntegration(t *testing.T) { } } +func TestCPEVendorMatchesSoftware(t *testing.T) { + tests := []struct { + name string + cpeVendor string + softwareVendor string + want bool + }{ + { + name: "CPE vendor appears in software vendor", + cpeVendor: "linecorp", + softwareVendor: "linecorp inc", + want: true, + }, + { + name: "CPE vendor does not appear in software vendor", + cpeVendor: "ge", + softwareVendor: "linecorp inc", + want: false, + }, + { + name: "software vendor is empty", + cpeVendor: "linecorp", + softwareVendor: "", + want: false, + }, + { + name: "CPE vendor appears in software vendor case-insensitive", + cpeVendor: "python", + softwareVendor: "Python Software Foundation", + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + item := &IndexedCPEItem{Vendor: tt.cpeVendor} + sw := &fleet.Software{Vendor: tt.softwareVendor} + assert.Equal(t, tt.want, cpeVendorMatchesSoftware(item, sw)) + }) + } +} + func TestContainsNonASCII(t *testing.T) { testCases := []struct { input string diff --git a/server/vulnerabilities/nvd/testing_utils.go b/server/vulnerabilities/nvd/testing_utils.go index b7a84994d31..8248fa2b12f 100644 --- a/server/vulnerabilities/nvd/testing_utils.go +++ b/server/vulnerabilities/nvd/testing_utils.go @@ -36,5 +36,13 @@ const XmlCPETestDict = ` Vendor2 Product4 999 for MacOS + + GE Line 1.0 + + + + LINE Corporation Line 1.0 + + ` From f4c8bf07efcffafdaba6c9510fedf2913b3a71d6 Mon Sep 17 00:00:00 2001 From: Victor Lyuboslavsky <2685025+getvictor@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:06:25 -0500 Subject: [PATCH 2/3] Handle code review comments. --- server/vulnerabilities/nvd/cpe.go | 8 +++---- server/vulnerabilities/nvd/cpe_test.go | 8 +++++++ server/vulnerabilities/nvd/testing_utils.go | 24 +++++++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/server/vulnerabilities/nvd/cpe.go b/server/vulnerabilities/nvd/cpe.go index 22a11c872fc..780d4a617b8 100644 --- a/server/vulnerabilities/nvd/cpe.go +++ b/server/vulnerabilities/nvd/cpe.go @@ -639,13 +639,13 @@ func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, soft // This avoids nondeterministic results when multiple CPE entries match // (e.g. "ge:line" vs "linecorp:line" for the "Line" app). var bestMatch *IndexedCPEItem - var hasDeprecatedMatches bool + var deprecatedMatches []IndexedCPEItem for i := range results { if !cpeItemMatchesSoftware(&results[i], software) { continue } if results[i].Deprecated { - hasDeprecatedMatches = true + deprecatedMatches = append(deprecatedMatches, results[i]) continue } if bestMatch == nil || (!cpeVendorMatchesSoftware(bestMatch, software) && cpeVendorMatchesSoftware(&results[i], software)) { @@ -656,8 +656,8 @@ func CPEFromSoftware(ctx context.Context, logger *slog.Logger, db *sqlx.DB, soft return bestMatch.FmtStr(software), nil } // All matches are deprecated; try to resolve via deprecation chain - if hasDeprecatedMatches { - cpe, err := resolveDeprecatedCPE(db, results, software) + if len(deprecatedMatches) > 0 { + cpe, err := resolveDeprecatedCPE(db, deprecatedMatches, software) if err != nil { return "", err } diff --git a/server/vulnerabilities/nvd/cpe_test.go b/server/vulnerabilities/nvd/cpe_test.go index 5db0bdbd687..88194120a86 100644 --- a/server/vulnerabilities/nvd/cpe_test.go +++ b/server/vulnerabilities/nvd/cpe_test.go @@ -64,6 +64,14 @@ func TestCPEFromSoftware(t *testing.T) { require.NoError(t, err) require.Equal(t, "cpe:2.3:a:linecorp:line:4.3.1:*:*:*:*:macos:*:*", cpe) + // Deprecated CPE: when the only matching CPE is deprecated, follows the deprecation + // chain to find the non-deprecated replacement. + cpe, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{ + Name: "Widget", Version: "1.0", Vendor: "goodcorp inc", Source: "programs", + }, nil, reCache) + require.NoError(t, err) + require.Equal(t, "cpe:2.3:a:goodcorp:correct_result:1.0:*:*:*:*:windows:*:*", cpe) + // Does not error on Unicode Names _, err = CPEFromSoftware(t.Context(), slog.New(slog.DiscardHandler), db, &fleet.Software{Name: "Девушка Фонарём", Version: "1.2.3", BundleIdentifier: "vendor", Source: "apps"}, nil, reCache) require.NoError(t, err) diff --git a/server/vulnerabilities/nvd/testing_utils.go b/server/vulnerabilities/nvd/testing_utils.go index 8248fa2b12f..76cc1262d0c 100644 --- a/server/vulnerabilities/nvd/testing_utils.go +++ b/server/vulnerabilities/nvd/testing_utils.go @@ -44,5 +44,29 @@ const XmlCPETestDict = ` LINE Corporation Line 1.0 + + Bad Vendor Widget 1.0 + + + + + + + + Bad Vendor Wrong Result 1.0 + + + + Good Corp Widget 1.0 + + + + + + + + Good Corp Correct Result 1.0 + + ` From 633a261474db1dfc9fe30a5893c4aa7a8fb02d66 Mon Sep 17 00:00:00 2001 From: Victor Lyuboslavsky <2685025+getvictor@users.noreply.github.com> Date: Mon, 16 Mar 2026 16:09:10 -0500 Subject: [PATCH 3/3] Code review fix. --- server/vulnerabilities/nvd/cpe.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/vulnerabilities/nvd/cpe.go b/server/vulnerabilities/nvd/cpe.go index 780d4a617b8..6c2a8f33dd4 100644 --- a/server/vulnerabilities/nvd/cpe.go +++ b/server/vulnerabilities/nvd/cpe.go @@ -184,7 +184,12 @@ func cpeSearchQueries(software *fleet.Software) []cpeSearchQuery { // pass cpeItemMatchesSoftware. func cpeVendorMatchesSoftware(item *IndexedCPEItem, software *fleet.Software) bool { sVendor := strings.ToLower(software.Vendor) - return sVendor != "" && strings.Contains(sVendor, item.Vendor) + if sVendor == "" { + return false + } + pattern := `\b` + regexp.QuoteMeta(item.Vendor) + `\b` + matched, _ := regexp.MatchString(pattern, sVendor) + return matched } // cpeItemMatchesSoftware checks whether a CPE result's vendor/product terms all appear in the