From e90f70d5b7c601cb6cbeb5144bb9d69de8dcb62a Mon Sep 17 00:00:00 2001 From: mastersans Date: Mon, 19 Feb 2024 18:35:17 +0530 Subject: [PATCH 1/5] feat: heuristic splitting --- cve_bin_tool/sbom_manager/__init__.py | 52 +++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/cve_bin_tool/sbom_manager/__init__.py b/cve_bin_tool/sbom_manager/__init__.py index 7b0b588d25..eef39b9cb2 100644 --- a/cve_bin_tool/sbom_manager/__init__.py +++ b/cve_bin_tool/sbom_manager/__init__.py @@ -22,6 +22,11 @@ class SBOMManager: + """ + SBOMManager is a class that manages the Software Bill of Materials (SBOM) data. + It provides methods for scanning SBOM files, parsing them, and retrieving vendor information. + """ + SBOMtype = ["spdx", "cyclonedx", "swid"] sbom_data: defaultdict[ProductInfo, TriageData] @@ -72,6 +77,53 @@ def scan_file(self) -> dict[ProductInfo, TriageData]: if version != "": # Now add vendor to create product record.... vendor_set = self.get_vendor(product) + if ( + len(vendor_set) == 1 + and vendor_set[0] == "UNKNOWN" + and "-" in product + ): + # if the product have '-' in name try splitting it and try common prefixes. + found_flag = False + common_prefix = ( + "perl-", + "golang-", + "rubygem-", + "python-", + "py3-", + "python3-", + "python2-", + "rust-", + "nodejs-", + ) + for prefix in common_prefix: + if product.startswith(prefix): + temp_product = product[len(prefix) :] + temp_vendor_set = self.get_vendor(temp_product) + if len(temp_vendor_set) > 1 or ( + len(temp_vendor_set) == 1 + and temp_vendor_set[0] != "UNKNOWN" + ): + found_flag = True + product = temp_product + break + if not found_flag: + # if vendor not found after removing common prefix try splitting it + LOGGER.warning( + f"No Vendor found for {product}, trying splitted product. " + "Some results may be inaccurate due to vendor identification limitations." + ) + splitted_product = product.split("-") + for pro in splitted_product: + temp = self.get_vendor(pro) + if len(temp) > 1 or ( + len(temp) == 1 and temp[0] != "UNKNOWN" + ): + for vendor in temp: + # if vendor is not None: + parsed_data.append( + ProductInfo(vendor, pro, version) + ) + else: for vendor in vendor_set: # if vendor is not None: parsed_data.append(ProductInfo(vendor, product, version)) From 1a2411869ebf1ce19701a80a7a317f3c0896fea8 Mon Sep 17 00:00:00 2001 From: mastersans Date: Mon, 19 Feb 2024 18:42:39 +0530 Subject: [PATCH 2/5] fix: indentation --- cve_bin_tool/sbom_manager/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cve_bin_tool/sbom_manager/__init__.py b/cve_bin_tool/sbom_manager/__init__.py index eef39b9cb2..d3854ff2f2 100644 --- a/cve_bin_tool/sbom_manager/__init__.py +++ b/cve_bin_tool/sbom_manager/__init__.py @@ -123,10 +123,10 @@ def scan_file(self) -> dict[ProductInfo, TriageData]: parsed_data.append( ProductInfo(vendor, pro, version) ) - else: - for vendor in vendor_set: - # if vendor is not None: - parsed_data.append(ProductInfo(vendor, product, version)) + else: + for vendor in vendor_set: + # if vendor is not None: + parsed_data.append(ProductInfo(vendor, product, version)) for row in parsed_data: self.sbom_data[row]["default"] = { From 71dd3d9b5ab5542014271e46a51cf5e58c3a3f55 Mon Sep 17 00:00:00 2001 From: mastersans Date: Mon, 19 Feb 2024 19:15:59 +0530 Subject: [PATCH 3/5] fix: appending prefix product --- cve_bin_tool/sbom_manager/__init__.py | 31 ++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/cve_bin_tool/sbom_manager/__init__.py b/cve_bin_tool/sbom_manager/__init__.py index d3854ff2f2..d87b1987b9 100644 --- a/cve_bin_tool/sbom_manager/__init__.py +++ b/cve_bin_tool/sbom_manager/__init__.py @@ -83,7 +83,7 @@ def scan_file(self) -> dict[ProductInfo, TriageData]: and "-" in product ): # if the product have '-' in name try splitting it and try common prefixes. - found_flag = False + found_common_prefix = False common_prefix = ( "perl-", "golang-", @@ -97,31 +97,38 @@ def scan_file(self) -> dict[ProductInfo, TriageData]: ) for prefix in common_prefix: if product.startswith(prefix): - temp_product = product[len(prefix) :] - temp_vendor_set = self.get_vendor(temp_product) - if len(temp_vendor_set) > 1 or ( - len(temp_vendor_set) == 1 - and temp_vendor_set[0] != "UNKNOWN" + common_prefix_product = product[len(prefix) :] + common_prefix_vendor = self.get_vendor( + common_prefix_product + ) + if len(common_prefix_vendor) > 1 or ( + len(common_prefix_vendor) == 1 + and common_prefix_vendor[0] != "UNKNOWN" ): - found_flag = True - product = temp_product + found_common_prefix = True + for vendor in common_prefix_vendor: + parsed_data.append( + ProductInfo( + vendor, common_prefix_product, version + ) + ) break - if not found_flag: + if not found_common_prefix: # if vendor not found after removing common prefix try splitting it LOGGER.warning( f"No Vendor found for {product}, trying splitted product. " "Some results may be inaccurate due to vendor identification limitations." ) splitted_product = product.split("-") - for pro in splitted_product: - temp = self.get_vendor(pro) + for sp in splitted_product: + temp = self.get_vendor(sp) if len(temp) > 1 or ( len(temp) == 1 and temp[0] != "UNKNOWN" ): for vendor in temp: # if vendor is not None: parsed_data.append( - ProductInfo(vendor, pro, version) + ProductInfo(vendor, sp, version) ) else: for vendor in vendor_set: From 223d1e2e14b0c8d13cfa6c2c11f249a800b3ee27 Mon Sep 17 00:00:00 2001 From: mastersans Date: Fri, 22 Mar 2024 09:44:55 +0530 Subject: [PATCH 4/5] feat: common_prefix_split function and unit test --- cve_bin_tool/sbom_manager/__init__.py | 105 ++++++++++++-------------- test/test_sbom.py | 19 +++++ 2 files changed, 67 insertions(+), 57 deletions(-) diff --git a/cve_bin_tool/sbom_manager/__init__.py b/cve_bin_tool/sbom_manager/__init__.py index d87b1987b9..6947a0992b 100644 --- a/cve_bin_tool/sbom_manager/__init__.py +++ b/cve_bin_tool/sbom_manager/__init__.py @@ -49,6 +49,51 @@ def __init__( # Connect to the database self.cvedb = CVEDB(version_check=False) + def common_prefix_split(self, product, version): + """If the product have '-' in name try splitting it and try common prefixes. + currently not being used, proposed to be used in future""" + parsed_data: list[ProductInfo] = [] + found_common_prefix = False + common_prefix = ( + "perl-", + "golang-", + "rubygem-", + "python-", + "py3-", + "python3-", + "python2-", + "rust-", + "nodejs-", + ) + for prefix in common_prefix: + if product.startswith(prefix): + common_prefix_product = product[len(prefix) :] + common_prefix_vendor = self.get_vendor(common_prefix_product) + if len(common_prefix_vendor) > 1 or ( + len(common_prefix_vendor) == 1 + and common_prefix_vendor[0] != "UNKNOWN" + ): + found_common_prefix = True + for vendor in common_prefix_vendor: + parsed_data.append( + ProductInfo(vendor, common_prefix_product, version) + ) + break + if not found_common_prefix: + # if vendor not found after removing common prefix try splitting it + LOGGER.debug( + f"No Vendor found for {product}, trying splitted product. " + "Some results may be inaccurate due to vendor identification limitations." + ) + splitted_product = product.split("-") + for sp in splitted_product: + temp = self.get_vendor(sp) + if len(temp) > 1 or (len(temp) == 1 and temp[0] != "UNKNOWN"): + for vendor in temp: + # if vendor is not None: + parsed_data.append(ProductInfo(vendor, sp, version)) + return parsed_data + def scan_file(self) -> dict[ProductInfo, TriageData]: self.logger.debug( f"Processing SBOM {self.filename} of type {self.type.upper()}" @@ -77,63 +122,9 @@ def scan_file(self) -> dict[ProductInfo, TriageData]: if version != "": # Now add vendor to create product record.... vendor_set = self.get_vendor(product) - if ( - len(vendor_set) == 1 - and vendor_set[0] == "UNKNOWN" - and "-" in product - ): - # if the product have '-' in name try splitting it and try common prefixes. - found_common_prefix = False - common_prefix = ( - "perl-", - "golang-", - "rubygem-", - "python-", - "py3-", - "python3-", - "python2-", - "rust-", - "nodejs-", - ) - for prefix in common_prefix: - if product.startswith(prefix): - common_prefix_product = product[len(prefix) :] - common_prefix_vendor = self.get_vendor( - common_prefix_product - ) - if len(common_prefix_vendor) > 1 or ( - len(common_prefix_vendor) == 1 - and common_prefix_vendor[0] != "UNKNOWN" - ): - found_common_prefix = True - for vendor in common_prefix_vendor: - parsed_data.append( - ProductInfo( - vendor, common_prefix_product, version - ) - ) - break - if not found_common_prefix: - # if vendor not found after removing common prefix try splitting it - LOGGER.warning( - f"No Vendor found for {product}, trying splitted product. " - "Some results may be inaccurate due to vendor identification limitations." - ) - splitted_product = product.split("-") - for sp in splitted_product: - temp = self.get_vendor(sp) - if len(temp) > 1 or ( - len(temp) == 1 and temp[0] != "UNKNOWN" - ): - for vendor in temp: - # if vendor is not None: - parsed_data.append( - ProductInfo(vendor, sp, version) - ) - else: - for vendor in vendor_set: - # if vendor is not None: - parsed_data.append(ProductInfo(vendor, product, version)) + for vendor in vendor_set: + # if vendor is not None: + parsed_data.append(ProductInfo(vendor, product, version)) for row in parsed_data: self.sbom_data[row]["default"] = { diff --git a/test/test_sbom.py b/test/test_sbom.py index 4020ed0a41..b42d1adae1 100644 --- a/test/test_sbom.py +++ b/test/test_sbom.py @@ -37,6 +37,10 @@ class TestSBOM: "paths": {""}, }, } + SPLIT_DATA = [ + ProductInfo(vendor="openzeppelin", product="contracts", version="4.8.1"), + ProductInfo(vendor="downline_goldmine", product="builder", version="3.2.4"), + ] @pytest.mark.parametrize( "filepath", @@ -118,6 +122,21 @@ def test_valid_swid_file( for p in swid_parsed_data: assert p in scan_result + @pytest.mark.parametrize( + "product, version, productinfo, no_existent_file", + [ + ("openzeppelin-contracts", "4.8.1", SPLIT_DATA[0], "no_existent_file"), + ("rubygem-builder", "3.2.4", SPLIT_DATA[1], "no_existent_file"), + ], + ) + def test_common_prefix_split(self, product, version, productinfo, no_existent_file): + """Unit Test for common_prefix_split that try to split on hyphen if no vendors are + are found and the product has hyphen, here a no_existent_file is used + with sole purpose for creating a SBOMManager instance""" + sbom_engine = SBOMManager(no_existent_file) + scanned_list = sbom_engine.common_prefix_split(product, version) + assert productinfo in scanned_list + @pytest.mark.parametrize( "filename, sbom_type, validate", ( From 12d02ca028ea24ed2474e8f90576775766525599 Mon Sep 17 00:00:00 2001 From: mastersans Date: Fri, 22 Mar 2024 10:18:48 +0530 Subject: [PATCH 5/5] feat: type hint --- cve_bin_tool/sbom_manager/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cve_bin_tool/sbom_manager/__init__.py b/cve_bin_tool/sbom_manager/__init__.py index 6947a0992b..809caac273 100644 --- a/cve_bin_tool/sbom_manager/__init__.py +++ b/cve_bin_tool/sbom_manager/__init__.py @@ -49,7 +49,7 @@ def __init__( # Connect to the database self.cvedb = CVEDB(version_check=False) - def common_prefix_split(self, product, version): + def common_prefix_split(self, product, version) -> list[ProductInfo]: """If the product have '-' in name try splitting it and try common prefixes. currently not being used, proposed to be used in future""" parsed_data: list[ProductInfo] = []