From 22bd88390482de78d56b1e7c5dea5971cbbf8bdf Mon Sep 17 00:00:00 2001 From: Ravi Mishra <145095127+ravi-mishra-10@users.noreply.github.com> Date: Thu, 1 Aug 2024 20:49:58 +0530 Subject: [PATCH] Update spdx_parser.py to handle spdx file parsing logic to generate correct key value pair of dictionary Addition regex logic will help to find right delta between two SPDX file. Without this fix sometime ProductName getting recorded incorrectly as "v2" or "without full path of software" this enhancement will help to detect same product name b/w two spdx file(s). --- sbomdiff/spdx_parser.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sbomdiff/spdx_parser.py b/sbomdiff/spdx_parser.py index e46f4de..daff3f1 100644 --- a/sbomdiff/spdx_parser.py +++ b/sbomdiff/spdx_parser.py @@ -34,16 +34,31 @@ def parse_spdx_tag(self, sbom_file): packages = {} package = "" version = None + githubStr = "pkg.go.dev/" for line in lines: line_elements = line.split(":") if line_elements[0] == "PackageName": + isProductNameWithOnlyVersionNumber = False package = line_elements[1].strip().rstrip("\n") + productNameWithOnlyVersionNumber = re.compile(r'(/)') + if bool(productNameWithOnlyVersionNumber.search(package)) != True: + isProductNameWithOnlyVersionNumber = True version = None license = None if line_elements[0] == "PackageVersion": version = line[16:].strip().rstrip("\n") if line_elements[0] == "PackageLicenseConcluded": license = line_elements[1].strip().rstrip("\n") + if line_elements[0] == "PackageHomePage": + packageHomePage = line_elements[1].strip().rstrip("\n") + packageHomePageRemaining = "" + if len(line_elements) > 2 : + packageHomePageRemaining = line_elements[2].strip().rstrip("\n") + packageHomePage = packageHomePage + packageHomePageRemaining + if isProductNameWithOnlyVersionNumber: + tempArry = packageHomePage.split(githubStr) + if len(tempArry) == 2: + package = tempArry[1] if package not in packages and version is not None and license is not None: packages[package] = [version, license]