In [2]:
import json

def get_sbom_components(path: str) -> dict[str, dict]:
    def was_found_by_cachi2(component):
        for property in component["properties"]:
            if property["value"] == "cachi2":
                return True

        return False

    with open(f"{path}/merged.bom.json") as file:
        sbom = json.load(file)

    cachi2_components = {}
    syft_components = {}

    # We'll use only the name to try to find duplicates,
    # since Syft reports versions for Pip and NPM
    # HTTPS/VCS dependencies very differently from Cachi2
    for component in sbom["components"]:
        if was_found_by_cachi2(component):
            cachi2_components[component["name"]] = component
        else:
            syft_components[component["name"]] = component

    return cachi2_components, syft_components

In [3]:
# https://github.com/cachito-testing/cachito-npm-with-deps/commit/565aba4c7f210c6196c1b522e2279f853f77d6d2
cachi2_components, syft_components = get_sbom_components("npm/cachito-npm-with-deps")

for name in syft_components:
    if name in cachi2_components:
        print(name)

# no duplicates found

In [4]:
# https://github.com/brunoapimentel/sample-nodejs-app/commit/94cec5c1ef002efb0edd341a14f408843a105465
cachi2_components, syft_components = get_sbom_components("npm/sample-nodejs-app")

for name in syft_components:
    if name in cachi2_components:
        print(name)

# no duplicates found

In [5]:
# https://github.com/brunoapimentel/pip-e2e-test/commit/294df352deed835cf703ae8a799926418ae5fd3b
cachi2_components, syft_components = get_sbom_components("pip/pip-e2e-test")

# Remove components that Syft reports twice (probably a bug)
filtered_syft_components = filter(
    lambda c: "%20\\" not in c.get("purl", ""),
    syft_components.values()
)

syft_components = {component["name"]: component for component in filtered_syft_components}

print("Duplicates found by name: ")
for name in syft_components:
    if name in cachi2_components:
        print(name)
        print("syft version: " + syft_components[name]["version"])
        print("cachi2 version: " + cachi2_components[name]["version"])

# Syft reports two distinct versions of urllib3
# So the package is not actually duplicated

Duplicates found by name: 
urllib3
syft version: 1.26.5
cachi2 version: 1.21.1


In [6]:
# https://github.com/containerbuildsystem/cachi2/commit/fc0d6079c2dc9b2a491c0848e550ad3509986110
cachi2_components, syft_components = get_sbom_components("pip/cachi2")

# Remove components that Syft reports twice (probably a bug)
filtered_syft_components = filter(
    lambda c: "%20\\" not in c.get("purl", ""),
    syft_components.values()
)

syft_components = {component["name"]: component for component in filtered_syft_components}

print("Duplicates found by name: ")
for name in syft_components:
    if name in cachi2_components:
        print(name)

print()

# Syft creates a pseudo-version for the main package
print("cachi2 version")
print("reported by syft: " + syft_components["cachi2"]["version"])
print("reported by cachi2: " + cachi2_components["cachi2"]["version"])

print()

# Cachi2 uses a dash instead of an underscore in the purl
print("typing_extensions purl")
print("reported by syft: " + syft_components["typing_extensions"]["purl"])
print("reported by cachi2: " + cachi2_components["typing_extensions"]["purl"])

Duplicates found by name: 
cachi2
typing_extensions

cachi2 version
reported by syft: 0.0.post1+gdfd2180.d20230704
reported by cachi2: 0.0.1

typing_extensions purl
reported by syft: pkg:pypi/typing_extensions@4.7.1
reported by cachi2: pkg:pypi/typing-extensions@4.7.1


In [7]:
# https://github.com/cachito-testing/gomod-pandemonium/commit/0c6890c3280a00271891f4bd04705a56151428f0
cachi2_components, syft_components = get_sbom_components("golang/build-service")

# since there are no specific handling for file/vcs dependencies
# we'll also consider the version for finding duplicates
print("Duplicates found by name: ")
for name in syft_components:
    if name in cachi2_components \
        and cachi2_components[name]["version"] == syft_components[name]["version"]:
        print(name)

print()

# Syft duplicates the name go.opencensus.io in the purl
print("go.opencensus.io purl")
print("reported by syft: " + syft_components["go.opencensus.io"]["purl"])
print("reported by cachi2: " + cachi2_components["go.opencensus.io"]["purl"])

Duplicates found by name: 
go.opencensus.io

go.opencensus.io purl
reported by syft: pkg:golang/go.opencensus.io/go.opencensus.io@v0.24.0
reported by cachi2: pkg:golang/go.opencensus.io@v0.24.0?type=package


In [8]:
# https://github.com/redhat-appstudio/build-service/commit/d1a9e858489d1515621398fb02942da068f1c956
cachi2_components, syft_components = get_sbom_components("golang/gomod-pandemonium")

# since there are no specific handling for file/vcs dependencies
# we'll also consider the version for finding duplicates
for name in syft_components:
    if name in cachi2_components \
        and cachi2_components[name]["version"] == syft_components[name]["version"]:
        print(name)

# no duplicates found

In [9]:
# https://github.com/redhat-appstudio/build-service/commit/d1a9e858489d1515621398fb02942da068f1c956
cachi2_components, syft_components = get_sbom_components("/home/bpimente/Downloads")

# since there are no specific handling for file/vcs dependencies
# we'll also consider the version for finding duplicates
for name in syft_components:
    if name in cachi2_components \
        and cachi2_components[name]["version"] == syft_components[name]["version"]:
        print(name)

# no duplicates found

FileNotFoundError: [Errno 2] No such file or directory: '~/Downloads/merged.bom.json'