Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
cyclonedx.xsd
spdx.xsd
credentials
5 changes: 5 additions & 0 deletions scripts/cyclonedx-wrapper.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<schema elementFormDefault="qualified" xmlns="http://www.w3.org/2001/XMLSchema">
<import namespace="http://cyclonedx.org/schema/spdx" schemaLocation="spdx.xsd"/>
<import namespace="http://cyclonedx.org/schema/bom/1.2" schemaLocation="cyclonedx.xsd"/>
</schema>
245 changes: 210 additions & 35 deletions scripts/sbom_scraper.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
#
# Scrape a docker image and upload as public or private SBOM file
# Scrape a docker image and upload as public (default) or private SBOM file
#
# Preparation:
#
Expand All @@ -13,39 +13,47 @@
# and note down the CLIENT_ID and SECRET.
#
# Copy the SECRET generated to the file specified by ${CLIENTSECRET_FILE} below. This
# file should reside in a subdirectory with 0600 permissions.
# file should reside in a subdirectory with 0700 permissions.
#
# Use the CLIENT_ID as the first fixed argument to this script.
#

SCRIPTNAME=$(basename "$0")

SYFT=$(which syft)
if [ -z "${SYFT}" ]
then
echo "syft command not found"
exit 10
fi
JQ=$(which jq)
if [ -z "${JQ}" ]
then
JQ="cat"
else
JQ="jq ."
fi
for TOOL in syft jq xq xmllint python3
do
if ! type $TOOL > /dev/null
then
echo >&2 "please make sure this tool is on your PATH"
exit 10
fi
done

set -e
set -u

LOGTAG=$$
log() {
echo "${LOGTAG}:$(date --rfc-3339=seconds):$* ..."
echo "${LOGTAG}:$(date ):$*"
}

GIT_STATUS=$(git status --porcelain)

# defaults
FORMAT=cyclonedx
AUTHOR_NAME="$(git config user.name)"
AUTHOR_EMAIL="$(git config user.email)"
COMPONENT_AUTHOR_NAME="$AUTHOR_NAME"
SUPPLIER_NAME=dockerhub
SUPPLIER_URL=https://hub.docker.com
TOOL_NAME="$(git config --get remote.origin.url) $(git ls-files --full-name "$SCRIPTNAME")"
TOOL_VERSION=$(git describe --tags)${GIT_STATUS:++}
TOOL_VENDOR="Jitsuin Inc"
TOOL_HASH_ALG=SHA-256
# shellcheck disable=SC2002
TOOL_HASH_CONTENT=$(cat "$0" | openssl dgst -sha256)

# credentials directory has 0600 permissions
# credentials directory should have 0700 permissions
CLIENTSECRET_FILE=credentials/client_secret
SBOM=false
PRIVACY=PUBLIC
Expand All @@ -55,37 +63,44 @@ URL=https://app.rkvst.io
usage() {
cat >&2 <<EOF

Scrape an SBOM from a docker image and upload to abom archivist
Create a Cyclone DX 1.2 XML SBOM from a docker image and upload to RKVST SBOM Hub

Usage: $SCRIPTNAME [-p] [-c clientsecretfile] [-o output format] [-s sbomFile ] [-u url] client_id [docker-image|sbom file]
Usage: $SCRIPTNAME [-a AUTHOR_NAME] [-A AUTHOR_NAME] [-c CLIENT_SECRET_FILE] [-e AUTHOR_EMAIL] [-s] [-p] [-u URL] CLIENT_ID [docker-image:tag|sbom file]

-c clientsecretfile containing client secret (default ${CLIENTSECRET_FILE})
-o FORMAT default ($FORMAT) [cyclonedx]
-s default ($SBOM) if specified the second argument is an sbom file
and -o is ignored.
-p upload private SBOM
-u URL URL Default ($URL)
-a AUTHOR name of the author of the SBOM. Default ($AUTHOR_NAME)
-A COMPONENT_AUTHOR name of the author of the docker image. Default ($COMPONENT_AUTHOR_NAME)
-c CLIENT_SECRET_FILE containing client secret (default ${CLIENTSECRET_FILE})
-e AUTHOR_EMAIL email address of the author of the SBOM. Default ($AUTHOR_EMAIL)
-s if specified the second argument is an sbom file.
Default ($SBOM)
-p upload private SBOM
-u URL URL of archivist SBOM hub. Default ($URL)

Example:
Examples:

$0 29b48af4-45ca-465b-b136-206674f8aa9b ubuntu:21.10
$0 -s 29b48af4-45ca-465b-b136-206674f8aa9b ./my-sbom.xml

EOF

exit 1
}

while getopts "c:ho:psu:" o; do
while getopts "a:A:c:e:hpsu:" o; do
case "${o}" in
a) AUTHOR_NAME="${OPTARG}"
;;
A) COMPONENT_AUTHOR_NAME="${OPTARG}"
;;
c) CLIENTSECRET_FILE="${OPTARG}"
;;
o) FORMAT=${OPTARG}
e) AUTHOR_EMAIL="${OPTARG}"
;;
p) PRIVACY=PRIVATE
;;
s) SBOM=true
;;
u) URL=$OPTARG
u) URL="$OPTARG"
;;
*)
usage
Expand Down Expand Up @@ -133,19 +148,179 @@ SECRET=$(cat "${CLIENTSECRET_FILE}")
# ----------------------------------------------------------------------------
if [ "${SBOM}" = "false" ]
then
log "Scrape ${FORMAT} SBOM from ${DOCKER_IMAGE} to ${OUTFILE}..."
log "Scrape ${FORMAT} SBOM from ${DOCKER_IMAGE} to ${OUTFILE} ..."
OUTPUT="${TEMPDIR}/${OUTFILE}"
${SYFT} -q packages -o "${FORMAT}" "${DOCKER_IMAGE}"> "${OUTPUT}"
syft -q packages -o "${FORMAT}" "${DOCKER_IMAGE}"> "${OUTPUT}"
else
OUTPUT="${DOCKER_IMAGE}"
fi

# ----------------------------------------------------------------------------
# Update SBOM including NTIA minimum elments
# ----------------------------------------------------------------------------
ORIG_COMPONENT_NAME=$(xq -r .bom.metadata.component.name "$OUTPUT")
ORIG_COMPONENT_VERSION=$(xq -r .bom.metadata.component.version "$OUTPUT")
COMPONENT_NAME=${ORIG_COMPONENT_NAME%%:*}
COMPONENT_VERSION=${ORIG_COMPONENT_NAME##*:}
HASH_ALG="${ORIG_COMPONENT_VERSION%%:*}"
case ${HASH_ALG^^} in
SHA256) COMPONENT_HASH_ALG="SHA-256"
;;
*) echo >&2 "Unknonwn hash algorithm $HASH_ALG"
esac
COMPONENT_HASH_CONTENT="${ORIG_COMPONENT_VERSION##*:}"

echo "metadata:"
echo " tools:"
echo " tool:"
echo " vendor: $TOOL_VENDOR"
echo " name: $TOOL_NAME"
echo " version: $TOOL_VERSION"
echo " hashes:"
echo " hash:"
echo " alg: $TOOL_HASH_ALG"
echo " content: $TOOL_HASH_CONTENT"
echo " authors:"
echo " author:"
echo " name: $AUTHOR_NAME"
echo " email: $AUTHOR_EMAIL"
echo " component:"
echo " supplier:"
echo " name: $SUPPLIER_NAME"
echo " url: $SUPPLIER_URL"
echo " author: $COMPONENT_AUTHOR_NAME"
echo " name: $ORIG_COMPONENT_NAME -> $COMPONENT_NAME"
echo " version: $ORIG_COMPONENT_VERSION -> $COMPONENT_VERSION"
echo " hashes:"
echo " hash:"
echo " alg: $COMPONENT_HASH_ALG"
echo " content: $COMPONENT_HASH_CONTENT"

[ -z "$TOOL_VENDOR" ] && echo >&2 "Unable to determine SBOM tool vendor" && exit 1
[ -z "$TOOL_NAME" ] && echo >&2 "Unable to determine SBOM tool name" && exit 1
[ -z "$TOOL_VERSION" ] && echo >&2 "Unable to determine SBOM tool version" && exit 1
[ -z "$TOOL_HASH_ALG" ] && echo >&2 "Unable to determine SBOM tool hash algorithm" && exit 1
[ -z "$TOOL_HASH_CONTENT" ] && echo >&2 "Unable to determine SBOM tool hash content" && exit 1
[ -z "$AUTHOR_NAME" ] && echo >&2 "Unable to determine SBOM author name" && exit 1
[ -z "$AUTHOR_EMAIL" ] && echo >&2 "Unable to determine SBOM author email" && exit 1
[ -z "$SUPPLIER_NAME" ] && echo >&2 "Unable to determine component supplier name" && exit 1
[ -z "$SUPPLIER_URL" ] && echo >&2 "Unable to determine component supplier url" && exit 1
[ -z "$COMPONENT_AUTHOR_NAME" ] && echo >&2 "Unable to determine component author name" && exit 1
[ -z "$COMPONENT_NAME" ] && echo >&2 "Unable to determine component name" && exit 1
[ -z "$COMPONENT_VERSION" ] && echo >&2 "Unable to determine component version" && exit 1
[ -z "$COMPONENT_HASH_ALG" ] && echo >&2 "Unable to determine component hash algorithm" && exit 1
[ -z "$COMPONENT_HASH_CONTENT" ] && echo >&2 "Unable to determine component hash content" && exit 1

PATCHED_OUTPUT="${OUTPUT}.patched"

python3 <(cat <<END
import sys
import xml.etree.ElementTree as ET

def indent(elem, level=0):
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i

ET.register_namespace('', 'http://cyclonedx.org/schema/bom/1.2')
ns = {'': 'http://cyclonedx.org/schema/bom/1.2'}

# Open original file
et = ET.parse(sys.stdin)
root = et.getroot()

metadata = root.find('metadata', ns)

# Add this tool
tools = metadata.find('tools', ns)
if not tools:
tools = ET.SubElement(metadata, 'tools')
tool = ET.SubElement(tools, 'tool')
ET.SubElement(tool, 'vendor').text = '$TOOL_VENDOR'
ET.SubElement(tool, 'name').text = '$TOOL_NAME'
ET.SubElement(tool, 'version').text = '$TOOL_VERSION'
hashes = ET.SubElement(tool, 'hashes')
hash = ET.SubElement(hashes, 'hash', alg='${TOOL_HASH_ALG}')
hash.text = '$TOOL_HASH_CONTENT'

# Add sbom authors elements
authors = metadata.find('authors', ns)
if not authors:
authors = ET.Element('authors')
metadata.insert(2, authors)
author = ET.SubElement(authors, 'author')
ET.SubElement(author, 'name').text = '$AUTHOR_NAME'
ET.SubElement(author, 'email').text = '$AUTHOR_EMAIL'

component = metadata.find('component', ns)

# Update component author
author = component.find('author', ns)
if not author:
author = ET.Element('author')
component.insert(0, author)
author.text = '$COMPONENT_AUTHOR_NAME'

# Update component name and version
component.find('name', ns).text = '$COMPONENT_NAME'
component.find('version', ns).text = '$COMPONENT_VERSION'

# Update component hash
hashes = component.find('hashes', ns)
if not hashes:
hashes = ET.SubElement(component, 'hashes')
hash = ET.SubElement(hashes, 'hash', alg='${COMPONENT_HASH_ALG}')
hash.text = '$COMPONENT_HASH_CONTENT'

# Add component supplier
supplier = component.find('supplier', ns)
if not supplier:
supplier = ET.Element('supplier')
component.insert(0, supplier)
ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME'
ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL'

# Add supplier (it appears twice in the schema)
supplier = metadata.find('supplier', ns)
if not supplier:
supplier = ET.SubElement(metadata, 'supplier')
ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME'
ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL'

indent(root)

et.write(sys.stdout, encoding='unicode', xml_declaration=True, default_namespace='')
END
) < "$OUTPUT" > "$PATCHED_OUTPUT"

# ----------------------------------------------------------------------------
# Check that the patched SBOM is valid against the cyclonedx schema
# ----------------------------------------------------------------------------
[ -f spdx.xsd ] || curl -fsS -o spdx.xsd https://cyclonedx.org/schema/spdx
[ -f cyclonedx.xsd ] || curl -fsS -o cyclonedx.xsd https://cyclonedx.org/schema/bom/1.2

# xmllint complains about a double import of the spdx schema, but we have to import via
# the wrapper to set the schema location to a local file, as xmllint fails to download
# them from the internet as they are https
xmllint "$PATCHED_OUTPUT" --schema cyclonedx-wrapper.xsd --noout 2>&1 | grep -Fv "Skipping import of schema located at 'http://cyclonedx.org/schema/spdx' for the namespace 'http://cyclonedx.org/schema/spdx'"
[ "${PIPESTATUS[0]}" -ne 0 ] && exit "${PIPESTATUS[0]}"

# ----------------------------------------------------------------------------
# Handle client id and secrets for SBOM scraper via App registrations
# ----------------------------------------------------------------------------
HTTP_STATUS=""
# get token
log "Get token"
log "Get token ..."
HTTP_STATUS=$(curl -sS -w "%{http_code}" \
-o "${TEMPDIR}/access_token" \
--data-urlencode "grant_type=client_credentials" \
Expand All @@ -169,13 +344,13 @@ EOF
# ----------------------------------------------------------------------------
# Upload SBOM
# ----------------------------------------------------------------------------
log "Upload ${PRIVACY} ${OUTPUT}"
log "Upload ${PRIVACY} ${OUTPUT} ..."

HTTP_STATUS=$(curl -s -w "%{http_code}" -X POST \
-o "${TEMPDIR}/upload" \
-H "@${BEARER_TOKEN_FILE}" \
-H "content_type=text/xml" \
-F "sbom=@${OUTPUT}" \
-F "sbom=@${PATCHED_OUTPUT}" \
"${URL}/archivist/v1/sboms?privacy=${PRIVACY}")

if [ "${HTTP_STATUS}" != "200" ]
Expand All @@ -184,5 +359,5 @@ then
exit 4
fi
log "Upload success: "
${JQ} "${TEMPDIR}/upload"
jq . "${TEMPDIR}/upload"
exit 0