diff --git a/scripts/.gitignore b/scripts/.gitignore
new file mode 100644
index 0000000..466d190
--- /dev/null
+++ b/scripts/.gitignore
@@ -0,0 +1,3 @@
+cyclonedx.xsd
+spdx.xsd
+credentials
diff --git a/scripts/cyclonedx-wrapper.xsd b/scripts/cyclonedx-wrapper.xsd
new file mode 100644
index 0000000..ab0ceba
--- /dev/null
+++ b/scripts/cyclonedx-wrapper.xsd
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/scripts/sbom_scraper.sh b/scripts/sbom_scraper.sh
index 106f12a..a3e59ef 100755
--- a/scripts/sbom_scraper.sh
+++ b/scripts/sbom_scraper.sh
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
#
-# Scrape a docker image and upload as public or private SBOM file
+# Scrape a docker image and upload as public (default) or private SBOM file
#
# Preparation:
#
@@ -13,39 +13,47 @@
# and note down the CLIENT_ID and SECRET.
#
# Copy the SECRET generated to the file specified by ${CLIENTSECRET_FILE} below. This
-# file should reside in a subdirectory with 0600 permissions.
+# file should reside in a subdirectory with 0700 permissions.
#
# Use the CLIENT_ID as the first fixed argument to this script.
#
SCRIPTNAME=$(basename "$0")
-SYFT=$(which syft)
-if [ -z "${SYFT}" ]
-then
- echo "syft command not found"
- exit 10
-fi
-JQ=$(which jq)
-if [ -z "${JQ}" ]
-then
- JQ="cat"
-else
- JQ="jq ."
-fi
+for TOOL in syft jq xq xmllint python3
+do
+ if ! type $TOOL > /dev/null
+ then
+ echo >&2 "please make sure this tool is on your PATH"
+ exit 10
+ fi
+done
set -e
set -u
LOGTAG=$$
log() {
- echo "${LOGTAG}:$(date --rfc-3339=seconds):$* ..."
+ echo "${LOGTAG}:$(date ):$*"
}
+GIT_STATUS=$(git status --porcelain)
+
# defaults
FORMAT=cyclonedx
+AUTHOR_NAME="$(git config user.name)"
+AUTHOR_EMAIL="$(git config user.email)"
+COMPONENT_AUTHOR_NAME="$AUTHOR_NAME"
+SUPPLIER_NAME=dockerhub
+SUPPLIER_URL=https://hub.docker.com
+TOOL_NAME="$(git config --get remote.origin.url) $(git ls-files --full-name "$SCRIPTNAME")"
+TOOL_VERSION=$(git describe --tags)${GIT_STATUS:++}
+TOOL_VENDOR="Jitsuin Inc"
+TOOL_HASH_ALG=SHA-256
+# shellcheck disable=SC2002
+TOOL_HASH_CONTENT=$(cat "$0" | openssl dgst -sha256)
-# credentials directory has 0600 permissions
+# credentials directory should have 0700 permissions
CLIENTSECRET_FILE=credentials/client_secret
SBOM=false
PRIVACY=PUBLIC
@@ -55,37 +63,44 @@ URL=https://app.rkvst.io
usage() {
cat >&2 < "${OUTPUT}"
+ syft -q packages -o "${FORMAT}" "${DOCKER_IMAGE}"> "${OUTPUT}"
else
OUTPUT="${DOCKER_IMAGE}"
fi
+# ----------------------------------------------------------------------------
+# Update SBOM including NTIA minimum elments
+# ----------------------------------------------------------------------------
+ORIG_COMPONENT_NAME=$(xq -r .bom.metadata.component.name "$OUTPUT")
+ORIG_COMPONENT_VERSION=$(xq -r .bom.metadata.component.version "$OUTPUT")
+COMPONENT_NAME=${ORIG_COMPONENT_NAME%%:*}
+COMPONENT_VERSION=${ORIG_COMPONENT_NAME##*:}
+HASH_ALG="${ORIG_COMPONENT_VERSION%%:*}"
+case ${HASH_ALG^^} in
+ SHA256) COMPONENT_HASH_ALG="SHA-256"
+ ;;
+ *) echo >&2 "Unknonwn hash algorithm $HASH_ALG"
+esac
+COMPONENT_HASH_CONTENT="${ORIG_COMPONENT_VERSION##*:}"
+
+echo "metadata:"
+echo " tools:"
+echo " tool:"
+echo " vendor: $TOOL_VENDOR"
+echo " name: $TOOL_NAME"
+echo " version: $TOOL_VERSION"
+echo " hashes:"
+echo " hash:"
+echo " alg: $TOOL_HASH_ALG"
+echo " content: $TOOL_HASH_CONTENT"
+echo " authors:"
+echo " author:"
+echo " name: $AUTHOR_NAME"
+echo " email: $AUTHOR_EMAIL"
+echo " component:"
+echo " supplier:"
+echo " name: $SUPPLIER_NAME"
+echo " url: $SUPPLIER_URL"
+echo " author: $COMPONENT_AUTHOR_NAME"
+echo " name: $ORIG_COMPONENT_NAME -> $COMPONENT_NAME"
+echo " version: $ORIG_COMPONENT_VERSION -> $COMPONENT_VERSION"
+echo " hashes:"
+echo " hash:"
+echo " alg: $COMPONENT_HASH_ALG"
+echo " content: $COMPONENT_HASH_CONTENT"
+
+[ -z "$TOOL_VENDOR" ] && echo >&2 "Unable to determine SBOM tool vendor" && exit 1
+[ -z "$TOOL_NAME" ] && echo >&2 "Unable to determine SBOM tool name" && exit 1
+[ -z "$TOOL_VERSION" ] && echo >&2 "Unable to determine SBOM tool version" && exit 1
+[ -z "$TOOL_HASH_ALG" ] && echo >&2 "Unable to determine SBOM tool hash algorithm" && exit 1
+[ -z "$TOOL_HASH_CONTENT" ] && echo >&2 "Unable to determine SBOM tool hash content" && exit 1
+[ -z "$AUTHOR_NAME" ] && echo >&2 "Unable to determine SBOM author name" && exit 1
+[ -z "$AUTHOR_EMAIL" ] && echo >&2 "Unable to determine SBOM author email" && exit 1
+[ -z "$SUPPLIER_NAME" ] && echo >&2 "Unable to determine component supplier name" && exit 1
+[ -z "$SUPPLIER_URL" ] && echo >&2 "Unable to determine component supplier url" && exit 1
+[ -z "$COMPONENT_AUTHOR_NAME" ] && echo >&2 "Unable to determine component author name" && exit 1
+[ -z "$COMPONENT_NAME" ] && echo >&2 "Unable to determine component name" && exit 1
+[ -z "$COMPONENT_VERSION" ] && echo >&2 "Unable to determine component version" && exit 1
+[ -z "$COMPONENT_HASH_ALG" ] && echo >&2 "Unable to determine component hash algorithm" && exit 1
+[ -z "$COMPONENT_HASH_CONTENT" ] && echo >&2 "Unable to determine component hash content" && exit 1
+
+PATCHED_OUTPUT="${OUTPUT}.patched"
+
+python3 <(cat < "$PATCHED_OUTPUT"
+
+# ----------------------------------------------------------------------------
+# Check that the patched SBOM is valid against the cyclonedx schema
+# ----------------------------------------------------------------------------
+[ -f spdx.xsd ] || curl -fsS -o spdx.xsd https://cyclonedx.org/schema/spdx
+[ -f cyclonedx.xsd ] || curl -fsS -o cyclonedx.xsd https://cyclonedx.org/schema/bom/1.2
+
+# xmllint complains about a double import of the spdx schema, but we have to import via
+# the wrapper to set the schema location to a local file, as xmllint fails to download
+# them from the internet as they are https
+xmllint "$PATCHED_OUTPUT" --schema cyclonedx-wrapper.xsd --noout 2>&1 | grep -Fv "Skipping import of schema located at 'http://cyclonedx.org/schema/spdx' for the namespace 'http://cyclonedx.org/schema/spdx'"
+[ "${PIPESTATUS[0]}" -ne 0 ] && exit "${PIPESTATUS[0]}"
+
# ----------------------------------------------------------------------------
# Handle client id and secrets for SBOM scraper via App registrations
# ----------------------------------------------------------------------------
HTTP_STATUS=""
# get token
-log "Get token"
+log "Get token ..."
HTTP_STATUS=$(curl -sS -w "%{http_code}" \
-o "${TEMPDIR}/access_token" \
--data-urlencode "grant_type=client_credentials" \
@@ -169,13 +344,13 @@ EOF
# ----------------------------------------------------------------------------
# Upload SBOM
# ----------------------------------------------------------------------------
-log "Upload ${PRIVACY} ${OUTPUT}"
+log "Upload ${PRIVACY} ${OUTPUT} ..."
HTTP_STATUS=$(curl -s -w "%{http_code}" -X POST \
-o "${TEMPDIR}/upload" \
-H "@${BEARER_TOKEN_FILE}" \
-H "content_type=text/xml" \
- -F "sbom=@${OUTPUT}" \
+ -F "sbom=@${PATCHED_OUTPUT}" \
"${URL}/archivist/v1/sboms?privacy=${PRIVACY}")
if [ "${HTTP_STATUS}" != "200" ]
@@ -184,5 +359,5 @@ then
exit 4
fi
log "Upload success: "
-${JQ} "${TEMPDIR}/upload"
+jq . "${TEMPDIR}/upload"
exit 0