diff --git a/scripts/.gitignore b/scripts/.gitignore new file mode 100644 index 0000000..466d190 --- /dev/null +++ b/scripts/.gitignore @@ -0,0 +1,3 @@ +cyclonedx.xsd +spdx.xsd +credentials diff --git a/scripts/cyclonedx-wrapper.xsd b/scripts/cyclonedx-wrapper.xsd new file mode 100644 index 0000000..ab0ceba --- /dev/null +++ b/scripts/cyclonedx-wrapper.xsd @@ -0,0 +1,5 @@ + + + + + diff --git a/scripts/sbom_scraper.sh b/scripts/sbom_scraper.sh index 106f12a..a3e59ef 100755 --- a/scripts/sbom_scraper.sh +++ b/scripts/sbom_scraper.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Scrape a docker image and upload as public or private SBOM file +# Scrape a docker image and upload as public (default) or private SBOM file # # Preparation: # @@ -13,39 +13,47 @@ # and note down the CLIENT_ID and SECRET. # # Copy the SECRET generated to the file specified by ${CLIENTSECRET_FILE} below. This -# file should reside in a subdirectory with 0600 permissions. +# file should reside in a subdirectory with 0700 permissions. # # Use the CLIENT_ID as the first fixed argument to this script. # SCRIPTNAME=$(basename "$0") -SYFT=$(which syft) -if [ -z "${SYFT}" ] -then - echo "syft command not found" - exit 10 -fi -JQ=$(which jq) -if [ -z "${JQ}" ] -then - JQ="cat" -else - JQ="jq ." -fi +for TOOL in syft jq xq xmllint python3 +do + if ! type $TOOL > /dev/null + then + echo >&2 "please make sure this tool is on your PATH" + exit 10 + fi +done set -e set -u LOGTAG=$$ log() { - echo "${LOGTAG}:$(date --rfc-3339=seconds):$* ..." + echo "${LOGTAG}:$(date ):$*" } +GIT_STATUS=$(git status --porcelain) + # defaults FORMAT=cyclonedx +AUTHOR_NAME="$(git config user.name)" +AUTHOR_EMAIL="$(git config user.email)" +COMPONENT_AUTHOR_NAME="$AUTHOR_NAME" +SUPPLIER_NAME=dockerhub +SUPPLIER_URL=https://hub.docker.com +TOOL_NAME="$(git config --get remote.origin.url) $(git ls-files --full-name "$SCRIPTNAME")" +TOOL_VERSION=$(git describe --tags)${GIT_STATUS:++} +TOOL_VENDOR="Jitsuin Inc" +TOOL_HASH_ALG=SHA-256 +# shellcheck disable=SC2002 +TOOL_HASH_CONTENT=$(cat "$0" | openssl dgst -sha256) -# credentials directory has 0600 permissions +# credentials directory should have 0700 permissions CLIENTSECRET_FILE=credentials/client_secret SBOM=false PRIVACY=PUBLIC @@ -55,37 +63,44 @@ URL=https://app.rkvst.io usage() { cat >&2 < "${OUTPUT}" + syft -q packages -o "${FORMAT}" "${DOCKER_IMAGE}"> "${OUTPUT}" else OUTPUT="${DOCKER_IMAGE}" fi +# ---------------------------------------------------------------------------- +# Update SBOM including NTIA minimum elments +# ---------------------------------------------------------------------------- +ORIG_COMPONENT_NAME=$(xq -r .bom.metadata.component.name "$OUTPUT") +ORIG_COMPONENT_VERSION=$(xq -r .bom.metadata.component.version "$OUTPUT") +COMPONENT_NAME=${ORIG_COMPONENT_NAME%%:*} +COMPONENT_VERSION=${ORIG_COMPONENT_NAME##*:} +HASH_ALG="${ORIG_COMPONENT_VERSION%%:*}" +case ${HASH_ALG^^} in + SHA256) COMPONENT_HASH_ALG="SHA-256" + ;; + *) echo >&2 "Unknonwn hash algorithm $HASH_ALG" +esac +COMPONENT_HASH_CONTENT="${ORIG_COMPONENT_VERSION##*:}" + +echo "metadata:" +echo " tools:" +echo " tool:" +echo " vendor: $TOOL_VENDOR" +echo " name: $TOOL_NAME" +echo " version: $TOOL_VERSION" +echo " hashes:" +echo " hash:" +echo " alg: $TOOL_HASH_ALG" +echo " content: $TOOL_HASH_CONTENT" +echo " authors:" +echo " author:" +echo " name: $AUTHOR_NAME" +echo " email: $AUTHOR_EMAIL" +echo " component:" +echo " supplier:" +echo " name: $SUPPLIER_NAME" +echo " url: $SUPPLIER_URL" +echo " author: $COMPONENT_AUTHOR_NAME" +echo " name: $ORIG_COMPONENT_NAME -> $COMPONENT_NAME" +echo " version: $ORIG_COMPONENT_VERSION -> $COMPONENT_VERSION" +echo " hashes:" +echo " hash:" +echo " alg: $COMPONENT_HASH_ALG" +echo " content: $COMPONENT_HASH_CONTENT" + +[ -z "$TOOL_VENDOR" ] && echo >&2 "Unable to determine SBOM tool vendor" && exit 1 +[ -z "$TOOL_NAME" ] && echo >&2 "Unable to determine SBOM tool name" && exit 1 +[ -z "$TOOL_VERSION" ] && echo >&2 "Unable to determine SBOM tool version" && exit 1 +[ -z "$TOOL_HASH_ALG" ] && echo >&2 "Unable to determine SBOM tool hash algorithm" && exit 1 +[ -z "$TOOL_HASH_CONTENT" ] && echo >&2 "Unable to determine SBOM tool hash content" && exit 1 +[ -z "$AUTHOR_NAME" ] && echo >&2 "Unable to determine SBOM author name" && exit 1 +[ -z "$AUTHOR_EMAIL" ] && echo >&2 "Unable to determine SBOM author email" && exit 1 +[ -z "$SUPPLIER_NAME" ] && echo >&2 "Unable to determine component supplier name" && exit 1 +[ -z "$SUPPLIER_URL" ] && echo >&2 "Unable to determine component supplier url" && exit 1 +[ -z "$COMPONENT_AUTHOR_NAME" ] && echo >&2 "Unable to determine component author name" && exit 1 +[ -z "$COMPONENT_NAME" ] && echo >&2 "Unable to determine component name" && exit 1 +[ -z "$COMPONENT_VERSION" ] && echo >&2 "Unable to determine component version" && exit 1 +[ -z "$COMPONENT_HASH_ALG" ] && echo >&2 "Unable to determine component hash algorithm" && exit 1 +[ -z "$COMPONENT_HASH_CONTENT" ] && echo >&2 "Unable to determine component hash content" && exit 1 + +PATCHED_OUTPUT="${OUTPUT}.patched" + +python3 <(cat < "$PATCHED_OUTPUT" + +# ---------------------------------------------------------------------------- +# Check that the patched SBOM is valid against the cyclonedx schema +# ---------------------------------------------------------------------------- +[ -f spdx.xsd ] || curl -fsS -o spdx.xsd https://cyclonedx.org/schema/spdx +[ -f cyclonedx.xsd ] || curl -fsS -o cyclonedx.xsd https://cyclonedx.org/schema/bom/1.2 + +# xmllint complains about a double import of the spdx schema, but we have to import via +# the wrapper to set the schema location to a local file, as xmllint fails to download +# them from the internet as they are https +xmllint "$PATCHED_OUTPUT" --schema cyclonedx-wrapper.xsd --noout 2>&1 | grep -Fv "Skipping import of schema located at 'http://cyclonedx.org/schema/spdx' for the namespace 'http://cyclonedx.org/schema/spdx'" +[ "${PIPESTATUS[0]}" -ne 0 ] && exit "${PIPESTATUS[0]}" + # ---------------------------------------------------------------------------- # Handle client id and secrets for SBOM scraper via App registrations # ---------------------------------------------------------------------------- HTTP_STATUS="" # get token -log "Get token" +log "Get token ..." HTTP_STATUS=$(curl -sS -w "%{http_code}" \ -o "${TEMPDIR}/access_token" \ --data-urlencode "grant_type=client_credentials" \ @@ -169,13 +344,13 @@ EOF # ---------------------------------------------------------------------------- # Upload SBOM # ---------------------------------------------------------------------------- -log "Upload ${PRIVACY} ${OUTPUT}" +log "Upload ${PRIVACY} ${OUTPUT} ..." HTTP_STATUS=$(curl -s -w "%{http_code}" -X POST \ -o "${TEMPDIR}/upload" \ -H "@${BEARER_TOKEN_FILE}" \ -H "content_type=text/xml" \ - -F "sbom=@${OUTPUT}" \ + -F "sbom=@${PATCHED_OUTPUT}" \ "${URL}/archivist/v1/sboms?privacy=${PRIVACY}") if [ "${HTTP_STATUS}" != "200" ] @@ -184,5 +359,5 @@ then exit 4 fi log "Upload success: " -${JQ} "${TEMPDIR}/upload" +jq . "${TEMPDIR}/upload" exit 0