diff --git a/Dockerfile-scraper b/Dockerfile-scraper
new file mode 100644
index 0000000..4849e41
--- /dev/null
+++ b/Dockerfile-scraper
@@ -0,0 +1,42 @@
+FROM ubuntu:jammy
+
+RUN apt-get update \
+ && apt-get upgrade -y --no-install-recommends \
+ && apt-get install -y \
+ curl \
+ default-jdk \
+ jq \
+ libdigest-sha-perl \
+ openssl \
+ python3-pip \
+ && apt-get autoremove \
+ && apt-get autoclean \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN python3 -m pip install yq
+
+RUN curl -fsSOL https://github.com/CycloneDX/cyclonedx-cli/releases/download/v0.24.2/cyclonedx-linux-x64 \
+ && mv cyclonedx-linux-x64 /usr/local/bin/cdx \
+ && chmod +x /usr/local/bin/cdx \
+ && curl -fsSOL https://github.com/anchore/syft/releases/download/v0.60.3/syft_0.60.3_linux_amd64.tar.gz \
+ && tar xvzf syft_0.60.3_linux_amd64.tar.gz syft \
+ && mv syft /usr/local/bin \
+ && chmod +x /usr/local/bin/syft \
+ && rm syft_0.60.3_linux_amd64.tar.gz
+
+RUN which cdx \
+ && which curl \
+ && which jar \
+ && which jdeps \
+ && which jq \
+ && which openssl \
+ && which python3 \
+ && which shasum \
+ && which syft \
+ && which xq
+
+COPY scripts/sbom_scraper.sh /usr/local/bin/sbom_scraper.sh
+RUN chmod +x /usr/local/bin/sbom_scraper.sh
+
+ENTRYPOINT ["/usr/local/bin/sbom_scraper.sh"]
diff --git a/README.md b/README.md
index 4271539..e85c554 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,20 @@ Make a change to the code and validate the changes:
task check
```
+And then test changes with a working set of options:
+
+```bash
+task build-scraper
+task scrape -- -h
+task scrape -- -a "RKVST, Inc" \
+ -e support@rkvst.com \
+ -A Docker \
+ -c credentials/client_secret \
+ -u https://app.rkvst.io \
+ 8f8f2467-01fe-48fb-891a-5c0be643cec1 \
+ aerospike:ce-6.0.0.5
+```
+
### Seeking a review
#### Synchronizing the upstream
diff --git a/Taskfile.yml b/Taskfile.yml
index 58497c8..f3a838e 100644
--- a/Taskfile.yml
+++ b/Taskfile.yml
@@ -2,6 +2,11 @@ version: '3'
tasks:
+ build-scraper:
+ desc: Build scraper image
+ cmds:
+ - docker build --no-cache -f Dockerfile-scraper -t archivist-shell-scraper .
+
check:
desc: Standard linting of shell scripts
cmds:
@@ -11,3 +16,17 @@ tasks:
desc: Clean git repo
cmds:
- git clean -fdX
+
+ scrape:
+ desc: Execute scraper command in dockerfile
+ cmds:
+ - |
+ docker run \
+ --rm -it \
+ -v $(pwd):$(pwd) \
+ -w $(pwd) \
+ -u $(id -u):$(id -g) \
+ -e USER \
+ archivist-shell-scraper \
+ {{.CLI_ARGS}}
+
diff --git a/scripts/cyclonedx-wrapper.xsd b/scripts/cyclonedx-wrapper.xsd
deleted file mode 100644
index 90b4cf0..0000000
--- a/scripts/cyclonedx-wrapper.xsd
+++ /dev/null
@@ -1,5 +0,0 @@
-
-
-
-
-
diff --git a/scripts/sbom_scraper.sh b/scripts/sbom_scraper.sh
index 9f9baec..6d3ac1f 100755
--- a/scripts/sbom_scraper.sh
+++ b/scripts/sbom_scraper.sh
@@ -16,22 +16,18 @@
# file should reside in a subdirectory with 0700 permissions.
#
# Use the CLIENT_ID as the first fixed argument to this script.
-#
-
-SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-SCRIPTNAME=$(basename "$0")
#
-# cdx - https://github.com/CycloneDX/cyclonedx-cli/releases/tag/v0.22.0
-# jar, jdeps - sudo apt install default-jre
-# syft - https://github.com/anchore/syft/releases/tag/v0.37.10
+# cdx - https://github.com/CycloneDX/cyclonedx-cli/releases/tag/v0.24.2
+# curl - sudo apt install curl
+# jar, jdeps - sudo apt install default-jdk
# jq - sudo apt install jq
-# xq - python3 -m pip install --user yq
-# xmllint - sudo apt install libxml2-utils
-# python3 - should come with distro
# openssl - sudo apt install openssl
-# curl - sudo apt install curl
+# python3 - should come with distro
+# shasum - libdigest-sha-perl
+# syft - https://github.com/anchore/syft/releases/tag/v0.60.3
+# xq - python3 -m pip install --user yq
NOTFOUND=0
-for TOOL in cdx jar jdeps syft jq xq xmllint python3 openssl curl shasum
+for TOOL in cdx jar jdeps syft jq xq python3 openssl curl shasum
do
if ! type $TOOL > /dev/null
then
@@ -44,6 +40,7 @@ then
echo >&2 "Some tools not found"
exit 10
fi
+CDX_VERSION=$(cdx --version)
SYFT_VERSION=$(syft version | grep '^Version' | tr -s ' ' | cut -d' ' -f2)
compare_version() {
local x=$1
@@ -51,13 +48,14 @@ compare_version() {
last=${x##*.} # Delete up to last dot.
mid=${x##"$first".} # Delete first number and dot.
mid=${mid%%."$last"} # Delete dot and last number.
- if [ "$mid" -lt 34 ]
+ if [ "$mid" -lt "$3" ]
then
- echo >&2 "syft must be at least version 0.34.0"
+ echo >&2 "$2 must be at least version 0.$3.0"
exit 10
fi
}
-compare_version "${SYFT_VERSION}"
+compare_version "${SYFT_VERSION}" syft 60
+compare_version "${CDX_VERSION}" cdx 24
set -e
set -u
@@ -70,10 +68,10 @@ log() {
# ----------------------------------------------------------------------------
# Option parsing
# ----------------------------------------------------------------------------
-TOOL_NAME="https://github.com/jitsuin-inc/archivist-shell $SCRIPTNAME"
+TOOL_NAME="https://github.com/jitsuin-inc/archivist-shell sbom_scraper.sh"
#
-# Set this value just before release
-TOOL_VERSION="v0.5.1"
+# Set this value and merge the change just before release
+TOOL_VERSION="v0.6.0"
TOOL_VENDOR="RKVST Inc"
TOOL_HASH_ALG=SHA-256
TOOL_HASH_CONTENT=$(shasum -a 256 "$0" | cut -d' ' -f1)
@@ -87,7 +85,7 @@ COMPONENT_AUTHOR_NAME="$DEFAULT_AUTHOR_NAME"
SBOM_UPLOAD_TIMEOUT=10
# shellcheck disable=SC2002
# credentials directory should have 0700 permissions
-CLIENTSECRET_FILE=$SCRIPTDIR/../credentials/client_secret
+CLIENTSECRET_FILE=credentials/client_secret
SBOM=false
PRIVACY=PUBLIC
JARFILE=false
@@ -98,9 +96,9 @@ URL=https://app.rkvst.io
usage() {
cat >&2 < sbom.xml
+ echo "Downloaded xml file is in sbom.xml"
+fi
+
if [ "$UPLOAD" = "true" ]
then
+# Order is important....
cat >&1 < "$PATCHED_OUTPUT"
+)
else
@@ -395,11 +401,11 @@ def indent(elem, level=0):
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
-ET.register_namespace('', 'http://cyclonedx.org/schema/bom/1.3')
-ns = {'': 'http://cyclonedx.org/schema/bom/1.3'}
+ET.register_namespace('', 'http://cyclonedx.org/schema/bom/1.4')
+ns = {'': 'http://cyclonedx.org/schema/bom/1.4'}
# Open original file
-et = ET.parse(sys.stdin)
+et = ET.parse("$OUTPUT")
root = et.getroot()
metadata = root.find('metadata', ns)
@@ -427,82 +433,116 @@ if not authors:
author = ET.SubElement(authors, 'author')
ET.SubElement(author, 'name').text = '$AUTHOR_NAME'
ET.SubElement(author, 'email').text = '$AUTHOR_EMAIL'
+
+# Update component - the selected fields are inspected as to whether they
+# already exist - if they do they are removed and replaced with a new
+# instance with the correct value.
+#
+# CAVEAT: only (supplier, author, publisher, name, version, hashes) fields are
+# altered. The ElementTree module does not preserve order - unfortunately XML
+# schema validation requires order (defined as a sequence).
+
+# Full list of fields in order from schema 1.4 is
+# (supplier, author, publisher, group, name, version, description, scope, hashes,
+# licenses, copyright, cpe, purl, swid, modified, pedigree, externalReferences,
+# properties, components, evidence, releaseNotes, ##other)
+#
+# plus (type, mime-type, bom-ref, ##any) in any order.
+#
+# Note that ##other is a wildcard of an attribute from another namespace and ##any
+# is a wildcard of user-defined attributes.
component = metadata.find('component', ns)
-if not component:
+if component is None:
component = ET.SubElement(metadata, 'component')
-# Update component publisher and author
-publisher = component.find('publisher', ns)
-if not publisher:
- publisher = ET.Element('publisher')
- component.insert(0, publisher)
-publisher.text = '$COMPONENT_AUTHOR_NAME'
+# Add component supplier
+supplier = component.find('supplier', ns)
+if supplier is not None:
+ component.remove(supplier)
+supplier = ET.Element('supplier')
+ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME'
+ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL'
+component.append(supplier)
+
+# Update component author
author = component.find('author', ns)
-if not author:
- author = ET.Element('author')
- component.insert(1, author)
+if author is not None:
+ component.remove(author)
+author = ET.Element('author')
author.text = '$COMPONENT_AUTHOR_NAME'
+component.append(author)
-# Update component name and version
-name = component.find('name', ns)
-if not name:
- name = ET.SubElement(component, 'name')
+# Update component publisher
+publisher = component.find('publisher', ns)
+if publisher is not None:
+ component.remove(publisher)
+publisher = ET.Element('publisher')
+publisher.text = '$COMPONENT_AUTHOR_NAME'
+component.append(publisher)
+
+group = component.find('group', ns)
+if group is not None:
+ component.append(group)
+# Update component name
+name = component.find('name', ns)
+if name is not None:
+ component.remove(name)
+name = ET.Element('name')
name.text = '$COMPONENT_NAME'
+component.append(name)
+
+# Update component version
component_version = '$COMPONENT_VERSION'
+version = component.find('version', ns)
if component_version:
- version = component.find('version', ns)
- if not version:
- version = ET.SubElement(component, 'version')
+ if version is not None:
+ component.remove(version)
+ version = ET.Element('version')
version.text = component_version
+if version is not None:
+ component.append(version)
+
+description = component.find('description', ns)
+if description is not None:
+ component.append(description)
+
+scope = component.find('scope', ns)
+if scope is not None:
+ component.append(scope)
+
# Update component hash
component_hash_alg = '${COMPONENT_HASH_ALG}'
+hashes = component.find('hashes', ns)
if component_hash_alg:
- hashes = component.find('hashes', ns)
- if not hashes:
- hashes = ET.SubElement(component, 'hashes')
+ if hashes is not None:
+ component.remove(hashes)
+ hashes = ET.Element('hashes')
hash = ET.SubElement(hashes, 'hash', alg=component_hash_alg)
hash.text = '$COMPONENT_HASH_CONTENT'
-# Add component supplier
-supplier = component.find('supplier', ns)
-if not supplier:
- supplier = ET.Element('supplier')
- component.insert(4, supplier)
-ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME'
-ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL'
+if hashes is not None:
+ component.append(hashes)
-# Add supplier (it appears twice in the schema)
-supplier = metadata.find('supplier', ns)
-if not supplier:
- supplier = ET.SubElement(metadata, 'supplier')
-ET.SubElement(supplier, 'name').text = '$SUPPLIER_NAME'
-ET.SubElement(supplier, 'url').text = '$SUPPLIER_URL'
+for f in ('licenses', 'copyright', 'cpe', 'purl', 'swid', 'modified', 'pedigree', 'externalReferences',
+ 'properties', 'components', 'evidence', 'releaseNotes'):
+ val = component.find(f, ns)
+ if val is not None:
+ component.append(val)
indent(root)
-et.write(sys.stdout, encoding='unicode', xml_declaration=True, default_namespace='')
+et.write("$PATCHED_OUTPUT", encoding='unicode', xml_declaration=True, default_namespace='')
END
-) < "$OUTPUT" > "$PATCHED_OUTPUT"
+)
fi
-# ----------------------------------------------------------------------------
-# Check that the patched SBOM is valid against the cyclonedx schema
-# ----------------------------------------------------------------------------
-[ -f "$SCRIPTDIR"/spdx.xsd ] || curl -fsS -o "$SCRIPTDIR"/spdx.xsd https://cyclonedx.org/schema/spdx
-[ -f "$SCRIPTDIR"/cyclonedx.xsd ] || curl -fsS -o "$SCRIPTDIR"/cyclonedx.xsd https://cyclonedx.org/schema/bom/1.3
-
-# xmllint complains about a double import of the spdx schema, but we have to import via
-# the wrapper to set the schema location to a local file, as xmllint fails to download
-# them from the internet as they are https
-_=$(xmllint "$PATCHED_OUTPUT" --schema "$SCRIPTDIR"/cyclonedx-wrapper.xsd --noout 2>&1 | grep -Fv "Skipping import of schema located at 'http://cyclonedx.org/schema/spdx' for the namespace 'http://cyclonedx.org/schema/spdx'")
-[ "${PIPESTATUS[0]}" -ne 0 ] && cat "${PATCHED_OUTPUT}" && exit "${PIPESTATUS[0]}"
-
if [ "${UPLOAD}" = "false" ]
then
# not uploading - just output the xml
- cat "${PATCHED_OUTPUT}"
+ cat "${PATCHED_OUTPUT}" > sbom-patched.xml
+ echo "Patched xml is in sbom-patched.xml"
else
# ----------------------------------------------------------------------------
# Handle client id and secrets for SBOM scraper via App registrations
@@ -550,6 +590,7 @@ EOF
then
log "Upload failure: Timeout"
exit 3
+
# all other non-zero return codes
elif [ ${RETURN_CODE} -gt 0 ]
then