Skip to content

Commit

Permalink
verify digest for cachito archive
Browse files Browse the repository at this point in the history
* STONEBLD-509

Signed-off-by: Robert Cerven <rcerven@redhat.com>
  • Loading branch information
rcerven committed May 16, 2023
1 parent 81f3444 commit 6d63200
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 11 deletions.
5 changes: 5 additions & 0 deletions atomic_reactor/constants.py
Expand Up @@ -218,3 +218,8 @@

# https://raw.githubusercontent.com/CycloneDX/specification/1.4/schema/bom-1.4.schema.json
SBOM_SCHEMA_PATH = 'schemas/sbom-1.4.schema.json'

# algorithm used for hash of cachito archive
CACHITO_HASH_ALG = 'sha256'
# string used in response header in Digest
CACHITO_ALG_STR = 'sha-256'
26 changes: 25 additions & 1 deletion atomic_reactor/download.py
Expand Up @@ -5,6 +5,7 @@
This software may be modified and distributed under the terms
of the BSD license. See the LICENSE file for details.
"""
import base64
import hashlib
import logging
import os
Expand All @@ -17,14 +18,16 @@
DEFAULT_DOWNLOAD_BLOCK_SIZE,
HTTP_BACKOFF_FACTOR,
HTTP_MAX_RETRIES,
CACHITO_HASH_ALG,
CACHITO_ALG_STR,
)


logger = logging.getLogger(__name__)


def download_url(url, dest_dir, insecure=False, session=None, dest_filename=None,
expected_checksums=None):
expected_checksums=None, verify_cachito_digest=False):
"""Download file from URL, handling retries
To download to a temporary directory, use:
Expand All @@ -37,6 +40,7 @@ def download_url(url, dest_dir, insecure=False, session=None, dest_filename=None
:param dest_filename: optional filename for downloaded file
:param expected_checksums: optional dictionary of checksum_type and
checksum to verify downloaded files
:param verify_cachito_digest: bool, verify sha digest for cachito archive
:return: str, path of downloaded file
"""

Expand All @@ -52,6 +56,7 @@ def download_url(url, dest_dir, insecure=False, session=None, dest_filename=None
logger.debug('downloading %s', url)

checksums = {algo: hashlib.new(algo) for algo in expected_checksums}
cachito_hasher = hashlib.new(CACHITO_HASH_ALG)

for attempt in range(HTTP_MAX_RETRIES + 1):
response = session.get(url, stream=True, verify=not insecure)
Expand All @@ -62,11 +67,30 @@ def download_url(url, dest_dir, insecure=False, session=None, dest_filename=None
f.write(chunk)
for checksum in checksums.values():
checksum.update(chunk)

if verify_cachito_digest:
cachito_hasher.update(chunk)

for algo, checksum in checksums.items():
if checksum.hexdigest() != expected_checksums[algo]:
raise ValueError(
'Computed {} checksum, {}, does not match expected checksum, {}'
.format(algo, checksum.hexdigest(), expected_checksums[algo]))

if verify_cachito_digest:
logger.info('will verify cachito digest')
if 'Digest' in response.headers:
logger.info('digest is in cachito response header')

digest = base64.b64encode(cachito_hasher.digest()).decode("utf-8")
digest_str = f'{CACHITO_ALG_STR}={digest}'
if digest_str != response.headers['Digest']:
raise ValueError(
'Cachito archive digest "{}" does not match expected digest "{}"'
.format(digest_str, response.headers['Digest']))
else:
logger.info('digest for cachito archive is correct')

break
except requests.exceptions.RequestException:
if attempt < HTTP_MAX_RETRIES:
Expand Down
2 changes: 1 addition & 1 deletion atomic_reactor/utils/cachito.py
Expand Up @@ -183,7 +183,7 @@ def download_sources(self, request, dest_dir='.', dest_filename=REMOTE_SOURCE_TA
url = self.assemble_download_url(request_id)
dest_path = download_url(
url, dest_dir=dest_dir, insecure=not self.session.verify, session=self.session,
dest_filename=dest_filename)
dest_filename=dest_filename, verify_cachito_digest=True)
logger.debug('Sources bundle for request %d downloaded to %s', request_id, dest_path)
return dest_path

Expand Down
31 changes: 31 additions & 0 deletions tests/test_download.py
Expand Up @@ -18,6 +18,7 @@

from atomic_reactor.util import get_retrying_requests_session
from atomic_reactor.download import download_url
from atomic_reactor.constants import CACHITO_ALG_STR


class TestDownloadUrl(object):
Expand All @@ -34,6 +35,36 @@ def test_happy_path(self):
with open(result, 'rb') as f:
assert f.read() == content

@responses.activate
def test_cachito_download_digest_matches(self):
url = 'https://example.com/path/file'
dest_dir = tempfile.mkdtemp()
content = b'abc'
digest = 'ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0='
digest_str = f'{CACHITO_ALG_STR}={digest}'

reader = BufferedReader(BytesIO(content), buffer_size=1)
responses.add(responses.GET, url, body=reader, headers={'Digest': digest_str})
result = download_url(url, dest_dir, verify_cachito_digest=True)

assert os.path.basename(result) == 'file'
with open(result, 'rb') as f:
assert f.read() == content

@responses.activate
def test_cachito_download_digest_mismatches(self):
url = 'https://example.com/path/file'
dest_dir = tempfile.mkdtemp()
content = b'abc'
digest = 'wrong'
digest_str = f'{CACHITO_ALG_STR}={digest}'

reader = BufferedReader(BytesIO(content), buffer_size=1)
responses.add(responses.GET, url, body=reader, headers={'Digest': digest_str})

with pytest.raises(ValueError, match='does not match expected digest'):
download_url(url, dest_dir, verify_cachito_digest=True)

def test_connection_failure(self):
url = 'https://example.com/path/file'
dest_dir = tempfile.mkdtemp()
Expand Down
29 changes: 20 additions & 9 deletions tests/utils/test_cachito.py
Expand Up @@ -20,6 +20,7 @@
from datetime import datetime
from textwrap import dedent

from atomic_reactor.constants import CACHITO_ALG_STR

CACHITO_URL = 'http://cachito.example.com'
CACHITO_REQUEST_ID = 123
Expand Down Expand Up @@ -278,24 +279,34 @@ def test_wait_for_request_bad_request_type():


@responses.activate
@pytest.mark.parametrize('cachito_request', (
CACHITO_REQUEST_ID,
{'id': CACHITO_REQUEST_ID},
@pytest.mark.parametrize(('cachito_request', 'digest_match'), (
(CACHITO_REQUEST_ID, True),
({'id': CACHITO_REQUEST_ID}, False)
))
def test_download_sources(tmpdir, cachito_request):
def test_download_sources(tmpdir, cachito_request, digest_match):
blob = 'glop-glop-I\'m-a-blob'
expected_dest_path = os.path.join(str(tmpdir), 'remote-source.tar.gz')

if digest_match:
digest = 'XrN1l765qbGhErVrxe8Cj6+zCfwhqZoldJxOSYrpUlo='
else:
digest = 'wrong'
digest_str = f'{CACHITO_ALG_STR}={digest}'

responses.add(
responses.GET,
'{}/api/v1/requests/{}/download'.format(CACHITO_URL, CACHITO_REQUEST_ID),
body=blob)
body=blob, headers={'Digest': digest_str})

dest_path = CachitoAPI(CACHITO_URL).download_sources(cachito_request, str(tmpdir))
if digest_match:
dest_path = CachitoAPI(CACHITO_URL).download_sources(cachito_request, str(tmpdir))

assert dest_path == expected_dest_path
with open(dest_path) as f:
assert f.read() == blob
assert dest_path == expected_dest_path
with open(dest_path) as f:
assert f.read() == blob
else:
with pytest.raises(ValueError, match='does not match expected digest'):
CachitoAPI(CACHITO_URL).download_sources(cachito_request, str(tmpdir))


def test_download_sources_bad_request_type(tmpdir):
Expand Down

0 comments on commit 6d63200

Please sign in to comment.