diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d52e329..bf6f05e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,24 @@ Changelog ========= +v32.0.0 +-------- + +This is a minor release with bug fixes and an output change. + +- We no longer support Python 3.6, only 3.7 and up. + +- "utils.extract_tar" function now behaves correctly with links and return + either a list of error message strings (the previous default) but with updated + messages or a list of ExtractEvent to better track extraction errors and warnings. + The behavious is driven by the "as_events" argument. + +- In all places where extract is callable (Image, Layer) there is a new + "skip_symlinks" argument defaulting to True. If True, we skip symlinks and links. + The same applies with the "as_events" available in these places as these + functions now return a list (rather than nothing before). + + v31.1.0 -------- diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ece17ca..8640559 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -11,7 +11,7 @@ jobs: parameters: job_name: ubuntu18_cpython image_name: ubuntu-18.04 - python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] + python_versions: ['3.7', '3.8', '3.9', '3.10'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -19,7 +19,7 @@ jobs: parameters: job_name: ubuntu20_cpython image_name: ubuntu-20.04 - python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] + python_versions: ['3.7', '3.8', '3.9', '3.10'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -27,7 +27,7 @@ jobs: parameters: job_name: macos1015_cpython image_name: macos-10.15 - python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] + python_versions: ['3.7', '3.8', '3.9', '3.10'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -43,7 +43,7 @@ jobs: # parameters: # job_name: win2019_cpython # image_name: windows-2019 -# python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] +# python_versions: ['3.7', '3.8', '3.9', '3.10'] # test_suites: # all: venv\Scripts\pytest -n 2 -vvs # diff --git a/requirements-dev.txt b/requirements-dev.txt index fe92ed8..bd08a7b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,24 +1,31 @@ -aboutcode-toolkit==7.0.1 -bleach==4.1.0 +aboutcode-toolkit==7.0.2 +black==22.6.0 +bleach==5.0.1 build==0.7.0 commonmark==0.9.1 -docutils==0.18.1 +docutils==0.19 et-xmlfile==1.1.0 execnet==1.9.0 iniconfig==1.1.1 -jeepney==0.7.1 -keyring==23.4.1 -openpyxl==3.0.9 +isort==5.10.1 +jeepney==0.8.0 +keyring==23.7.0 +mypy-extensions==0.4.3 +openpyxl==3.0.10 +pathspec==0.9.0 pep517==0.12.0 -pkginfo==1.8.2 +pkginfo==1.8.3 +platformdirs==2.5.2 py==1.11.0 -pytest==7.0.1 +pytest==7.1.2 pytest-forked==1.4.0 pytest-xdist==2.5.0 -readme-renderer==34.0 +readme-renderer==35.0 requests-toolbelt==0.9.1 -rfc3986==1.5.0 -rich==12.3.0 +rfc3986==2.0.0 +rich==12.5.1 secretstorage==3.3.2 -tomli==1.2.3 -twine==3.8.0 +tomli==2.0.1 +tqdm==4.64.0 +twine==4.0.1 +typing_extensions==4.3.0 diff --git a/requirements.txt b/requirements.txt index 8762462..0d6b14a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,53 +2,53 @@ attrs==21.4.0 banal==1.0.6 beautifulsoup4==4.11.1 binaryornot==0.4.4 -boolean.py==3.8 -certifi==2021.10.8 -cffi==1.15.0 -chardet==4.0.0 -charset-normalizer==2.0.12 -click==8.0.4 -colorama==0.4.4 -commoncode==30.2.0 +boolean.py==4.0 +certifi==2022.6.15 +cffi==1.15.1 +chardet==5.0.0 +charset-normalizer==2.1.0 +click==8.1.3 +colorama==0.4.5 +commoncode==31.0.0b4 construct==2.10.68 -cryptography==36.0.2 -debian-inspector==30.0.0 +cryptography==37.0.4 +debian-inspector==31.0.0b1 dockerfile-parse==1.2.0 dparse2==0.6.1 -extractcode==30.0.0 +extractcode==31.0.0 extractcode-7z==16.5.210531 extractcode-libarchive==3.5.1.210531 fasteners==0.17.3 fingerprints==1.0.3 -ftfy==6.0.3 +ftfy==6.1.1 future==0.18.2 gemfileparser==0.8.0 html5lib==1.1 idna==3.3 -importlib-metadata==4.8.3 +importlib-metadata==4.12.0 inflection==0.5.1 intbitset==3.0.1 isodate==0.6.1 -jaraco.functools==3.4.0 +jaraco.functools==3.5.1 javaproperties==0.8.1 -Jinja2==3.0.3 +Jinja2==3.1.2 jsonstreams==0.6.0 -license-expression==21.6.14 -lxml==4.8.0 -MarkupSafe==2.0.1 +libfwsi-python==20220123 +license-expression==30.0.0 +lxml==4.9.1 +MarkupSafe==2.1.1 more-itertools==8.13.0 normality==2.3.3 packagedcode-msitools==0.101.210706 -packageurl-python==0.9.9 +packageurl-python==0.10.0 packaging==21.3 parameter-expansion-patched==0.3.1 -patch==1.16 -pdfminer.six==20220506 -pefile==2021.9.3 +pdfminer.six==20220524 +pefile==2022.5.30 pip-requirements-parser==31.2.0 pkginfo2==30.0.0 pluggy==1.0.0 -plugincode==21.1.21 +plugincode==31.0.0b1 ply==3.11 publicsuffix2==2.20191221 pyahocorasick==2.0.0b1 @@ -56,24 +56,24 @@ pycparser==2.21 pygmars==0.7.0 Pygments==2.12.0 pymaven-patch==0.3.0 -pyparsing==3.0.8 +pyparsing==3.0.9 pytz==2022.1 PyYAML==6.0 -rdflib==5.0.0 -regipy==2.2.2 -requests==2.27.1 +rdflib==6.2.0 +regipy==3.0.2 +requests==2.28.1 rpm-inspector-rpm==4.16.1.3.210404 saneyaml==0.5.2 six==1.16.0 -soupsieve==2.3.1 +soupsieve==2.3.2.post1 spdx-tools==0.7.0a3 text-unidecode==1.3 toml==0.10.2 -typecode==21.6.1 +typecode==30.0.0 typecode-libmagic==5.39.210531 -urllib3==1.26.9 +urllib3==1.26.11 urlpy==0.5 wcwidth==0.2.5 webencodings==0.5.1 -xmltodict==0.12.0 -zipp==3.6.0 +xmltodict==0.13.0 +zipp==3.8.1 diff --git a/setup.cfg b/setup.cfg index 87d040a..e32f34e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,7 @@ zip_safe = false setup_requires = setuptools_scm[toml] >= 4 -python_requires = >=3.6.* +python_requires = >=3.7.* install_requires = click >= 6.7, !=7.0, !=8.0.3 diff --git a/src/container_inspector/image.py b/src/container_inspector/image.py index a60b50d..c6e4796 100755 --- a/src/container_inspector/image.py +++ b/src/container_inspector/image.py @@ -356,14 +356,23 @@ def bottom_layer(self): """ return self.layers[0] - def extract_layers(self, extracted_location): + def extract_layers(self, extracted_location, as_events=False, skip_symlinks=True): """ Extract all layer archives to the `extracted_location` directory. Each layer is extracted to its own directory named after its `layer_id`. + Skip symlinks and links if ``skip_symlinks`` is True. + Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise. """ + all_events = [] for layer in self.layers: exloc = os.path.join(extracted_location, layer.layer_id) - layer.extract(extracted_location=exloc) + events = layer.extract( + extracted_location=exloc, + skip_symlinks=skip_symlinks, + as_events=as_events, + ) + all_events.extend(events) + return events def get_layers_resources(self, with_dir=False): """ @@ -450,15 +459,18 @@ def get_installed_packages(self, packages_getter): yield purl, package, layer @staticmethod - def extract(archive_location, extracted_location, skip_symlinks=False): + def extract(archive_location, extracted_location, as_events=False, skip_symlinks=False): """ Extract the image archive tarball at ``archive_location`` to - ``extracted_location``. Skip symlinks and links if ``skip_symlinks`` is True. + ``extracted_location``. + Skip symlinks and links if ``skip_symlinks`` is True. + Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise. """ - utils.extract_tar( + return utils.extract_tar( location=archive_location, target_dir=extracted_location, skip_symlinks=skip_symlinks, + as_events=as_events, ) @staticmethod @@ -466,25 +478,34 @@ def get_images_from_tarball( archive_location, extracted_location, verify=True, + skip_symlinks=False, ): """ - Return a list of Images found in the tarball at `archive_location` that - will be extracted to `extracted_location`. The tarball must be in the + Return a list of Images found in the tarball at ``archive_location`` that + will be extracted to ``extracted_location``. The tarball must be in the format of a "docker save" command tarball. - If `verify` is True, perform extra checks on the config data and layers + If ``verify`` is True, perform extra checks on the config data and layers checksums. + Skip symlinks and links if ``skip_symlinks`` is True. + Ignore the extract events from extraction. """ if TRACE: logger.debug( - f'get_images_from_tarball: {archive_location} , ' + f'get_images_from_tarball: {archive_location} ' f'extracting to: {extracted_location}' ) - Image.extract( + # TODO: do not ignore extract events + _events = Image.extract( archive_location=archive_location, extracted_location=extracted_location, + skip_symlinks=skip_symlinks, ) + if TRACE: + logger.debug(f'get_images_from_tarball: events') + for e in _events: + logger.debug(str(e)) return Image.get_images_from_dir( extracted_location=extracted_location, @@ -1071,16 +1092,19 @@ def __attrs_post_init__(self, *args, **kwargs): if not self.size: self.size = os.path.getsize(self.archive_location) - def extract(self, extracted_location, skip_symlinks=True): + def extract(self, extracted_location, as_events=False, skip_symlinks=False): """ Extract this layer archive in the `extracted_location` directory and set this Layer ``extracted_location`` attribute to ``extracted_location``. + Skip symlinks and links if ``skip_symlinks`` is True. + Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise. """ self.extracted_location = extracted_location - utils.extract_tar( + return utils.extract_tar( location=self.archive_location, target_dir=extracted_location, skip_symlinks=skip_symlinks, + as_events=as_events, ) def get_resources(self, with_dir=False, walker=os.walk): diff --git a/src/container_inspector/rootfs.py b/src/container_inspector/rootfs.py index 6fdbb55..81d4980 100755 --- a/src/container_inspector/rootfs.py +++ b/src/container_inspector/rootfs.py @@ -31,7 +31,7 @@ class InconsistentLayersError(Exception): pass -def rebuild_rootfs(img, target_dir): +def rebuild_rootfs(img, target_dir, skip_symlinks=True): """ Extract and merge or "squash" all layers of the `image` Image in a single rootfs in `target_dir`. Extraction is done in sequence from the bottom (root @@ -39,6 +39,8 @@ def rebuild_rootfs(img, target_dir): unionfs/overlayfs procedure is applied at each step as per the OCI spec: https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + Skip symlinks and links if ``skip_symlinks`` is True. + Return a list of deleted "whiteout" files. Raise an Exception on errrors. @@ -73,8 +75,15 @@ def rebuild_rootfs(img, target_dir): # 1. extract a layer to temp. # Note that we are not preserving any special file and any file permission extracted_loc = tempfile.mkdtemp('container_inspector-docker') - layer.extract(extracted_location=extracted_loc) - if TRACE: logger.debug(f' Extracted layer to: {extracted_loc}') + # TODO: do not ignore extract events + _events = layer.extract( + extracted_location=extracted_loc, + skip_symlinks=skip_symlinks, + ) + if TRACE: + logger.debug(f' Extracted layer to: {extracted_loc} with skip_symlinks: {skip_symlinks}') + for ev in _events: + logger.debug(f' {ev}') # 2. find whiteouts in that layer. whiteouts = list(find_whiteouts(extracted_loc)) diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index 740d0ae..c382cb3 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -10,10 +10,13 @@ import logging import hashlib import os +import traceback +from typing import NamedTuple from commoncode import fileutils TRACE = False + logger = logging.getLogger(__name__) if TRACE: import sys @@ -80,67 +83,93 @@ def get_labels(config, container_config): return dict(sorted(labels.items())) -def extract_tar(location, target_dir, skip_symlinks=True): +class ExtractEvent(NamedTuple): + """ + Represent an extraction event of interest. These are returned when running + extract_tar + """ + + INFO = "info" + WARNING = "warning" + ERROR = "error" + # type of event: one of error, warning or info + type: str + # source path in the archive + source: str + # even message + message: str + + def to_string(self): + return f"{self.type}: {self.message}" + + +def extract_tar(location, target_dir, as_events=False, skip_symlinks=True, trace=TRACE): """ - Extract a tar archive at `location` in the `target_dir` directory. - Ignore special device files. Skip symlinks and hardlinks if skip_symlinks is True. + Extract a tar archive at ``location`` in the ``target_dir`` directory. + Return a list of ExtractEvent is ``as_events`` is True, or a list of message + strings otherwise. This list can be empty. Skip symlinks and hardlinks if + skip_symlinks is True. + + Ignore special device files. Do not preserve the permissions and owners. - Raise exceptions on possible problematic relative paths. - Issue a warning if skip_symlinks is True and links target are missing. """ import tarfile - tarfile.TarInfo - if TRACE: logger.debug(f'_extract_tar: {location} to {target_dir} skip_symlinks: {skip_symlinks}') + if trace: + logger.debug(f'_extract_tar: {location} to {target_dir} skip_symlinks: {skip_symlinks}') fileutils.create_dir(target_dir) + events = [] with tarfile.open(location) as tarball: - # never extract character device, block and fifo files: - # we extract dirs, files and links only - error_messages = [] + for tarinfo in tarball: - if TRACE: logger.debug(f'_extract_tar: {tarinfo}') + if trace: + logger.debug(f'extract_tar: {location!r}: {tarinfo}') if tarinfo.isdev() or tarinfo.ischr() or tarinfo.isblk() or tarinfo.isfifo() or tarinfo.sparse: - msg = f'_extract_tar: skipping unsupported {tarinfo} file type: block, chr, dev or sparse file' - error_messages.append(msg) - if TRACE: - logger.debug(msg) + msg = f'skipping unsupported {tarinfo.name} file type: block, chr, dev or sparse file' + events.append(ExtractEvent(type=ExtractEvent.INFO, source=tarinfo.name, message=msg)) + if trace: + logger.debug(f'extract_tar: {msg}') continue if '..' in tarinfo.name: - msg = f'_extract_tar: skipping unsupported {tarinfo} with relative path' - error_messages.append(msg) - if TRACE: - logger.debug(msg) + msg = f'{location}: skipping unsupported {tarinfo.name} with relative path.' + events.append(ExtractEvent(type=ExtractEvent.WARNING, source=tarinfo.name, message=msg)) + if trace: + logger.debug(f'extract_tar: {msg}') continue - if tarinfo.islnk() or tarinfo.issym(): - try: - target = tarball._find_link_target(tarinfo) - if not target: - msg = f'_extract_tar: skipping link with missing target: {tarinfo}' - error_messages.append(msg) - if TRACE: - logger.debug(msg) - continue - - except Exception: - import traceback - msg = f'_extract_tar: skipping link with missing target: {tarinfo}: {traceback.format_exc()}' - error_messages.append(msg) - if TRACE: - logger.debug(msg) - continue + if skip_symlinks and (tarinfo.islnk() or tarinfo.issym()): + msg = f'{location}: skipping link with skip_symlinks: {skip_symlinks}: {tarinfo.name} -> {tarinfo.linkname}' + if trace: + logger.debug(f'extract_tar: {msg}') + continue + + if tarinfo.name.startswith('/'): + msg = f'{location}: absolute path name: {tarinfo.name} transformed in relative path.' + events.append(ExtractEvent(type=ExtractEvent.WARNING, source=tarinfo.name, message=msg)) + tarinfo.name = tarinfo.name.lstrip('/') + if trace: + logger.debug(f'extract_tar: {msg}') + # finally extract proper tarinfo.mode = 0o755 - tarinfo.name = tarinfo.name.lstrip('/') - tarball.extract(member=tarinfo, path=target_dir, set_attrs=False,) - return error_messages + + try: + tarball.extract(member=tarinfo, path=target_dir, set_attrs=False,) + except Exception: + msg = f'{location}: failed to extract: {tarinfo.name}: {traceback.format_exc()}' + events.append(ExtractEvent(type=ExtractEvent.ERROR, source=tarinfo.name, message=msg)) + if trace: + logger.debug(f'extract_tar: {msg}') + if not as_events: + events = [e.to_string() for e in events] + return events -def extract_tar_with_symlinks(location, target_dir): - return extract_tar(location, target_dir, skip_symlinks=False) +def extract_tar_with_symlinks(location, target_dir, as_events=False): + return extract_tar(location=location, target_dir=target_dir, as_events=as_events, skip_symlinks=False,) def lower_keys(mapping): diff --git a/tests/data/distro/os-release/fedora/fedora-26-modular.txt b/tests/data/distro/os-release/fedora/fedora-26-modular.txt new file mode 100644 index 0000000..024f43d --- /dev/null +++ b/tests/data/distro/os-release/fedora/fedora-26-modular.txt @@ -0,0 +1,15 @@ +NAME="Fedora Modular" +VERSION="26 (Twenty Six)" +ID=fedora-modular +ID_LIKE=fedora +VERSION_ID=26 +PRETTY_NAME="Fedora Modular 26 (Twenty Six)" +ANSI_COLOR="0;34" +CPE_NAME="cpe:/o:fedoraproject:fedora-modular:26" +HOME_URL="https://fedoraproject.org/" +BUG_REPORT_URL="https://bugzilla.redhat.com/" +REDHAT_BUGZILLA_PRODUCT="Fedora" +REDHAT_BUGZILLA_PRODUCT_VERSION=26 +REDHAT_SUPPORT_PRODUCT="Fedora" +REDHAT_SUPPORT_PRODUCT_VERSION=26 +PRIVACY_POLICY_URL=https://fedoraproject.org/wiki/Legal:PrivacyPolicy \ No newline at end of file diff --git a/tests/data/distro/os-release/fedora/fedora-26-modular.txt-distro-expected.json b/tests/data/distro/os-release/fedora/fedora-26-modular.txt-distro-expected.json new file mode 100644 index 0000000..37970a6 --- /dev/null +++ b/tests/data/distro/os-release/fedora/fedora-26-modular.txt-distro-expected.json @@ -0,0 +1,27 @@ +{ + "os": "linux", + "architecture": null, + "name": "Fedora Modular", + "version": "26 (Twenty Six)", + "identifier": "fedora-modular", + "id_like": "fedora", + "version_codename": null, + "version_id": "26", + "pretty_name": "Fedora Modular 26 (Twenty Six)", + "cpe_name": "cpe:/o:fedoraproject:fedora-modular:26", + "home_url": "https://fedoraproject.org/", + "documentation_url": null, + "support_url": null, + "bug_report_url": "https://bugzilla.redhat.com/", + "privacy_policy_url": "https://fedoraproject.org/wiki/Legal:PrivacyPolicy", + "build_id": null, + "variant": null, + "variant_id": null, + "logo": null, + "extra_data": { + "REDHAT_BUGZILLA_PRODUCT": "Fedora", + "REDHAT_BUGZILLA_PRODUCT_VERSION": "26", + "REDHAT_SUPPORT_PRODUCT": "Fedora", + "REDHAT_SUPPORT_PRODUCT_VERSION": "26" + } +} \ No newline at end of file diff --git a/tests/data/distro/os-release/fedora/fedora-26-modular.txt-expected.json b/tests/data/distro/os-release/fedora/fedora-26-modular.txt-expected.json new file mode 100644 index 0000000..6094dfe --- /dev/null +++ b/tests/data/distro/os-release/fedora/fedora-26-modular.txt-expected.json @@ -0,0 +1,17 @@ +{ + "NAME": "Fedora Modular", + "VERSION": "26 (Twenty Six)", + "ID": "fedora-modular", + "ID_LIKE": "fedora", + "VERSION_ID": "26", + "PRETTY_NAME": "Fedora Modular 26 (Twenty Six)", + "ANSI_COLOR": "0;34", + "CPE_NAME": "cpe:/o:fedoraproject:fedora-modular:26", + "HOME_URL": "https://fedoraproject.org/", + "BUG_REPORT_URL": "https://bugzilla.redhat.com/", + "REDHAT_BUGZILLA_PRODUCT": "Fedora", + "REDHAT_BUGZILLA_PRODUCT_VERSION": "26", + "REDHAT_SUPPORT_PRODUCT": "Fedora", + "REDHAT_SUPPORT_PRODUCT_VERSION": "26", + "PRIVACY_POLICY_URL": "https://fedoraproject.org/wiki/Legal:PrivacyPolicy" +} \ No newline at end of file diff --git a/tests/data/utils/layer_with_links.tar b/tests/data/utils/layer_with_links.tar new file mode 100644 index 0000000..1c90400 Binary files /dev/null and b/tests/data/utils/layer_with_links.tar differ diff --git a/tests/data/utils/layer_with_links.tar.expected-events-skipping.json b/tests/data/utils/layer_with_links.tar.expected-events-skipping.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/tests/data/utils/layer_with_links.tar.expected-events-skipping.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/data/utils/layer_with_links.tar.expected-events.json b/tests/data/utils/layer_with_links.tar.expected-events.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/tests/data/utils/layer_with_links.tar.expected-events.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/data/utils/layer_with_links.tar.expected-skipping.json b/tests/data/utils/layer_with_links.tar.expected-skipping.json new file mode 100644 index 0000000..99f79ea --- /dev/null +++ b/tests/data/utils/layer_with_links.tar.expected-skipping.json @@ -0,0 +1,12 @@ +[ + "/lib", + "/lib/libcrypto.so.1.0.0", + "/lib/libssl.so.1.0.0", + "/usr", + "/usr/lib", + "/usr/lib/icu", + "/usr/lib/icu/60.2", + "/usr/lib/icu/60.2/Makefile.inc", + "/usr/lib/icu/60.2/pkgdata.inc", + "/usr/lib/libffi.so.6.0.4" +] \ No newline at end of file diff --git a/tests/data/utils/layer_with_links.tar.expected.json b/tests/data/utils/layer_with_links.tar.expected.json new file mode 100644 index 0000000..bca3024 --- /dev/null +++ b/tests/data/utils/layer_with_links.tar.expected.json @@ -0,0 +1,20 @@ +[ + "/lib", + "/lib/libcrypto.so.1.0.0", + "/lib/libssl.so.1.0.0", + "/usr", + "/usr/lib", + "/usr/lib/icu", + "/usr/lib/icu/60.2", + "/usr/lib/icu/60.2/Makefile.inc", + "/usr/lib/icu/60.2/pkgdata.inc", + "/usr/lib/icu/Makefile.inc", + "/usr/lib/icu/current", + "/usr/lib/icu/current/Makefile.inc", + "/usr/lib/icu/current/pkgdata.inc", + "/usr/lib/icu/pkgdata.inc", + "/usr/lib/libcrypto.so.1.0.0", + "/usr/lib/libffi.so.6", + "/usr/lib/libffi.so.6.0.4", + "/usr/lib/libssl.so.1.0.0" +] \ No newline at end of file diff --git a/tests/data/utils/layer_with_links_missing_targets.tar b/tests/data/utils/layer_with_links_missing_targets.tar new file mode 100644 index 0000000..0919579 Binary files /dev/null and b/tests/data/utils/layer_with_links_missing_targets.tar differ diff --git a/tests/data/utils/layer_with_links_missing_targets.tar.expected-broken.json b/tests/data/utils/layer_with_links_missing_targets.tar.expected-broken.json new file mode 100644 index 0000000..e4c2a47 --- /dev/null +++ b/tests/data/utils/layer_with_links_missing_targets.tar.expected-broken.json @@ -0,0 +1,15 @@ +[ + "/lib", + "/lib/libcrypto.so.1.0.0", + "/lib/libssl.so.1.0.0", + "/usr", + "/usr/lib", + "/usr/lib/icu", + "/usr/lib/icu/60.2", + "/usr/lib/icu/60.2/pkgdata.inc", + "/usr/lib/icu/current", + "/usr/lib/icu/current/pkgdata.inc", + "/usr/lib/icu/pkgdata.inc", + "/usr/lib/libcrypto.so.1.0.0", + "/usr/lib/libssl.so.1.0.0" +] \ No newline at end of file diff --git a/tests/data/utils/layer_with_links_missing_targets.tar.expected-events-broken.json b/tests/data/utils/layer_with_links_missing_targets.tar.expected-events-broken.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/tests/data/utils/layer_with_links_missing_targets.tar.expected-events-broken.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/data/utils/layer_with_links_missing_targets.tar.expected-events.json b/tests/data/utils/layer_with_links_missing_targets.tar.expected-events.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/tests/data/utils/layer_with_links_missing_targets.tar.expected-events.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/data/utils/layer_with_links_missing_targets.tar.expected.json b/tests/data/utils/layer_with_links_missing_targets.tar.expected.json new file mode 100644 index 0000000..ea51323 --- /dev/null +++ b/tests/data/utils/layer_with_links_missing_targets.tar.expected.json @@ -0,0 +1,10 @@ +[ + "/lib", + "/lib/libcrypto.so.1.0.0", + "/lib/libssl.so.1.0.0", + "/usr", + "/usr/lib", + "/usr/lib/icu", + "/usr/lib/icu/60.2", + "/usr/lib/icu/60.2/pkgdata.inc" +] \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py index 7446744..25b3ed3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,6 +13,8 @@ from container_inspector import utils +from utilities import check_expected + def check_files(target_dir, expected): """ @@ -42,26 +44,73 @@ def check_files(target_dir, expected): class TestUtils(testcase.FileBasedTesting): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + def clean_events(self, extract_dir, events): + """ + Return a list of events mapping cleaned from absolute paths + """ + events_results = [] + for e in events: + ne = e._replace( + source=e.source.replace(extract_dir, ''), + message=e.message.replace(self.test_data_dir, ''), + ) + events_results.append(ne._asdict()) + + return events_results + + def clean_paths(self, extract_dir): + return sorted([p.replace(extract_dir, '') for p in + fileutils.resource_iter( + location=extract_dir, + with_dirs=True, + follow_symlinks=True)] + ) + def test_extract_tree_with_colon_in_filenames(self): expected = ( 'colon/libc6:amd64.list', ) test_dir = self.get_test_loc('tar/colon.tar.xz') - temp_dir = self.get_temp_dir() - errors = utils.extract_tar(location=test_dir, target_dir=temp_dir) - check_files(temp_dir, expected) - assert not errors + extract_dir = self.get_temp_dir() + events = utils.extract_tar(location=test_dir, target_dir=extract_dir) + check_files(target_dir=extract_dir, expected=expected) + assert not events def test_extract_tar_relative(self): expected = () test_dir = self.get_test_loc('tar/tar_relative.tar') - temp_dir = self.get_temp_dir() - errors = utils.extract_tar(location=test_dir, target_dir=temp_dir) - check_files(temp_dir, expected) - assert errors - for error in errors: - assert 'skipping unsupported' in error - assert 'with relative path' in error + extract_dir = self.get_temp_dir() + events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True) + check_files(target_dir=extract_dir, expected=expected) + events = self.clean_events(extract_dir, events) + expected_events = [ + {'message': '/tar/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.', + 'source': '../a_parent_folder.txt', + 'type': 'warning'}, + {'message': '/tar/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.', + 'source': '../../another_folder/b_two_root.txt', + 'type': 'warning'}, + {'message': '/tar/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.', + 'source': '../folder/subfolder/b_subfolder.txt', + 'type': 'warning'}, + ] + + assert events == expected_events + + def test_extract_tar_relative_as_strings(self): + expected = () + test_dir = self.get_test_loc('tar/tar_relative.tar') + extract_dir = self.get_temp_dir() + events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=False) + check_files(target_dir=extract_dir, expected=expected) + + events = [e.replace(self.test_data_dir, '') for e in events] + expected_events = [ + 'warning: /tar/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.', + 'warning: /tar/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.', + 'warning: /tar/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.', + ] + assert events == expected_events def test_extract_tar_absolute(self): expected = ( @@ -69,7 +118,92 @@ def test_extract_tar_absolute(self): 'tmp/subdir/b.txt', ) test_dir = self.get_test_loc('tar/absolute_path.tar') - temp_dir = self.get_temp_dir() - errors = utils.extract_tar(location=test_dir, target_dir=temp_dir) - check_files(temp_dir, expected) - assert not errors + extract_dir = self.get_temp_dir() + events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True) + check_files(target_dir=extract_dir, expected=expected) + + events = self.clean_events(extract_dir, events) + expected_events = [ + {'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir transformed in relative path.', + 'source': '/tmp/subdir', + 'type': 'warning'}, + {'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir/a.txt transformed in relative path.', + 'source': '/tmp/subdir/a.txt', + 'type': 'warning'}, + {'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir/b.txt transformed in relative path.', + 'source': '/tmp/subdir/b.txt', + 'type': 'warning'}, + ] + + assert events == expected_events + + def test_extract_tar_not_skipping_links(self): + test_tarball = self.get_test_loc('utils/layer_with_links.tar') + extract_dir = self.get_temp_dir() + + events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, as_events=True, skip_symlinks=False) + + results = self.clean_paths(extract_dir) + expected_results = self.get_test_loc('utils/layer_with_links.tar.expected.json', must_exist=False) + check_expected(results, expected_results, regen=False) + + events_results = self.clean_events(extract_dir, events) + expected_events = self.get_test_loc('utils/layer_with_links.tar.expected-events.json', must_exist=False) + check_expected(events_results, expected_events, regen=False) + + def test_extract_tar_skipping_links(self): + test_tarball = self.get_test_loc('utils/layer_with_links.tar') + extract_dir = self.get_temp_dir() + + events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, as_events=True, skip_symlinks=True) + + results = self.clean_paths(extract_dir) + expected_results = self.get_test_loc('utils/layer_with_links.tar.expected-skipping.json', must_exist=False) + check_expected(results, expected_results, regen=False) + + events_results = self.clean_events(extract_dir, events) + expected_events = self.get_test_loc('utils/layer_with_links.tar.expected-events-skipping.json', must_exist=False) + check_expected(events_results, expected_events, regen=False) + + def test_extract_tar_with_symlinks(self): + test_tarball = self.get_test_loc('utils/layer_with_links.tar') + extract_dir = self.get_temp_dir() + + events = utils.extract_tar_with_symlinks(location=test_tarball, as_events=True, target_dir=extract_dir) + + results = self.clean_paths(extract_dir) + expected_results = self.get_test_loc('utils/layer_with_links.tar.expected.json', must_exist=False) + check_expected(results, expected_results, regen=False) + + events_results = self.clean_events(extract_dir, events) + expected_events = self.get_test_loc('utils/layer_with_links.tar.expected-events.json', must_exist=False) + check_expected(events_results, expected_events, regen=False) + + def test_extract_tar_with_broken_links_skipping_links(self): + test_tarball = self.get_test_loc('utils/layer_with_links_missing_targets.tar') + extract_dir = self.get_temp_dir() + + events = utils.extract_tar(location=test_tarball, target_dir=extract_dir, as_events=True, skip_symlinks=True) + + results = self.clean_paths(extract_dir) + expected_results = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected.json', must_exist=False) + check_expected(results, expected_results, regen=False) + + events_results = self.clean_events(extract_dir, events) + expected_events = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected-events.json', must_exist=False) + check_expected(events_results, expected_events, regen=False) + + def test_extract_tar_with_symlinks_with_broken_links(self): + test_tarball = self.get_test_loc('utils/layer_with_links_missing_targets.tar') + extract_dir = self.get_temp_dir() + + events = utils.extract_tar_with_symlinks(location=test_tarball, target_dir=extract_dir) + + results = self.clean_paths(extract_dir) + expected_results = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected-broken.json', must_exist=False) + check_expected(results, expected_results, regen=False) + + events_results = self.clean_events(extract_dir, events) + expected_events = self.get_test_loc('utils/layer_with_links_missing_targets.tar.expected-events-broken.json', must_exist=False) + check_expected(events_results, expected_events, regen=False) +