diff --git a/.travis.yml b/.travis.yml index 1b52eb2..02a1161 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,9 +13,10 @@ python: - "3.6" - "3.7" - "3.8" + - "3.9" # Scripts to run at install stage install: ./configure --dev # Scripts to run at script stage -script: tmp/bin/pytest +script: tmp/bin/pytest -vvs -n2 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ad1571b..97c5ccd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,19 @@ Changelog ========= +v21.6.4 +-------- + +This is a minor release with bug fixes and minor API changes. + +API changes +~~~~~~~~~~~ + +The Distro.from_rootfs() now works as expected. It can handle empty location +and works correctly with a base_distro. When a base_distro is provided it +will raise an Exception if the found Distro.os does not match the base Distro.os + + v21.5.25 -------- diff --git a/NOTICE b/NOTICE index 65936b2..12de513 100644 --- a/NOTICE +++ b/NOTICE @@ -2,7 +2,8 @@ # Copyright (c) nexB Inc. and others. # SPDX-License-Identifier: Apache-2.0 # -# Visit https://aboutcode.org and https://github.com/nexB/ for support and download. +# Visit https://aboutcode.org and https://github.com/nexB/container-inspector +# for support and download. # ScanCode is a trademark of nexB Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/README.rst b/README.rst index f13fc70..685bcd1 100644 --- a/README.rst +++ b/README.rst @@ -77,4 +77,5 @@ Related tools ------------- - Fetching Image from remote registry is available in ScanCode.io - Extracting VM Image filesystems as archives is available in ExtractCode + - Scanning for application and system packages is available in ScanCode Toolkit diff --git a/pyproject.toml b/pyproject.toml index 8eebe91..852f0fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [build-system] -requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 4"] +requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] # this is used populated when creating a git archive # and when there is .git dir and/or there is no git installed -fallback_version = "v9999.$Format:%h-%cs$" +fallback_version = "9999.$Format:%h-%cs$" [tool.pytest.ini_options] norecursedirs = [ diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index fa87180..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,4 +0,0 @@ -pytest -# used for its tests classes -commoncode --e . diff --git a/setup.cfg b/setup.cfg index 75ef426..40e7104 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -license_files = +license_files = apache-2.0.LICENSE NOTICE README.rst @@ -10,7 +10,7 @@ author_email = info@aboutcode.org license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 -description = Docker, containers, rootfs and virtual machinesrelated utilities. +description = Docker, containers, rootfs and virtual machine related software composition analysis (SCA) utilities. long_description = file:README.rst url = https://github.com/nexB/container-inspector classifiers = @@ -55,6 +55,9 @@ testing = # upstream pytest >= 6 pytest-xdist >= 2 + twine + restview + docs= Sphinx>=3.3.1 sphinx-rtd-theme>=0.5.0 @@ -62,4 +65,4 @@ docs= [aliases] -release = register clean --all sdist bdist_wheel \ No newline at end of file +release = clean --all sdist bdist_wheel \ No newline at end of file diff --git a/src/container_inspector/__init__.py b/src/container_inspector/__init__.py index deffe27..632f0d3 100644 --- a/src/container_inspector/__init__.py +++ b/src/container_inspector/__init__.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import re diff --git a/src/container_inspector/cli.py b/src/container_inspector/cli.py index 9385485..56e2c5a 100755 --- a/src/container_inspector/cli.py +++ b/src/container_inspector/cli.py @@ -1,23 +1,18 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -import csv as csv_module -import json as json_module import logging import os -from os import path import sys import tempfile +import csv as csv_module +import json as json_module +from os import path import click @@ -25,10 +20,11 @@ from container_inspector import dockerfile from container_inspector import rootfs +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +if TRACE: + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) @click.command() diff --git a/src/container_inspector/distro.py b/src/container_inspector/distro.py index 4d38a66..a5e0ee9 100755 --- a/src/container_inspector/distro.py +++ b/src/container_inspector/distro.py @@ -1,30 +1,29 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging -from os import path +import os import shlex +from os import path import attr +from container_inspector import rootfs + +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) -def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) + +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Utilities to detect the "distro" of a root filesystem (be it a VM or rootfs @@ -46,19 +45,21 @@ class Distro(object): """ os = attr.attrib( - default='linux', + default=None, metadata=dict( - doc='Operating system, default to linux. ' + doc='Operating system. ' 'One of: {}'.format(', '.join(os_choices))) ) architecture = attr.attrib( default=None, - metadata=dict(doc='Processor architecture such as x86, x86_64, arm or amd64.') + metadata=dict( + doc='Processor architecture such as x86, x86_64, arm or amd64.' + ) ) name = attr.attrib( - default='linux', + default=None, metadata=dict(doc='''Based on os-release: https://www.freedesktop.org/software/systemd/man/os-release.html NAME= A string identifying the operating system, without a version @@ -82,7 +83,7 @@ class Distro(object): ) identifier = attr.attrib( - default='linux', + default=None, metadata=dict(doc='''Based on os-release: https://www.freedesktop.org/software/systemd/man/os-release.html ID= A lower-case string (no spaces or other characters outside of @@ -291,7 +292,14 @@ def to_dict(self): def from_os_release_file(cls, location): """ Return a Distro built from a Linux os-release file. + Return None if ``location`` is empty or missing. + Raise an Exception if the os-release file is invalid and cannot be + parsed """ + if not location or not os.path.exists(location): + if TRACE: logger.debug(f'from_os_release_file: {location!r} does not exists') + return + data = parse_os_release(location) or {} new_data = dict( # This idiom looks a tad wierd but we want to always get a linux as @@ -326,6 +334,8 @@ def from_os_release_file(cls, location): if data: new_data['extra_data'] = data + if TRACE: logger.debug(f'from_os_release_file: new_data: {new_data!r}') + return cls(**new_data) from_file = from_os_release_file @@ -333,13 +343,26 @@ def from_os_release_file(cls, location): @classmethod def from_rootfs(cls, location, base_distro=None): """ - Return a Distro discovered from the rootfs at `location`. - Return None if no OS was found. + Return a Distro discovered from the rootfs at ``location``. Return None + if no OS is found or if ``location`` is empty or missing. Use the optional ``base_distro`` Distro object attributes as a base and - to guide discovery. If provided ``base_distro`` may be returned as-is - if no extra OS details were found. + to guide discovery. + + Raise an Exception if the ``base_distro`` OS does not match the found + distro. + + Providing a ``base_distro`` Distro is useful when the distro information + are already known ahead of time (for instance from a Docker image + manifest) and may be missing from the rootfs proper (for instance of an + /etc/os-release is missing in the rootfs for a Linux-based image). """ + if TRACE: logger.debug(f'from_rootfs: {location!r} base_distro: {base_distro!r}') + + if not location or not os.path.exists(location): + if TRACE: logger.debug(f'from_rootfs: {location!r} does not exists') + return + finders = { 'linux': cls.find_linux_details, 'windows': cls.find_windows_details, @@ -347,21 +370,33 @@ def from_rootfs(cls, location, base_distro=None): } for finder_os, finder in finders.items(): - if base_distro and base_distro.os != finder_os: - continue + if TRACE: logger.debug(f'from_rootfs: trying finder_os: {finder_os!r}') found = finder(location) + if TRACE: logger.debug(f'from_rootfs: trying found: {found!r}') if found: - return base_distro.merge(found) + if base_distro: + if base_distro.os != finder_os: + raise Exception( + f'Inconsistent base distro OS: {base_distro.os} ' + f'and found distro OS : {found.os}' + ) + + merged = base_distro.merge(found) + if TRACE: logger.debug(f'from_rootfs: returning merged: {merged!r}') + return merged - if base_distro: - return base_distro + else: + if TRACE: logger.debug(f'from_rootfs: returning found: {found!r}') + return found @classmethod def find_linux_details(cls, location): """ - Find a linux distro details using the os-release file and return a - Distro object or None. + Find a linux distro details using the os-release file at ``location`` + and return a Distro object or None. + + Raise an Exception if an os-release file is found that cannot be parsed. """ # note: /etc/os-release has precedence over /usr/lib/os-release. for candidate_path in ('etc/os-release', 'usr/lib/os-release',): @@ -374,10 +409,12 @@ def find_windows_details(cls, location): """ Find a Windows installation details and return a Distro object or None. """ - return cls( - os='windows', - identifier='identifier', - ) + if rootfs.find_root( + location, + max_depth=3, + root_paths=rootfs.WINDOWS_PATHS, + ): + return cls(os='windows', identifier='windows',) @classmethod def find_freebsd_details(cls, location): @@ -420,10 +457,19 @@ def merge(self, other_distro): Return a new distro based on this Distro data updated with non-empty values from the ``other_distro`` Distro object. """ + if TRACE: logger.debug(f'merge: {self!r} with: {other_distro!r}') + existing = self.to_dict() if other_distro: - other_non_empty = {k: v for k, v in other_distro.to_dict().items() if v} + other_non_empty = { + k: v for k, v in other_distro.to_dict().items() + if v + } existing.update(other_non_empty) + if TRACE: logger.debug(f'merge: updated data: {existing!r}') + + if TRACE: logger.debug(f'merge: merged data: {existing!r}') + return type(self)(**existing) @@ -450,8 +496,14 @@ def parse_os_release(location): """ with open(location) as osrl: lines = (line.strip() for line in osrl) - lines = (line.partition('=') for line in lines if line and not line.startswith('#')) - return {key.strip(): ''.join(shlex.split(value)) for key, _, value in lines} + lines = ( + line.partition('=') for line in lines + if line and not line.startswith('#') + ) + return { + key.strip(): ''.join(shlex.split(value)) + for key, _, value in lines + } def get_debian_details(): diff --git a/src/container_inspector/dockerfile.py b/src/container_inspector/dockerfile.py index 26260d6..7b6a851 100755 --- a/src/container_inspector/dockerfile.py +++ b/src/container_inspector/dockerfile.py @@ -1,27 +1,24 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging import operator +import os from os import path import dockerfile_parse -import os +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Analysis helper for Docker Dockerfiles. @@ -37,7 +34,7 @@ def get_dockerfile(location): if not 'Dockerfile' in fn: return {} - logger.debug('Found Dockerfile at: %(location)r' % locals()) + if TRACE: logger.debug('Found Dockerfile at: %(location)r' % locals()) try: # TODO: keep comments instead of ignoring them: @@ -57,7 +54,7 @@ def get_dockerfile(location): df_data['instructions'].append(entry) return {location: df_data} except: - logger.debug('Error parsing Dockerfile at: %(location)r' % locals()) + if TRACE: logger.debug('Error parsing Dockerfile at: %(location)r' % locals()) return {} @@ -85,7 +82,7 @@ def collect_dockerfiles(location): for top, dirs, files in os.walk(location): for f in files: dfiles.update(get_dockerfile(path.join(top, f))) - logger.debug('collect_dockerfiles: %(dfiles)r' % locals()) + if TRACE: logger.debug('collect_dockerfiles: %(dfiles)r' % locals()) return dfiles diff --git a/src/container_inspector/image.py b/src/container_inspector/image.py index 2f9c973..9be77eb 100755 --- a/src/container_inspector/image.py +++ b/src/container_inspector/image.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging import os @@ -26,14 +21,12 @@ from container_inspector.utils import load_json from container_inspector.utils import sha256_digest +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) - - -def logger_debug(*args): - return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Objects to handle Docker and OCI images and Layers. @@ -230,6 +223,7 @@ class Image(ArchiveMixin, ConfigMixin, ToDictMixin): Image objects can be created from these inputs: - an image tarball in docker format (e.g. "docker save"). - a directory that contains an extracted image tarball in these layouts. + OCI format is not yet supported. """ @@ -422,6 +416,8 @@ def get_images_from_tarball( If `verify` is True, perform extra checks on the config data and layers checksums. """ + if TRACE: logger.debug(f'get_images_from_tarball: {archive_location} , extracting to: {extracted_location}') + Image.extract( archive_location=archive_location, extracted_location=extracted_location, @@ -446,10 +442,15 @@ def get_images_from_dir( If `verify` is True, perform extra checks on the config data and layers checksums. """ + if TRACE: logger.debug(f'get_images_from_dir: from {extracted_location} and archive_location: {archive_location}') + if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') image_format = Image.find_format(extracted_location) + + if TRACE: logger.debug(f'get_images_from_dir: image_format: {image_format}') + if image_format == 'docker': return Image.get_docker_images_from_dir( extracted_location=extracted_location, @@ -457,7 +458,7 @@ def get_images_from_dir( verify=verify, ) - if image_format == 'docker': + if image_format == 'oci': return Image.get_oci_images_from_dir( extracted_location=extracted_location, archive_location=archive_location, @@ -506,6 +507,8 @@ def get_docker_images_from_dir( .... ] """ + if TRACE: logger.debug(f'get_docker_images_from_dir: {extracted_location}') + if not os.path.isdir(extracted_location): raise Exception(f'Not a directory: {extracted_location}') @@ -517,15 +520,21 @@ def get_docker_images_from_dir( manifest = load_json(manifest_loc) + if TRACE: logger.debug(f'get_docker_images_from_dir: manifest: {manifest}') + images = [] for manifest_config in manifest: - images.append( - Image.from_docker_manifest_config( - extracted_location=extracted_location, - archive_location=archive_location, - manifest_config=manifest_config, - verify=verify, - )) + if TRACE: logger.debug(f'get_docker_images_from_dir: manifest_config: {manifest_config}') + img = Image.from_docker_manifest_config( + extracted_location=extracted_location, + archive_location=archive_location, + manifest_config=manifest_config, + verify=verify, + + ) + if TRACE: logger.debug(f'get_docker_images_from_dir: img: {img!r}') + + images.append(img) return images @@ -600,6 +609,8 @@ def from_docker_manifest_config( } } """ + if TRACE: logger.debug(f'from_docker_manifest_config: manifest_config: {manifest_config!r}') + manifest_config = utils.lower_keys(manifest_config) config_file = manifest_config.get('config') or '' @@ -627,7 +638,9 @@ def from_docker_manifest_config( layer_paths = manifest_config.get('layers') or [] layers_archive_locs = [ - os.path.join(extracted_location, lp) for lp in layer_paths] + os.path.join(extracted_location, lp) + for lp in layer_paths + ] tags = manifest_config.get('repotags') or [] diff --git a/src/container_inspector/rootfs.py b/src/container_inspector/rootfs.py index 593d75e..6fdbb55 100755 --- a/src/container_inspector/rootfs.py +++ b/src/container_inspector/rootfs.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import logging import os @@ -17,12 +12,14 @@ from commoncode.fileutils import copytree from commoncode.fileutils import delete +from commoncode.paths import split +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# import sys -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) """ Utilities to handle image and layer archives and recreate proper rootfs @@ -68,7 +65,7 @@ def rebuild_rootfs(img, target_dir): deletions = [] for layer_num, layer in enumerate(img.layers): - logger.debug( + if TRACE: logger.debug( f'Extracting layer {layer_num} - {layer.layer_id} ' f'tarball: {layer.archive_location}' ) @@ -77,16 +74,16 @@ def rebuild_rootfs(img, target_dir): # Note that we are not preserving any special file and any file permission extracted_loc = tempfile.mkdtemp('container_inspector-docker') layer.extract(extracted_location=extracted_loc) - logger.debug(f' Extracted layer to: {extracted_loc}') + if TRACE: logger.debug(f' Extracted layer to: {extracted_loc}') # 2. find whiteouts in that layer. whiteouts = list(find_whiteouts(extracted_loc)) - logger.debug(' Merging extracted layers and applying unionfs whiteouts') - logger.debug(' Whiteouts:\n' + ' \n'.join(map(repr, whiteouts))) + if TRACE: logger.debug(' Merging extracted layers and applying unionfs whiteouts') + if TRACE: logger.debug(' Whiteouts:\n' + ' \n'.join(map(repr, whiteouts))) # 3. remove whiteouts in the previous layer stack (e.g. the WIP rootfs) for whiteout_marker_loc, whiteable_path in whiteouts: - logger.debug(f' Deleting dir or file with whiteout marker: {whiteout_marker_loc}') + if TRACE: logger.debug(f' Deleting dir or file with whiteout marker: {whiteout_marker_loc}') whiteable_loc = os.path.join(target_dir, whiteable_path) delete(whiteable_loc) # also delete the whiteout marker file @@ -94,9 +91,9 @@ def rebuild_rootfs(img, target_dir): deletions.append(whiteable_loc) # 4. finall copy/overwrite the extracted layer over the WIP rootfs - logger.debug(f' Moving extracted layer from: {extracted_loc} to: {target_dir}') + if TRACE: logger.debug(f' Moving extracted layer from: {extracted_loc} to: {target_dir}') copytree(extracted_loc, target_dir) - logger.debug(f' Moved layer to: {target_dir}') + if TRACE: logger.debug(f' Moved layer to: {target_dir}') delete(extracted_loc) return deletions @@ -193,6 +190,24 @@ def find_whiteouts(root_location, walker=os.walk): ]) +def compute_path_depth(root_path, dir_path): + """ + Compute the depth of ``dir_path`` below ``root_path`` as the number of paths + segments that extend below the root. + """ + if not dir_path: + return 0 + dir_path = dir_path.strip('/') + + if not root_path: + return len(split(dir_path)) + + root_path = root_path.strip('/') + + suffix = dir_path[len(root_path):] + return len(split(suffix)) + + def find_root( location, max_depth=3, @@ -202,18 +217,38 @@ def find_root( ): """ Return the first likely location of the root of a filesystem found in the - `location` directory and looking down up to `max_depth` directory levels - deep below the location directory. If `max_depth` == 0, look at full depth. - Search for well known directories listed in the `root_paths` set. A root - directory is return as found if at least `min_paths` exists as filenames or - directories under it. + ``location`` directory and below up and including to ``max_depth`` directory + levels deep below the ``location`` root directory. - `walker` is a callable that behaves the same as `os.walk() and is used - for testing` + If ``max_depth`` == 0, look at full depth. + + Search for well known directories listed in the ``root_paths`` set. A root + directory is returned as found if at least ``min_paths`` exists as filenames + or directories under it. + + ``walker`` is a callable behaving like ``os.walk()`` and is used for testing. """ - for depth, (top, dirs, files) in enumerate(walker(location), 1): + if TRACE: logger.debug( + f'find_root: location={location!r}, max_depth={max_depth!r}, ' + f'root_paths={root_paths!r}, min_paths={min_paths!r}' + ) + depth = 0 + for top, dirs, files in walker(location): + if TRACE: logger.debug(f' find_root: top={top!r}, dirs={dirs!r}, files={files!r}') + if max_depth: + depth = compute_path_depth(location, top) + if TRACE: logger.debug(f' find_root: top depth={depth!r}') + if depth > max_depth: + if TRACE: logger.debug( + f' find_root: max_depth={max_depth!r}, ' + f'depth={depth!r} returning None') + return + matches = len(set(dirs + files) & root_paths) + if TRACE: logger.debug(f' find_root: top={top!r}, matches={matches!r}') + if matches >= min_paths: + if TRACE: logger.debug(f' find_root: matches >= min_paths: returning {top!r}') return top - if max_depth and depth == max_depth: - return + + if TRACE: logger.debug(f'find_root: noting found: returning None') diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index f496175..f54dd71 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import json import logging @@ -20,10 +15,12 @@ from extractcode.extract import extract_file +TRACE = False logger = logging.getLogger(__name__) -# un-comment these lines to enable logging -# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) -# logger.setLevel(logging.DEBUG) +if TRACE: + import sys + logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) + logger.setLevel(logging.DEBUG) def load_json(location): @@ -109,14 +106,19 @@ def extract_tar_keeping_symlinks(location, target_dir): Do not preserve the permissions and owners. Raise exceptions on possible problematic relative paths. """ - fileutils.create_dir(target_dir) import tarfile + if TRACE: logger.debug(f'extract_tar_keeping_symlinks: {location} to {target_dir}') + + fileutils.create_dir(target_dir) + + with tarfile.open(location) as tarball: # never extract character device, block and fifo files: # we extract dirs, files and links only for tinfo in tarball: if tinfo.isdev(): continue + if TRACE: logger.debug(f'extract_tar_keeping_symlinks: {tinfo}') tarball.extract( member=tinfo, path=target_dir, diff --git a/tests/data/distro/windows-container-rootfs.tar b/tests/data/distro/windows-container-rootfs.tar new file mode 100644 index 0000000..a3b02f0 Binary files /dev/null and b/tests/data/distro/windows-container-rootfs.tar differ diff --git a/tests/data/image/windows-mini-image.tar.gz.expected.json b/tests/data/image/windows-mini-image.tar.gz.expected.json index 50afebe..df10647 100644 --- a/tests/data/image/windows-mini-image.tar.gz.expected.json +++ b/tests/data/image/windows-mini-image.tar.gz.expected.json @@ -18,9 +18,9 @@ "distro": { "os": "windows", "architecture": "amd64", - "name": "linux", + "name": null, "version": "10.0.19042.985", - "identifier": "identifier", + "identifier": "windows", "id_like": [], "version_codename": null, "version_id": null, diff --git a/tests/test_cli.py b/tests/test_cli.py index 4e52b1c..237f38f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os import json diff --git a/tests/test_distro.py b/tests/test_distro.py index 5bfc351..d512f81 100644 --- a/tests/test_distro.py +++ b/tests/test_distro.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os @@ -44,3 +39,62 @@ def test_distro_from_os_release_file(self): expected = test_file + '-distro-expected.json' result = Distro.from_os_release_file(test_file).to_dict() check_expected(result, expected, regen=False) + + def test_distro_from_os_release_returns_None_on_empty_or_missing_location(self): + assert Distro.from_os_release_file('') is None + assert Distro.from_os_release_file(None) is None + assert Distro.from_os_release_file('THIS/dir/does/exists') is None + try: + assert Distro.from_os_release_file(__file__) is None + self.fail('An exception should be raised.') + except: + pass + + def test_distro_from_rootfs_returns_None_on_empty_or_missing_location(self): + assert Distro.from_rootfs('') is None + assert Distro.from_rootfs(None) is None + assert Distro.from_rootfs('THIS/dir/does/exists') is None + + def test_distro_from_rootfs_returns_a_distro_even_if_not_found(self): + not_a_rootfs = os.path.dirname(__file__) + distro = Distro.from_rootfs(not_a_rootfs) + # all distro attributes should be empty + assert not distro + + def test_distro_from_rootfs_return_None_if_base_distro_not_found(self): + base = Distro(os='freebsd', architecture='amd64') + not_a_rootfs = os.path.dirname(__file__) + distro = Distro.from_rootfs(not_a_rootfs, base_distro=base) + assert distro is None + + def test_distro_does_not_default_to_linux(self): + # we want to ensure that no attributes values contains linux by default + distro = repr(Distro().to_dict().values()).lower() + assert 'linux' not in distro + + def test_distro_from_rootfs_detects_windows(self): + test_dir = self.extract_test_tar('distro/windows-container-rootfs.tar') + distro = Distro.from_rootfs(test_dir) + expected = {'identifier': 'windows', 'os': 'windows'} + results = {k: v for k, v in sorted(distro.to_dict().items()) if v} + assert results == expected + + def test_distro_from_rootfs_has_base_distro_merged(self): + base = Distro(os='windows', architecture='amd64') + test_dir = self.extract_test_tar('distro/windows-container-rootfs.tar') + distro = Distro.from_rootfs(test_dir, base_distro=base) + expected = { + 'architecture': 'amd64', + 'identifier': 'windows', + 'os': 'windows', + } + results = {k: v for k, v in sorted(distro.to_dict().items()) if v} + assert results == expected + + def test_distro_from_rootfs_raise_exception_if_different_base_distro_os(self): + base = Distro(os='freebsd') + test_dir = self.extract_test_tar('distro/windows-container-rootfs.tar') + try: + Distro.from_rootfs(test_dir, base_distro=base) + except Exception as e: + assert str(e) == 'Inconsistent base distro OS: freebsd and found distro OS : windows' diff --git a/tests/test_dockerfile.py b/tests/test_dockerfile.py index 644d30b..e5937c9 100644 --- a/tests/test_dockerfile.py +++ b/tests/test_dockerfile.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os diff --git a/tests/test_image.py b/tests/test_image.py index 0af5663..f355dbe 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. from os import path @@ -65,6 +60,7 @@ def test_Image_get_images_from_tarball_windows(self): extracted_location=extract_dir, verify=False, )[0] + layer_extracted_location = self.get_temp_dir() image.extract_layers(extracted_location=layer_extracted_location) image.get_and_set_distro() diff --git a/tests/test_rootfs.py b/tests/test_rootfs.py index d3a7a5d..ee8e6af 100644 --- a/tests/test_rootfs.py +++ b/tests/test_rootfs.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import os @@ -210,7 +205,7 @@ def test_rootfs_does_respects_max_depth(self): assert not rootfs.find_root(test_dir, max_depth=1) assert not rootfs.find_root(test_dir, max_depth=2) assert not rootfs.find_root(test_dir, max_depth=3) - assert not rootfs.find_root(test_dir, max_depth=4) + assert rootfs.find_root(test_dir, max_depth=4).endswith('level1/level2/level3') expected = '/find_root/level1/level2/level3' found = rootfs.find_root(test_dir, max_depth=5) @@ -221,3 +216,16 @@ def test_rootfs_does_respects_max_depth(self): found = rootfs.find_root(os.path.join(test_dir, 'find_root'), max_depth=4) assert found.replace(test_dir, '') == expected + + def test_rootfs_compute_path_depth(self): + assert rootfs.compute_path_depth(None, None) == 0 + assert rootfs.compute_path_depth('', '') == 0 + assert rootfs.compute_path_depth(None, 'foo') == 1 + assert rootfs.compute_path_depth('foo', None) == 0 + assert rootfs.compute_path_depth('/root', '/root/find_root') == 1 + assert rootfs.compute_path_depth('/root', '/root/one/2/') == 2 + assert rootfs.compute_path_depth('/root/', '/root/one/2/') == 2 + assert rootfs.compute_path_depth('root/', '/root/one/2') == 2 + assert rootfs.compute_path_depth('root/', '/root/') == 0 + assert rootfs.compute_path_depth('root/', '/root/') == 0 + assert rootfs.compute_path_depth('root/', '/root/1/2/3/4') == 4 diff --git a/tests/utilities.py b/tests/utilities.py index 534197d..24a20f8 100644 --- a/tests/utilities.py +++ b/tests/utilities.py @@ -1,15 +1,10 @@ -# Copyright (c) nexB Inc. and others. All rights reserved. -# http://nexb.com and https://github.com/nexB/container-inspector/ # -# This software is licensed under the Apache License version 2.0.# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. # -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: -# http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. import json import os