diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index 7c0edbe..740d0ae 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -97,36 +97,46 @@ def extract_tar(location, target_dir, skip_symlinks=True): with tarfile.open(location) as tarball: # never extract character device, block and fifo files: # we extract dirs, files and links only - to_extract = [] + error_messages = [] for tarinfo in tarball: if TRACE: logger.debug(f'_extract_tar: {tarinfo}') if tarinfo.isdev() or tarinfo.ischr() or tarinfo.isblk() or tarinfo.isfifo() or tarinfo.sparse: + msg = f'_extract_tar: skipping unsupported {tarinfo} file type: block, chr, dev or sparse file' + error_messages.append(msg) if TRACE: - logger.debug(f'_extract_tar: skipping unsupported {tarinfo} file type: block, chr, dev or sparse file') + logger.debug(msg) continue if '..' in tarinfo.name: - if TRACE: logger.debug(f'_extract_tar: skipping unsupported {tarinfo} with relative path') + msg = f'_extract_tar: skipping unsupported {tarinfo} with relative path' + error_messages.append(msg) + if TRACE: + logger.debug(msg) continue if tarinfo.islnk() or tarinfo.issym(): try: target = tarball._find_link_target(tarinfo) if not target: + msg = f'_extract_tar: skipping link with missing target: {tarinfo}' + error_messages.append(msg) if TRACE: - logger.debug(f'_extract_tar: skipping link with missing target: {tarinfo}') + logger.debug(msg) continue except Exception: import traceback + msg = f'_extract_tar: skipping link with missing target: {tarinfo}: {traceback.format_exc()}' + error_messages.append(msg) if TRACE: - logger.debug(f'_extract_tar: skipping link with missing target: {tarinfo}: {traceback.format_exc()}') + logger.debug(msg) continue tarinfo.mode = 0o755 tarinfo.name = tarinfo.name.lstrip('/') tarball.extract(member=tarinfo, path=target_dir, set_attrs=False,) + return error_messages def extract_tar_with_symlinks(location, target_dir): diff --git a/tests/data/tar/absolute_path.tar b/tests/data/tar/absolute_path.tar new file mode 100644 index 0000000..0b168da Binary files /dev/null and b/tests/data/tar/absolute_path.tar differ diff --git a/tests/data/tar/colon.tar.xz b/tests/data/tar/colon.tar.xz new file mode 100644 index 0000000..e8481c9 Binary files /dev/null and b/tests/data/tar/colon.tar.xz differ diff --git a/tests/data/tar/tar_relative.tar b/tests/data/tar/tar_relative.tar new file mode 100644 index 0000000..0caff86 Binary files /dev/null and b/tests/data/tar/tar_relative.tar differ diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..7446744 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,75 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/container-inspector for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from commoncode import fileutils +from commoncode import testcase + +from container_inspector import utils + + +def check_files(target_dir, expected): + """ + Walk test_dir. + Check that all dirs are readable. + Check that all files are: + * non-special, + * readable, + * have a posix path that ends with one of the expected tuple paths. + """ + result = [] + + test_dir_path = fileutils.as_posixpath(target_dir) + for top, _, files in os.walk(target_dir): + for f in files: + location = os.path.join(top, f) + path = fileutils.as_posixpath(location) + path = path.replace(test_dir_path, '').strip('/') + result.append(path) + + expected_content = sorted(expected) + result = sorted(result) + + assert result == expected_content + + +class TestUtils(testcase.FileBasedTesting): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + def test_extract_tree_with_colon_in_filenames(self): + expected = ( + 'colon/libc6:amd64.list', + ) + test_dir = self.get_test_loc('tar/colon.tar.xz') + temp_dir = self.get_temp_dir() + errors = utils.extract_tar(location=test_dir, target_dir=temp_dir) + check_files(temp_dir, expected) + assert not errors + + def test_extract_tar_relative(self): + expected = () + test_dir = self.get_test_loc('tar/tar_relative.tar') + temp_dir = self.get_temp_dir() + errors = utils.extract_tar(location=test_dir, target_dir=temp_dir) + check_files(temp_dir, expected) + assert errors + for error in errors: + assert 'skipping unsupported' in error + assert 'with relative path' in error + + def test_extract_tar_absolute(self): + expected = ( + 'tmp/subdir/a.txt', + 'tmp/subdir/b.txt', + ) + test_dir = self.get_test_loc('tar/absolute_path.tar') + temp_dir = self.get_temp_dir() + errors = utils.extract_tar(location=test_dir, target_dir=temp_dir) + check_files(temp_dir, expected) + assert not errors