Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions src/container_inspector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,36 +97,46 @@ def extract_tar(location, target_dir, skip_symlinks=True):
with tarfile.open(location) as tarball:
# never extract character device, block and fifo files:
# we extract dirs, files and links only
to_extract = []
error_messages = []
for tarinfo in tarball:
if TRACE: logger.debug(f'_extract_tar: {tarinfo}')

if tarinfo.isdev() or tarinfo.ischr() or tarinfo.isblk() or tarinfo.isfifo() or tarinfo.sparse:
msg = f'_extract_tar: skipping unsupported {tarinfo} file type: block, chr, dev or sparse file'
error_messages.append(msg)
if TRACE:
logger.debug(f'_extract_tar: skipping unsupported {tarinfo} file type: block, chr, dev or sparse file')
logger.debug(msg)
continue

if '..' in tarinfo.name:
if TRACE: logger.debug(f'_extract_tar: skipping unsupported {tarinfo} with relative path')
msg = f'_extract_tar: skipping unsupported {tarinfo} with relative path'
error_messages.append(msg)
if TRACE:
logger.debug(msg)
continue

if tarinfo.islnk() or tarinfo.issym():
try:
target = tarball._find_link_target(tarinfo)
if not target:
msg = f'_extract_tar: skipping link with missing target: {tarinfo}'
error_messages.append(msg)
if TRACE:
logger.debug(f'_extract_tar: skipping link with missing target: {tarinfo}')
logger.debug(msg)
continue

except Exception:
import traceback
msg = f'_extract_tar: skipping link with missing target: {tarinfo}: {traceback.format_exc()}'
error_messages.append(msg)
if TRACE:
logger.debug(f'_extract_tar: skipping link with missing target: {tarinfo}: {traceback.format_exc()}')
logger.debug(msg)
continue

tarinfo.mode = 0o755
tarinfo.name = tarinfo.name.lstrip('/')
tarball.extract(member=tarinfo, path=target_dir, set_attrs=False,)
return error_messages


def extract_tar_with_symlinks(location, target_dir):
Expand Down
Binary file added tests/data/tar/absolute_path.tar
Binary file not shown.
Binary file added tests/data/tar/colon.tar.xz
Binary file not shown.
Binary file added tests/data/tar/tar_relative.tar
Binary file not shown.
75 changes: 75 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/container-inspector for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os

from commoncode import fileutils
from commoncode import testcase

from container_inspector import utils


def check_files(target_dir, expected):
"""
Walk test_dir.
Check that all dirs are readable.
Check that all files are:
* non-special,
* readable,
* have a posix path that ends with one of the expected tuple paths.
"""
result = []

test_dir_path = fileutils.as_posixpath(target_dir)
for top, _, files in os.walk(target_dir):
for f in files:
location = os.path.join(top, f)
path = fileutils.as_posixpath(location)
path = path.replace(test_dir_path, '').strip('/')
result.append(path)

expected_content = sorted(expected)
result = sorted(result)

assert result == expected_content


class TestUtils(testcase.FileBasedTesting):
test_data_dir = os.path.join(os.path.dirname(__file__), 'data')

def test_extract_tree_with_colon_in_filenames(self):
expected = (
'colon/libc6:amd64.list',
)
test_dir = self.get_test_loc('tar/colon.tar.xz')
temp_dir = self.get_temp_dir()
errors = utils.extract_tar(location=test_dir, target_dir=temp_dir)
check_files(temp_dir, expected)
assert not errors

def test_extract_tar_relative(self):
expected = ()
test_dir = self.get_test_loc('tar/tar_relative.tar')
temp_dir = self.get_temp_dir()
errors = utils.extract_tar(location=test_dir, target_dir=temp_dir)
check_files(temp_dir, expected)
assert errors
for error in errors:
assert 'skipping unsupported' in error
assert 'with relative path' in error

def test_extract_tar_absolute(self):
expected = (
'tmp/subdir/a.txt',
'tmp/subdir/b.txt',
)
test_dir = self.get_test_loc('tar/absolute_path.tar')
temp_dir = self.get_temp_dir()
errors = utils.extract_tar(location=test_dir, target_dir=temp_dir)
check_files(temp_dir, expected)
assert not errors