diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bf6f05e..9cd70d3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,17 @@ Changelog ========= +v32.0.1 +-------- + +This is a minor release with bug fixes. + +- We now correctly process the opaque whiteouts seen in container image layers + tarballs. + +Thank you to AJ Arena @sig-aarena + + v32.0.0 -------- diff --git a/src/container_inspector/rootfs.py b/src/container_inspector/rootfs.py index 81d4980..2efbeb1 100755 --- a/src/container_inspector/rootfs.py +++ b/src/container_inspector/rootfs.py @@ -108,13 +108,14 @@ def rebuild_rootfs(img, target_dir, skip_symlinks=True): return deletions -WHITEOUT_EXPLICIT_PREFIX = '.wh.' -WHITEOUT_OPAQUE_PREFIX = '.wh..wh.opq' +WHITEOUT_PREFIX = '.wh.' +WHITEOUT_SPECIAL_PREFIX = '.wh..wh' +WHITEOUT_OPAQUE_PREFIX = '.wh..wh..opq' -def is_whiteout_marker(path): +def is_whiteout_marker(file_name): """ - Return True if the ``path`` is a whiteout marker file. + Return True if the ``file_name`` is a whiteout marker file. For example:: >>> is_whiteout_marker('.wh.somepath') @@ -123,32 +124,80 @@ def is_whiteout_marker(path): True >>> is_whiteout_marker('somepath.wh.') False - >>> is_whiteout_marker('somepath/.wh.foo') + >>> is_whiteout_marker('.wh.foo') True - >>> is_whiteout_marker('somepath/.wh.foo/') + """ + return file_name and file_name.startswith(WHITEOUT_PREFIX) + + +def is_whiteout_opaque_marker(file_name): + """ + Return True if the ``file_name`` is an opaque whiteout marker file. + + For example:: + >>> is_whiteout_opaque_marker('.wh.somepath') + False + >>> is_whiteout_opaque_marker('.wh..wh.opq') + False + >>> is_whiteout_opaque_marker('.wh..wh..opq') True + >>> is_whiteout_opaque_marker('somepath..wh..wh..opq') + False + >>> is_whiteout_opaque_marker('.wh..wh.plnk') + False + >>> is_whiteout_opaque_marker('.wh..wh..opq.foo') + False + >>> is_whiteout_opaque_marker('somepath/.wh..wh..opq/') + False + """ + return file_name and file_name == WHITEOUT_OPAQUE_PREFIX + + +def is_whiteout_special_marker(file_name): + """ + Return True if the ``file_name`` is an opaque whiteout marker file. + + For example:: + >>> is_whiteout_special_marker('.wh.somepath') + False + >>> is_whiteout_special_marker('.wh..wh.opq') + True + >>> is_whiteout_special_marker('.wh..wh..opq') + True + >>> is_whiteout_special_marker('.wh..wh.plnk') + True + >>> is_whiteout_special_marker('somepath..wh..wh..opq') + False + >>> is_whiteout_special_marker('.wh..wh..opq.foo') + True + >>> is_whiteout_special_marker('somepath/.wh..wh..opq/') + False """ - file_name = path and os.path.basename(path.strip('/')) or '' - return file_name.startswith(WHITEOUT_EXPLICIT_PREFIX) + return file_name and file_name.startswith(WHITEOUT_SPECIAL_PREFIX) def get_whiteable_path(path): """ Return the whiteable path for ``path`` or None if this not a whiteable path. - TODO: Handle OSses with case-insensitive FS (e.g. Windows) """ + # FIXME: Handle OSses with case-insensitive FS (e.g. Windows) file_name = os.path.basename(path) parent_dir = os.path.dirname(path) - if file_name == WHITEOUT_OPAQUE_PREFIX: + if is_whiteout_special_marker(file_name): # Opaque whiteouts means the whole parent directory should be removed # https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts + # note as a simplification we treat all of these as opaque, and log these that are not + if not is_whiteout_opaque_marker(file_name): + # This is the case for legacy AUFS '.wh..wh.plnk' and '.wh..wh.aufs' + # only seen in legacy Docker + logger.error(f'ERROR: unsupported whiteout filename: {file_name}') return parent_dir - if file_name.startswith(WHITEOUT_EXPLICIT_PREFIX): + elif is_whiteout_marker(file_name): # Explicit, file-only whiteout # https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts - _, _, real_file_name = file_name.rpartition(WHITEOUT_EXPLICIT_PREFIX) + _, _, real_file_name = file_name.rpartition(WHITEOUT_PREFIX) return os.path.join(parent_dir, real_file_name) diff --git a/src/container_inspector/utils.py b/src/container_inspector/utils.py index c382cb3..45037fe 100755 --- a/src/container_inspector/utils.py +++ b/src/container_inspector/utils.py @@ -103,6 +103,23 @@ def to_string(self): return f"{self.type}: {self.message}" +def is_relative_path(path): + """ + Return True if ``path`` is a relative path. + >>> is_relative_path('.wh..wh..opq') + False + >>> is_relative_path('.wh/../wh..opq') + True + >>> is_relative_path('..foor') + False + >>> is_relative_path('../foor') + True + >>> is_relative_path('.//.foor//..') + True + """ + return any(name == '..' for name in path.split('/')) + + def extract_tar(location, target_dir, as_events=False, skip_symlinks=True, trace=TRACE): """ Extract a tar archive at ``location`` in the ``target_dir`` directory. @@ -133,7 +150,7 @@ def extract_tar(location, target_dir, as_events=False, skip_symlinks=True, trace logger.debug(f'extract_tar: {msg}') continue - if '..' in tarinfo.name: + if is_relative_path(tarinfo.name): msg = f'{location}: skipping unsupported {tarinfo.name} with relative path.' events.append(ExtractEvent(type=ExtractEvent.WARNING, source=tarinfo.name, message=msg)) if trace: diff --git a/tests/data/tar/absolute_path.tar b/tests/data/utils/absolute_path.tar similarity index 100% rename from tests/data/tar/absolute_path.tar rename to tests/data/utils/absolute_path.tar diff --git a/tests/data/tar/colon.tar.xz b/tests/data/utils/colon.tar.xz similarity index 100% rename from tests/data/tar/colon.tar.xz rename to tests/data/utils/colon.tar.xz diff --git a/tests/data/utils/tar_relative-with-whiteouts.tar b/tests/data/utils/tar_relative-with-whiteouts.tar new file mode 100644 index 0000000..09d8466 Binary files /dev/null and b/tests/data/utils/tar_relative-with-whiteouts.tar differ diff --git a/tests/data/tar/tar_relative.tar b/tests/data/utils/tar_relative.tar similarity index 100% rename from tests/data/tar/tar_relative.tar rename to tests/data/utils/tar_relative.tar diff --git a/tests/test_utils.py b/tests/test_utils.py index 25b3ed3..cd0aa4c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -54,9 +54,9 @@ def clean_events(self, extract_dir, events): source=e.source.replace(extract_dir, ''), message=e.message.replace(self.test_data_dir, ''), ) - events_results.append(ne._asdict()) - - return events_results + events_results.append(ne) + events_results = sorted(events_results, key=lambda x: x.source) + return [dict(ne._asdict()) for ne in events_results] def clean_paths(self, extract_dir): return sorted([p.replace(extract_dir, '') for p in @@ -70,7 +70,7 @@ def test_extract_tree_with_colon_in_filenames(self): expected = ( 'colon/libc6:amd64.list', ) - test_dir = self.get_test_loc('tar/colon.tar.xz') + test_dir = self.get_test_loc('utils/colon.tar.xz') extract_dir = self.get_temp_dir() events = utils.extract_tar(location=test_dir, target_dir=extract_dir) check_files(target_dir=extract_dir, expected=expected) @@ -78,37 +78,62 @@ def test_extract_tree_with_colon_in_filenames(self): def test_extract_tar_relative(self): expected = () - test_dir = self.get_test_loc('tar/tar_relative.tar') + test_dir = self.get_test_loc('utils/tar_relative.tar') extract_dir = self.get_temp_dir() events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True) check_files(target_dir=extract_dir, expected=expected) events = self.clean_events(extract_dir, events) expected_events = [ - {'message': '/tar/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.', - 'source': '../a_parent_folder.txt', - 'type': 'warning'}, - {'message': '/tar/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.', + {'message': '/utils/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.', 'source': '../../another_folder/b_two_root.txt', 'type': 'warning'}, - {'message': '/tar/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.', + {'message': '/utils/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.', + 'source': '../a_parent_folder.txt', + 'type': 'warning'}, + {'message': '/utils/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.', 'source': '../folder/subfolder/b_subfolder.txt', 'type': 'warning'}, ] assert events == expected_events + def test_extract_tar_relative_with_whiteouts(self): + expected = ( + '.wh..wh..opq', + '.wh..wh..plnk', + '.wh.foo.txt' + ) + test_dir = self.get_test_loc('utils/tar_relative-with-whiteouts.tar') + extract_dir = self.get_temp_dir() + events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True) + check_files(target_dir=extract_dir, expected=expected) + events = self.clean_events(extract_dir, events) + expected_events = [ + {'message': '/utils/tar_relative-with-whiteouts.tar: skipping unsupported ../../another_folder/.wh..wh..opq with relative path.', + 'source': '../../another_folder/.wh..wh..opq', + 'type': 'warning'}, + {'message': '/utils/tar_relative-with-whiteouts.tar: skipping unsupported ../.wh..wh..opq with relative path.', + 'source': '../.wh..wh..opq', + 'type': 'warning'}, + {'message': '/utils/tar_relative-with-whiteouts.tar: skipping unsupported ../folder/subfolder/.wh..wh..opq with relative path.', + 'source': '../folder/subfolder/.wh..wh..opq', + 'type': 'warning'}, + ] + + assert events == expected_events + def test_extract_tar_relative_as_strings(self): expected = () - test_dir = self.get_test_loc('tar/tar_relative.tar') + test_dir = self.get_test_loc('utils/tar_relative.tar') extract_dir = self.get_temp_dir() events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=False) check_files(target_dir=extract_dir, expected=expected) events = [e.replace(self.test_data_dir, '') for e in events] expected_events = [ - 'warning: /tar/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.', - 'warning: /tar/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.', - 'warning: /tar/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.', + 'warning: /utils/tar_relative.tar: skipping unsupported ../a_parent_folder.txt with relative path.', + 'warning: /utils/tar_relative.tar: skipping unsupported ../../another_folder/b_two_root.txt with relative path.', + 'warning: /utils/tar_relative.tar: skipping unsupported ../folder/subfolder/b_subfolder.txt with relative path.', ] assert events == expected_events @@ -117,20 +142,20 @@ def test_extract_tar_absolute(self): 'tmp/subdir/a.txt', 'tmp/subdir/b.txt', ) - test_dir = self.get_test_loc('tar/absolute_path.tar') + test_dir = self.get_test_loc('utils/absolute_path.tar') extract_dir = self.get_temp_dir() events = utils.extract_tar(location=test_dir, target_dir=extract_dir, as_events=True) check_files(target_dir=extract_dir, expected=expected) events = self.clean_events(extract_dir, events) expected_events = [ - {'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir transformed in relative path.', + {'message': '/utils/absolute_path.tar: absolute path name: /tmp/subdir transformed in relative path.', 'source': '/tmp/subdir', 'type': 'warning'}, - {'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir/a.txt transformed in relative path.', + {'message': '/utils/absolute_path.tar: absolute path name: /tmp/subdir/a.txt transformed in relative path.', 'source': '/tmp/subdir/a.txt', 'type': 'warning'}, - {'message': '/tar/absolute_path.tar: absolute path name: /tmp/subdir/b.txt transformed in relative path.', + {'message': '/utils/absolute_path.tar: absolute path name: /tmp/subdir/b.txt transformed in relative path.', 'source': '/tmp/subdir/b.txt', 'type': 'warning'}, ]