From e2eb4a3158ece4e9a1122a85accf95e23f6630dc Mon Sep 17 00:00:00 2001 From: Brandon Bodnar Date: Fri, 28 Oct 2016 00:06:22 -0400 Subject: [PATCH 1/2] Prevent traversing excluded directories with no possible dockerignore exceptions Fixes an issue where all files in a rather large excluded folder are traversed and examined when creating the build context for potential exception to the exclusion, even though the exclusion rule is for a completely unrelated folder. Signed-off-by: Brandon Bodnar --- docker/utils/utils.py | 51 +++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/docker/utils/utils.py b/docker/utils/utils.py index b107f22e97..87401642a0 100644 --- a/docker/utils/utils.py +++ b/docker/utils/utils.py @@ -214,6 +214,31 @@ def should_include(path, exclude_patterns, include_patterns): return True +def should_check_directory(directory_path, exclude_patterns, include_patterns): + """ + Given a directory path, a list of exclude patterns, and a list of inclusion + patterns: + + 1. Returns True if the directory path should be included according to + should_include. + 2. Returns True if the directory path is the prefix for an inclusion + pattern + 3. Returns False otherwise + """ + + # To account for exception rules, check directories if their path is a + # a prefix to an inclusion pattern. This logic conforms with the current + # docker logic (2016-10-27): + # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671 + + path_with_slash = directory_path + os.sep + possible_child_patterns = [pattern for pattern in include_patterns if + (pattern + os.sep).startswith(path_with_slash)] + directory_included = should_include(directory_path, exclude_patterns, + include_patterns) + return directory_included or len(possible_child_patterns) > 0 + + def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False): paths = [] @@ -222,25 +247,13 @@ def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False): if parent == '.': parent = '' - # If exception rules exist, we can't skip recursing into ignored - # directories, as we need to look for exceptions in them. - # - # It may be possible to optimize this further for exception patterns - # that *couldn't* match within ignored directores. - # - # This matches the current docker logic (as of 2015-11-24): - # https://github.com/docker/docker/blob/37ba67bf636b34dc5c0c0265d62a089d0492088f/pkg/archive/archive.go#L555-L557 - - if not has_exceptions: - - # Remove excluded patterns from the list of directories to traverse - # by mutating the dirs we're iterating over. - # This looks strange, but is considered the correct way to skip - # traversal. See https://docs.python.org/2/library/os.html#os.walk - - dirs[:] = [d for d in dirs if - should_include(os.path.join(parent, d), - exclude_patterns, include_patterns)] + # Remove excluded patterns from the list of directories to traverse + # by mutating the dirs we're iterating over. + # This looks strange, but is considered the correct way to skip + # traversal. See https://docs.python.org/2/library/os.html#os.walk + dirs[:] = [d for d in dirs if + should_check_directory(os.path.join(parent, d), + exclude_patterns, include_patterns)] for path in dirs: if should_include(os.path.join(parent, path), From 9fc8b3a730293b8cbee2feab04c355e753b20cdf Mon Sep 17 00:00:00 2001 From: Brandon Bodnar Date: Mon, 31 Oct 2016 19:56:02 -0400 Subject: [PATCH 2/2] Add unit tests for should_check_directory. Signed-off-by: Brandon Bodnar --- tests/unit/utils_test.py | 76 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index 57aa226d84..45f5914b76 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -25,7 +25,9 @@ ) from docker.utils.ports import build_port_bindings, split_port -from docker.utils.utils import create_endpoint_config, format_environment +from docker.utils.utils import ( + create_endpoint_config, format_environment, should_check_directory +) from ..helpers import make_tree @@ -1092,6 +1094,78 @@ def test_tar_with_directory_symlinks(self): ) +class ShouldCheckDirectoryTest(unittest.TestCase): + exclude_patterns = [ + 'exclude_rather_large_directory', + 'dir/with/subdir_excluded', + 'dir/with/exceptions' + ] + + include_patterns = [ + 'dir/with/exceptions/like_this_one', + 'dir/with/exceptions/in/descendents' + ] + + def test_should_check_directory_not_excluded(self): + self.assertTrue( + should_check_directory('not_excluded', self.exclude_patterns, + self.include_patterns) + ) + + self.assertTrue( + should_check_directory('dir/with', self.exclude_patterns, + self.include_patterns) + ) + + def test_shoud_check_parent_directories_of_excluded(self): + self.assertTrue( + should_check_directory('dir', self.exclude_patterns, + self.include_patterns) + ) + self.assertTrue( + should_check_directory('dir/with', self.exclude_patterns, + self.include_patterns) + ) + + def test_should_not_check_excluded_directories_with_no_exceptions(self): + self.assertFalse( + should_check_directory('exclude_rather_large_directory', + self.exclude_patterns, self.include_patterns + ) + ) + self.assertFalse( + should_check_directory('dir/with/subdir_excluded', + self.exclude_patterns, self.include_patterns + ) + ) + + def test_should_check_excluded_directory_with_exceptions(self): + self.assertTrue( + should_check_directory('dir/with/exceptions', + self.exclude_patterns, self.include_patterns + ) + ) + self.assertTrue( + should_check_directory('dir/with/exceptions/in', + self.exclude_patterns, self.include_patterns + ) + ) + + def test_should_not_check_siblings_of_exceptions(self): + self.assertFalse( + should_check_directory('dir/with/exceptions/but_not_here', + self.exclude_patterns, self.include_patterns + ) + ) + + def test_should_check_subdirectories_of_exceptions(self): + self.assertTrue( + should_check_directory('dir/with/exceptions/like_this_one/subdir', + self.exclude_patterns, self.include_patterns + ) + ) + + class FormatEnvironmentTest(unittest.TestCase): def test_format_env_binary_unicode_value(self): env_dict = {