From 41fac851fb62b4224e0400be46a3884708d185c7 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 12 Dec 2023 19:02:14 -0500 Subject: [PATCH 1/2] Avoid mktemp in tests, in straightforward cases The tempfile.mktemp function is deprecated, because of a race condition where the file may be concurrently created between when its name is generated and when it is opened. Other faciliies in the tempfile module overcome this by generating a name, attempting to create the file or directory in a way that guarantees failure if it already existed, and, in the occasional case that it did already exist, generating another name and trying again (stopping after a predefined limit). For further information on mktemp deprecation: - https://docs.python.org/3/library/tempfile.html#tempfile.mktemp - https://github.com/gitpython-developers/smmap/issues/41 The security risk of calls to mktemp in this project's test suite is low. However, it is still best to avoid using it, because it is deprecated, because it is (at least slightly) brittle, and because any use of mktemp looks like a potential security risk and thereby imposes a burden on working with the code (which could potentially be addressed with detailed comments analyzing why it is believed safe in particular cases, but this would typically be more verbose, and at least as challenging to add, as replacing mktemp with a better alternative). This commit replaces *some* uses of mktemp in the test suite: those where it is readily clear how to do so in a way that preserves the code's intent: - Where a name for a temporary directory is generated with mktemp and os.mkdir is called immediately, mkdtemp is now used. - Where a name for a temporary file that is not customized (such as with a prefix) is generated with mktemp, such that the code under test never uses the filename but only the already-open file-like object, TemporaryFile is now used. As the name isn't customized, the test code in these cases does not express an intent to allow the developer to inspect the file after a test failure, so even if the file wasn't guaranteed to be deleted with a finally block or context manager, it is fine to do so. TemporaryFile supports this use case well on all systems including Windows, and automatically deletes the file. - Where a name for a temporary file that *is* customized (such as with a prefix) to reflect the way the test uses it is generated with mktemp, and the test code does not attempt deterministic deletion of the file when an exception would make the test fail, NamedTemporaryFile with delete=False is now used. The original code to remove the file when the test succeeds is modified accordingly to do the same job, and also commented to explain that it is being handled this way to allow the file to be kept and examined when a test failure occurs. Other cases in the test suite should also be feasible to replace, but are left alone for now. Some of them are ambiguous in their intent, with respect to whether the file should be retained after a test failure. Others appear deliberately to avoid creating a file or directory where the code under test should do so, possibly to check that this is done properly. (One way to preserve that latter behavior, while avoiding the weakness of using mktemp and also avoiding inadverently reproducing that weakness by other means, could be to use a path in a temporary directory made for the test.) This commit also doesn't address the one use of mktemp in the code under test (i.e., outside the test suite, inside the git module). --- test/lib/helper.py | 3 +- test/performance/lib.py | 3 +- test/test_base.py | 5 ++-- test/test_reflog.py | 7 ++--- test/test_repo.py | 5 ++-- test/test_util.py | 64 ++++++++++++++++++++++------------------- 6 files changed, 42 insertions(+), 45 deletions(-) diff --git a/test/lib/helper.py b/test/lib/helper.py index 58d96534a..d662b632d 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -89,8 +89,7 @@ def with_rw_directory(func): @wraps(func) def wrapper(self): - path = tempfile.mktemp(prefix=func.__name__) - os.mkdir(path) + path = tempfile.mkdtemp(prefix=func.__name__) keep = False try: return func(self, path) diff --git a/test/performance/lib.py b/test/performance/lib.py index ceee6c2a1..d08e1027f 100644 --- a/test/performance/lib.py +++ b/test/performance/lib.py @@ -65,8 +65,7 @@ class TestBigRepoRW(TestBigRepoR): def setUp(self): self.gitrwrepo = None super().setUp() - dirname = tempfile.mktemp() - os.mkdir(dirname) + dirname = tempfile.mkdtemp() self.gitrwrepo = self.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB) self.puregitrwrepo = Repo(dirname, odbt=GitDB) diff --git a/test/test_base.py b/test/test_base.py index cdf82b74d..74f342071 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -68,12 +68,11 @@ def test_base_object(self): data = data_stream.read() assert data - tmpfilename = tempfile.mktemp(suffix="test-stream") - with open(tmpfilename, "wb+") as tmpfile: + with tempfile.NamedTemporaryFile(suffix="test-stream", delete=False) as tmpfile: self.assertEqual(item, item.stream_data(tmpfile)) tmpfile.seek(0) self.assertEqual(tmpfile.read(), data) - os.remove(tmpfilename) + os.remove(tmpfile.name) # Do it this way so we can inspect the file on failure. # END for each object type to create # Each has a unique sha. diff --git a/test/test_reflog.py b/test/test_reflog.py index 625466d40..1bd2e5dab 100644 --- a/test/test_reflog.py +++ b/test/test_reflog.py @@ -1,7 +1,7 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -import os +import os.path as osp import tempfile from git.objects import IndexObject @@ -9,8 +9,6 @@ from test.lib import TestBase, fixture_path from git.util import Actor, rmtree, hex_to_bin -import os.path as osp - class TestRefLog(TestBase): def test_reflogentry(self): @@ -35,8 +33,7 @@ def test_reflogentry(self): def test_base(self): rlp_head = fixture_path("reflog_HEAD") rlp_master = fixture_path("reflog_master") - tdir = tempfile.mktemp(suffix="test_reflogs") - os.mkdir(tdir) + tdir = tempfile.mkdtemp(suffix="test_reflogs") rlp_master_ro = RefLog.path(self.rorepo.head) assert osp.isfile(rlp_master_ro) diff --git a/test/test_repo.py b/test/test_repo.py index 007b8ecb0..465bb2574 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -667,11 +667,10 @@ def test_tag_to_full_tag_path(self): self.assertEqual(value_errors, []) def test_archive(self): - tmpfile = tempfile.mktemp(suffix="archive-test") - with open(tmpfile, "wb") as stream: + with tempfile.NamedTemporaryFile("wb", suffix="archive-test", delete=False) as stream: self.rorepo.archive(stream, "0.1.6", path="doc") assert stream.tell() - os.remove(tmpfile) + os.remove(stream.name) # Do it this way so we can inspect the file on failure. @mock.patch.object(Git, "_call_process") def test_should_display_blame_information(self, git): diff --git a/test/test_util.py b/test/test_util.py index 1cb255cfe..6616e1067 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -359,48 +359,52 @@ def test_it_should_dashify(self): self.assertEqual("foo", dashify("foo")) def test_lock_file(self): - my_file = tempfile.mktemp() - lock_file = LockFile(my_file) - assert not lock_file._has_lock() - # Release lock we don't have - fine. - lock_file._release_lock() + with tempfile.TemporaryDirectory() as tdir: + my_file = os.path.join(tdir, "my-lock-file") + lock_file = LockFile(my_file) + assert not lock_file._has_lock() + # Release lock we don't have - fine. + lock_file._release_lock() - # Get lock. - lock_file._obtain_lock_or_raise() - assert lock_file._has_lock() + # Get lock. + lock_file._obtain_lock_or_raise() + assert lock_file._has_lock() - # Concurrent access. - other_lock_file = LockFile(my_file) - assert not other_lock_file._has_lock() - self.assertRaises(IOError, other_lock_file._obtain_lock_or_raise) + # Concurrent access. + other_lock_file = LockFile(my_file) + assert not other_lock_file._has_lock() + self.assertRaises(IOError, other_lock_file._obtain_lock_or_raise) - lock_file._release_lock() - assert not lock_file._has_lock() + lock_file._release_lock() + assert not lock_file._has_lock() - other_lock_file._obtain_lock_or_raise() - self.assertRaises(IOError, lock_file._obtain_lock_or_raise) + other_lock_file._obtain_lock_or_raise() + self.assertRaises(IOError, lock_file._obtain_lock_or_raise) - # Auto-release on destruction. - del other_lock_file - lock_file._obtain_lock_or_raise() - lock_file._release_lock() + # Auto-release on destruction. + del other_lock_file + lock_file._obtain_lock_or_raise() + lock_file._release_lock() def test_blocking_lock_file(self): - my_file = tempfile.mktemp() - lock_file = BlockingLockFile(my_file) - lock_file._obtain_lock() - - # Next one waits for the lock. - start = time.time() - wait_time = 0.1 - wait_lock = BlockingLockFile(my_file, 0.05, wait_time) - self.assertRaises(IOError, wait_lock._obtain_lock) - elapsed = time.time() - start + with tempfile.TemporaryDirectory() as tdir: + my_file = os.path.join(tdir, "my-lock-file") + lock_file = BlockingLockFile(my_file) + lock_file._obtain_lock() + + # Next one waits for the lock. + start = time.time() + wait_time = 0.1 + wait_lock = BlockingLockFile(my_file, 0.05, wait_time) + self.assertRaises(IOError, wait_lock._obtain_lock) + elapsed = time.time() - start + extra_time = 0.02 if os.name == "nt" or sys.platform == "cygwin": extra_time *= 6 # Without this, we get indeterministic failures on Windows. elif sys.platform == "darwin": extra_time *= 18 # The situation on macOS is similar, but with more delay. + self.assertLess(elapsed, wait_time + extra_time) def test_user_id(self): From 9e86053c885a27360d9d4699fafaef5e995d6ec5 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 12 Dec 2023 22:30:51 -0500 Subject: [PATCH 2/2] Replace the one use of mktemp in the git module This makes two related changes to git.index.util.TemporaryFileSwap: - Replace mktemp with mkstemp and then immediately closing the file. This avoids two possible name clashes: the usual name clash where the file may be created by another process between when mktemp generates the name and when the file is created, and the problem that mktemp does not check for files that contain the generated name as a part. The latter is specific to the use here, where a string generated by mktemp was manually concatenated to the base filename. This addresses that by passing the filename as the prefix for mkstemp to use. - Replace os.remove with os.replace and simplify. This is made necessary on Windows by the file already existing, due to mkstemp creating it. Deleting the file and allowing it to be recreated would reproduce the mktemp race condition in full (obscured and with extra steps). But os.replace supports an existing target file on all platforms. It is now also used in __exit__, where it allows the Windows check for the file to be removed, and (in any OS) better expresses the intent of the code to human readers. Furthermore, because one of the "look before you leap" checks in __exit__ is removed, the minor race condition in cleaning up the file is somewhat decreased. A small amount of related refactoring is included. The interface is not changed, even where it could be simplified (by letting __exit__ return None), and resource acquisition remains done on construction rather than in __enter__, because changing those aspects of the design could break some existing uses. Although the use of mktemp replaced here was in the git module and not the test suite, its use was to generate filenames for use in a directory that would ordinarily be under the user's control, such that the ability to carry out typical mktemp-related attacks would already require the ability to achieve the same goals more easily and reliably. Although TemporaryFileSwap is a public class that may be used directly by applications, it is only useful for making a temporary file in the same directory as an existing file. Thus the intended benefits of this change are mainly to code quality and a slight improvement in robustness. --- git/index/util.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/git/index/util.py b/git/index/util.py index b1aaa58fd..1c3b1c4ad 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -3,6 +3,7 @@ """Index utilities.""" +import contextlib from functools import wraps import os import os.path as osp @@ -40,12 +41,10 @@ class TemporaryFileSwap: def __init__(self, file_path: PathLike) -> None: self.file_path = file_path - self.tmp_file_path = str(self.file_path) + tempfile.mktemp("", "", "") - # It may be that the source does not exist. - try: - os.rename(self.file_path, self.tmp_file_path) - except OSError: - pass + fd, self.tmp_file_path = tempfile.mkstemp(prefix=self.file_path, dir="") + os.close(fd) + with contextlib.suppress(OSError): # It may be that the source does not exist. + os.replace(self.file_path, self.tmp_file_path) def __enter__(self) -> "TemporaryFileSwap": return self @@ -57,10 +56,7 @@ def __exit__( exc_tb: Optional[TracebackType], ) -> bool: if osp.isfile(self.tmp_file_path): - if os.name == "nt" and osp.exists(self.file_path): - os.remove(self.file_path) - os.rename(self.tmp_file_path, self.file_path) - + os.replace(self.tmp_file_path, self.file_path) return False