From 5506683deaab4c1475800e9b4cd2c06b8de4de97 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:02 +0800 Subject: [PATCH 001/303] t7002: add tests for moving from in-cone to out-of-cone Add corresponding tests to test that user can move an in-cone to out-of-cone when --sparse is supplied. Such can be either clean or dirty, and moving it results in different behaviors: A clean move should move to in the index (do *not* create in the worktree), then delete from the worktree. A dirty move should move the to the , both in the working tree and the index, but should *not* remove the resulted path from the working tree and should *not* turn on its CE_SKIP_WORKTREE bit. Also make sure that if exists in the index (existing check for if is in the worktree is not enough in in-to-out moves), warn user against the overwrite. And Git should force the overwrite when supplied with -f or --force. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- t/t7002-mv-sparse-checkout.sh | 198 ++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 71fe29690fd122..1ac78edde6c2f6 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -290,4 +290,202 @@ test_expect_success 'move sparse file to existing destination with --force and - test_cmp expect sub/file1 ' +test_expect_failure 'move clean path from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + + test_must_fail git mv sub/d folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/d" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/d folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/d && + test_path_is_missing folder1/d && + git ls-files -t >actual && + ! grep "^H sub/d\$" actual && + grep "S folder1/d" actual +' + +test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + echo "sub/file1 overwrite" >sub/file1 && + git add sub/file1 && + + test_must_fail git mv sub/file1 folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/file1" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + test_must_fail git mv --sparse sub/file1 folder1 2>stderr && + echo "fatal: destination exists in the index, source=sub/file1, destination=folder1/file1" \ + >expect && + test_cmp expect stderr && + + git mv --sparse -f sub/file1 folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/file1 && + test_path_is_missing folder1/file1 && + git ls-files -t >actual && + ! grep "H sub/file1" actual && + grep "S folder1/file1" actual && + + # compare file content before move and after move + echo "sub/file1 overwrite" >expect && + git ls-files -s -- folder1/file1 | awk "{print \$2}" >oid && + git cat-file blob $(cat oid) >actual && + test_cmp expect actual +' + +# This test is testing the same behavior as the +# "move clean path from in-cone to out-of-cone overwrite" above. +# The only difference is the changes from "folder1" to "folder1/file1" +test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + echo "sub/file1 overwrite" >sub/file1 && + git add sub/file1 && + + test_must_fail git mv sub/file1 folder1/file1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/file1" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + test_must_fail git mv --sparse sub/file1 folder1/file1 2>stderr && + echo "fatal: destination exists in the index, source=sub/file1, destination=folder1/file1" \ + >expect && + test_cmp expect stderr && + + git mv --sparse -f sub/file1 folder1/file1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/file1 && + test_path_is_missing folder1/file1 && + git ls-files -t >actual && + ! grep "H sub/file1" actual && + grep "S folder1/file1" actual && + + # compare file content before move and after move + echo "sub/file1 overwrite" >expect && + git ls-files -s -- folder1/file1 | awk "{print \$2}" >oid && + git cat-file blob $(cat oid) >actual && + test_cmp expect actual +' + +test_expect_failure 'move directory with one of the files overwrite' ' + test_when_finished "cleanup_sparse_checkout" && + mkdir -p folder1/dir && + touch folder1/dir/file1 && + git add folder1 && + git sparse-checkout set --cone sub && + + echo test >sub/dir/file1 && + git add sub/dir/file1 && + + test_must_fail git mv sub/dir folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/dir/e" >>expect && + echo "folder1/dir/file1" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + test_must_fail git mv --sparse sub/dir folder1 2>stderr && + echo "fatal: destination exists in the index, source=sub/dir/file1, destination=folder1/dir/file1" \ + >expect && + test_cmp expect stderr && + + git mv --sparse -f sub/dir folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing sub/dir/file1 && + test_path_is_missing sub/dir/e && + test_path_is_missing folder1/file1 && + git ls-files -t >actual && + ! grep "H sub/dir/file1" actual && + ! grep "H sub/dir/e" actual && + grep "S folder1/dir/file1" actual && + + # compare file content before move and after move + echo test >expect && + git ls-files -s -- folder1/dir/file1 | awk "{print \$2}" >oid && + git cat-file blob $(cat oid) >actual && + test_cmp expect actual +' + +test_expect_failure 'move dirty path from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + echo "modified" >>sub/d && + + test_must_fail git mv sub/d folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/d" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/d folder1 2>stderr && + + test_path_is_missing sub/d && + test_path_is_file folder1/d && + git ls-files -t >actual && + ! grep "^H sub/d\$" actual && + grep "H folder1/d" actual +' + +test_expect_failure 'move dir from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + + test_must_fail git mv sub/dir folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/dir/e" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/dir folder1 2>stderr && + test_must_be_empty stderr && + + test_path_is_missing folder1 && + git ls-files -t >actual && + ! grep "H sub/dir/e" actual && + grep "S folder1/dir/e" actual +' + +test_expect_failure 'move partially-dirty dir from in-cone to out-of-cone' ' + test_when_finished "cleanup_sparse_checkout" && + setup_sparse_checkout && + touch sub/dir/e2 sub/dir/e3 && + git add sub/dir/e2 sub/dir/e3 && + echo "modified" >>sub/dir/e2 && + echo "modified" >>sub/dir/e3 && + + test_must_fail git mv sub/dir folder1 2>stderr && + cat sparse_error_header >expect && + echo "folder1/dir/e" >>expect && + echo "folder1/dir/e2" >>expect && + echo "folder1/dir/e3" >>expect && + cat sparse_hint >>expect && + test_cmp expect stderr && + + git mv --sparse sub/dir folder1 2>stderr && + + test_path_is_missing folder1/dir/e && + test_path_is_file folder1/dir/e2 && + test_path_is_file folder1/dir/e3 && + git ls-files -t >actual && + ! grep "H sub/dir/e" actual && + ! grep "H sub/dir/e2" actual && + ! grep "H sub/dir/e3" actual && + grep "S folder1/dir/e" actual && + grep "H folder1/dir/e2" actual && + grep "H folder1/dir/e3" actual +' + test_done From 72e59ba19e776abbff7dccbf093c73e43527a339 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:03 +0800 Subject: [PATCH 002/303] mv: rename check_dir_in_index() to empty_dir_has_sparse_contents() Method check_dir_in_index() introduced in b91a2b6594 (mv: add check_dir_in_index() and solve general dir check issue, 2022-06-30) does not describe its intent and behavior well. Change its name to empty_dir_has_sparse_contents(), which more precisely describes its purpose. Reverse the return values, check_dir_in_index() return 0 for success and 1 for failure; reverse the values so empty_dir_has_sparse_contents() return 1 for success and 0 for failure. These values are more intuitive because 1 usually means "has" and 0 means "not found". Also modify the documentation to better align with the method's intent and behavior. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index 2a38e2af4664dc..969f464f7d789b 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -125,15 +125,13 @@ static int index_range_of_same_dir(const char *src, int length, } /* - * Check if an out-of-cone directory should be in the index. Imagine this case - * that all the files under a directory are marked with 'CE_SKIP_WORKTREE' bit - * and thus the directory is sparsified. - * - * Return 0 if such directory exist (i.e. with any of its contained files not - * marked with CE_SKIP_WORKTREE, the directory would be present in working tree). - * Return 1 otherwise. + * Given the path of a directory that does not exist on-disk, check whether the + * directory contains any entries in the index with the SKIP_WORKTREE flag + * enabled. + * Return 1 if such index entries exist. + * Return 0 otherwise. */ -static int check_dir_in_index(const char *name) +static int empty_dir_has_sparse_contents(const char *name) { const char *with_slash = add_slash(name); int length = strlen(with_slash); @@ -144,14 +142,14 @@ static int check_dir_in_index(const char *name) if (pos < 0) { pos = -pos - 1; if (pos >= the_index.cache_nr) - return 1; + return 0; ce = active_cache[pos]; if (strncmp(with_slash, ce->name, length)) - return 1; - if (ce_skip_worktree(ce)) return 0; + if (ce_skip_worktree(ce)) + return 1; } - return 1; + return 0; } int cmd_mv(int argc, const char **argv, const char *prefix) @@ -231,7 +229,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) if (pos < 0) { const char *src_w_slash = add_slash(src); if (!path_in_sparse_checkout(src_w_slash, &the_index) && - !check_dir_in_index(src)) { + empty_dir_has_sparse_contents(src)) { modes[i] |= SKIP_WORKTREE_DIR; goto dir_check; } From d57690a9c82c8888be6bb8ae17be231a2b2802e6 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:04 +0800 Subject: [PATCH 003/303] mv: free the with_slash in check_dir_in_index() with_slash may be a malloc'd pointer, and when it is, free it. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index 969f464f7d789b..c17df2a12f6b32 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -133,6 +133,7 @@ static int index_range_of_same_dir(const char *src, int length, */ static int empty_dir_has_sparse_contents(const char *name) { + int ret = 0; const char *with_slash = add_slash(name); int length = strlen(with_slash); @@ -142,14 +143,18 @@ static int empty_dir_has_sparse_contents(const char *name) if (pos < 0) { pos = -pos - 1; if (pos >= the_index.cache_nr) - return 0; + goto free_return; ce = active_cache[pos]; if (strncmp(with_slash, ce->name, length)) - return 0; + goto free_return; if (ce_skip_worktree(ce)) - return 1; + ret = 1; } - return 0; + +free_return: + if (with_slash != name) + free((char *)with_slash); + return ret; } int cmd_mv(int argc, const char **argv, const char *prefix) From c08830de41f15a8ee85cf7926266e1db732ec773 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:05 +0800 Subject: [PATCH 004/303] mv: check if is a SKIP_WORKTREE_DIR Originally, is assumed to be in the working tree. If it is not found as a directory, then it is determined to be either a regular file path, or error out if used under the second form (move into a directory) of 'git-mv'. Such behavior is not ideal, mainly because Git does not look into the index for , which could potentially be a SKIP_WORKTREE_DIR, which we need to determine for the later "moving from in-cone to out-of-cone" patch. Change the logic so that Git first check if is a directory with all its contents sparsified (a SKIP_WORKTREE_DIR). If is such a sparse directory, then we should modify the index the same way as we would if this were a non-sparse directory. We must be careful to ensure that the is marked with SKIP_WORKTREE_DIR. Also add a `dst_w_slash` to reuse the result from `add_slash()`, which was everywhere and can be simplified. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index c17df2a12f6b32..11aea7b4db564b 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -171,6 +171,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) OPT_END(), }; const char **source, **destination, **dest_path, **submodule_gitfile; + const char *dst_w_slash; enum update_mode *modes; struct stat st; struct string_list src_for_dst = STRING_LIST_INIT_NODUP; @@ -200,6 +201,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) if (argc == 1 && is_directory(argv[0]) && !is_directory(argv[1])) flags = 0; dest_path = internal_prefix_pathspec(prefix, argv + argc, 1, flags); + dst_w_slash = add_slash(dest_path[0]); submodule_gitfile = xcalloc(argc, sizeof(char *)); if (dest_path[0][0] == '\0') @@ -207,12 +209,20 @@ int cmd_mv(int argc, const char **argv, const char *prefix) destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME); else if (!lstat(dest_path[0], &st) && S_ISDIR(st.st_mode)) { - dest_path[0] = add_slash(dest_path[0]); - destination = internal_prefix_pathspec(dest_path[0], argv, argc, DUP_BASENAME); + destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME); } else { - if (argc != 1) + if (!path_in_sparse_checkout(dst_w_slash, &the_index) && + empty_dir_has_sparse_contents(dst_w_slash)) { + destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME); + } else if (argc != 1) { die(_("destination '%s' is not a directory"), dest_path[0]); - destination = dest_path; + } else { + destination = dest_path; + } + } + if (dst_w_slash != dest_path[0]) { + free((char *)dst_w_slash); + dst_w_slash = NULL; } /* Checking */ From 9284c3ce266fcf9abb0afbc59645c62dd58026dd Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:06 +0800 Subject: [PATCH 005/303] mv: remove BOTH from enum update_mode Since BOTH is not used anywhere in the code and its meaning is unclear, remove it. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/builtin/mv.c b/builtin/mv.c index 11aea7b4db564b..7ac653be23c34b 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -21,7 +21,6 @@ static const char * const builtin_mv_usage[] = { }; enum update_mode { - BOTH = 0, WORKING_DIRECTORY = (1 << 1), INDEX = (1 << 2), SPARSE = (1 << 3), From 5784db1b22feceafc72454dccd9637d19fdd422c Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:07 +0800 Subject: [PATCH 006/303] mv: from in-cone to out-of-cone Originally, moving an in-cone to an out-of-cone was not possible, mainly because such is a directory that is not present in the working tree. Change the behavior so that we can move an in-cone to out-of-cone when --sparse is supplied. Notice that can also be an out-of-cone file path, rather than a directory. Such can be either clean or dirty, and moving it results in different behaviors: A clean move should move to in the index (do *not* create in the worktree), then delete from the worktree. A dirty move should move the to the , both in the working tree and the index, but should *not* remove the resulted path from the working tree and should *not* turn on its CE_SKIP_WORKTREE bit. Optional reading ================ We are strict about cone mode when is a file path. The reason is that some of the previous tests that use no-cone mode in t7002 are keep breaking, mainly because the `dst_mode = SPARSE;` line added in this patch. Most features developed in both "from-out-to-in" and "from-in-to-out" only care about cone mode situation, as no-cone mode is becoming irrelevant. And because assigning `SPARSE` to `dst_mode` when the repo is in no-cone mode causes miscellaneous bugs, we should just leave this new functionality to be exclusive cone mode and save some time. Helped-by: Derrick Stolee Helped-by: Victoria Dye Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 71 ++++++++++++++++++++++++++++++----- t/t7002-mv-sparse-checkout.sh | 8 ++-- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index 7ac653be23c34b..b64c28bd5b22d8 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -171,12 +171,13 @@ int cmd_mv(int argc, const char **argv, const char *prefix) }; const char **source, **destination, **dest_path, **submodule_gitfile; const char *dst_w_slash; - enum update_mode *modes; + enum update_mode *modes, dst_mode = 0; struct stat st; struct string_list src_for_dst = STRING_LIST_INIT_NODUP; struct lock_file lock_file = LOCK_INIT; struct cache_entry *ce; struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP; + struct string_list dirty_paths = STRING_LIST_INIT_NODUP; git_config(git_default_config, NULL); @@ -213,10 +214,21 @@ int cmd_mv(int argc, const char **argv, const char *prefix) if (!path_in_sparse_checkout(dst_w_slash, &the_index) && empty_dir_has_sparse_contents(dst_w_slash)) { destination = internal_prefix_pathspec(dst_w_slash, argv, argc, DUP_BASENAME); + dst_mode = SKIP_WORKTREE_DIR; } else if (argc != 1) { die(_("destination '%s' is not a directory"), dest_path[0]); } else { destination = dest_path; + /* + * is a file outside of sparse-checkout + * cone. Insist on cone mode here for backward + * compatibility. We don't want dst_mode to be assigned + * for a file when the repo is using no-cone mode (which + * is deprecated at this point) sparse-checkout. As + * SPARSE here is only considering cone-mode situation. + */ + if (!path_in_cone_mode_sparse_checkout(destination[0], &the_index)) + dst_mode = SPARSE; } } if (dst_w_slash != dest_path[0]) { @@ -410,6 +422,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) const char *src = source[i], *dst = destination[i]; enum update_mode mode = modes[i]; int pos; + int sparse_and_dirty = 0; struct checkout state = CHECKOUT_INIT; state.istate = &the_index; @@ -420,6 +433,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) if (show_only) continue; if (!(mode & (INDEX | SPARSE | SKIP_WORKTREE_DIR)) && + !(dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) && rename(src, dst) < 0) { if (ignore_errors) continue; @@ -439,17 +453,55 @@ int cmd_mv(int argc, const char **argv, const char *prefix) pos = cache_name_pos(src, strlen(src)); assert(pos >= 0); + if (!(mode & SPARSE) && !lstat(src, &st)) + sparse_and_dirty = ce_modified(active_cache[pos], &st, 0); rename_cache_entry_at(pos, dst); - if ((mode & SPARSE) && - (path_in_sparse_checkout(dst, &the_index))) { - int dst_pos; + if (ignore_sparse && + core_apply_sparse_checkout && + core_sparse_checkout_cone) { + /* + * NEEDSWORK: we are *not* paying attention to + * "out-to-out" move ( is out-of-cone and + * is out-of-cone) at this point. It + * should be added in a future patch. + */ + if ((mode & SPARSE) && + path_in_sparse_checkout(dst, &the_index)) { + /* from out-of-cone to in-cone */ + int dst_pos = cache_name_pos(dst, strlen(dst)); + struct cache_entry *dst_ce = active_cache[dst_pos]; + + dst_ce->ce_flags &= ~CE_SKIP_WORKTREE; + + if (checkout_entry(dst_ce, &state, NULL, NULL)) + die(_("cannot checkout %s"), dst_ce->name); + } else if ((dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) && + !(mode & SPARSE) && + !path_in_sparse_checkout(dst, &the_index)) { + /* from in-cone to out-of-cone */ + int dst_pos = cache_name_pos(dst, strlen(dst)); + struct cache_entry *dst_ce = active_cache[dst_pos]; - dst_pos = cache_name_pos(dst, strlen(dst)); - active_cache[dst_pos]->ce_flags &= ~CE_SKIP_WORKTREE; - - if (checkout_entry(active_cache[dst_pos], &state, NULL, NULL)) - die(_("cannot checkout %s"), active_cache[dst_pos]->name); + /* + * if src is clean, it will suffice to remove it + */ + if (!sparse_and_dirty) { + dst_ce->ce_flags |= CE_SKIP_WORKTREE; + unlink_or_warn(src); + } else { + /* + * if src is dirty, move it to the + * destination and create leading + * dirs if necessary + */ + char *dst_dup = xstrdup(dst); + string_list_append(&dirty_paths, dst); + safe_create_leading_directories(dst_dup); + FREE_AND_NULL(dst_dup); + rename(src, dst); + } + } } } @@ -461,6 +513,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) die(_("Unable to write new index file")); string_list_clear(&src_for_dst, 0); + string_list_clear(&dirty_paths, 0); UNLEAK(source); UNLEAK(dest_path); free(submodule_gitfile); diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 1ac78edde6c2f6..d875f492dd1ce6 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -290,7 +290,7 @@ test_expect_success 'move sparse file to existing destination with --force and - test_cmp expect sub/file1 ' -test_expect_failure 'move clean path from in-cone to out-of-cone' ' +test_expect_success 'move clean path from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && @@ -419,7 +419,7 @@ test_expect_failure 'move directory with one of the files overwrite' ' test_cmp expect actual ' -test_expect_failure 'move dirty path from in-cone to out-of-cone' ' +test_expect_success 'move dirty path from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && echo "modified" >>sub/d && @@ -439,7 +439,7 @@ test_expect_failure 'move dirty path from in-cone to out-of-cone' ' grep "H folder1/d" actual ' -test_expect_failure 'move dir from in-cone to out-of-cone' ' +test_expect_success 'move dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && @@ -458,7 +458,7 @@ test_expect_failure 'move dir from in-cone to out-of-cone' ' grep "S folder1/dir/e" actual ' -test_expect_failure 'move partially-dirty dir from in-cone to out-of-cone' ' +test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && touch sub/dir/e2 sub/dir/e3 && From b6f51e3db978ae2a72c290a10bd205f9e1d6818e Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:08 +0800 Subject: [PATCH 007/303] mv: cleanup empty WORKING_DIRECTORY Originally, moving from-in-to-out may leave an empty directory on-disk (this kind of directory is marked as WORKING_DIRECTORY). Cleanup such directories if they are empty (don't have any entries under them). Modify two tests that take as WORKING_DIRECTORY to test this behavior. Suggested-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 27 +++++++++++++++++++++++++++ t/t7002-mv-sparse-checkout.sh | 4 ++++ 2 files changed, 31 insertions(+) diff --git a/builtin/mv.c b/builtin/mv.c index b64c28bd5b22d8..f4961c0ffde3c2 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -171,6 +171,9 @@ int cmd_mv(int argc, const char **argv, const char *prefix) }; const char **source, **destination, **dest_path, **submodule_gitfile; const char *dst_w_slash; + const char **src_dir = NULL; + int src_dir_nr = 0, src_dir_alloc = 0; + struct strbuf a_src_dir = STRBUF_INIT; enum update_mode *modes, dst_mode = 0; struct stat st; struct string_list src_for_dst = STRING_LIST_INIT_NODUP; @@ -313,6 +316,10 @@ int cmd_mv(int argc, const char **argv, const char *prefix) /* last - first >= 1 */ modes[i] |= WORKING_DIRECTORY; + + ALLOC_GROW(src_dir, src_dir_nr + 1, src_dir_alloc); + src_dir[src_dir_nr++] = src; + n = argc + last - first; REALLOC_ARRAY(source, n); REALLOC_ARRAY(destination, n); @@ -505,6 +512,26 @@ int cmd_mv(int argc, const char **argv, const char *prefix) } } + /* + * cleanup the empty src_dirs + */ + for (i = 0; i < src_dir_nr; i++) { + int dummy; + strbuf_addstr(&a_src_dir, src_dir[i]); + /* + * if entries under a_src_dir are all moved away, + * recursively remove a_src_dir to cleanup + */ + if (index_range_of_same_dir(a_src_dir.buf, a_src_dir.len, + &dummy, &dummy) < 1) { + remove_dir_recursively(&a_src_dir, 0); + } + strbuf_reset(&a_src_dir); + } + + strbuf_release(&a_src_dir); + free(src_dir); + if (gitmodules_modified) stage_updated_gitmodules(&the_index); diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index d875f492dd1ce6..df8c0fa5723aaa 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -442,6 +442,7 @@ test_expect_success 'move dirty path from in-cone to out-of-cone' ' test_expect_success 'move dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && + mkdir sub/dir/deep && test_must_fail git mv sub/dir folder1 2>stderr && cat sparse_error_header >expect && @@ -452,6 +453,7 @@ test_expect_success 'move dir from in-cone to out-of-cone' ' git mv --sparse sub/dir folder1 2>stderr && test_must_be_empty stderr && + test_path_is_missing sub/dir && test_path_is_missing folder1 && git ls-files -t >actual && ! grep "H sub/dir/e" actual && @@ -461,6 +463,7 @@ test_expect_success 'move dir from in-cone to out-of-cone' ' test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && + mkdir sub/dir/deep && touch sub/dir/e2 sub/dir/e3 && git add sub/dir/e2 sub/dir/e3 && echo "modified" >>sub/dir/e2 && @@ -476,6 +479,7 @@ test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' git mv --sparse sub/dir folder1 2>stderr && + test_path_is_missing sub/dir && test_path_is_missing folder1/dir/e && test_path_is_file folder1/dir/e2 && test_path_is_file folder1/dir/e3 && From 5efd533ed8896592740afe22ac07271497d6db36 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:09 +0800 Subject: [PATCH 008/303] advice.h: add advise_on_moving_dirty_path() Add an advice. When the user use `git mv --sparse `, Git will warn the user to use `git add --sparse ` then use `git sparse-checkout reapply` to apply the sparsity rules. Add a few lines to previous "move dirty path" tests so we can test this new advice is working. Suggested-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- advice.c | 19 +++++++++++++++++++ advice.h | 1 + builtin/mv.c | 3 +++ t/t7002-mv-sparse-checkout.sh | 24 +++++++++++++++++++++++- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/advice.c b/advice.c index 6fda9edbc2474f..fd189689437c75 100644 --- a/advice.c +++ b/advice.c @@ -261,3 +261,22 @@ void detach_advice(const char *new_name) fprintf(stderr, fmt, new_name); } + +void advise_on_moving_dirty_path(struct string_list *pathspec_list) +{ + struct string_list_item *item; + + if (!pathspec_list->nr) + return; + + fprintf(stderr, _("The following paths have been moved outside the\n" + "sparse-checkout definition but are not sparse due to local\n" + "modifications.\n")); + for_each_string_list_item(item, pathspec_list) + fprintf(stderr, "%s\n", item->string); + + advise_if_enabled(ADVICE_UPDATE_SPARSE_PATH, + _("To correct the sparsity of these paths, do the following:\n" + "* Use \"git add --sparse \" to update the index\n" + "* Use \"git sparse-checkout reapply\" to apply the sparsity rules")); +} diff --git a/advice.h b/advice.h index 7ddc6cbc1ac8d7..07e0f76833e780 100644 --- a/advice.h +++ b/advice.h @@ -74,5 +74,6 @@ void NORETURN die_conclude_merge(void); void NORETURN die_ff_impossible(void); void advise_on_updating_sparse_paths(struct string_list *pathspec_list); void detach_advice(const char *new_name); +void advise_on_moving_dirty_path(struct string_list *pathspec_list); #endif /* ADVICE_H */ diff --git a/builtin/mv.c b/builtin/mv.c index f4961c0ffde3c2..d80adf8de5305f 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -532,6 +532,9 @@ int cmd_mv(int argc, const char **argv, const char *prefix) strbuf_release(&a_src_dir); free(src_dir); + if (dirty_paths.nr) + advise_on_moving_dirty_path(&dirty_paths); + if (gitmodules_modified) stage_updated_gitmodules(&the_index); diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index df8c0fa5723aaa..5e5eb70e7a9c4f 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -28,12 +28,25 @@ test_expect_success 'setup' " updated in the index: EOF - cat >sparse_hint <<-EOF + cat >sparse_hint <<-EOF && hint: If you intend to update such entries, try one of the following: hint: * Use the --sparse option. hint: * Disable or modify the sparsity rules. hint: Disable this message with \"git config advice.updateSparsePath false\" EOF + + cat >dirty_error_header <<-EOF && + The following paths have been moved outside the + sparse-checkout definition but are not sparse due to local + modifications. + EOF + + cat >dirty_hint <<-EOF + hint: To correct the sparsity of these paths, do the following: + hint: * Use \"git add --sparse \" to update the index + hint: * Use \"git sparse-checkout reapply\" to apply the sparsity rules + hint: Disable this message with \"git config advice.updateSparsePath false\" + EOF " test_expect_success 'mv refuses to move sparse-to-sparse' ' @@ -431,6 +444,10 @@ test_expect_success 'move dirty path from in-cone to out-of-cone' ' test_cmp expect stderr && git mv --sparse sub/d folder1 2>stderr && + cat dirty_error_header >expect && + echo "folder1/d" >>expect && + cat dirty_hint >>expect && + test_cmp expect stderr && test_path_is_missing sub/d && test_path_is_file folder1/d && @@ -478,6 +495,11 @@ test_expect_success 'move partially-dirty dir from in-cone to out-of-cone' ' test_cmp expect stderr && git mv --sparse sub/dir folder1 2>stderr && + cat dirty_error_header >expect && + echo "folder1/dir/e2" >>expect && + echo "folder1/dir/e3" >>expect && + cat dirty_hint >>expect && + test_cmp expect stderr && test_path_is_missing sub/dir && test_path_is_missing folder1/dir/e && From da6fe05b3d624ad5b40472eebfe0499c15ecc93d Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Tue, 9 Aug 2022 20:09:10 +0800 Subject: [PATCH 009/303] mv: check overwrite for in-to-out move Add checking logic for overwriting when moving from in-cone to out-of-cone. It is the index version of the original overwrite logic. Helped-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 12 ++++++++++++ t/t7002-mv-sparse-checkout.sh | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/builtin/mv.c b/builtin/mv.c index d80adf8de5305f..4b67bd096a95d5 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -376,6 +376,18 @@ int cmd_mv(int argc, const char **argv, const char *prefix) goto act_on_entry; } + if (ignore_sparse && + (dst_mode & (SKIP_WORKTREE_DIR | SPARSE)) && + index_entry_exists(&the_index, dst, strlen(dst))) { + bad = _("destination exists in the index"); + if (force) { + if (verbose) + warning(_("overwriting '%s'"), dst); + bad = NULL; + } else { + goto act_on_entry; + } + } /* * We check if the paths are in the sparse-checkout * definition as a very final check, since that diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 5e5eb70e7a9c4f..26582ae4e5fb5e 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -323,7 +323,7 @@ test_expect_success 'move clean path from in-cone to out-of-cone' ' grep "S folder1/d" actual ' -test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' ' +test_expect_success 'move clean path from in-cone to out-of-cone overwrite' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && echo "sub/file1 overwrite" >sub/file1 && @@ -359,7 +359,7 @@ test_expect_failure 'move clean path from in-cone to out-of-cone overwrite' ' # This test is testing the same behavior as the # "move clean path from in-cone to out-of-cone overwrite" above. # The only difference is the changes from "folder1" to "folder1/file1" -test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' ' +test_expect_success 'move clean path from in-cone to out-of-cone file overwrite' ' test_when_finished "cleanup_sparse_checkout" && setup_sparse_checkout && echo "sub/file1 overwrite" >sub/file1 && @@ -392,7 +392,7 @@ test_expect_failure 'move clean path from in-cone to out-of-cone file overwrite' test_cmp expect actual ' -test_expect_failure 'move directory with one of the files overwrite' ' +test_expect_success 'move directory with one of the files overwrite' ' test_when_finished "cleanup_sparse_checkout" && mkdir -p folder1/dir && touch folder1/dir/file1 && From b4f25b07c74fc294cab6c12d09faa2021c67f25a Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:39 +0000 Subject: [PATCH 010/303] t: add skeleton chainlint.pl Although chainlint.sed usefully identifies broken &&-chains in tests, it has several shortcomings which include: * only detects &&-chain breakage in subshells (one-level deep) * does not check for broken top-level &&-chains; that task is left to the "magic exit code 117" checker built into test-lib.sh, however, that detection does not extend to `{...}` blocks, `$(...)` expressions, or compound statements such as `if...fi`, `while...done`, `case...esac` * uses heuristics, which makes it (potentially) fallible and difficult to tweak to handle additional real-world cases * written in `sed` and employs advanced `sed` operators which are probably not well-known to many programmers, thus the pool of people who can maintain it is likely small * manually simulates recursion into subshells which makes it much more difficult to reason about than, say, a traditional top-down parser * checks each test as the test is run, which can get expensive for tests which are run repeatedly by functions or loops since their bodies will be checked over and over (tens or hundreds of times) unnecessarily To address these shortcomings, begin implementing a more functional and precise test linter which understands shell syntax and semantics rather than employing heuristics, thus is able to recognize structural problems with tests beyond broken &&-chains. The new linter is written in Perl, thus should be more accessible to a wider audience, and is structured as a traditional top-down parser which makes it much easier to reason about, and allows it to inspect compound statements within test bodies to any depth. Furthermore, it can check all test definitions in the entire project in a single invocation rather than having to be invoked once per test, and each test definition is checked only once no matter how many times the test is actually run. At this stage, the new linter is just a skeleton containing boilerplate which handles command-line options, collects and reports statistics, and feeds its arguments -- paths of test scripts -- to a (presently) do-nothing script parser for validation. Subsequent changes will flesh out the functionality. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 115 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100755 t/chainlint.pl diff --git a/t/chainlint.pl b/t/chainlint.pl new file mode 100755 index 00000000000000..e8ab95c7858e95 --- /dev/null +++ b/t/chainlint.pl @@ -0,0 +1,115 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2021-2022 Eric Sunshine +# +# This tool scans shell scripts for test definitions and checks those tests for +# problems, such as broken &&-chains, which might hide bugs in the tests +# themselves or in behaviors being exercised by the tests. +# +# Input arguments are pathnames of shell scripts containing test definitions, +# or globs referencing a collection of scripts. For each problem discovered, +# the pathname of the script containing the test is printed along with the test +# name and the test body with a `?!FOO?!` annotation at the location of each +# detected problem, where "FOO" is a tag such as "AMP" which indicates a broken +# &&-chain. Returns zero if no problems are discovered, otherwise non-zero. + +use warnings; +use strict; +use File::Glob; +use Getopt::Long; + +my $show_stats; +my $emit_all; + +package ScriptParser; + +sub new { + my $class = shift @_; + my $self = bless {} => $class; + $self->{output} = []; + $self->{ntests} = 0; + return $self; +} + +sub parse_cmd { + return undef; +} + +# main contains high-level functionality for processing command-line switches, +# feeding input test scripts to ScriptParser, and reporting results. +package main; + +my $getnow = sub { return time(); }; +my $interval = sub { return time() - shift; }; +if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { + $getnow = sub { return [Time::HiRes::gettimeofday()]; }; + $interval = sub { return Time::HiRes::tv_interval(shift); }; +} + +sub show_stats { + my ($start_time, $stats) = @_; + my $walltime = $interval->($start_time); + my ($usertime) = times(); + my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); + for (@$stats) { + my ($worker, $nscripts, $ntests, $nerrs) = @$_; + print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); + $total_workers++; + $total_scripts += $nscripts; + $total_tests += $ntests; + $total_errs += $nerrs; + } + printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); +} + +sub check_script { + my ($id, $next_script, $emit) = @_; + my ($nscripts, $ntests, $nerrs) = (0, 0, 0); + while (my $path = $next_script->()) { + $nscripts++; + my $fh; + unless (open($fh, "<", $path)) { + $emit->("?!ERR?! $path: $!\n"); + next; + } + my $s = do { local $/; <$fh> }; + close($fh); + my $parser = ScriptParser->new(\$s); + 1 while $parser->parse_cmd(); + if (@{$parser->{output}}) { + my $s = join('', @{$parser->{output}}); + $emit->("# chainlint: $path\n" . $s); + $nerrs += () = $s =~ /\?![^?]+\?!/g; + } + $ntests += $parser->{ntests}; + } + return [$id, $nscripts, $ntests, $nerrs]; +} + +sub exit_code { + my $stats = shift @_; + for (@$stats) { + my ($worker, $nscripts, $ntests, $nerrs) = @$_; + return 1 if $nerrs; + } + return 0; +} + +Getopt::Long::Configure(qw{bundling}); +GetOptions( + "emit-all!" => \$emit_all, + "stats|show-stats!" => \$show_stats) or die("option error\n"); + +my $start_time = $getnow->(); +my @stats; + +my @scripts; +push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); +unless (@scripts) { + show_stats($start_time, \@stats) if $show_stats; + exit; +} + +push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); +show_stats($start_time, \@stats) if $show_stats; +exit(exit_code(\@stats)); From 7d4804731ed642b92b516908fb93397b08e986bf Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:40 +0000 Subject: [PATCH 011/303] chainlint.pl: add POSIX shell lexical analyzer Begin fleshing out chainlint.pl by adding a lexical analyzer for the POSIX shell command language. The sole entry point Lexer::scan_token() returns the next token from the input. It will be called by the upcoming shell language parser. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 177 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/t/chainlint.pl b/t/chainlint.pl index e8ab95c7858e95..81ffbf28bf3dc5 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -21,6 +21,183 @@ my $show_stats; my $emit_all; +# Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 +# "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although +# similar to lexical analyzers for other languages, this one differs in a few +# substantial ways due to quirks of the shell command language. +# +# For instance, in many languages, newline is just whitespace like space or +# TAB, but in shell a newline is a command separator, thus a distinct lexical +# token. A newline is significant and returned as a distinct token even at the +# end of a shell comment. +# +# In other languages, `1+2` would typically be scanned as three tokens +# (`1`, `+`, and `2`), but in shell it is a single token. However, the similar +# `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. +# In shell, several characters with special meaning lose that meaning when not +# surrounded by whitespace. For instance, the negation operator `!` is special +# when standing alone surrounded by whitespace; whereas in `foo!uucp` it is +# just a plain character in the longer token "foo!uucp". In many other +# languages, `"string"/foo:'string'` might be scanned as five tokens ("string", +# `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. +# +# The lexical analyzer for the shell command language is also somewhat unusual +# in that it recursively invokes the parser to handle the body of `$(...)` +# expressions which can contain arbitrary shell code. Such expressions may be +# encountered both inside and outside of double-quoted strings. +# +# The lexical analyzer is responsible for consuming shell here-doc bodies which +# extend from the line following a `< $parser, + buff => $s, + heretags => [] + } => $class; +} + +sub scan_heredoc_tag { + my $self = shift @_; + ${$self->{buff}} =~ /\G(-?)/gc; + my $indented = $1; + my $tag = $self->scan_token(); + $tag =~ s/['"\\]//g; + push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag"); + return "<<$indented$tag"; +} + +sub scan_op { + my ($self, $c) = @_; + my $b = $self->{buff}; + return $c unless $$b =~ /\G(.)/sgc; + my $cc = $c . $1; + return scan_heredoc_tag($self) if $cc eq '<<'; + return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; + pos($$b)--; + return $c; +} + +sub scan_sqstring { + my $self = shift @_; + ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; + return "'" . $1; +} + +sub scan_dqstring { + my $self = shift @_; + my $b = $self->{buff}; + my $s = '"'; + while (1) { + # slurp up non-special characters + $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; + # handle special characters + last unless $$b =~ /\G(.)/sgc; + my $c = $1; + $s .= '"', last if $c eq '"'; + $s .= '$' . $self->scan_dollar(), next if $c eq '$'; + if ($c eq '\\') { + $s .= '\\', last unless $$b =~ /\G(.)/sgc; + $c = $1; + next if $c eq "\n"; # line splice + # backslash escapes only $, `, ", \ in dq-string + $s .= '\\' unless $c =~ /^[\$`"\\]$/; + $s .= $c; + next; + } + die("internal error scanning dq-string '$c'\n"); + } + return $s; +} + +sub scan_balanced { + my ($self, $c1, $c2) = @_; + my $b = $self->{buff}; + my $depth = 1; + my $s = $c1; + while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { + $s .= $1; + $depth++, next if $s =~ /\Q$c1\E$/; + $depth--; + last if $depth == 0; + } + return $s; +} + +sub scan_subst { + my $self = shift @_; + my @tokens = $self->{parser}->parse(qr/^\)$/); + $self->{parser}->next_token(); # closing ")" + return @tokens; +} + +sub scan_dollar { + my $self = shift @_; + my $b = $self->{buff}; + return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) + return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) + return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} + return $1 if $$b =~ /\G(\w+)/gc; # $var + return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. + return ''; +} + +sub swallow_heredocs { + my $self = shift @_; + my $b = $self->{buff}; + my $tags = $self->{heretags}; + while (my $tag = shift @$tags) { + my $indent = $tag =~ s/^\t// ? '\\s*' : ''; + $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc; + } +} + +sub scan_token { + my $self = shift @_; + my $b = $self->{buff}; + my $token = ''; +RESTART: + $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) + return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment + while (1) { + # slurp up non-special characters + $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; + # handle special characters + last unless $$b =~ /\G(.)/sgc; + my $c = $1; + last if $c =~ /^[ \t]$/; # whitespace ends token + pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; + $token .= $self->scan_sqstring(), next if $c eq "'"; + $token .= $self->scan_dqstring(), next if $c eq '"'; + $token .= $c . $self->scan_dollar(), next if $c eq '$'; + $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; + $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; + $token = $c, last if $c =~ /^[(){}]$/; + if ($c eq '\\') { + $token .= '\\', last unless $$b =~ /\G(.)/sgc; + $c = $1; + next if $c eq "\n" && length($token); # line splice + goto RESTART if $c eq "\n"; # line splice + $token .= '\\' . $c; + next; + } + die("internal error scanning character '$c'\n"); + } + return length($token) ? $token : undef; +} + package ScriptParser; sub new { From 6594554119811a01888b44112a7daec6fa0312b2 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:41 +0000 Subject: [PATCH 012/303] chainlint.pl: add POSIX shell parser Continue fleshing out chainlint.pl by adding a general purpose recursive descent parser for the POSIX shell command language. Although never invoked directly, upcoming parser subclasses will extend its functionality for specific purposes, such as plucking test definitions from input scripts and applying domain-specific knowledge to perform test validation. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 243 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) diff --git a/t/chainlint.pl b/t/chainlint.pl index 81ffbf28bf3dc5..cdf136896be077 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -198,6 +198,249 @@ sub scan_token { return length($token) ? $token : undef; } +# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It +# is a recursive descent parser very roughly modeled after section 2.10 "Shell +# Grammar" of POSIX chapter 2 "Shell Command Language". +package ShellParser; + +sub new { + my ($class, $s) = @_; + my $self = bless { + buff => [], + stop => [], + output => [] + } => $class; + $self->{lexer} = Lexer->new($self, $s); + return $self; +} + +sub next_token { + my $self = shift @_; + return pop(@{$self->{buff}}) if @{$self->{buff}}; + return $self->{lexer}->scan_token(); +} + +sub untoken { + my $self = shift @_; + push(@{$self->{buff}}, @_); +} + +sub peek { + my $self = shift @_; + my $token = $self->next_token(); + return undef unless defined($token); + $self->untoken($token); + return $token; +} + +sub stop_at { + my ($self, $token) = @_; + return 1 unless defined($token); + my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; + return defined($stop) && $token =~ $stop; +} + +sub expect { + my ($self, $expect) = @_; + my $token = $self->next_token(); + return $token if defined($token) && $token eq $expect; + push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "") . "'\n"); + $self->untoken($token) if defined($token); + return (); +} + +sub optional_newlines { + my $self = shift @_; + my @tokens; + while (my $token = $self->peek()) { + last unless $token eq "\n"; + push(@tokens, $self->next_token()); + } + return @tokens; +} + +sub parse_group { + my $self = shift @_; + return ($self->parse(qr/^}$/), + $self->expect('}')); +} + +sub parse_subshell { + my $self = shift @_; + return ($self->parse(qr/^\)$/), + $self->expect(')')); +} + +sub parse_case_pattern { + my $self = shift @_; + my @tokens; + while (defined(my $token = $self->next_token())) { + push(@tokens, $token); + last if $token eq ')'; + } + return @tokens; +} + +sub parse_case { + my $self = shift @_; + my @tokens; + push(@tokens, + $self->next_token(), # subject + $self->optional_newlines(), + $self->expect('in'), + $self->optional_newlines()); + while (1) { + my $token = $self->peek(); + last unless defined($token) && $token ne 'esac'; + push(@tokens, + $self->parse_case_pattern(), + $self->optional_newlines(), + $self->parse(qr/^(?:;;|esac)$/)); # item body + $token = $self->peek(); + last unless defined($token) && $token ne 'esac'; + push(@tokens, + $self->expect(';;'), + $self->optional_newlines()); + } + push(@tokens, $self->expect('esac')); + return @tokens; +} + +sub parse_for { + my $self = shift @_; + my @tokens; + push(@tokens, + $self->next_token(), # variable + $self->optional_newlines()); + my $token = $self->peek(); + if (defined($token) && $token eq 'in') { + push(@tokens, + $self->expect('in'), + $self->optional_newlines()); + } + push(@tokens, + $self->parse(qr/^do$/), # items + $self->expect('do'), + $self->optional_newlines(), + $self->parse_loop_body(), + $self->expect('done')); + return @tokens; +} + +sub parse_if { + my $self = shift @_; + my @tokens; + while (1) { + push(@tokens, + $self->parse(qr/^then$/), # if/elif condition + $self->expect('then'), + $self->optional_newlines(), + $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body + my $token = $self->peek(); + last unless defined($token) && $token eq 'elif'; + push(@tokens, $self->expect('elif')); + } + my $token = $self->peek(); + if (defined($token) && $token eq 'else') { + push(@tokens, + $self->expect('else'), + $self->optional_newlines(), + $self->parse(qr/^fi$/)); # else body + } + push(@tokens, $self->expect('fi')); + return @tokens; +} + +sub parse_loop_body { + my $self = shift @_; + return $self->parse(qr/^done$/); +} + +sub parse_loop { + my $self = shift @_; + return ($self->parse(qr/^do$/), # condition + $self->expect('do'), + $self->optional_newlines(), + $self->parse_loop_body(), + $self->expect('done')); +} + +sub parse_func { + my $self = shift @_; + return ($self->expect('('), + $self->expect(')'), + $self->optional_newlines(), + $self->parse_cmd()); # body +} + +sub parse_bash_array_assignment { + my $self = shift @_; + my @tokens = $self->expect('('); + while (defined(my $token = $self->next_token())) { + push(@tokens, $token); + last if $token eq ')'; + } + return @tokens; +} + +my %compound = ( + '{' => \&parse_group, + '(' => \&parse_subshell, + 'case' => \&parse_case, + 'for' => \&parse_for, + 'if' => \&parse_if, + 'until' => \&parse_loop, + 'while' => \&parse_loop); + +sub parse_cmd { + my $self = shift @_; + my $cmd = $self->next_token(); + return () unless defined($cmd); + return $cmd if $cmd eq "\n"; + + my $token; + my @tokens = $cmd; + if ($cmd eq '!') { + push(@tokens, $self->parse_cmd()); + return @tokens; + } elsif (my $f = $compound{$cmd}) { + push(@tokens, $self->$f()); + } elsif (defined($token = $self->peek()) && $token eq '(') { + if ($cmd !~ /\w=$/) { + push(@tokens, $self->parse_func()); + return @tokens; + } + $tokens[-1] .= join(' ', $self->parse_bash_array_assignment()); + } + + while (defined(my $token = $self->next_token())) { + $self->untoken($token), last if $self->stop_at($token); + push(@tokens, $token); + last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/; + } + push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n"; + return @tokens; +} + +sub accumulate { + my ($self, $tokens, $cmd) = @_; + push(@$tokens, @$cmd); +} + +sub parse { + my ($self, $stop) = @_; + push(@{$self->{stop}}, $stop); + goto DONE if $self->stop_at($self->peek()); + my @tokens; + while (my @cmd = $self->parse_cmd()) { + $self->accumulate(\@tokens, \@cmd); + last if $self->stop_at($self->peek()); + } +DONE: + pop(@{$self->{stop}}); + return @tokens; +} + package ScriptParser; sub new { From 6d932e92fcb49b59b780bc018fe550d867bb3d84 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:42 +0000 Subject: [PATCH 013/303] chainlint.pl: add parser to validate tests Continue fleshing out chainlint.pl by adding TestParser, a parser with special knowledge about how Git tests should be written; for instance, it knows that commands within a test body should be chained together with `&&`. An upcoming parser which plucks test definitions from test scripts will invoke TestParser for each test body it encounters. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/t/chainlint.pl b/t/chainlint.pl index cdf136896be077..ad257106e56826 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -441,6 +441,52 @@ sub parse { return @tokens; } +# TestParser is a subclass of ShellParser which, beyond parsing shell script +# code, is also imbued with semantic knowledge of test construction, and checks +# tests for common problems (such as broken &&-chains) which might hide bugs in +# the tests themselves or in behaviors being exercised by the tests. As such, +# TestParser is only called upon to parse test bodies, not the top-level +# scripts in which the tests are defined. +package TestParser; + +use base 'ShellParser'; + +sub find_non_nl { + my $tokens = shift @_; + my $n = shift @_; + $n = $#$tokens if !defined($n); + $n-- while $n >= 0 && $$tokens[$n] eq "\n"; + return $n; +} + +sub ends_with { + my ($tokens, $needles) = @_; + my $n = find_non_nl($tokens); + for my $needle (reverse(@$needles)) { + return undef if $n < 0; + $n = find_non_nl($tokens, $n), next if $needle eq "\n"; + return undef if $$tokens[$n] !~ $needle; + $n--; + } + return 1; +} + +sub accumulate { + my ($self, $tokens, $cmd) = @_; + goto DONE unless @$tokens; + goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; + + # did previous command end with "&&", "||", "|"? + goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]); + + # flag missing "&&" at end of previous command + my $n = find_non_nl($tokens); + splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; + +DONE: + $self->SUPER::accumulate($tokens, $cmd); +} + package ScriptParser; sub new { From d99ebd6d2e57baa3ec45b939d40cf939b85301a3 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:43 +0000 Subject: [PATCH 014/303] chainlint.pl: add parser to identify test definitions Finish fleshing out chainlint.pl by adding ScriptParser, a parser which scans shell scripts for tests defined by test_expect_success() and test_expect_failure(), plucks the test body from each definition, and passes it to TestParser for validation. It recognizes test definitions not only at the top-level of test scripts but also tests synthesized within compound commands such as loops and function. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 63 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/t/chainlint.pl b/t/chainlint.pl index ad257106e56826..d526723ac00317 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -487,18 +487,75 @@ sub accumulate { $self->SUPER::accumulate($tokens, $cmd); } +# ScriptParser is a subclass of ShellParser which identifies individual test +# definitions within test scripts, and passes each test body through TestParser +# to identify possible problems. ShellParser detects test definitions not only +# at the top-level of test scripts but also within compound commands such as +# loops and function definitions. package ScriptParser; +use base 'ShellParser'; + sub new { my $class = shift @_; - my $self = bless {} => $class; - $self->{output} = []; + my $self = $class->SUPER::new(@_); $self->{ntests} = 0; return $self; } +# extract the raw content of a token, which may be a single string or a +# composition of multiple strings and non-string character runs; for instance, +# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` +sub unwrap { + my $token = @_ ? shift @_ : $_; + # simple case: 'sqstring' or "dqstring" + return $token if $token =~ s/^'([^']*)'$/$1/; + return $token if $token =~ s/^"([^"]*)"$/$1/; + + # composite case + my ($s, $q, $escaped); + while (1) { + # slurp up non-special characters + $s .= $1 if $token =~ /\G([^\\'"]*)/gc; + # handle special characters + last unless $token =~ /\G(.)/sgc; + my $c = $1; + $q = undef, next if defined($q) && $c eq $q; + $q = $c, next if !defined($q) && $c =~ /^['"]$/; + if ($c eq '\\') { + last unless $token =~ /\G(.)/sgc; + $c = $1; + $s .= '\\' if $c eq "\n"; # preserve line splice + } + $s .= $c; + } + return $s +} + +sub check_test { + my $self = shift @_; + my ($title, $body) = map(unwrap, @_); + $self->{ntests}++; + my $parser = TestParser->new(\$body); + my @tokens = $parser->parse(); + return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); + my $checked = join(' ', @tokens); + $checked =~ s/^\n//; + $checked =~ s/^ //mg; + $checked =~ s/ $//mg; + $checked .= "\n" unless $checked =~ /\n$/; + push(@{$self->{output}}, "# chainlint: $title\n$checked"); +} + sub parse_cmd { - return undef; + my $self = shift @_; + my @tokens = $self->SUPER::parse_cmd(); + return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/; + my $n = $#tokens; + $n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/; + $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body + $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body + return @tokens; } # main contains high-level functionality for processing command-line switches, From 29fb2ec384a867ca577335a12f4b45c184e7b642 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:44 +0000 Subject: [PATCH 015/303] chainlint.pl: validate test scripts in parallel Although chainlint.pl has undergone a good deal of optimization during its development -- increasing in speed significantly -- parsing and validating 1050+ scripts and 16500+ tests via Perl is not exactly instantaneous. However, perceived performance can be improved by taking advantage of the fact that there is no interdependence between test scripts or test definitions, thus parsing and validating can be done in parallel. The number of available cores is determined automatically but can be overridden via the --jobs option. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/t/chainlint.pl b/t/chainlint.pl index d526723ac00317..898573a910030b 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -15,9 +15,11 @@ use warnings; use strict; +use Config; use File::Glob; use Getopt::Long; +my $jobs = -1; my $show_stats; my $emit_all; @@ -569,6 +571,16 @@ package main; $interval = sub { return Time::HiRes::tv_interval(shift); }; } +sub ncores { + # Windows + return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); + # Linux / MSYS2 / Cygwin / WSL + do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor\s*:/, <>)); } if -r '/proc/cpuinfo'; + # macOS & BSD + return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/; + return 1; +} + sub show_stats { my ($start_time, $stats) = @_; my $walltime = $interval->($start_time); @@ -621,7 +633,9 @@ sub exit_code { Getopt::Long::Configure(qw{bundling}); GetOptions( "emit-all!" => \$emit_all, + "jobs|j=i" => \$jobs, "stats|show-stats!" => \$show_stats) or die("option error\n"); +$jobs = ncores() if $jobs < 1; my $start_time = $getnow->(); my @stats; @@ -633,6 +647,40 @@ sub exit_code { exit; } -push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); +unless ($Config{useithreads} && eval { + require threads; threads->import(); + require Thread::Queue; Thread::Queue->import(); + 1; + }) { + push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); + show_stats($start_time, \@stats) if $show_stats; + exit(exit_code(\@stats)); +} + +my $script_queue = Thread::Queue->new(); +my $output_queue = Thread::Queue->new(); + +sub next_script { return $script_queue->dequeue(); } +sub emit { $output_queue->enqueue(@_); } + +sub monitor { + while (my $s = $output_queue->dequeue()) { + print($s); + } +} + +my $mon = threads->create({'context' => 'void'}, \&monitor); +threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; + +$script_queue->enqueue(@scripts); +$script_queue->end(); + +for (threads->list()) { + push(@stats, $_->join()) unless $_ == $mon; +} + +$output_queue->end(); +$mon->join(); + show_stats($start_time, \@stats) if $show_stats; exit(exit_code(\@stats)); From 35ebb1e37b25b9d799d1064d36a2ce668ad20264 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:45 +0000 Subject: [PATCH 016/303] chainlint.pl: don't require `return|exit|continue` to end with `&&` In order to check for &&-chain breakage, each time TestParser encounters a new command, it checks whether the previous command ends with `&&`, and -- with a couple exceptions -- signals breakage if it does not. The first exception is that a command may validly end with `||`, which is commonly employed as `command || return 1` at the very end of a loop body to terminate the loop early. The second is that piping one command's output with `|` to another command does not constitute a &&-chain break (the exit status of the pipe is the exit status of the final command in the pipe). However, it turns out that there are a few additional cases found in the wild in which it is likely safe for `&&` to be missing even when other commands follow. For instance: while {condition-1} do test {condition-2} || return 1 # or `exit 1` within a subshell more-commands done while {condition-1} do test {condition-2} || continue more-commands done Such cases indicate deliberate thought about failure modes by the test author, thus flagging them as breaking the &&-chain is not helpful. Therefore, take these special cases into consideration when checking for &&-chain breakage. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 20 ++++++++++++++++++-- t/chainlint/chain-break-continue.expect | 12 ++++++++++++ t/chainlint/chain-break-continue.test | 13 +++++++++++++ t/chainlint/chain-break-return-exit.expect | 4 ++++ t/chainlint/chain-break-return-exit.test | 5 +++++ t/chainlint/return-loop.expect | 5 +++++ t/chainlint/return-loop.test | 6 ++++++ 7 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 t/chainlint/chain-break-continue.expect create mode 100644 t/chainlint/chain-break-continue.test create mode 100644 t/chainlint/chain-break-return-exit.expect create mode 100644 t/chainlint/chain-break-return-exit.test create mode 100644 t/chainlint/return-loop.expect create mode 100644 t/chainlint/return-loop.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 898573a910030b..31c444067cee8c 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -473,13 +473,29 @@ sub ends_with { return 1; } +sub match_ending { + my ($tokens, $endings) = @_; + for my $needles (@$endings) { + next if @$tokens < scalar(grep {$_ ne "\n"} @$needles); + return 1 if ends_with($tokens, $needles); + } + return undef; +} + +my @safe_endings = ( + [qr/^(?:&&|\|\||\|)$/], + [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], + [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], + [qr/^(?:exit|return|continue)$/], + [qr/^(?:exit|return|continue)$/, qr/^;$/]); + sub accumulate { my ($self, $tokens, $cmd) = @_; goto DONE unless @$tokens; goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; - # did previous command end with "&&", "||", "|"? - goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]); + # did previous command end with "&&", "|", "|| return" or similar? + goto DONE if match_ending($tokens, \@safe_endings); # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); diff --git a/t/chainlint/chain-break-continue.expect b/t/chainlint/chain-break-continue.expect new file mode 100644 index 00000000000000..47a34577100024 --- /dev/null +++ b/t/chainlint/chain-break-continue.expect @@ -0,0 +1,12 @@ +git ls-tree --name-only -r refs/notes/many_notes | +while read path +do + test "$path" = "foobar/non-note.txt" && continue + test "$path" = "deadbeef" && continue + test "$path" = "de/adbeef" && continue + + if test $(expr length "$path") -ne $hexsz + then + return 1 + fi +done diff --git a/t/chainlint/chain-break-continue.test b/t/chainlint/chain-break-continue.test new file mode 100644 index 00000000000000..f0af71d8bd914a --- /dev/null +++ b/t/chainlint/chain-break-continue.test @@ -0,0 +1,13 @@ +git ls-tree --name-only -r refs/notes/many_notes | +while read path +do +# LINT: broken &&-chain okay if explicit "continue" + test "$path" = "foobar/non-note.txt" && continue + test "$path" = "deadbeef" && continue + test "$path" = "de/adbeef" && continue + + if test $(expr length "$path") -ne $hexsz + then + return 1 + fi +done diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect new file mode 100644 index 00000000000000..dba292ee89b695 --- /dev/null +++ b/t/chainlint/chain-break-return-exit.expect @@ -0,0 +1,4 @@ +for i in 1 2 3 4 ; do + git checkout main -b $i || return $? + test_commit $i $i $i tag$i || return $? +done diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test new file mode 100644 index 00000000000000..e2b059933aa683 --- /dev/null +++ b/t/chainlint/chain-break-return-exit.test @@ -0,0 +1,5 @@ +for i in 1 2 3 4 ; do +# LINT: broken &&-chain okay if explicit "return $?" signals failure + git checkout main -b $i || return $? + test_commit $i $i $i tag$i || return $? +done diff --git a/t/chainlint/return-loop.expect b/t/chainlint/return-loop.expect new file mode 100644 index 00000000000000..cfc0549befe3ff --- /dev/null +++ b/t/chainlint/return-loop.expect @@ -0,0 +1,5 @@ +while test $i -lt $((num - 5)) +do + git notes add -m "notes for commit$i" HEAD~$i || return 1 + i=$((i + 1)) +done diff --git a/t/chainlint/return-loop.test b/t/chainlint/return-loop.test new file mode 100644 index 00000000000000..f90b1713005b0d --- /dev/null +++ b/t/chainlint/return-loop.test @@ -0,0 +1,6 @@ +while test $i -lt $((num - 5)) +do +# LINT: "|| return {n}" valid loop escape outside subshell; no "&&" needed + git notes add -m "notes for commit$i" HEAD~$i || return 1 + i=$((i + 1)) +done From d00113ec3474a1652a73c11695c7e7b5182d80a7 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:46 +0000 Subject: [PATCH 017/303] t/Makefile: apply chainlint.pl to existing self-tests Now that chainlint.pl is functional, take advantage of the existing chainlint self-tests to validate its operation. (While at it, stop validating chainlint.sed against the self-tests since it will soon be retired.) Due to chainlint.sed implementation limitations leaking into the self-test "expect" files, a few of them require minor adjustment to make them compatible with chainlint.pl which does not share those limitations. First, because `sed` does not provide any sort of real recursion, chainlint.sed only emulates recursion into subshells, and each level of recursion leads to a multiplicative increase in complexity of the `sed` rules. To avoid substantial complexity, chainlint.sed, therefore, only emulates subshell recursion one level deep. Any subshell deeper than that is passed through as-is, which means that &&-chains are not checked in deeper subshells. chainlint.pl, on the other hand, employs a proper recursive descent parser, thus checks subshells to any depth and correctly flags broken &&-chains in deep subshells. Second, due to sed's line-oriented nature, chainlint.sed, by necessity, folds multi-line quoted strings into a single line. chainlint.pl, on the other hand, employs a proper lexical analyzer which preserves quoted strings as-is, including embedded newlines. Furthermore, the output of chainlint.sed and chainlint.pl do not match precisely in terms of whitespace. However, since the purpose of the self-checks is to verify that the ?!AMP?! annotations are being correctly added, minor whitespace differences are immaterial. For this reason, rather than adjusting whitespace in all existing self-test "expect" files to match the new linter's output, the `check-chainlint` target ignores whitespace differences. Since `diff -w` is not POSIX, `check-chainlint` attempts to employ `git diff -w`, and only falls back to non-POSIX `diff -w` (and `-u`) if `git diff` is not available. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/Makefile | 29 +++++++++++++++---- t/chainlint/block.expect | 2 +- t/chainlint/here-doc-multi-line-string.expect | 3 +- t/chainlint/multi-line-string.expect | 11 +++++-- t/chainlint/nested-subshell.expect | 2 +- t/chainlint/t7900-subtree.expect | 13 +++++++-- 6 files changed, 46 insertions(+), 14 deletions(-) diff --git a/t/Makefile b/t/Makefile index 1c80c0c79a05d8..11f276774ea286 100644 --- a/t/Makefile +++ b/t/Makefile @@ -38,7 +38,7 @@ T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh))) TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh)) CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test))) -CHAINLINT = sed -f chainlint.sed +CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl all: $(DEFAULT_TEST_TARGET) @@ -73,10 +73,29 @@ clean-chainlint: check-chainlint: @mkdir -p '$(CHAINLINTTMP_SQ)' && \ - sed -e '/^# LINT: /d' $(patsubst %,chainlint/%.test,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/tests && \ - sed -e '/^[ ]*$$/d' $(patsubst %,chainlint/%.expect,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/expect && \ - $(CHAINLINT) '$(CHAINLINTTMP_SQ)'/tests | grep -v '^[ ]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \ - diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual + for i in $(CHAINLINTTESTS); do \ + echo "test_expect_success '$$i' '" && \ + sed -e '/^# LINT: /d' chainlint/$$i.test && \ + echo "'"; \ + done >'$(CHAINLINTTMP_SQ)'/tests && \ + { \ + echo "# chainlint: $(CHAINLINTTMP_SQ)/tests" && \ + for i in $(CHAINLINTTESTS); do \ + echo "# chainlint: $$i" && \ + sed -e '/^[ ]*$$/d' chainlint/$$i.expect; \ + done \ + } >'$(CHAINLINTTMP_SQ)'/expect && \ + $(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \ + grep -v '^[ ]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \ + if test -f ../GIT-BUILD-OPTIONS; then \ + . ../GIT-BUILD-OPTIONS; \ + fi && \ + if test -x ../git$$X; then \ + DIFFW="../git$$X --no-pager diff -w --no-index"; \ + else \ + DIFFW="diff -w -u"; \ + fi && \ + $$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \ test-lint-filenames diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect index da60257ebc4e1d..37dbf7d95fa7f9 100644 --- a/t/chainlint/block.expect +++ b/t/chainlint/block.expect @@ -1,7 +1,7 @@ ( foo && { - echo a + echo a ?!AMP?! echo b } && bar && diff --git a/t/chainlint/here-doc-multi-line-string.expect b/t/chainlint/here-doc-multi-line-string.expect index 2578191ca8a809..be64b26869ada1 100644 --- a/t/chainlint/here-doc-multi-line-string.expect +++ b/t/chainlint/here-doc-multi-line-string.expect @@ -1,4 +1,5 @@ ( - cat <<-TXT && echo "multi-line string" ?!AMP?! + cat <<-TXT && echo "multi-line + string" ?!AMP?! bap ) diff --git a/t/chainlint/multi-line-string.expect b/t/chainlint/multi-line-string.expect index ab0dadf748e859..27ff95218e7f64 100644 --- a/t/chainlint/multi-line-string.expect +++ b/t/chainlint/multi-line-string.expect @@ -1,9 +1,14 @@ ( - x="line 1 line 2 line 3" && - y="line 1 line2" ?!AMP?! + x="line 1 + line 2 + line 3" && + y="line 1 + line2" ?!AMP?! foobar ) && ( - echo "xyz" "abc def ghi" && + echo "xyz" "abc + def + ghi" && barfoo ) diff --git a/t/chainlint/nested-subshell.expect b/t/chainlint/nested-subshell.expect index 41a48adaa2b8ff..02e0a9f1bb5f30 100644 --- a/t/chainlint/nested-subshell.expect +++ b/t/chainlint/nested-subshell.expect @@ -6,7 +6,7 @@ ) >file && cd foo && ( - echo a + echo a ?!AMP?! echo b ) >file ) diff --git a/t/chainlint/t7900-subtree.expect b/t/chainlint/t7900-subtree.expect index 1cccc7bf7e1a47..69167da2f27a30 100644 --- a/t/chainlint/t7900-subtree.expect +++ b/t/chainlint/t7900-subtree.expect @@ -1,10 +1,17 @@ ( - chks="sub1sub2sub3sub4" && + chks="sub1 +sub2 +sub3 +sub4" && chks_sub=$(cat < Date: Thu, 1 Sep 2022 00:29:47 +0000 Subject: [PATCH 018/303] chainlint.pl: don't require `&` background command to end with `&&` The exit status of the `&` asynchronous operator which starts a command in the background is unconditionally zero, and the few places in the test scripts which launch commands asynchronously are not interested in the exit status of the `&` operator (though they often capture the background command's PID). As such, there is little value in complaining about broken &&-chain for a command launched in the background, and doing so would only make busy-work for test authors. Therefore, take this special case into account when checking for &&-chain breakage. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 2 +- t/chainlint/chain-break-background.expect | 9 +++++++++ t/chainlint/chain-break-background.test | 10 ++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 t/chainlint/chain-break-background.expect create mode 100644 t/chainlint/chain-break-background.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 31c444067cee8c..ba3fcb0c8e613b 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -483,7 +483,7 @@ sub match_ending { } my @safe_endings = ( - [qr/^(?:&&|\|\||\|)$/], + [qr/^(?:&&|\|\||\||&)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], [qr/^(?:exit|return|continue)$/], diff --git a/t/chainlint/chain-break-background.expect b/t/chainlint/chain-break-background.expect new file mode 100644 index 00000000000000..28f9114f42de6b --- /dev/null +++ b/t/chainlint/chain-break-background.expect @@ -0,0 +1,9 @@ +JGIT_DAEMON_PID= && +git init --bare empty.git && +> empty.git/git-daemon-export-ok && +mkfifo jgit_daemon_output && +{ + jgit daemon --port="$JGIT_DAEMON_PORT" . > jgit_daemon_output & + JGIT_DAEMON_PID=$! +} && +test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git diff --git a/t/chainlint/chain-break-background.test b/t/chainlint/chain-break-background.test new file mode 100644 index 00000000000000..e10f656b0556d7 --- /dev/null +++ b/t/chainlint/chain-break-background.test @@ -0,0 +1,10 @@ +JGIT_DAEMON_PID= && +git init --bare empty.git && +>empty.git/git-daemon-export-ok && +mkfifo jgit_daemon_output && +{ +# LINT: exit status of "&" is always 0 so &&-chaining immaterial + jgit daemon --port="$JGIT_DAEMON_PORT" . >jgit_daemon_output & + JGIT_DAEMON_PID=$! +} && +test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git From a8f30ee0502b89ecb660af36784f653a8c3fb20d Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:48 +0000 Subject: [PATCH 019/303] chainlint.pl: don't flag broken &&-chain if `$?` handled explicitly There are cases in which tests capture and check a command's exit code explicitly without employing test_expect_code(). They do so by intentionally breaking the &&-chain since it would be impossible to capture "$?" in the failing case if the `status=$?` assignment was part of the &&-chain. Since such constructs are manually checking the exit code, their &&-chain breakage is legitimate and safe, thus should not be flagged. Therefore, stop flagging &&-chain breakage in such cases. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 6 ++++++ t/chainlint/chain-break-status.expect | 9 +++++++++ t/chainlint/chain-break-status.test | 11 +++++++++++ 3 files changed, 26 insertions(+) create mode 100644 t/chainlint/chain-break-status.expect create mode 100644 t/chainlint/chain-break-status.test diff --git a/t/chainlint.pl b/t/chainlint.pl index ba3fcb0c8e613b..14e1db3519a2d5 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -497,6 +497,12 @@ sub accumulate { # did previous command end with "&&", "|", "|| return" or similar? goto DONE if match_ending($tokens, \@safe_endings); + # if this command handles "$?" specially, then okay for previous + # command to be missing "&&" + for my $token (@$cmd) { + goto DONE if $token =~ /\$\?/; + } + # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; diff --git a/t/chainlint/chain-break-status.expect b/t/chainlint/chain-break-status.expect new file mode 100644 index 00000000000000..f4bada946322a6 --- /dev/null +++ b/t/chainlint/chain-break-status.expect @@ -0,0 +1,9 @@ +OUT=$(( ( large_git ; echo $? 1 >& 3 ) | : ) 3 >& 1) && +test_match_signal 13 "$OUT" && + +{ test-tool sigchain > actual ; ret=$? ; } && +{ + test_match_signal 15 "$ret" || + test "$ret" = 3 +} && +test_cmp expect actual diff --git a/t/chainlint/chain-break-status.test b/t/chainlint/chain-break-status.test new file mode 100644 index 00000000000000..a6602a7b99cfc7 --- /dev/null +++ b/t/chainlint/chain-break-status.test @@ -0,0 +1,11 @@ +# LINT: broken &&-chain okay if next command handles "$?" explicitly +OUT=$( ((large_git; echo $? 1>&3) | :) 3>&1 ) && +test_match_signal 13 "$OUT" && + +# LINT: broken &&-chain okay if next command handles "$?" explicitly +{ test-tool sigchain >actual; ret=$?; } && +{ + test_match_signal 15 "$ret" || + test "$ret" = 3 +} && +test_cmp expect actual From 832c68b3c210267c93e1dcb2f2763372339ca36c Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:49 +0000 Subject: [PATCH 020/303] chainlint.pl: don't flag broken &&-chain if failure indicated explicitly There are quite a few tests which print an error messages and then explicitly signal failure with `false`, `return 1`, or `exit 1` as the final command in an `if` branch. In these cases, the tests don't bother maintaining the &&-chain between `echo` and the explicit "test failed" indicator. Since such constructs are manually signaling failure, their &&-chain breakage is legitimate and safe -- both for the command immediately preceding `false`, `return`, or `exit`, as well as for all preceding commands in the `if` branch. Therefore, stop flagging &&-chain breakage in these sorts of cases. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 8 ++++++++ t/chainlint/chain-break-false.expect | 9 +++++++++ t/chainlint/chain-break-false.test | 10 ++++++++++ t/chainlint/chain-break-return-exit.expect | 15 +++++++++++++++ t/chainlint/chain-break-return-exit.test | 18 ++++++++++++++++++ t/chainlint/if-in-loop.expect | 2 +- t/chainlint/if-in-loop.test | 2 +- 7 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 t/chainlint/chain-break-false.expect create mode 100644 t/chainlint/chain-break-false.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 14e1db3519a2d5..a76a09ecf5e294 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -503,6 +503,14 @@ sub accumulate { goto DONE if $token =~ /\$\?/; } + # if this command is "false", "return 1", or "exit 1" (which signal + # failure explicitly), then okay for all preceding commands to be + # missing "&&" + if ($$cmd[0] =~ /^(?:false|return|exit)$/) { + @$tokens = grep(!/^\?!AMP\?!$/, @$tokens); + goto DONE; + } + # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; diff --git a/t/chainlint/chain-break-false.expect b/t/chainlint/chain-break-false.expect new file mode 100644 index 00000000000000..989766fb856446 --- /dev/null +++ b/t/chainlint/chain-break-false.expect @@ -0,0 +1,9 @@ +if condition not satisified +then + echo it did not work... + echo failed! + false +else + echo it went okay ?!AMP?! + congratulate user +fi diff --git a/t/chainlint/chain-break-false.test b/t/chainlint/chain-break-false.test new file mode 100644 index 00000000000000..a5aaff8c8a4290 --- /dev/null +++ b/t/chainlint/chain-break-false.test @@ -0,0 +1,10 @@ +# LINT: broken &&-chain okay if explicit "false" signals failure +if condition not satisified +then + echo it did not work... + echo failed! + false +else + echo it went okay + congratulate user +fi diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect index dba292ee89b695..1732d221c32e98 100644 --- a/t/chainlint/chain-break-return-exit.expect +++ b/t/chainlint/chain-break-return-exit.expect @@ -1,3 +1,18 @@ +case "$(git ls-files)" in +one ) echo pass one ;; +* ) echo bad one ; return 1 ;; +esac && +( + case "$(git ls-files)" in + two ) echo pass two ;; + * ) echo bad two ; exit 1 ;; +esac +) && +case "$(git ls-files)" in +dir/two"$LF"one ) echo pass both ;; +* ) echo bad ; return 1 ;; +esac && + for i in 1 2 3 4 ; do git checkout main -b $i || return $? test_commit $i $i $i tag$i || return $? diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test index e2b059933aa683..46542edf881916 100644 --- a/t/chainlint/chain-break-return-exit.test +++ b/t/chainlint/chain-break-return-exit.test @@ -1,3 +1,21 @@ +case "$(git ls-files)" in +one) echo pass one ;; +# LINT: broken &&-chain okay if explicit "return 1" signals failuire +*) echo bad one; return 1 ;; +esac && +( + case "$(git ls-files)" in + two) echo pass two ;; +# LINT: broken &&-chain okay if explicit "exit 1" signals failuire + *) echo bad two; exit 1 ;; + esac +) && +case "$(git ls-files)" in +dir/two"$LF"one) echo pass both ;; +# LINT: broken &&-chain okay if explicit "return 1" signals failuire +*) echo bad; return 1 ;; +esac && + for i in 1 2 3 4 ; do # LINT: broken &&-chain okay if explicit "return $?" signals failure git checkout main -b $i || return $? diff --git a/t/chainlint/if-in-loop.expect b/t/chainlint/if-in-loop.expect index 03b82a3e58c21e..d6514ae74927ff 100644 --- a/t/chainlint/if-in-loop.expect +++ b/t/chainlint/if-in-loop.expect @@ -3,7 +3,7 @@ do if false then - echo "err" ?!AMP?! + echo "err" exit 1 fi ?!AMP?! foo diff --git a/t/chainlint/if-in-loop.test b/t/chainlint/if-in-loop.test index f0cf19cfadac8c..90c23976feccdd 100644 --- a/t/chainlint/if-in-loop.test +++ b/t/chainlint/if-in-loop.test @@ -3,7 +3,7 @@ do if false then -# LINT: missing "&&" on "echo" +# LINT: missing "&&" on "echo" okay since "exit 1" signals error explicitly echo "err" exit 1 # LINT: missing "&&" on "fi" From fd4094c3cad7c62adb0b7080e0dca37f66bf0c6e Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:50 +0000 Subject: [PATCH 021/303] chainlint.pl: complain about loops lacking explicit failure handling Shell `for` and `while` loops do not terminate automatically just because a command fails within the loop body. Instead, the loop continues to iterate and eventually returns the exit status of the final command of the final iteration, which may not be the command which failed, thus it is possible for failures to go undetected. Consequently, it is important for test authors to explicitly handle failure within the loop body by terminating the loop manually upon failure. This can be done by returning a non-zero exit code from within the loop body (i.e. `|| return 1`) or exiting (i.e. `|| exit 1`) if the loop is within a subshell, or by manually checking `$?` and taking some appropriate action. Therefore, add logic to detect and complain about loops which lack explicit `return` or `exit`, or `$?` check. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 11 ++++++ t/chainlint/complex-if-in-cuddled-loop.expect | 2 +- t/chainlint/for-loop.expect | 4 +-- t/chainlint/loop-detect-failure.expect | 15 ++++++++ t/chainlint/loop-detect-failure.test | 17 +++++++++ t/chainlint/loop-detect-status.expect | 18 ++++++++++ t/chainlint/loop-detect-status.test | 19 ++++++++++ t/chainlint/loop-in-if.expect | 2 +- t/chainlint/nested-loop-detect-failure.expect | 31 ++++++++++++++++ t/chainlint/nested-loop-detect-failure.test | 35 +++++++++++++++++++ t/chainlint/semicolon.expect | 2 +- t/chainlint/while-loop.expect | 4 +-- 12 files changed, 153 insertions(+), 7 deletions(-) create mode 100644 t/chainlint/loop-detect-failure.expect create mode 100644 t/chainlint/loop-detect-failure.test create mode 100644 t/chainlint/loop-detect-status.expect create mode 100644 t/chainlint/loop-detect-status.test create mode 100644 t/chainlint/nested-loop-detect-failure.expect create mode 100644 t/chainlint/nested-loop-detect-failure.test diff --git a/t/chainlint.pl b/t/chainlint.pl index a76a09ecf5e294..674b3ddf69633e 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -482,6 +482,17 @@ sub match_ending { return undef; } +sub parse_loop_body { + my $self = shift @_; + my @tokens = $self->SUPER::parse_loop_body(@_); + # did loop signal failure via "|| return" or "|| exit"? + return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens); + # flag missing "return/exit" handling explicit failure in loop body + my $n = find_non_nl(\@tokens); + splice(@tokens, $n + 1, 0, '?!LOOP?!'); + return @tokens; +} + my @safe_endings = ( [qr/^(?:&&|\|\||\||&)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], diff --git a/t/chainlint/complex-if-in-cuddled-loop.expect b/t/chainlint/complex-if-in-cuddled-loop.expect index 2fca1834095817..dac2d0fd1d9037 100644 --- a/t/chainlint/complex-if-in-cuddled-loop.expect +++ b/t/chainlint/complex-if-in-cuddled-loop.expect @@ -4,6 +4,6 @@ : else echo >file - fi + fi ?!LOOP?! done) && test ! -f file diff --git a/t/chainlint/for-loop.expect b/t/chainlint/for-loop.expect index 6671b8cd842de1..a5810c9bddd835 100644 --- a/t/chainlint/for-loop.expect +++ b/t/chainlint/for-loop.expect @@ -2,10 +2,10 @@ for i in a b c do echo $i ?!AMP?! - cat <<-EOF + cat <<-EOF ?!LOOP?! done ?!AMP?! for i in a b c; do echo $i && - cat $i + cat $i ?!LOOP?! done ) diff --git a/t/chainlint/loop-detect-failure.expect b/t/chainlint/loop-detect-failure.expect new file mode 100644 index 00000000000000..a66025c39d4fca --- /dev/null +++ b/t/chainlint/loop-detect-failure.expect @@ -0,0 +1,15 @@ +git init r1 && +for n in 1 2 3 4 5 +do + echo "This is file: $n" > r1/file.$n && + git -C r1 add file.$n && + git -C r1 commit -m "$n" || return 1 +done && + +git init r2 && +for n in 1000 10000 +do + printf "%"$n"s" X > r2/large.$n && + git -C r2 add large.$n && + git -C r2 commit -m "$n" ?!LOOP?! +done diff --git a/t/chainlint/loop-detect-failure.test b/t/chainlint/loop-detect-failure.test new file mode 100644 index 00000000000000..b9791cc802e661 --- /dev/null +++ b/t/chainlint/loop-detect-failure.test @@ -0,0 +1,17 @@ +git init r1 && +# LINT: loop handles failure explicitly with "|| return 1" +for n in 1 2 3 4 5 +do + echo "This is file: $n" > r1/file.$n && + git -C r1 add file.$n && + git -C r1 commit -m "$n" || return 1 +done && + +git init r2 && +# LINT: loop fails to handle failure explicitly with "|| return 1" +for n in 1000 10000 +do + printf "%"$n"s" X > r2/large.$n && + git -C r2 add large.$n && + git -C r2 commit -m "$n" +done diff --git a/t/chainlint/loop-detect-status.expect b/t/chainlint/loop-detect-status.expect new file mode 100644 index 00000000000000..0ad23bb35e4fb1 --- /dev/null +++ b/t/chainlint/loop-detect-status.expect @@ -0,0 +1,18 @@ +( while test $i -le $blobcount +do + printf "Generating blob $i/$blobcount\r" >& 2 && + printf "blob\nmark :$i\ndata $blobsize\n" && + + printf "%-${blobsize}s" $i && + echo "M 100644 :$i $i" >> commit && + i=$(($i+1)) || + echo $? > exit-status +done && +echo "commit refs/heads/main" && +echo "author A U Thor 123456789 +0000" && +echo "committer C O Mitter 123456789 +0000" && +echo "data 5" && +echo ">2gb" && +cat commit ) | +git fast-import --big-file-threshold=2 && +test ! -f exit-status diff --git a/t/chainlint/loop-detect-status.test b/t/chainlint/loop-detect-status.test new file mode 100644 index 00000000000000..1c6c23cfc9ebae --- /dev/null +++ b/t/chainlint/loop-detect-status.test @@ -0,0 +1,19 @@ +# LINT: "$?" handled explicitly within loop body +(while test $i -le $blobcount + do + printf "Generating blob $i/$blobcount\r" >&2 && + printf "blob\nmark :$i\ndata $blobsize\n" && + #test-tool genrandom $i $blobsize && + printf "%-${blobsize}s" $i && + echo "M 100644 :$i $i" >> commit && + i=$(($i+1)) || + echo $? > exit-status + done && + echo "commit refs/heads/main" && + echo "author A U Thor 123456789 +0000" && + echo "committer C O Mitter 123456789 +0000" && + echo "data 5" && + echo ">2gb" && + cat commit) | +git fast-import --big-file-threshold=2 && +test ! -f exit-status diff --git a/t/chainlint/loop-in-if.expect b/t/chainlint/loop-in-if.expect index e1be42376c5ef4..6c5d6e5b2438ef 100644 --- a/t/chainlint/loop-in-if.expect +++ b/t/chainlint/loop-in-if.expect @@ -4,7 +4,7 @@ while true do echo "pop" ?!AMP?! - echo "glup" + echo "glup" ?!LOOP?! done ?!AMP?! foo fi ?!AMP?! diff --git a/t/chainlint/nested-loop-detect-failure.expect b/t/chainlint/nested-loop-detect-failure.expect new file mode 100644 index 00000000000000..4793a0e8e12aeb --- /dev/null +++ b/t/chainlint/nested-loop-detect-failure.expect @@ -0,0 +1,31 @@ +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" ?!LOOP?! + done ?!LOOP?! +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" || return 1 + done +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" ?!LOOP?! + done || return 1 +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" || return 1 + done || return 1 +done diff --git a/t/chainlint/nested-loop-detect-failure.test b/t/chainlint/nested-loop-detect-failure.test new file mode 100644 index 00000000000000..e6f0c1acfb8b26 --- /dev/null +++ b/t/chainlint/nested-loop-detect-failure.test @@ -0,0 +1,35 @@ +# LINT: neither loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" + done +done && + +# LINT: inner loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" || return 1 + done +done && + +# LINT: outer loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" + done || return 1 +done && + +# LINT: inner & outer loops handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" || return 1 + done || return 1 +done diff --git a/t/chainlint/semicolon.expect b/t/chainlint/semicolon.expect index ed0b3707ae9013..3aa2259f36c172 100644 --- a/t/chainlint/semicolon.expect +++ b/t/chainlint/semicolon.expect @@ -15,5 +15,5 @@ ) && (cd foo && for i in a b c; do - echo; + echo; ?!LOOP?! done) diff --git a/t/chainlint/while-loop.expect b/t/chainlint/while-loop.expect index 0d3a9b3d128940..f272aa21fee195 100644 --- a/t/chainlint/while-loop.expect +++ b/t/chainlint/while-loop.expect @@ -2,10 +2,10 @@ while true do echo foo ?!AMP?! - cat <<-EOF + cat <<-EOF ?!LOOP?! done ?!AMP?! while true; do echo foo && - cat bar + cat bar ?!LOOP?! done ) From ae0c55abf8217bb06422f9eafcd7a30b2c8f9e8b Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:51 +0000 Subject: [PATCH 022/303] chainlint.pl: allow `|| echo` to signal failure upstream of a pipe The use of `|| return` (or `|| exit`) to signal failure within a loop isn't effective when the loop is upstream of a pipe since the pipe swallows all upstream exit codes and returns only the exit code of the final command in the pipeline. To work around this limitation, tests may adopt an alternative strategy of signaling failure by emitting text which would never be emitted in the non-failing case. For instance: while condition do command1 && command2 || echo "impossible text" done | sort >actual && Such usage indicates deliberate thought about failure cases by the test author, thus flagging them as missing `|| return` (or `|| exit`) is not helpful. Therefore, take this case into consideration when checking for explicit loop termination. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 3 +++ t/chainlint/loop-upstream-pipe.expect | 10 ++++++++++ t/chainlint/loop-upstream-pipe.test | 11 +++++++++++ 3 files changed, 24 insertions(+) create mode 100644 t/chainlint/loop-upstream-pipe.expect create mode 100644 t/chainlint/loop-upstream-pipe.test diff --git a/t/chainlint.pl b/t/chainlint.pl index 674b3ddf69633e..386999ce65d617 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -487,6 +487,9 @@ sub parse_loop_body { my @tokens = $self->SUPER::parse_loop_body(@_); # did loop signal failure via "|| return" or "|| exit"? return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens); + # did loop upstream of a pipe signal failure via "|| echo 'impossible + # text'" as the final command in the loop body? + return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]); # flag missing "return/exit" handling explicit failure in loop body my $n = find_non_nl(\@tokens); splice(@tokens, $n + 1, 0, '?!LOOP?!'); diff --git a/t/chainlint/loop-upstream-pipe.expect b/t/chainlint/loop-upstream-pipe.expect new file mode 100644 index 00000000000000..0b82ecc4b96fee --- /dev/null +++ b/t/chainlint/loop-upstream-pipe.expect @@ -0,0 +1,10 @@ +( + git rev-list --objects --no-object-names base..loose | + while read oid + do + path="$objdir/$(test_oid_to_path "$oid")" && + printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" || + echo "object list generation failed for $oid" + done | + sort -k1 +) >expect && diff --git a/t/chainlint/loop-upstream-pipe.test b/t/chainlint/loop-upstream-pipe.test new file mode 100644 index 00000000000000..efb77da897c6f1 --- /dev/null +++ b/t/chainlint/loop-upstream-pipe.test @@ -0,0 +1,11 @@ +( + git rev-list --objects --no-object-names base..loose | + while read oid + do +# LINT: "|| echo" signals failure in loop upstream of a pipe + path="$objdir/$(test_oid_to_path "$oid")" && + printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" || + echo "object list generation failed for $oid" + done | + sort -k1 +) >expect && From 56066523ed3ebd16b455e99ce954ec19b6ac5ada Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:52 +0000 Subject: [PATCH 023/303] t/chainlint: add more chainlint.pl self-tests During the development of chainlint.pl, numerous new self-tests were created to verify correct functioning beyond the checks already represented by the existing self-tests. The new checks fall into several categories: * behavior of the lexical analyzer for complex cases, such as line splicing, token pasting, entering and exiting string contexts inside and outside of test script bodies; for instance: test_expect_success 'title' ' x=$(echo "something" | sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\'' ' * behavior of the parser for all compound grammatical constructs, such as `if...fi`, `case...esac`, `while...done`, `{...}`, etc., and for other legal shell grammatical constructs not covered by existing chainlint.sed self-tests, as well as complex cases, such as: OUT=$( ((large_git 1>&3) | :) 3>&1 ) && * detection of problems, such as &&-chain breakage, from top-level to any depth since the existing self-tests do not cover any top-level context and only cover subshells one level deep due to limitations of chainlint.sed * address blind spots in chainlint.sed (such as not detecting a broken &&-chain on a one-line for-loop in a subshell[1]) which chainlint.pl correctly detects * real-world cases which tripped up chainlint.pl during its development [1]: https://lore.kernel.org/git/dce35a47012fecc6edc11c68e91dbb485c5bc36f.1661663880.git.gitgitgadget@gmail.com/ Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint/blank-line-before-esac.expect | 18 +++++++++++ t/chainlint/blank-line-before-esac.test | 19 +++++++++++ t/chainlint/block.expect | 13 +++++++- t/chainlint/block.test | 15 ++++++++- t/chainlint/chained-block.expect | 9 ++++++ t/chainlint/chained-block.test | 11 +++++++ t/chainlint/chained-subshell.expect | 10 ++++++ t/chainlint/chained-subshell.test | 13 ++++++++ .../command-substitution-subsubshell.expect | 2 ++ .../command-substitution-subsubshell.test | 3 ++ t/chainlint/double-here-doc.expect | 2 ++ t/chainlint/double-here-doc.test | 12 +++++++ t/chainlint/dqstring-line-splice.expect | 3 ++ t/chainlint/dqstring-line-splice.test | 7 ++++ t/chainlint/dqstring-no-interpolate.expect | 11 +++++++ t/chainlint/dqstring-no-interpolate.test | 15 +++++++++ t/chainlint/empty-here-doc.expect | 3 ++ t/chainlint/empty-here-doc.test | 5 +++ t/chainlint/exclamation.expect | 4 +++ t/chainlint/exclamation.test | 8 +++++ t/chainlint/for-loop-abbreviated.expect | 5 +++ t/chainlint/for-loop-abbreviated.test | 6 ++++ t/chainlint/function.expect | 11 +++++++ t/chainlint/function.test | 13 ++++++++ t/chainlint/here-doc-indent-operator.expect | 5 +++ t/chainlint/here-doc-indent-operator.test | 13 ++++++++ t/chainlint/if-condition-split.expect | 7 ++++ t/chainlint/if-condition-split.test | 8 +++++ t/chainlint/one-liner-for-loop.expect | 9 ++++++ t/chainlint/one-liner-for-loop.test | 10 ++++++ t/chainlint/sqstring-in-sqstring.expect | 4 +++ t/chainlint/sqstring-in-sqstring.test | 5 +++ t/chainlint/token-pasting.expect | 27 ++++++++++++++++ t/chainlint/token-pasting.test | 32 +++++++++++++++++++ 34 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 t/chainlint/blank-line-before-esac.expect create mode 100644 t/chainlint/blank-line-before-esac.test create mode 100644 t/chainlint/chained-block.expect create mode 100644 t/chainlint/chained-block.test create mode 100644 t/chainlint/chained-subshell.expect create mode 100644 t/chainlint/chained-subshell.test create mode 100644 t/chainlint/command-substitution-subsubshell.expect create mode 100644 t/chainlint/command-substitution-subsubshell.test create mode 100644 t/chainlint/double-here-doc.expect create mode 100644 t/chainlint/double-here-doc.test create mode 100644 t/chainlint/dqstring-line-splice.expect create mode 100644 t/chainlint/dqstring-line-splice.test create mode 100644 t/chainlint/dqstring-no-interpolate.expect create mode 100644 t/chainlint/dqstring-no-interpolate.test create mode 100644 t/chainlint/empty-here-doc.expect create mode 100644 t/chainlint/empty-here-doc.test create mode 100644 t/chainlint/exclamation.expect create mode 100644 t/chainlint/exclamation.test create mode 100644 t/chainlint/for-loop-abbreviated.expect create mode 100644 t/chainlint/for-loop-abbreviated.test create mode 100644 t/chainlint/function.expect create mode 100644 t/chainlint/function.test create mode 100644 t/chainlint/here-doc-indent-operator.expect create mode 100644 t/chainlint/here-doc-indent-operator.test create mode 100644 t/chainlint/if-condition-split.expect create mode 100644 t/chainlint/if-condition-split.test create mode 100644 t/chainlint/one-liner-for-loop.expect create mode 100644 t/chainlint/one-liner-for-loop.test create mode 100644 t/chainlint/sqstring-in-sqstring.expect create mode 100644 t/chainlint/sqstring-in-sqstring.test create mode 100644 t/chainlint/token-pasting.expect create mode 100644 t/chainlint/token-pasting.test diff --git a/t/chainlint/blank-line-before-esac.expect b/t/chainlint/blank-line-before-esac.expect new file mode 100644 index 00000000000000..48ed4eb1246efc --- /dev/null +++ b/t/chainlint/blank-line-before-esac.expect @@ -0,0 +1,18 @@ +test_done ( ) { + case "$test_failure" in + 0 ) + test_at_end_hook_ + + exit 0 ;; + + * ) + if test $test_external_has_tap -eq 0 + then + say_color error "# failed $test_failure among $msg" + say "1..$test_count" + fi + + exit 1 ;; + + esac +} diff --git a/t/chainlint/blank-line-before-esac.test b/t/chainlint/blank-line-before-esac.test new file mode 100644 index 00000000000000..cecccad19f5a9b --- /dev/null +++ b/t/chainlint/blank-line-before-esac.test @@ -0,0 +1,19 @@ +# LINT: blank line before "esac" +test_done () { + case "$test_failure" in + 0) + test_at_end_hook_ + + exit 0 ;; + + *) + if test $test_external_has_tap -eq 0 + then + say_color error "# failed $test_failure among $msg" + say "1..$test_count" + fi + + exit 1 ;; + + esac +} diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect index 37dbf7d95fa7f9..a3bcea492a915f 100644 --- a/t/chainlint/block.expect +++ b/t/chainlint/block.expect @@ -9,4 +9,15 @@ echo c } ?!AMP?! baz -) +) && + +{ + echo a ; ?!AMP?! echo b +} && +{ echo a ; ?!AMP?! echo b ; } && + +{ + echo "${var}9" && + echo "done" +} && +finis diff --git a/t/chainlint/block.test b/t/chainlint/block.test index 0a82fd579f6751..4ab69a4afc4038 100644 --- a/t/chainlint/block.test +++ b/t/chainlint/block.test @@ -11,4 +11,17 @@ echo c } baz -) +) && + +# LINT: ";" not allowed in place of "&&" +{ + echo a; echo b +} && +{ echo a; echo b; } && + +# LINT: "}" inside string not mistaken as end of block +{ + echo "${var}9" && + echo "done" +} && +finis diff --git a/t/chainlint/chained-block.expect b/t/chainlint/chained-block.expect new file mode 100644 index 00000000000000..574cdceb071804 --- /dev/null +++ b/t/chainlint/chained-block.expect @@ -0,0 +1,9 @@ +echo nobody home && { + test the doohicky ?!AMP?! + right now +} && + +GIT_EXTERNAL_DIFF=echo git diff | { + read path oldfile oldhex oldmode newfile newhex newmode && + test "z$oh" = "z$oldhex" +} diff --git a/t/chainlint/chained-block.test b/t/chainlint/chained-block.test new file mode 100644 index 00000000000000..86f81ece63903f --- /dev/null +++ b/t/chainlint/chained-block.test @@ -0,0 +1,11 @@ +# LINT: start of block chained to preceding command +echo nobody home && { + test the doohicky + right now +} && + +# LINT: preceding command pipes to block on same line +GIT_EXTERNAL_DIFF=echo git diff | { + read path oldfile oldhex oldmode newfile newhex newmode && + test "z$oh" = "z$oldhex" +} diff --git a/t/chainlint/chained-subshell.expect b/t/chainlint/chained-subshell.expect new file mode 100644 index 00000000000000..af0369d3285b17 --- /dev/null +++ b/t/chainlint/chained-subshell.expect @@ -0,0 +1,10 @@ +mkdir sub && ( + cd sub && + foo the bar ?!AMP?! + nuff said +) && + +cut "-d " -f actual | ( read s1 s2 s3 && +test -f $s1 ?!AMP?! +test $(cat $s2) = tree2path1 && +test $(cat $s3) = tree3path1 ) diff --git a/t/chainlint/chained-subshell.test b/t/chainlint/chained-subshell.test new file mode 100644 index 00000000000000..4ff6ddd8cbd078 --- /dev/null +++ b/t/chainlint/chained-subshell.test @@ -0,0 +1,13 @@ +# LINT: start of subshell chained to preceding command +mkdir sub && ( + cd sub && + foo the bar + nuff said +) && + +# LINT: preceding command pipes to subshell on same line +cut "-d " -f actual | (read s1 s2 s3 && +test -f $s1 +test $(cat $s2) = tree2path1 && +# LINT: closing subshell ")" correctly detected on same line as "$(...)" +test $(cat $s3) = tree3path1) diff --git a/t/chainlint/command-substitution-subsubshell.expect b/t/chainlint/command-substitution-subsubshell.expect new file mode 100644 index 00000000000000..ab2f79e8457037 --- /dev/null +++ b/t/chainlint/command-substitution-subsubshell.expect @@ -0,0 +1,2 @@ +OUT=$(( ( large_git 1 >& 3 ) | : ) 3 >& 1) && +test_match_signal 13 "$OUT" diff --git a/t/chainlint/command-substitution-subsubshell.test b/t/chainlint/command-substitution-subsubshell.test new file mode 100644 index 00000000000000..321de2951ce13f --- /dev/null +++ b/t/chainlint/command-substitution-subsubshell.test @@ -0,0 +1,3 @@ +# LINT: subshell nested in subshell nested in command substitution +OUT=$( ((large_git 1>&3) | :) 3>&1 ) && +test_match_signal 13 "$OUT" diff --git a/t/chainlint/double-here-doc.expect b/t/chainlint/double-here-doc.expect new file mode 100644 index 00000000000000..75477bb1add492 --- /dev/null +++ b/t/chainlint/double-here-doc.expect @@ -0,0 +1,2 @@ +run_sub_test_lib_test_err run-inv-range-start "--run invalid range start" --run="a-5" <<-EOF && +check_sub_test_lib_test_err run-inv-range-start <<-EOF_OUT 3 <<-EOF_ERR diff --git a/t/chainlint/double-here-doc.test b/t/chainlint/double-here-doc.test new file mode 100644 index 00000000000000..cd584a43573004 --- /dev/null +++ b/t/chainlint/double-here-doc.test @@ -0,0 +1,12 @@ +run_sub_test_lib_test_err run-inv-range-start \ + "--run invalid range start" \ + --run="a-5" <<-\EOF && +test_expect_success "passing test #1" "true" +test_done +EOF +check_sub_test_lib_test_err run-inv-range-start \ + <<-\EOF_OUT 3<<-EOF_ERR +> FATAL: Unexpected exit with code 1 +EOF_OUT +> error: --run: invalid non-numeric in range start: ${SQ}a-5${SQ} +EOF_ERR diff --git a/t/chainlint/dqstring-line-splice.expect b/t/chainlint/dqstring-line-splice.expect new file mode 100644 index 00000000000000..bf9ced60d4c4a3 --- /dev/null +++ b/t/chainlint/dqstring-line-splice.expect @@ -0,0 +1,3 @@ +echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' > expect && +test_must_fail git commit --fixup=reword:HEAD~ $1 2 > actual && +test_cmp expect actual diff --git a/t/chainlint/dqstring-line-splice.test b/t/chainlint/dqstring-line-splice.test new file mode 100644 index 00000000000000..b40714439f6fc8 --- /dev/null +++ b/t/chainlint/dqstring-line-splice.test @@ -0,0 +1,7 @@ +# LINT: line-splice within DQ-string +'" +echo 'fatal: reword option of --fixup is mutually exclusive with'\ + '--patch/--interactive/--all/--include/--only' >expect && +test_must_fail git commit --fixup=reword:HEAD~ $1 2>actual && +test_cmp expect actual +"' diff --git a/t/chainlint/dqstring-no-interpolate.expect b/t/chainlint/dqstring-no-interpolate.expect new file mode 100644 index 00000000000000..10724987a5fbb6 --- /dev/null +++ b/t/chainlint/dqstring-no-interpolate.expect @@ -0,0 +1,11 @@ +grep "^ ! [rejected][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out && + +grep "^\.git$" output.txt && + + +( + cd client$version && + GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. $(cat ../input) +) > output && + cut -d ' ' -f 2 < output | sort > actual && + test_cmp expect actual diff --git a/t/chainlint/dqstring-no-interpolate.test b/t/chainlint/dqstring-no-interpolate.test new file mode 100644 index 00000000000000..d2f4219cbbbcd7 --- /dev/null +++ b/t/chainlint/dqstring-no-interpolate.test @@ -0,0 +1,15 @@ +# LINT: regex dollar-sign eol anchor in double-quoted string not special +grep "^ ! \[rejected\][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out && + +# LINT: escaped "$" not mistaken for variable expansion +grep "^\\.git\$" output.txt && + +'" +( + cd client$version && +# LINT: escaped dollar-sign in double-quoted test body + GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. \$(cat ../input) +) >output && + cut -d ' ' -f 2 actual && + test_cmp expect actual +"' diff --git a/t/chainlint/empty-here-doc.expect b/t/chainlint/empty-here-doc.expect new file mode 100644 index 00000000000000..f42f2d41ba8c68 --- /dev/null +++ b/t/chainlint/empty-here-doc.expect @@ -0,0 +1,3 @@ +git ls-tree $tree path > current && +cat > expected <current && +# LINT: empty here-doc +cat >expected <<\EOF && +EOF +test_output diff --git a/t/chainlint/exclamation.expect b/t/chainlint/exclamation.expect new file mode 100644 index 00000000000000..2d961a58c6676c --- /dev/null +++ b/t/chainlint/exclamation.expect @@ -0,0 +1,4 @@ +if ! condition ; then echo nope ; else yep ; fi && +test_prerequisite !MINGW && +mail uucp!address && +echo !whatever! diff --git a/t/chainlint/exclamation.test b/t/chainlint/exclamation.test new file mode 100644 index 00000000000000..323595b5bd8635 --- /dev/null +++ b/t/chainlint/exclamation.test @@ -0,0 +1,8 @@ +# LINT: "! word" is two tokens +if ! condition; then echo nope; else yep; fi && +# LINT: "!word" is single token, not two tokens "!" and "word" +test_prerequisite !MINGW && +# LINT: "word!word" is single token, not three tokens "word", "!", and "word" +mail uucp!address && +# LINT: "!word!" is single token, not three tokens "!", "word", and "!" +echo !whatever! diff --git a/t/chainlint/for-loop-abbreviated.expect b/t/chainlint/for-loop-abbreviated.expect new file mode 100644 index 00000000000000..a21007a63f171c --- /dev/null +++ b/t/chainlint/for-loop-abbreviated.expect @@ -0,0 +1,5 @@ +for it +do + path=$(expr "$it" : ( [^:]*) ) && + git update-index --add "$path" || exit +done diff --git a/t/chainlint/for-loop-abbreviated.test b/t/chainlint/for-loop-abbreviated.test new file mode 100644 index 00000000000000..1084eccb89c1eb --- /dev/null +++ b/t/chainlint/for-loop-abbreviated.test @@ -0,0 +1,6 @@ +# LINT: for-loop lacking optional "in [word...]" before "do" +for it +do + path=$(expr "$it" : '\([^:]*\)') && + git update-index --add "$path" || exit +done diff --git a/t/chainlint/function.expect b/t/chainlint/function.expect new file mode 100644 index 00000000000000..a14388e6b9faeb --- /dev/null +++ b/t/chainlint/function.expect @@ -0,0 +1,11 @@ +sha1_file ( ) { + echo "$*" | sed "s#..#.git/objects/&/#" +} && + +remove_object ( ) { + file=$(sha1_file "$*") && + test -e "$file" ?!AMP?! + rm -f "$file" +} ?!AMP?! + +sha1_file arg && remove_object arg diff --git a/t/chainlint/function.test b/t/chainlint/function.test new file mode 100644 index 00000000000000..5ee59562c93f36 --- /dev/null +++ b/t/chainlint/function.test @@ -0,0 +1,13 @@ +# LINT: "()" in function definition not mistaken for subshell +sha1_file() { + echo "$*" | sed "s#..#.git/objects/&/#" +} && + +# LINT: broken &&-chain in function and after function +remove_object() { + file=$(sha1_file "$*") && + test -e "$file" + rm -f "$file" +} + +sha1_file arg && remove_object arg diff --git a/t/chainlint/here-doc-indent-operator.expect b/t/chainlint/here-doc-indent-operator.expect new file mode 100644 index 00000000000000..fb6cf7285d0264 --- /dev/null +++ b/t/chainlint/here-doc-indent-operator.expect @@ -0,0 +1,5 @@ +cat > expect <<-EOF && + +cat > expect <<-EOF ?!AMP?! + +cleanup diff --git a/t/chainlint/here-doc-indent-operator.test b/t/chainlint/here-doc-indent-operator.test new file mode 100644 index 00000000000000..c8a6f18eb4540f --- /dev/null +++ b/t/chainlint/here-doc-indent-operator.test @@ -0,0 +1,13 @@ +# LINT: whitespace between operator "<<-" and tag legal +cat >expect <<- EOF && +header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0 +num_commits: $1 +chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data +EOF + +# LINT: not an indented here-doc; just a plain here-doc with tag named "-EOF" +cat >expect << -EOF +this is not indented +-EOF + +cleanup diff --git a/t/chainlint/if-condition-split.expect b/t/chainlint/if-condition-split.expect new file mode 100644 index 00000000000000..ee745ef8d7fcc5 --- /dev/null +++ b/t/chainlint/if-condition-split.expect @@ -0,0 +1,7 @@ +if bob && + marcia || + kevin +then + echo "nomads" ?!AMP?! + echo "for sure" +fi diff --git a/t/chainlint/if-condition-split.test b/t/chainlint/if-condition-split.test new file mode 100644 index 00000000000000..240daa9fd5d67d --- /dev/null +++ b/t/chainlint/if-condition-split.test @@ -0,0 +1,8 @@ +# LINT: "if" condition split across multiple lines at "&&" or "||" +if bob && + marcia || + kevin +then + echo "nomads" + echo "for sure" +fi diff --git a/t/chainlint/one-liner-for-loop.expect b/t/chainlint/one-liner-for-loop.expect new file mode 100644 index 00000000000000..51a3dc7c5448e2 --- /dev/null +++ b/t/chainlint/one-liner-for-loop.expect @@ -0,0 +1,9 @@ +git init dir-rename-and-content && +( + cd dir-rename-and-content && + test_write_lines 1 2 3 4 5 >foo && + mkdir olddir && + for i in a b c; do echo $i >olddir/$i; ?!LOOP?! done ?!AMP?! + git add foo olddir && + git commit -m "original" && +) diff --git a/t/chainlint/one-liner-for-loop.test b/t/chainlint/one-liner-for-loop.test new file mode 100644 index 00000000000000..4bd8c066c79963 --- /dev/null +++ b/t/chainlint/one-liner-for-loop.test @@ -0,0 +1,10 @@ +git init dir-rename-and-content && +( + cd dir-rename-and-content && + test_write_lines 1 2 3 4 5 >foo && + mkdir olddir && +# LINT: one-liner for-loop missing "|| exit"; also broken &&-chain + for i in a b c; do echo $i >olddir/$i; done + git add foo olddir && + git commit -m "original" && +) diff --git a/t/chainlint/sqstring-in-sqstring.expect b/t/chainlint/sqstring-in-sqstring.expect new file mode 100644 index 00000000000000..cf0b591cf7d8e3 --- /dev/null +++ b/t/chainlint/sqstring-in-sqstring.expect @@ -0,0 +1,4 @@ +perl -e ' + defined($_ = -s $_) or die for @ARGV; + exit 1 if $ARGV[0] <= $ARGV[1]; +' test-2-$packname_2.pack test-3-$packname_3.pack diff --git a/t/chainlint/sqstring-in-sqstring.test b/t/chainlint/sqstring-in-sqstring.test new file mode 100644 index 00000000000000..77a425e0c795c5 --- /dev/null +++ b/t/chainlint/sqstring-in-sqstring.test @@ -0,0 +1,5 @@ +# LINT: SQ-string Perl code fragment within SQ-string +perl -e '\'' + defined($_ = -s $_) or die for @ARGV; + exit 1 if $ARGV[0] <= $ARGV[1]; +'\'' test-2-$packname_2.pack test-3-$packname_3.pack diff --git a/t/chainlint/token-pasting.expect b/t/chainlint/token-pasting.expect new file mode 100644 index 00000000000000..342360bcd05941 --- /dev/null +++ b/t/chainlint/token-pasting.expect @@ -0,0 +1,27 @@ +git config filter.rot13.smudge ./rot13.sh && +git config filter.rot13.clean ./rot13.sh && + +{ + echo "*.t filter=rot13" ?!AMP?! + echo "*.i ident" +} > .gitattributes && + +{ + echo a b c d e f g h i j k l m ?!AMP?! + echo n o p q r s t u v w x y z ?!AMP?! + echo '$Id$' +} > test && +cat test > test.t && +cat test > test.o && +cat test > test.i && +git add test test.t test.i && +rm -f test test.t test.i && +git checkout -- test test.t test.i && + +echo "content-test2" > test2.o && +echo "content-test3 - filename with special characters" > "test3 'sq',$x=.o" ?!AMP?! + +downstream_url_for_sed=$( + printf "%sn" "$downstream_url" | + sed -e 's/\/\\/g' -e 's/[[/.*^$]/\&/g' +) diff --git a/t/chainlint/token-pasting.test b/t/chainlint/token-pasting.test new file mode 100644 index 00000000000000..b4610ce815af71 --- /dev/null +++ b/t/chainlint/token-pasting.test @@ -0,0 +1,32 @@ +# LINT: single token; composite of multiple strings +git config filter.rot13.smudge ./rot13.sh && +git config filter.rot13.clean ./rot13.sh && + +{ + echo "*.t filter=rot13" + echo "*.i ident" +} >.gitattributes && + +{ + echo a b c d e f g h i j k l m + echo n o p q r s t u v w x y z +# LINT: exit/enter string context and escaped-quote outside of string + echo '\''$Id$'\'' +} >test && +cat test >test.t && +cat test >test.o && +cat test >test.i && +git add test test.t test.i && +rm -f test test.t test.i && +git checkout -- test test.t test.i && + +echo "content-test2" >test2.o && +# LINT: exit/enter string context and escaped-quote outside of string +echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o" + +# LINT: single token; composite of multiple strings +downstream_url_for_sed=$( + printf "%s\n" "$downstream_url" | +# LINT: exit/enter string context; "&" inside string not command terminator + sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\'' +) From 9fd911237f94680e0d1985e1f2fba751b16f5a94 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:53 +0000 Subject: [PATCH 024/303] test-lib: retire "lint harder" optimization hack `test_run_` in test-lib.sh "lints" the body of a test by sending it down a `sed chainlint.sed | grep` pipeline; this happens once for each test run by a test script. Although this pipeline may seem relatively cheap in isolation, it can become expensive when invoked 26800+ times by `make test`, once for each test run, despite the existence of only 16500+ test definitions across all tests scripts. This difference in the number of tests defined in the scripts (16500+) and the number of tests actually run by `make test` (26800+) is explained by the fact that some test scripts run a very large number of small tests, all driven by a series of functions/loops which fill in the test bodies. This means that certain test definitions are being linted repeatedly (tens or hundreds of times) unnecessarily. To avoid such unnecessary work, 2d86a96220 (t: avoid sed-based chain-linting in some expensive cases, 2021-05-13) added an optimization hack which allows individual scripts to manually suppress the unnecessary repeated linting of the same test definition. However, unlike chainlint.sed which checks a test body as the test is run, chainlint.pl checks each test definition just once, no matter how many times the test is run, thus the sort of optimization hack introduced by 2d86a96220 is no longer needed and can be retired. Therefore, revert 2d86a96220. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/README | 5 ----- t/t0027-auto-crlf.sh | 7 +------ t/t3070-wildmatch.sh | 5 ----- t/test-lib.sh | 7 ++----- 4 files changed, 3 insertions(+), 21 deletions(-) diff --git a/t/README b/t/README index 2f439f96589f82..979b2d4833d983 100644 --- a/t/README +++ b/t/README @@ -196,11 +196,6 @@ appropriately before running "make". Short options can be bundled, i.e. this feature by setting the GIT_TEST_CHAIN_LINT environment variable to "1" or "0", respectively. - A few test scripts disable some of the more advanced - chain-linting detection in the name of efficiency. You can - override this by setting the GIT_TEST_CHAIN_LINT_HARDER - environment variable to "1". - --stress:: Run the test script repeatedly in multiple parallel jobs until one of them fails. Useful for reproducing rare failures in diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index a22e0e1382c42f..a94ac1eae377c0 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -387,9 +387,7 @@ test_expect_success 'setup main' ' test_tick ' -# Disable extra chain-linting for the next set of tests. There are many -# auto-generated ones that are not worth checking over and over. -GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0 + warn_LF_CRLF="LF will be replaced by CRLF" warn_CRLF_LF="CRLF will be replaced by LF" @@ -606,9 +604,6 @@ do checkout_files "" "$id" "crlf" true "" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul done -# The rest of the tests are unique; do the usual linting. -unset GIT_TEST_CHAIN_LINT_HARDER_DEFAULT - # Should be the last test case: remove some files from the worktree test_expect_success 'ls-files --eol -d -z' ' rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes && diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh index f9539968e4c16a..5d871fde960a03 100755 --- a/t/t3070-wildmatch.sh +++ b/t/t3070-wildmatch.sh @@ -5,11 +5,6 @@ test_description='wildmatch tests' TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh -# Disable expensive chain-lint tests; all of the tests in this script -# are variants of a few trivial test-tool invocations, and there are a lot of -# them. -GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0 - should_create_test_file() { file=$1 diff --git a/t/test-lib.sh b/t/test-lib.sh index 377cc1c1203d6f..dc0d0591095a9e 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1091,11 +1091,8 @@ test_run_ () { trace= # 117 is magic because it is unlikely to match the exit # code of other programs - if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" || - { - test "${GIT_TEST_CHAIN_LINT_HARDER:-${GIT_TEST_CHAIN_LINT_HARDER_DEFAULT:-1}}" != 0 && - $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') - } + if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') || + test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" then BUG "broken &&-chain or run-away HERE-DOC: $1" fi From 23a14f301662df6d003b5bf4dc598f02311c6b30 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:54 +0000 Subject: [PATCH 025/303] test-lib: replace chainlint.sed with chainlint.pl By automatically invoking chainlint.sed upon each test it runs, `test_run_` in test-lib.sh ensures that broken &&-chains will be detected early as tests are modified or new are tests created since it is typical to run a test script manually (i.e. `./t1234-test-script.sh`) during test development. Now that the implementation of chainlint.pl is complete, modify test-lib.sh to invoke it automatically instead of chainlint.sed each time a test script is run. This change reduces the number of "linter" invocations from 26800+ (once per test run) down to 1050+ (once per test script), however, a subsequent change will drop the number of invocations to 1 per `make test`, thus fully realizing the benefit of the new linter. Note that the "magic exit code 117" &&-chain checker added by bb79af9d09 (t/test-lib: introduce --chain-lint option, 2015-03-20) which is built into t/test-lib.sh is retained since it has near zero-cost and (theoretically) may catch a broken &&-chain not caught by chainlint.pl. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/buildsystems/CMakeLists.txt | 2 +- t/test-lib.sh | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 2237109b57fd12..ca358a21a5fe02 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -1076,7 +1076,7 @@ if(NOT ${CMAKE_BINARY_DIR}/CMakeCache.txt STREQUAL ${CACHE_PATH}) "string(REPLACE \"\${GIT_BUILD_DIR_REPL}\" \"GIT_BUILD_DIR=\\\"$TEST_DIRECTORY/../${BUILD_DIR_RELATIVE}\\\"\" content \"\${content}\")\n" "file(WRITE ${CMAKE_SOURCE_DIR}/t/test-lib.sh \${content})") #misc copies - file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.sed DESTINATION ${CMAKE_BINARY_DIR}/t/) + file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.pl DESTINATION ${CMAKE_BINARY_DIR}/t/) file(COPY ${CMAKE_SOURCE_DIR}/po/is.po DESTINATION ${CMAKE_BINARY_DIR}/po/) file(COPY ${CMAKE_SOURCE_DIR}/mergetools/tkdiff DESTINATION ${CMAKE_BINARY_DIR}/mergetools/) file(COPY ${CMAKE_SOURCE_DIR}/contrib/completion/git-prompt.sh DESTINATION ${CMAKE_BINARY_DIR}/contrib/completion/) diff --git a/t/test-lib.sh b/t/test-lib.sh index dc0d0591095a9e..a65df2fd220465 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1091,8 +1091,7 @@ test_run_ () { trace= # 117 is magic because it is unlikely to match the exit # code of other programs - if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') || - test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" + if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" then BUG "broken &&-chain or run-away HERE-DOC: $1" fi @@ -1588,6 +1587,12 @@ then BAIL_OUT_ENV_NEEDS_SANITIZE_LEAK "GIT_TEST_SANITIZE_LEAK_LOG=true" fi +if test "${GIT_TEST_CHAIN_LINT:-1}" != 0 +then + "$PERL_PATH" "$TEST_DIRECTORY/chainlint.pl" "$0" || + BUG "lint error (see '?!...!? annotations above)" +fi + # Last-minute variable setup USER_HOME="$HOME" HOME="$TRASH_DIRECTORY" From 69b9924b875079babb1d3f665bdc719c4871ba73 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:55 +0000 Subject: [PATCH 026/303] t/Makefile: teach `make test` and `make prove` to run chainlint.pl Unlike chainlint.sed which "lints" a single test body at a time, thus is invoked once per test, chainlint.pl can check all test bodies in all test scripts with a single invocation. As such, it is akin to other bulk "linters" run by the Makefile, such as `test-lint-shell-syntax`, `test-lint-duplicates`, etc. Therefore, teach `make test` and `make prove` to invoke chainlint.pl along with the other bulk linters. Also, since the single chainlint.pl invocation by `make test` or `make prove` has already checked all tests in all scripts, instruct the individual test scripts not to run chainlint.pl on themselves unnecessarily. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/Makefile | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/t/Makefile b/t/Makefile index 11f276774ea286..3db48c0cb64825 100644 --- a/t/Makefile +++ b/t/Makefile @@ -36,14 +36,21 @@ CHAINLINTTMP_SQ = $(subst ','\'',$(CHAINLINTTMP)) T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh))) +TLIBS = $(sort $(wildcard lib-*.sh)) annotate-tests.sh TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh)) +TINTEROP = $(sort $(wildcard interop/i[0-9][0-9][0-9][0-9]-*.sh)) CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test))) CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl +# `test-chainlint` (which is a dependency of `test-lint`, `test` and `prove`) +# checks all tests in all scripts via a single invocation, so tell individual +# scripts not to "chainlint" themselves +CHAINLINTSUPPRESS = GIT_TEST_CHAIN_LINT=0 && export GIT_TEST_CHAIN_LINT && + all: $(DEFAULT_TEST_TARGET) test: pre-clean check-chainlint $(TEST_LINT) - $(MAKE) aggregate-results-and-cleanup + $(CHAINLINTSUPPRESS) $(MAKE) aggregate-results-and-cleanup failed: @failed=$$(cd '$(TEST_RESULTS_DIRECTORY_SQ)' && \ @@ -52,7 +59,7 @@ failed: test -z "$$failed" || $(MAKE) $$failed prove: pre-clean check-chainlint $(TEST_LINT) - @echo "*** prove ***"; $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) + @echo "*** prove ***"; $(CHAINLINTSUPPRESS) $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) $(MAKE) clean-except-prove-cache $(T): @@ -99,6 +106,9 @@ check-chainlint: test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \ test-lint-filenames +ifneq ($(GIT_TEST_CHAIN_LINT),0) +test-lint: test-chainlint +endif test-lint-duplicates: @dups=`echo $(T) $(TPERF) | tr ' ' '\n' | sed 's/-.*//' | sort | uniq -d` && \ @@ -121,6 +131,9 @@ test-lint-filenames: test -z "$$bad" || { \ echo >&2 "non-portable file name(s): $$bad"; exit 1; } +test-chainlint: + @$(CHAINLINT) $(T) $(TLIBS) $(TPERF) $(TINTEROP) + aggregate-results-and-cleanup: $(T) $(MAKE) aggregate-results $(MAKE) clean @@ -136,4 +149,5 @@ valgrind: perf: $(MAKE) -C perf/ all -.PHONY: pre-clean $(T) aggregate-results clean valgrind perf check-chainlint clean-chainlint +.PHONY: pre-clean $(T) aggregate-results clean valgrind perf \ + check-chainlint clean-chainlint test-chainlint From fb41727b7ed7f62d121cd846f826fb1c62d1bc6a Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:56 +0000 Subject: [PATCH 027/303] t: retire unused chainlint.sed Retire chainlint.sed since it has been replaced by a more accurate and functional &&-chain "linter", thus is no longer used. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.sed | 399 ------------------------------------------------ 1 file changed, 399 deletions(-) delete mode 100644 t/chainlint.sed diff --git a/t/chainlint.sed b/t/chainlint.sed deleted file mode 100644 index dc4ce37cb5188a..00000000000000 --- a/t/chainlint.sed +++ /dev/null @@ -1,399 +0,0 @@ -#------------------------------------------------------------------------------ -# Detect broken &&-chains in tests. -# -# At present, only &&-chains in subshells are examined by this linter; -# top-level &&-chains are instead checked directly by the test framework. Like -# the top-level &&-chain linter, the subshell linter (intentionally) does not -# check &&-chains within {...} blocks. -# -# Checking for &&-chain breakage is done line-by-line by pure textual -# inspection. -# -# Incomplete lines (those ending with "\") are stitched together with following -# lines to simplify processing, particularly of "one-liner" statements. -# Top-level here-docs are swallowed to avoid false positives within the -# here-doc body, although the statement to which the here-doc is attached is -# retained. -# -# Heuristics are used to detect end-of-subshell when the closing ")" is cuddled -# with the final subshell statement on the same line: -# -# (cd foo && -# bar) -# -# in order to avoid misinterpreting the ")" in constructs such as "x=$(...)" -# and "case $x in *)" as ending the subshell. -# -# Lines missing a final "&&" are flagged with "?!AMP?!", as are lines which -# chain commands with ";" internally rather than "&&". A line may be flagged -# for both violations. -# -# Detection of a missing &&-link in a multi-line subshell is complicated by the -# fact that the last statement before the closing ")" must not end with "&&". -# Since processing is line-by-line, it is not known whether a missing "&&" is -# legitimate or not until the _next_ line is seen. To accommodate this, within -# multi-line subshells, each line is stored in sed's "hold" area until after -# the next line is seen and processed. If the next line is a stand-alone ")", -# then a missing "&&" on the previous line is legitimate; otherwise a missing -# "&&" is a break in the &&-chain. -# -# ( -# cd foo && -# bar -# ) -# -# In practical terms, when "bar" is encountered, it is flagged with "?!AMP?!", -# but when the stand-alone ")" line is seen which closes the subshell, the -# "?!AMP?!" violation is removed from the "bar" line (retrieved from the "hold" -# area) since the final statement of a subshell must not end with "&&". The -# final line of a subshell may still break the &&-chain by using ";" internally -# to chain commands together rather than "&&", but an internal "?!AMP?!" is -# never removed from a line even though a line-ending "?!AMP?!" might be. -# -# Care is taken to recognize the last _statement_ of a multi-line subshell, not -# necessarily the last textual _line_ within the subshell, since &&-chaining -# applies to statements, not to lines. Consequently, blank lines, comment -# lines, and here-docs are swallowed (but not the command to which the here-doc -# is attached), leaving the last statement in the "hold" area, not the last -# line, thus simplifying &&-link checking. -# -# The final statement before "done" in for- and while-loops, and before "elif", -# "else", and "fi" in if-then-else likewise must not end with "&&", thus -# receives similar treatment. -# -# Swallowing here-docs with arbitrary tags requires a bit of finesse. When a -# line such as "cat <cat <\n\1$/ is attempted to see if -# the content inside "<...>" matches the entirety of the newly-read line. For -# instance, if the next line read is "some data", when concatenated with the -# target line, it becomes "cat <cat <" does match the text following the -# newline, thus the closing here-doc tag has been found. The closing tag line -# and the "<...>" prefix on the target line are then discarded, leaving just -# the target line "cat <\1\2/ - :hered - N - /^<\([^>]*\)>.*\n[ ]*\1[ ]*$/!{ - s/\n.*$// - bhered - } - s/^<[^>]*>// - s/\n.*$// -} -:notdoc - -# one-liner "(...) &&" -/^[ ]*!*[ ]*(..*)[ ]*&&[ ]*$/boneline - -# same as above but without trailing "&&" -/^[ ]*!*[ ]*(..*)[ ]*$/boneline - -# one-liner "(...) >x" (or "2>x" or "|&]/boneline - -# multi-line "(...\n...)" -/^[ ]*(/bsubsh - -# innocuous line -- print it and advance to next line -b - -# found one-liner "(...)" -- mark suspect if it uses ";" internally rather than -# "&&" (but not ";" in a string) -:oneline -/;/{ - /"[^"]*;[^"]*"/!s/;/; ?!AMP?!/ -} -b - -:subsh -# bare "(" line? -- stash for later printing -/^[ ]*([ ]*$/ { - h - bnextln -} -# "(..." line -- "(" opening subshell cuddled with command; temporarily replace -# "(" with sentinel "^" and process the line as if "(" had been seen solo on -# the preceding line; this temporary replacement prevents several rules from -# accidentally thinking "(" introduces a nested subshell; "^" is changed back -# to "(" at output time -x -s/.*// -x -s/(/^/ -bslurp - -:nextln -N -s/.*\n// - -:slurp -# incomplete line "...\" -/\\$/bicmplte -# multi-line quoted string "...\n..."? -/"/bdqstr -# multi-line quoted string '...\n...'? (but not contraction in string "it's") -/'/{ - /"[^'"]*'[^'"]*"/!bsqstr -} -:folded -# here-doc -- swallow it (but not "<<" in a string) -/<<-*[ ]*[\\'"]*[A-Za-z0-9_]/{ - /"[^"]*<<[^"]*"/!bheredoc -} -# comment or empty line -- discard since final non-comment, non-empty line -# before closing ")", "done", "elsif", "else", or "fi" will need to be -# re-visited to drop "suspect" marking since final line of those constructs -# legitimately lacks "&&", so "suspect" mark must be removed -/^[ ]*#/bnextln -/^[ ]*$/bnextln -# in-line comment -- strip it (but not "#" in a string, Bash ${#...} array -# length, or Perforce "//depot/path#42" revision in filespec) -/[ ]#/{ - /"[^"]*#[^"]*"/!s/[ ]#.*$// -} -# one-liner "case ... esac" -/^[ ^]*case[ ]*..*esac/bchkchn -# multi-line "case ... esac" -/^[ ^]*case[ ]..*[ ]in/bcase -# multi-line "for ... done" or "while ... done" -/^[ ^]*for[ ]..*[ ]in/bcont -/^[ ^]*while[ ]/bcont -/^[ ]*do[ ]/bcont -/^[ ]*do[ ]*$/bcont -/;[ ]*do/bcont -/^[ ]*done[ ]*&&[ ]*$/bdone -/^[ ]*done[ ]*$/bdone -/^[ ]*done[ ]*[<>|]/bdone -/^[ ]*done[ ]*)/bdone -/||[ ]*exit[ ]/bcont -/||[ ]*exit[ ]*$/bcont -# multi-line "if...elsif...else...fi" -/^[ ^]*if[ ]/bcont -/^[ ]*then[ ]/bcont -/^[ ]*then[ ]*$/bcont -/;[ ]*then/bcont -/^[ ]*elif[ ]/belse -/^[ ]*elif[ ]*$/belse -/^[ ]*else[ ]/belse -/^[ ]*else[ ]*$/belse -/^[ ]*fi[ ]*&&[ ]*$/bdone -/^[ ]*fi[ ]*$/bdone -/^[ ]*fi[ ]*[<>|]/bdone -/^[ ]*fi[ ]*)/bdone -# nested one-liner "(...) &&" -/^[ ^]*(.*)[ ]*&&[ ]*$/bchkchn -# nested one-liner "(...)" -/^[ ^]*(.*)[ ]*$/bchkchn -# nested one-liner "(...) >x" (or "2>x" or "|]/bchkchn -# nested multi-line "(...\n...)" -/^[ ^]*(/bnest -# multi-line "{...\n...}" -/^[ ^]*{/bblock -# closing ")" on own line -- exit subshell -/^[ ]*)/bclssolo -# "$((...))" -- arithmetic expansion; not closing ")" -/\$(([^)][^)]*))[^)]*$/bchkchn -# "$(...)" -- command substitution; not closing ")" -/\$([^)][^)]*)[^)]*$/bchkchn -# multi-line "$(...\n...)" -- command substitution; treat as nested subshell -/\$([^)]*$/bnest -# "=(...)" -- Bash array assignment; not closing ")" -/=(/bchkchn -# closing "...) &&" -/)[ ]*&&[ ]*$/bclose -# closing "...)" -/)[ ]*$/bclose -# closing "...) >x" (or "2>x" or "|]/bclose -:chkchn -# mark suspect if line uses ";" internally rather than "&&" (but not ";" in a -# string and not ";;" in one-liner "case...esac") -/;/{ - /;;/!{ - /"[^"]*;[^"]*"/!s/;/; ?!AMP?!/ - } -} -# line ends with pipe "...|" -- valid; not missing "&&" -/|[ ]*$/bcont -# missing end-of-line "&&" -- mark suspect -/&&[ ]*$/!s/$/ ?!AMP?!/ -:cont -# retrieve and print previous line -x -s/^\([ ]*\)^/\1(/ -s/?!HERE?!/<\1?!HERE?!\2\3/ -:hdocsub -N -/^<\([^>]*\)>.*\n[ ]*\1[ ]*$/!{ - s/\n.*$// - bhdocsub -} -s/^<[^>]*>// -s/\n.*$// -bfolded - -# found "case ... in" -- pass through untouched -:case -x -s/^\([ ]*\)^/\1(/ -s/?!HERE?!/< Date: Fri, 2 Sep 2022 15:56:42 +0000 Subject: [PATCH 028/303] scalar: fix command documentation section header Rename the last section header in 'contrib/scalar/scalar.txt' from "Scalar" to "GIT". The linting rules of the 'documentation' CI build enforce the existence of a "GIT" section in command documentation. Although 'scalar.txt' is not yet checked, it will be in a future patch. Here, changing the header name is more appropriate than making a Scalar-specific exception to the linting rule. The existing "Scalar" section contains only a link back to the main Git documentation, essentially the same as the "GIT" section in builtin documentation. Changing the section name further clarifies the Scalar-Git association and maintains consistency with the rest of Git. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- contrib/scalar/scalar.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt index 1a12dc450774c2..f33436c7f65ff9 100644 --- a/contrib/scalar/scalar.txt +++ b/contrib/scalar/scalar.txt @@ -161,6 +161,6 @@ SEE ALSO -------- linkgit:git-clone[1], linkgit:git-maintenance[1]. -Scalar +GIT --- -Associated with the linkgit:git[1] suite +Part of the linkgit:git[1] suite From 7b5c93c6c6847b4b6037e38418bc8bbb8c2eada8 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:43 +0000 Subject: [PATCH 029/303] scalar: include in standard Git build & installation Move 'scalar' out of 'contrib/' and into the root of the Git tree. The goal of this change is to build 'scalar' as part of the standard Git build & install processes. This patch includes both the physical move of Scalar's files out of 'contrib/' ('scalar.c', 'scalar.txt', and 't9xxx-scalar.sh'), and the changes to the build definitions in 'Makefile' and 'CMakelists.txt' to accommodate the new program. At a high level, Scalar is built so that: - there is a 'scalar-objs' target (similar to those created in 029bac01a8 (Makefile: add {program,xdiff,test,git,fuzz}-objs & objects targets, 2021-02-23)) for debugging purposes. - it appears in the root of the install directory (rather than the gitexecdir). - it is included in the 'bin-wrappers/' directory for use in tests. - it receives a platform-specific executable suffix (e.g., '.exe'), if applicable. - 'scalar.txt' is installed as 'man1' documentation. - the 'clean' target removes the 'scalar' executable. Additionally, update the root level '.gitignore' file to ignore the Scalar executable. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- .gitignore | 1 + Documentation/Makefile | 1 + {contrib/scalar => Documentation}/scalar.txt | 0 Makefile | 31 ++++--- contrib/buildsystems/CMakeLists.txt | 9 ++- contrib/scalar/.gitignore | 2 - contrib/scalar/Makefile | 35 -------- contrib/scalar/t/Makefile | 81 ------------------- contrib/scalar/scalar.c => scalar.c | 0 .../t/t9099-scalar.sh => t/t9210-scalar.sh | 10 +-- 10 files changed, 28 insertions(+), 142 deletions(-) rename {contrib/scalar => Documentation}/scalar.txt (100%) delete mode 100644 contrib/scalar/.gitignore delete mode 100644 contrib/scalar/Makefile delete mode 100644 contrib/scalar/t/Makefile rename contrib/scalar/scalar.c => scalar.c (100%) rename contrib/scalar/t/t9099-scalar.sh => t/t9210-scalar.sh (96%) diff --git a/.gitignore b/.gitignore index 80b530bbed2c80..3d1b880101e5ea 100644 --- a/.gitignore +++ b/.gitignore @@ -185,6 +185,7 @@ /git-whatchanged /git-worktree /git-write-tree +/scalar /git-core-*/?* /git.res /gitweb/GITWEB-BUILD-OPTIONS diff --git a/Documentation/Makefile b/Documentation/Makefile index bd6b6fcb93085d..16c9e062390661 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -21,6 +21,7 @@ MAN1_TXT += $(filter-out \ MAN1_TXT += git.txt MAN1_TXT += gitk.txt MAN1_TXT += gitweb.txt +MAN1_TXT += scalar.txt # man5 / man7 guides (note: new guides should also be added to command-list.txt) MAN5_TXT += gitattributes.txt diff --git a/contrib/scalar/scalar.txt b/Documentation/scalar.txt similarity index 100% rename from contrib/scalar/scalar.txt rename to Documentation/scalar.txt diff --git a/Makefile b/Makefile index eac30126e29fe6..e03f32ec1e707f 100644 --- a/Makefile +++ b/Makefile @@ -608,7 +608,9 @@ FUZZ_OBJS = FUZZ_PROGRAMS = GIT_OBJS = LIB_OBJS = +SCALAR_OBJS = OBJECTS = +OTHER_PROGRAMS = PROGRAM_OBJS = PROGRAMS = EXCLUDED_PROGRAMS = @@ -821,10 +823,12 @@ BUILT_INS += git-switch$X BUILT_INS += git-whatchanged$X # what 'all' will build but not install in gitexecdir -OTHER_PROGRAMS = git$X +OTHER_PROGRAMS += git$X +OTHER_PROGRAMS += scalar$X # what test wrappers are needed and 'install' will install, in bindir BINDIR_PROGRAMS_NEED_X += git +BINDIR_PROGRAMS_NEED_X += scalar BINDIR_PROGRAMS_NEED_X += git-receive-pack BINDIR_PROGRAMS_NEED_X += git-shell BINDIR_PROGRAMS_NEED_X += git-upload-archive @@ -2222,7 +2226,7 @@ profile-fast: profile-clean all:: $(ALL_COMMANDS_TO_INSTALL) $(SCRIPT_LIB) $(OTHER_PROGRAMS) GIT-BUILD-OPTIONS ifneq (,$X) - $(QUIET_BUILT_IN)$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) git$X)), test -d '$p' -o '$p' -ef '$p$X' || $(RM) '$p';) + $(QUIET_BUILT_IN)$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) $(OTHER_PROGRAMS))), test -d '$p' -o '$p' -ef '$p$X' || $(RM) '$p';) endif all:: @@ -2545,7 +2549,12 @@ GIT_OBJS += git.o .PHONY: git-objs git-objs: $(GIT_OBJS) +SCALAR_OBJS += scalar.o +.PHONY: scalar-objs +scalar-objs: $(SCALAR_OBJS) + OBJECTS += $(GIT_OBJS) +OBJECTS += $(SCALAR_OBJS) OBJECTS += $(PROGRAM_OBJS) OBJECTS += $(TEST_OBJS) OBJECTS += $(XDIFF_OBJS) @@ -2556,10 +2565,6 @@ ifndef NO_CURL OBJECTS += http.o http-walker.o remote-curl.o endif -SCALAR_SOURCES := contrib/scalar/scalar.c -SCALAR_OBJECTS := $(SCALAR_SOURCES:c=o) -OBJECTS += $(SCALAR_OBJECTS) - .PHONY: objects objects: $(OBJECTS) @@ -2691,7 +2696,7 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o GIT-LDFLAGS $(GITLIBS $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS) -contrib/scalar/scalar$X: $(SCALAR_OBJECTS) GIT-LDFLAGS $(GITLIBS) +scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \ $(filter %.o,$^) $(LIBS) @@ -2747,8 +2752,7 @@ XGETTEXT_FLAGS_SH = $(XGETTEXT_FLAGS) --language=Shell \ XGETTEXT_FLAGS_PERL = $(XGETTEXT_FLAGS) --language=Perl \ --keyword=__ --keyword=N__ --keyword="__n:1,2" MSGMERGE_FLAGS = --add-location --backup=off --update -LOCALIZED_C = $(sort $(FOUND_C_SOURCES) $(FOUND_H_SOURCES) $(SCALAR_SOURCES) \ - $(GENERATED_H)) +LOCALIZED_C = $(sort $(FOUND_C_SOURCES) $(FOUND_H_SOURCES) $(GENERATED_H)) LOCALIZED_SH = $(sort $(SCRIPT_SH) git-sh-setup.sh) LOCALIZED_PERL = $(sort $(SCRIPT_PERL)) @@ -3062,7 +3066,7 @@ bin-wrappers/%: wrap-for-bin.sh $(call mkdir_p_parent_template) $(QUIET_GEN)sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ -e 's|@@BUILD_DIR@@|$(shell pwd)|' \ - -e 's|@@PROG@@|$(patsubst test-%,t/helper/test-%$(X),$(@F))$(patsubst git%,$(X),$(filter $(@F),$(BINDIR_PROGRAMS_NEED_X)))|' < $< > $@ && \ + -e 's|@@PROG@@|$(patsubst test-%,t/helper/test-%,$(@F))$(if $(filter-out $(BINDIR_PROGRAMS_NO_X),$(@F)),$(X),)|' < $< > $@ && \ chmod +x $@ # GNU make supports exporting all variables by "export" without parameters. @@ -3276,14 +3280,14 @@ ifndef NO_TCLTK $(MAKE) -C git-gui gitexecdir='$(gitexec_instdir_SQ)' install endif ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) git$X)), test '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p' -ef '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p$X' || $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';) + $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_COMMANDS_TO_INSTALL) $(OTHER_PROGRAMS))), test '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p' -ef '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p$X' || $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';) endif bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \ execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \ destdir_from_execdir_SQ=$$(echo '$(gitexecdir_relative_SQ)' | sed -e 's|[^/][^/]*|..|g') && \ { test "$$bindir/" = "$$execdir/" || \ - for p in git$X $(filter $(install_bindir_programs),$(ALL_PROGRAMS)); do \ + for p in $(OTHER_PROGRAMS) $(filter $(install_bindir_programs),$(ALL_PROGRAMS)); do \ $(RM) "$$execdir/$$p" && \ test -n "$(INSTALL_SYMLINKS)" && \ ln -s "$$destdir_from_execdir_SQ/$(bindir_relative_SQ)/$$p" "$$execdir/$$p" || \ @@ -3458,7 +3462,7 @@ clean: profile-clean coverage-clean cocciclean $(RM) git.res $(RM) $(OBJECTS) $(RM) $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(REFTABLE_TEST_LIB) - $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X + $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) $(OTHER_PROGRAMS) $(RM) $(TEST_PROGRAMS) $(RM) $(FUZZ_PROGRAMS) $(RM) $(SP_OBJ) @@ -3509,6 +3513,7 @@ ALL_COMMANDS += git-citool ALL_COMMANDS += git-gui ALL_COMMANDS += gitk ALL_COMMANDS += gitweb +ALL_COMMANDS += scalar .PHONY: check-docs check-docs:: diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 2237109b57fd12..bae203c1fb9c57 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -610,7 +610,7 @@ unset(CMAKE_REQUIRED_INCLUDES) #programs set(PROGRAMS_BUILT git git-daemon git-http-backend git-sh-i18n--envsubst - git-shell) + git-shell scalar) if(NOT CURL_FOUND) list(APPEND excluded_progs git-http-fetch git-http-push) @@ -757,6 +757,9 @@ target_link_libraries(git-sh-i18n--envsubst common-main) add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c) target_link_libraries(git-shell common-main) +add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c) +target_link_libraries(scalar common-main) + if(CURL_FOUND) add_library(http_obj OBJECT ${CMAKE_SOURCE_DIR}/http.c) @@ -903,7 +906,7 @@ list(TRANSFORM git_perl_scripts PREPEND "${CMAKE_BINARY_DIR}/") #install foreach(program ${PROGRAMS_BUILT}) -if(program STREQUAL "git" OR program STREQUAL "git-shell") +if(program MATCHES "^(git|git-shell|scalar)$") install(TARGETS ${program} RUNTIME DESTINATION bin) else() @@ -977,7 +980,7 @@ endif() #wrapper scripts set(wrapper_scripts - git git-upload-pack git-receive-pack git-upload-archive git-shell git-remote-ext) + git git-upload-pack git-receive-pack git-upload-archive git-shell git-remote-ext scalar) set(wrapper_test_scripts test-fake-ssh test-tool) diff --git a/contrib/scalar/.gitignore b/contrib/scalar/.gitignore deleted file mode 100644 index ff3d47e84d0436..00000000000000 --- a/contrib/scalar/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/*.exe -/scalar diff --git a/contrib/scalar/Makefile b/contrib/scalar/Makefile deleted file mode 100644 index 37f283f35d74d6..00000000000000 --- a/contrib/scalar/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -# The default target of this Makefile is... -all:: - -# Import tree-wide shared Makefile behavior and libraries -include ../../shared.mak - -include ../../config.mak.uname --include ../../config.mak.autogen --include ../../config.mak - -TARGETS = scalar$(X) scalar.o -GITLIBS = ../../common-main.o ../../libgit.a ../../xdiff/lib.a - -all:: scalar$(X) ../../bin-wrappers/scalar - -$(GITLIBS): - $(QUIET_SUBDIR0)../.. $(QUIET_SUBDIR1) $(subst ../../,,$@) - -$(TARGETS): $(GITLIBS) scalar.c - $(QUIET_SUBDIR0)../.. $(QUIET_SUBDIR1) $(patsubst %,contrib/scalar/%,$@) - -clean: - $(RM) $(TARGETS) ../../bin-wrappers/scalar - -../../bin-wrappers/scalar: ../../wrap-for-bin.sh Makefile - @mkdir -p ../../bin-wrappers - $(QUIET_GEN)sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ - -e 's|@@BUILD_DIR@@|$(shell cd ../.. && pwd)|' \ - -e 's|@@PROG@@|contrib/scalar/scalar$(X)|' < $< > $@ && \ - chmod +x $@ - -test: all - $(MAKE) -C t - -.PHONY: $(GITLIBS) all clean test FORCE diff --git a/contrib/scalar/t/Makefile b/contrib/scalar/t/Makefile deleted file mode 100644 index 1ed174a8cf38e3..00000000000000 --- a/contrib/scalar/t/Makefile +++ /dev/null @@ -1,81 +0,0 @@ -# Import tree-wide shared Makefile behavior and libraries -include ../../../shared.mak - -# Run scalar tests -# -# Copyright (c) 2005,2021 Junio C Hamano, Johannes Schindelin -# - --include ../../../config.mak.autogen --include ../../../config.mak - -SHELL_PATH ?= $(SHELL) -PERL_PATH ?= /usr/bin/perl -RM ?= rm -f -PROVE ?= prove -DEFAULT_TEST_TARGET ?= test -TEST_LINT ?= test-lint - -ifdef TEST_OUTPUT_DIRECTORY -TEST_RESULTS_DIRECTORY = $(TEST_OUTPUT_DIRECTORY)/test-results -else -TEST_RESULTS_DIRECTORY = ../../../t/test-results -endif - -# Shell quote; -SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) -PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) -TEST_RESULTS_DIRECTORY_SQ = $(subst ','\'',$(TEST_RESULTS_DIRECTORY)) - -T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) - -all: $(DEFAULT_TEST_TARGET) - -test: $(TEST_LINT) - $(MAKE) aggregate-results-and-cleanup - -prove: $(TEST_LINT) - @echo "*** prove ***"; GIT_CONFIG=.git/config $(PROVE) --exec '$(SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) - $(MAKE) clean-except-prove-cache - -$(T): - @echo "*** $@ ***"; GIT_CONFIG=.git/config '$(SHELL_PATH_SQ)' $@ $(GIT_TEST_OPTS) - -clean-except-prove-cache: - $(RM) -r 'trash directory'.* - $(RM) -r valgrind/bin - -clean: clean-except-prove-cache - $(RM) .prove - -test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax - -test-lint-duplicates: - @dups=`echo $(T) | tr ' ' '\n' | sed 's/-.*//' | sort | uniq -d` && \ - test -z "$$dups" || { \ - echo >&2 "duplicate test numbers:" $$dups; exit 1; } - -test-lint-executable: - @bad=`for i in $(T); do test -x "$$i" || echo $$i; done` && \ - test -z "$$bad" || { \ - echo >&2 "non-executable tests:" $$bad; exit 1; } - -test-lint-shell-syntax: - @'$(PERL_PATH_SQ)' ../../../t/check-non-portable-shell.pl $(T) - -aggregate-results-and-cleanup: $(T) - $(MAKE) aggregate-results - $(MAKE) clean - -aggregate-results: - for f in '$(TEST_RESULTS_DIRECTORY_SQ)'/t*-*.counts; do \ - echo "$$f"; \ - done | '$(SHELL_PATH_SQ)' ../../../t/aggregate-results.sh - -valgrind: - $(MAKE) GIT_TEST_OPTS="$(GIT_TEST_OPTS) --valgrind" - -test-results: - mkdir -p test-results - -.PHONY: $(T) aggregate-results clean valgrind diff --git a/contrib/scalar/scalar.c b/scalar.c similarity index 100% rename from contrib/scalar/scalar.c rename to scalar.c diff --git a/contrib/scalar/t/t9099-scalar.sh b/t/t9210-scalar.sh similarity index 96% rename from contrib/scalar/t/t9099-scalar.sh rename to t/t9210-scalar.sh index dfb949f52eed04..14ca575a214f3b 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/t/t9210-scalar.sh @@ -2,15 +2,9 @@ test_description='test the `scalar` command' -TEST_DIRECTORY=$PWD/../../../t -export TEST_DIRECTORY +. ./test-lib.sh -# Make it work with --no-bin-wrappers -PATH=$PWD/..:$PATH - -. ../../../t/test-lib.sh - -GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab ../cron.txt,launchctl:true,schtasks:true" +GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true" export GIT_TEST_MAINT_SCHEDULER test_expect_success 'scalar shows a usage' ' From dd9603e22822fab19c0557dad0cf1825de325403 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 2 Sep 2022 15:56:44 +0000 Subject: [PATCH 030/303] git help: special-case `scalar` With this commit, `git help scalar` will open the appropriate manual or HTML page (instead of looking for `gitscalar`). Signed-off-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- builtin/help.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/help.c b/builtin/help.c index 09ac4289f13065..6f2796f211e24c 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -440,6 +440,8 @@ static const char *cmd_to_page(const char *git_cmd) return git_cmd; else if (is_git_command(git_cmd)) return xstrfmt("git-%s", git_cmd); + else if (!strcmp("scalar", git_cmd)) + return xstrdup(git_cmd); else return xstrfmt("git%s", git_cmd); } From 951759d3a5cb20ffeff4836aa0c4b793fa142b51 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 2 Sep 2022 15:56:45 +0000 Subject: [PATCH 031/303] scalar: implement the `help` subcommand It is merely handing off to `git help scalar`. Signed-off-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- scalar.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/scalar.c b/scalar.c index 642d16124eb20c..c5c1ce68919908 100644 --- a/scalar.c +++ b/scalar.c @@ -819,6 +819,25 @@ static int cmd_delete(int argc, const char **argv) return res; } +static int cmd_help(int argc, const char **argv) +{ + struct option options[] = { + OPT_END(), + }; + const char * const usage[] = { + "scalar help", + NULL + }; + + argc = parse_options(argc, argv, NULL, options, + usage, 0); + + if (argc != 0) + usage_with_options(usage, options); + + return run_git("help", "scalar", NULL); +} + static int cmd_version(int argc, const char **argv) { int verbose = 0, build_options = 0; @@ -858,6 +877,7 @@ static struct { { "run", cmd_run }, { "reconfigure", cmd_reconfigure }, { "delete", cmd_delete }, + { "help", cmd_help }, { "version", cmd_version }, { "diagnose", cmd_diagnose }, { NULL, NULL}, From cc75e556a9de5d62c1ca52db900b729fc830f378 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:46 +0000 Subject: [PATCH 032/303] scalar: add to 'git help -a' command list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 'scalar' as a 'mainporcelain' command in the Git command list. Update the regex in 'cmd-list.perl' used to match the first line of command documentation to find 'scalar(1)'. Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- Documentation/cmd-list.perl | 2 +- command-list.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/cmd-list.perl b/Documentation/cmd-list.perl index af5da45d2878e0..9515a499a316db 100755 --- a/Documentation/cmd-list.perl +++ b/Documentation/cmd-list.perl @@ -10,7 +10,7 @@ sub format_one { $state = 0; open I, '<', "$name.txt" or die "No such file $name.txt"; while () { - if (/^git[a-z0-9-]*\(([0-9])\)$/) { + if (/^(git|scalar)[a-z0-9-]*\(([0-9])\)$/) { $mansection = $1; next; } diff --git a/command-list.txt b/command-list.txt index f96bdabd7d95d8..93f94e42ab70f5 100644 --- a/command-list.txt +++ b/command-list.txt @@ -235,3 +235,4 @@ gittutorial guide gittutorial-2 guide gitweb ancillaryinterrogators gitworkflows guide +scalar mainporcelain From 14b4e7e5a455b771f9dee74a5b4eb9a10f2a1cd4 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:47 +0000 Subject: [PATCH 033/303] scalar-clone: add test coverage Create a new test file ('t9211-scalar-clone.sh') to exercise the options and behavior of the 'scalar clone' command. Each test clones to a unique target location and cleans up the cloned repo only when the test passes. This ensures that failed tests' artifacts are captured in CI artifacts for further debugging. Helped-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/t9211-scalar-clone.sh | 151 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100755 t/t9211-scalar-clone.sh diff --git a/t/t9211-scalar-clone.sh b/t/t9211-scalar-clone.sh new file mode 100755 index 00000000000000..dd33d87e9be165 --- /dev/null +++ b/t/t9211-scalar-clone.sh @@ -0,0 +1,151 @@ +#!/bin/sh + +test_description='test the `scalar clone` subcommand' + +. ./test-lib.sh + +GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true" +export GIT_TEST_MAINT_SCHEDULER + +test_expect_success 'set up repository to clone' ' + rm -rf .git && + git init to-clone && + ( + cd to-clone && + git branch -m base && + + test_commit first && + test_commit second && + test_commit third && + + git switch -c parallel first && + mkdir -p 1/2 && + test_commit 1/2/3 && + + git switch base && + + # By default, permit + git config uploadpack.allowfilter true && + git config uploadpack.allowanysha1inwant true + ) +' + +cleanup_clone () { + rm -rf "$1" +} + +test_expect_success 'creates content in enlistment root' ' + enlistment=cloned && + + scalar clone "file://$(pwd)/to-clone" $enlistment && + ls -A $enlistment >enlistment-root && + test_line_count = 1 enlistment-root && + test_path_is_dir $enlistment/src && + test_path_is_dir $enlistment/src/.git && + + cleanup_clone $enlistment +' + +test_expect_success 'with spaces' ' + enlistment="cloned with space" && + + scalar clone "file://$(pwd)/to-clone" "$enlistment" && + test_path_is_dir "$enlistment" && + test_path_is_dir "$enlistment/src" && + test_path_is_dir "$enlistment/src/.git" && + + cleanup_clone "$enlistment" +' + +test_expect_success 'partial clone if supported by server' ' + enlistment=partial-clone && + + scalar clone "file://$(pwd)/to-clone" $enlistment && + + ( + cd $enlistment/src && + + # Two promisor packs: one for refs, the other for blobs + ls .git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 2 promisorlist + ) && + + cleanup_clone $enlistment +' + +test_expect_success 'fall back on full clone if partial unsupported' ' + enlistment=no-partial-support && + + test_config -C to-clone uploadpack.allowfilter false && + test_config -C to-clone uploadpack.allowanysha1inwant false && + + scalar clone "file://$(pwd)/to-clone" $enlistment 2>err && + grep "filtering not recognized by server, ignoring" err && + + ( + cd $enlistment/src && + + # Still get a refs promisor file, but none for blobs + ls .git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 1 promisorlist + ) && + + cleanup_clone $enlistment +' + +test_expect_success 'initializes sparse-checkout by default' ' + enlistment=sparse && + + scalar clone "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + test_cmp_config true core.sparseCheckout && + test_cmp_config true core.sparseCheckoutCone + ) && + + cleanup_clone $enlistment +' + +test_expect_success '--full-clone does not create sparse-checkout' ' + enlistment=full-clone && + + scalar clone --full-clone "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + test_cmp_config "" --default "" core.sparseCheckout && + test_cmp_config "" --default "" core.sparseCheckoutCone + ) && + + cleanup_clone $enlistment +' + +test_expect_success '--single-branch clones HEAD only' ' + enlistment=single-branch && + + scalar clone --single-branch "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + git for-each-ref refs/remotes/origin >out && + test_line_count = 1 out && + grep "refs/remotes/origin/base" out + ) && + + cleanup_clone $enlistment +' + +test_expect_success '--no-single-branch clones all branches' ' + enlistment=no-single-branch && + + scalar clone --no-single-branch "file://$(pwd)/to-clone" $enlistment && + ( + cd $enlistment/src && + git for-each-ref refs/remotes/origin >out && + test_line_count = 2 out && + grep "refs/remotes/origin/base" out && + grep "refs/remotes/origin/parallel" out + ) && + + cleanup_clone $enlistment +' + +test_done From e2809233d19e0faacedf59f229ec292cb9e7c7ef Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:48 +0000 Subject: [PATCH 034/303] t/perf: add Scalar performance tests Create 'p9210-scalar.sh' for testing Scalar performance and comparing performance of Git operations in Scalar registrations and standard repositories. Example results: Test this tree ------------------------------------------------------------------------ 9210.2: scalar clone 14.82(18.00+3.63) 9210.3: git clone 26.15(36.67+6.90) 9210.4: git status (scalar) 0.04(0.01+0.01) 9210.5: git status (non-scalar) 0.10(0.02+0.11) 9210.6: test_commit --append --no-tag A (scalar) 0.08(0.02+0.03) 9210.7: test_commit --append --no-tag A (non-scalar) 0.13(0.03+0.11) Helped-by: Johannes Schindelin Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/perf/p9210-scalar.sh | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 t/perf/p9210-scalar.sh diff --git a/t/perf/p9210-scalar.sh b/t/perf/p9210-scalar.sh new file mode 100755 index 00000000000000..265f7cd1fe2470 --- /dev/null +++ b/t/perf/p9210-scalar.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='test scalar performance' +. ./perf-lib.sh + +test_perf_large_repo "$TRASH_DIRECTORY/to-clone" + +test_expect_success 'enable server-side partial clone' ' + git -C to-clone config uploadpack.allowFilter true && + git -C to-clone config uploadpack.allowAnySHA1InWant true +' + +test_perf 'scalar clone' ' + rm -rf scalar-clone && + scalar clone "file://$(pwd)/to-clone" scalar-clone +' + +test_perf 'git clone' ' + rm -rf git-clone && + git clone "file://$(pwd)/to-clone" git-clone +' + +test_compare_perf () { + command=$1 + shift + args=$* + test_perf "$command $args (scalar)" " + $command -C scalar-clone/src $args + " + + test_perf "$command $args (non-scalar)" " + $command -C git-clone $args + " +} + +test_compare_perf git status +test_compare_perf test_commit --append --no-tag A + +test_done From ba1b117eec2f73c84f73f827e6f3ac8b82b35585 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:49 +0000 Subject: [PATCH 035/303] t/perf: add 'GIT_PERF_USE_SCALAR' run option Add a 'GIT_PERF_USE_SCALAR' environment variable (and corresponding perf config 'useScalar') to register a repository created with any of: * test_perf_fresh_repo * test_perf_default_repo * test_perf_large_repo as a Scalar enlistment. This is intended to allow a developer to test the impact of Scalar on already-defined performance scenarios. Suggested-by: Derrick Stolee Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- t/perf/README | 4 ++++ t/perf/perf-lib.sh | 13 ++++++++++++- t/perf/run | 3 +++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/t/perf/README b/t/perf/README index fb9127a66f7561..8f217d7be7d557 100644 --- a/t/perf/README +++ b/t/perf/README @@ -95,6 +95,10 @@ You can set the following variables (also in your config.mak): Git (e.g., performance of index-pack as the number of threads changes). These can be enabled with GIT_PERF_EXTRA. + GIT_PERF_USE_SCALAR + Boolean indicating whether to register test repo(s) with Scalar + before executing tests. + You can also pass the options taken by ordinary git tests; the most useful one is: diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh index 27c28017921a0b..e7786775a90161 100644 --- a/t/perf/perf-lib.sh +++ b/t/perf/perf-lib.sh @@ -49,6 +49,9 @@ export TEST_DIRECTORY TRASH_DIRECTORY GIT_BUILD_DIR GIT_TEST_CMP MODERN_GIT=$GIT_BUILD_DIR/bin-wrappers/git export MODERN_GIT +MODERN_SCALAR=$GIT_BUILD_DIR/bin-wrappers/scalar +export MODERN_SCALAR + perf_results_dir=$TEST_RESULTS_DIR test -n "$GIT_PERF_SUBSECTION" && perf_results_dir="$perf_results_dir/$GIT_PERF_SUBSECTION" mkdir -p "$perf_results_dir" @@ -120,6 +123,10 @@ test_perf_create_repo_from () { # status" due to a locked index. Since we have # a copy it's fine to remove the lock. rm .git/index.lock + fi && + if test_bool_env GIT_PERF_USE_SCALAR false + then + "$MODERN_SCALAR" register fi ) || error "failed to copy repository '$source' to '$repo'" } @@ -130,7 +137,11 @@ test_perf_fresh_repo () { "$MODERN_GIT" init -q "$repo" && ( cd "$repo" && - test_perf_do_repo_symlink_config_ + test_perf_do_repo_symlink_config_ && + if test_bool_env GIT_PERF_USE_SCALAR false + then + "$MODERN_SCALAR" register + fi ) } diff --git a/t/perf/run b/t/perf/run index 55219aa4056383..33da4d2aba2587 100755 --- a/t/perf/run +++ b/t/perf/run @@ -171,6 +171,9 @@ run_subsection () { get_var_from_env_or_config "GIT_PERF_MAKE_COMMAND" "perf" "makeCommand" get_var_from_env_or_config "GIT_PERF_MAKE_OPTS" "perf" "makeOpts" + get_var_from_env_or_config "GIT_PERF_USE_SCALAR" "perf" "useScalar" "--bool" + export GIT_PERF_USE_SCALAR + get_var_from_env_or_config "GIT_PERF_REPO_NAME" "perf" "repoName" export GIT_PERF_REPO_NAME From 9eb7a73158bdc91892a6b9a0b43b8f954b1e39e2 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Fri, 2 Sep 2022 15:56:50 +0000 Subject: [PATCH 036/303] Documentation/technical: include Scalar technical doc Include 'Documentation/technical/scalar.txt' alongside the other HTML technical docs when installing them. Now that the document is intended as a widely-accessible reference, remove the internal work-in-progress roadmap from the document. Those details should no longer be needed to guide Scalar's development and, if they were left, they could fall out-of-date and be misleading to readers. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- Documentation/Makefile | 1 + Documentation/technical/scalar.txt | 61 ------------------------------ 2 files changed, 1 insertion(+), 61 deletions(-) diff --git a/Documentation/Makefile b/Documentation/Makefile index 16c9e062390661..9ec53afdf186cf 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -116,6 +116,7 @@ TECH_DOCS += technical/parallel-checkout TECH_DOCS += technical/partial-clone TECH_DOCS += technical/racy-git TECH_DOCS += technical/reftable +TECH_DOCS += technical/scalar TECH_DOCS += technical/send-pack-pipeline TECH_DOCS += technical/shallow TECH_DOCS += technical/trivial-merge diff --git a/Documentation/technical/scalar.txt b/Documentation/technical/scalar.txt index 0600150b3ad8f4..921cb104c3c4ed 100644 --- a/Documentation/technical/scalar.txt +++ b/Documentation/technical/scalar.txt @@ -64,64 +64,3 @@ some "global" `git` options (e.g., `-c` and `-C`). Because `scalar` is not invoked as a Git subcommand (like `git scalar`), it is built and installed as its own executable in the `bin/` directory, alongside `git`, `git-gui`, etc. - -Roadmap -------- - -NOTE: this section will be removed once the remaining tasks outlined in this -roadmap are complete. - -Scalar is a large enough project that it is being upstreamed incrementally, -living in `contrib/` until it is feature-complete. So far, the following patch -series have been accepted: - -- `scalar-the-beginning`: The initial patch series which sets up - `contrib/scalar/` and populates it with a minimal `scalar` command that - demonstrates the fundamental ideas. - -- `scalar-c-and-C`: The `scalar` command learns about two options that can be - specified before the command, `-c =` and `-C `. - -- `scalar-diagnose`: The `scalar` command is taught the `diagnose` subcommand. - -- `scalar-generalize-diagnose`: Move the functionality of `scalar diagnose` - into `git diagnose` and `git bugreport --diagnose`. - -- 'scalar-add-fsmonitor: Enable the built-in FSMonitor in Scalar - enlistments. At the end of this series, Scalar should be feature-complete - from the perspective of a user. - -Roughly speaking (and subject to change), the following series are needed to -"finish" this initial version of Scalar: - -- Move Scalar to toplevel: Move Scalar out of `contrib/` and into the root of - `git`. This includes a variety of related updates, including: - - building & installing Scalar in the Git root-level 'make [install]'. - - builing & testing Scalar as part of CI. - - moving and expanding test coverage of Scalar (including perf tests). - - implementing 'scalar help'/'git help scalar' to display scalar - documentation. - -Finally, there are two additional patch series that exist in Microsoft's fork of -Git, but there is no current plan to upstream them. There are some interesting -ideas there, but the implementation is too specific to Azure Repos and/or VFS -for Git to be of much help in general. - -These still exist mainly because the GVFS protocol is what Azure Repos has -instead of partial clone, while Git is focused on improving partial clone: - -- `scalar-with-gvfs`: The primary purpose of this patch series is to support - existing Scalar users whose repositories are hosted in Azure Repos (which does - not support Git's partial clones, but supports its predecessor, the GVFS - protocol, which is used by Scalar to emulate the partial clone). - - Since the GVFS protocol will never be supported by core Git, this patch series - will remain in Microsoft's fork of Git. - -- `run-scalar-functional-tests`: The Scalar project developed a quite - comprehensive set of integration tests (or, "Functional Tests"). They are the - sole remaining part of the original C#-based Scalar project, and this patch - adds a GitHub workflow that runs them all. - - Since the tests partially depend on features that are only provided in the - `scalar-with-gvfs` patch series, this patch cannot be upstreamed. From 7ead46810b507828c1481eaea6d64b9ed635b8b7 Mon Sep 17 00:00:00 2001 From: Shaoxuan Yuan Date: Fri, 9 Sep 2022 15:27:36 -0700 Subject: [PATCH 037/303] builtin/mv.c: fix possible segfault in add_slash() A possible segfault was introduced in c08830de41 (mv: check if is a SKIP_WORKTREE_DIR, 2022-08-09). When running t7001 with SANITIZE=address, problem appears when running: git mv path1/path2/ . or git mv directory ../ or any that makes dest_path[0] an empty string. The add_slash() call could segfault when path argument to it is an empty string, because it makes an out-of-bounds read to decide if an extra slash '/' needs to be appended to it. As add_slash() is used to make sure that a valid pathname to a file in the given directory can be made by appending a filename after the value returned from it, if path is an empty string, we want to return it as-is. The path to a file "F" in the top-level of the working tree (i.e. path=="") is formed by appending "F" after "" (i.e. path) without any slash in between. So, just like the case where a non-empty path already ends with a slash, return an empty path as-is. Reported-by: Jeff King Helped-by: Jeff King Helped-by: Junio C Hamano Helped-by: Derrick Stolee Signed-off-by: Shaoxuan Yuan Signed-off-by: Junio C Hamano --- builtin/mv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/mv.c b/builtin/mv.c index 4b67bd096a95d5..35bf1e9c71dbdd 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -71,7 +71,7 @@ static const char **internal_prefix_pathspec(const char *prefix, static const char *add_slash(const char *path) { size_t len = strlen(path); - if (path[len - 1] != '/') { + if (len && path[len - 1] != '/') { char *with_slash = xmalloc(st_add(len, 2)); memcpy(with_slash, path, len); with_slash[len++] = '/'; From e40d906449950c140ba1e081b647c708d6d2979e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 00:58:09 -0400 Subject: [PATCH 038/303] list-objects-filter: don't memset after releasing filter struct If we see an error while parsing a "combine" filter, we call list_objects_filter_release() to free any allocated memory, and then use memset() to return the struct to a known state. But the release function already does that reinitializing. Doing it again is pointless. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 6cc4eb8e1c4b60..ea989db260d388 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -187,10 +187,8 @@ static int parse_combine_filter( cleanup: strbuf_list_free(subspecs); - if (result) { + if (result) list_objects_filter_release(filter_options); - memset(filter_options, 0, sizeof(*filter_options)); - } return result; } From aff4bfcf0a51a26d069ccc3a29e643a112867b27 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 01:00:45 -0400 Subject: [PATCH 039/303] list-objects-filter: handle null default filter spec When we have a remote.*.promisor config variable, we know that we're in a partial clone. Usually there's a matching remote.*.partialclonefilter option, which tells us which filter to use with the remote. If that option is missing, we skip setting up the filter at all. But something funny happens: we stick a NULL entry into the string_list storing the text filter spec. This is a weird state, and could possibly segfault if anybody called called list_objects_filter_spec(), etc. In practice, nobody does, because filter->choice will still be LOFC_DISABLED, so code generally realizes there's no filter to use. And the string_list itself is OK, because it starts in non-dup mode until we actually parse a filter spec. So it blindly stores the NULL without even looking at it. But it's probably worth avoiding this confused state. It's an accident waiting to happen, and it will be a problem if we replace the lazy initialization from 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec strings, 2022-09-08) with a real initialization function. The history is a little interesting here, as the bug was introduced during the merge resolution in 627b826834 (Merge branch 'md/list-objects-filter-combo', 2019-09-18). The original logic comes from cac1137dc4 (list-objects: check if filter is NULL before using, 2018-06-11), where we had a single string via core.partialCloneFilter, and a simple NULL check was sufficient. And it even added a test in t0410 that covers this situation. Later, that was expanded to allow per-remote filters in fa3d1b63e8 (promisor-remote: parse remote.*.partialclonefilter, 2019-06-25). After that commit, we get a promisor struct with a partial_clone_filter string, which could be NULL. The commit checks only that the struct pointer is non-NULL, which is enough. It may pass NULL to gently_parse_list_objects_filter(), but that function is smart enough to consider it a noop. But in parallel, cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list, 2019-06-27) added a new line of code: before we call gently_parse_list_objets_filter(), we append the filter spec to the string_list. By itself that was OK, since we'd have returned early if the string was NULL. When the two were merged in 627b826834, the result is that we return early only if the struct is NULL, but not the string. And we append to the string_list, meaning we may append NULL. The solution is to return early if either is NULL, as it would mean we don't have a configured filter. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index ea989db260d388..18c51001dc41e4 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -399,7 +399,7 @@ void partial_clone_get_default_filter_spec( /* * Parse default value, but silently ignore it if it is invalid. */ - if (!promisor) + if (!promisor || !promisor->partial_clone_filter) return; string_list_append(&filter_options->filter_spec, From 2a01bdedf87d7cbfc4411ff5059cfe406e1637db Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 01:03:07 -0400 Subject: [PATCH 040/303] list-objects-filter: add and use initializers In 7e2619d8ff (list_objects_filter_options: plug leak of filter_spec strings, 2022-09-08), we noted that the filter_spec string_list was inconsistent in how it handled memory ownership of strings stored in the list. The fix there was a bit of a band-aid to set the "strdup_strings" variable right before adding anything. That works OK, and it lets the users of the API continue to zero-initialize the struct. But it makes the code a bit hard to follow and accident-prone, as any other spots appending the filter_spec need to think about whether to set the strdup_strings value, too (there's one such spot in partial_clone_get_default_filter_spec(), which is probably a possible memory leak). So let's do that full cleanup now. We'll introduce a LIST_OBJECTS_FILTER_INIT macro and matching function, and use them as appropriate (though it is for the "_options" struct, this matches the corresponding list_objects_filter_release() function). This is harder than it seems! Many other structs, like git_transport_data, embed the filter struct. So they need to initialize it themselves even if the rest of the enclosing struct is OK with zero-initialization. I found all of the relevant spots by grepping manually for declarations of list_objects_filter_options. And then doing so recursively for structs which embed it, and ones which embed those, and so on. I'm pretty sure I got everything, but there's no change that would alert the compiler if any topics in flight added new declarations. To catch this case, we now double-check in the parsing function that things were initialized as expected and BUG() if appropriate. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/fetch-pack.c | 1 + builtin/fetch.c | 2 +- builtin/submodule--helper.c | 8 ++++---- bundle.h | 1 + list-objects-filter-options.c | 20 +++++++++++--------- list-objects-filter-options.h | 3 +++ revision.c | 1 + transport-helper.c | 2 ++ transport.c | 1 + upload-pack.c | 1 + 11 files changed, 27 insertions(+), 15 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 9e0b2b45cae912..78fb80eab6b323 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -72,7 +72,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; -static struct list_objects_filter_options filter_options; +static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; static int option_filter_submodules = -1; /* unspecified */ static int config_filter_submodules = -1; /* unspecified */ static struct string_list server_options = STRING_LIST_INIT_NODUP; diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index f045bbbe946dcb..afe679368deec2 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -62,6 +62,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) packet_trace_identity("fetch-pack"); memset(&args, 0, sizeof(args)); + list_objects_filter_init(&args.filter_options); args.uploadpack = "git-upload-pack"; for (i = 1; i < argc && *argv[i] == '-'; i++) { diff --git a/builtin/fetch.c b/builtin/fetch.c index ac29c2b1ae34df..0e6238d2837e28 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -80,7 +80,7 @@ static int recurse_submodules_cli = RECURSE_SUBMODULES_DEFAULT; static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND; static int shown_url = 0; static struct refspec refmap = REFSPEC_INIT_FETCH; -static struct list_objects_filter_options filter_options; +static struct list_objects_filter_options filter_options = LIST_OBJECTS_FILTER_INIT; static struct string_list server_options = STRING_LIST_INIT_DUP; static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP; static int fetch_write_commit_graph = -1; diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index c597df7528ee56..f5dc63fab4580a 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1754,7 +1754,8 @@ static int module_clone(int argc, const char **argv, const char *prefix) { int dissociate = 0, quiet = 0, progress = 0, require_init = 0; struct module_clone_data clone_data = MODULE_CLONE_DATA_INIT; - struct list_objects_filter_options filter_options; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; struct option module_clone_options[] = { OPT_STRING(0, "prefix", &clone_data.prefix, @@ -1796,7 +1797,6 @@ static int module_clone(int argc, const char **argv, const char *prefix) NULL }; - memset(&filter_options, 0, sizeof(filter_options)); argc = parse_options(argc, argv, prefix, module_clone_options, git_submodule_helper_usage, 0); @@ -2581,7 +2581,8 @@ static int module_update(int argc, const char **argv, const char *prefix) { struct pathspec pathspec; struct update_data opt = UPDATE_DATA_INIT; - struct list_objects_filter_options filter_options; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; int ret; struct option module_update_options[] = { @@ -2639,7 +2640,6 @@ static int module_update(int argc, const char **argv, const char *prefix) update_clone_config_from_gitmodules(&opt.max_jobs); git_config(git_update_clone_config, &opt.max_jobs); - memset(&filter_options, 0, sizeof(filter_options)); argc = parse_options(argc, argv, prefix, module_update_options, git_submodule_helper_usage, 0); diff --git a/bundle.h b/bundle.h index 0c052f54964f11..68ff39a0a74085 100644 --- a/bundle.h +++ b/bundle.h @@ -18,6 +18,7 @@ struct bundle_header { { \ .prerequisites = STRING_LIST_INIT_DUP, \ .references = STRING_LIST_INIT_DUP, \ + .filter = LIST_OBJECTS_FILTER_INIT, \ } void bundle_header_init(struct bundle_header *header); void bundle_header_release(struct bundle_header *header); diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 18c51001dc41e4..56a1933a50d6e5 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -108,7 +108,7 @@ int gently_parse_list_objects_filter( strbuf_addf(errbuf, _("invalid filter-spec '%s'"), arg); - memset(filter_options, 0, sizeof(*filter_options)); + list_objects_filter_init(filter_options); return 1; } @@ -223,8 +223,7 @@ static void transform_to_combine_type( struct list_objects_filter_options *sub_array = xcalloc(initial_sub_alloc, sizeof(*sub_array)); sub_array[0] = *filter_options; - memset(filter_options, 0, sizeof(*filter_options)); - string_list_init_dup(&filter_options->filter_spec); + list_objects_filter_init(filter_options); filter_options->sub = sub_array; filter_options->sub_alloc = initial_sub_alloc; } @@ -255,11 +254,8 @@ void parse_list_objects_filter( struct strbuf errbuf = STRBUF_INIT; int parse_error; - if (!filter_options->filter_spec.strdup_strings) { - if (filter_options->filter_spec.nr) - BUG("unexpected non-allocated string in filter_spec"); - filter_options->filter_spec.strdup_strings = 1; - } + if (!filter_options->filter_spec.strdup_strings) + BUG("filter_options not properly initialized"); if (!filter_options->choice) { string_list_append(&filter_options->filter_spec, arg); @@ -346,7 +342,7 @@ void list_objects_filter_release( for (sub = 0; sub < filter_options->sub_nr; sub++) list_objects_filter_release(&filter_options->sub[sub]); free(filter_options->sub); - memset(filter_options, 0, sizeof(*filter_options)); + list_objects_filter_init(filter_options); } void partial_clone_register( @@ -429,3 +425,9 @@ void list_objects_filter_copy( for (i = 0; i < src->sub_nr; i++) list_objects_filter_copy(&dest->sub[i], &src->sub[i]); } + +void list_objects_filter_init(struct list_objects_filter_options *filter_options) +{ + struct list_objects_filter_options blank = LIST_OBJECTS_FILTER_INIT; + memcpy(filter_options, &blank, sizeof(*filter_options)); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index ffc02d77e76000..2720f7dba87c6a 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -69,6 +69,9 @@ struct list_objects_filter_options { */ }; +#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRING_LIST_INIT_DUP } +void list_objects_filter_init(struct list_objects_filter_options *filter_options); + /* * Parse value of the argument to the "filter" keyword. * On the command line this looks like: diff --git a/revision.c b/revision.c index 0c6e26cd9c8ff8..fbd89ef8e758af 100644 --- a/revision.c +++ b/revision.c @@ -1900,6 +1900,7 @@ void repo_init_revisions(struct repository *r, } init_display_notes(&revs->notes_opt); + list_objects_filter_init(&revs->filter); } static void add_pending_commit_list(struct rev_info *revs, diff --git a/transport-helper.c b/transport-helper.c index 322c7224782fbe..e95267a4ab54dc 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -1286,6 +1286,8 @@ int transport_helper_init(struct transport *transport, const char *name) if (getenv("GIT_TRANSPORT_HELPER_DEBUG")) debug = 1; + list_objects_filter_init(&data->transport_options.filter_options); + transport->data = data; transport->vtable = &vtable; transport->smart_options = &(data->transport_options); diff --git a/transport.c b/transport.c index 6ec6130852cec5..a14179684b4fbf 100644 --- a/transport.c +++ b/transport.c @@ -1113,6 +1113,7 @@ struct transport *transport_get(struct remote *remote, const char *url) * will be checked individually in git_connect. */ struct git_transport_data *data = xcalloc(1, sizeof(*data)); + list_objects_filter_init(&data->options.filter_options); ret->data = data; ret->vtable = &builtin_smart_vtable; ret->smart_options = &(data->options); diff --git a/upload-pack.c b/upload-pack.c index 3a851b360663a5..cbd373f2e5df22 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -141,6 +141,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) data->allow_filter_fallback = 1; data->tree_filter_max_depth = ULONG_MAX; packet_writer_init(&data->writer, 1); + list_objects_filter_init(&data->filter_options); data->keepalive = 5; data->advertise_sid = 0; From c54980ab83661e8e290003fbd5ab44b12e4e77b1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 11 Sep 2022 01:03:31 -0400 Subject: [PATCH 041/303] list-objects-filter: convert filter_spec to a strbuf Originally, the filter_spec field was just a string pointer. In cf9ceb5a12 (list-objects-filter-options: make filter_spec a string_list, 2019-06-27) it became a string_list, but that commit notes: A strbuf would seem to be a more natural choice for this object, but it unfortunately requires initialization besides just zero'ing out the memory. This results in all container structs, and all containers of those structs, etc., to also require initialization. Initializing them all would be more cumbersome that simply using a string_list, which behaves properly when its contents are zero'd. Now that we've changed the struct to require non-zero initialization anyway (ironically, because string_list also needed non-zero initialization to avoid leaks), we can now convert to that more natural type. This makes the list_objects_filter_spec() function much less awkward, as it had to collapse the string_list to a single-entry list on the fly. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 51 +++++++++++++---------------------- list-objects-filter-options.h | 4 +-- 2 files changed, 20 insertions(+), 35 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 56a1933a50d6e5..d46ce4acc411c5 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -202,10 +202,10 @@ static int allow_unencoded(char ch) static void filter_spec_append_urlencode( struct list_objects_filter_options *filter, const char *raw) { - struct strbuf buf = STRBUF_INIT; - strbuf_addstr_urlencode(&buf, raw, allow_unencoded); - trace_printf("Add to combine filter-spec: %s\n", buf.buf); - string_list_append_nodup(&filter->filter_spec, strbuf_detach(&buf, NULL)); + size_t orig_len = filter->filter_spec.len; + strbuf_addstr_urlencode(&filter->filter_spec, raw, allow_unencoded); + trace_printf("Add to combine filter-spec: %s\n", + filter->filter_spec.buf + orig_len); } /* @@ -229,7 +229,7 @@ static void transform_to_combine_type( } filter_options->sub_nr = 1; filter_options->choice = LOFC_COMBINE; - string_list_append(&filter_options->filter_spec, "combine:"); + strbuf_addstr(&filter_options->filter_spec, "combine:"); filter_spec_append_urlencode( filter_options, list_objects_filter_spec(&filter_options->sub[0])); @@ -237,7 +237,7 @@ static void transform_to_combine_type( * We don't need the filter_spec strings for subfilter specs, only the * top level. */ - string_list_clear(&filter_options->sub[0].filter_spec, /*free_util=*/0); + strbuf_release(&filter_options->sub[0].filter_spec); } void list_objects_filter_die_if_populated( @@ -254,11 +254,11 @@ void parse_list_objects_filter( struct strbuf errbuf = STRBUF_INIT; int parse_error; - if (!filter_options->filter_spec.strdup_strings) + if (!filter_options->filter_spec.buf) BUG("filter_options not properly initialized"); if (!filter_options->choice) { - string_list_append(&filter_options->filter_spec, arg); + strbuf_addstr(&filter_options->filter_spec, arg); parse_error = gently_parse_list_objects_filter( filter_options, arg, &errbuf); @@ -269,7 +269,7 @@ void parse_list_objects_filter( */ transform_to_combine_type(filter_options); - string_list_append(&filter_options->filter_spec, "+"); + strbuf_addch(&filter_options->filter_spec, '+'); filter_spec_append_urlencode(filter_options, arg); ALLOC_GROW_BY(filter_options->sub, filter_options->sub_nr, 1, filter_options->sub_alloc); @@ -300,31 +300,18 @@ int opt_parse_list_objects_filter(const struct option *opt, const char *list_objects_filter_spec(struct list_objects_filter_options *filter) { - if (!filter->filter_spec.nr) + if (!filter->filter_spec.len) BUG("no filter_spec available for this filter"); - if (filter->filter_spec.nr != 1) { - struct strbuf concatted = STRBUF_INIT; - strbuf_add_separated_string_list( - &concatted, "", &filter->filter_spec); - string_list_clear(&filter->filter_spec, /*free_util=*/0); - string_list_append_nodup( - &filter->filter_spec, strbuf_detach(&concatted, NULL)); - } - - return filter->filter_spec.items[0].string; + return filter->filter_spec.buf; } const char *expand_list_objects_filter_spec( struct list_objects_filter_options *filter) { if (filter->choice == LOFC_BLOB_LIMIT) { - struct strbuf expanded_spec = STRBUF_INIT; - strbuf_addf(&expanded_spec, "blob:limit=%lu", + strbuf_release(&filter->filter_spec); + strbuf_addf(&filter->filter_spec, "blob:limit=%lu", filter->blob_limit_value); - string_list_clear(&filter->filter_spec, /*free_util=*/0); - string_list_append_nodup( - &filter->filter_spec, - strbuf_detach(&expanded_spec, NULL)); } return list_objects_filter_spec(filter); @@ -337,7 +324,7 @@ void list_objects_filter_release( if (!filter_options) return; - string_list_clear(&filter_options->filter_spec, /*free_util=*/0); + strbuf_release(&filter_options->filter_spec); free(filter_options->sparse_oid_name); for (sub = 0; sub < filter_options->sub_nr; sub++) list_objects_filter_release(&filter_options->sub[sub]); @@ -398,8 +385,8 @@ void partial_clone_get_default_filter_spec( if (!promisor || !promisor->partial_clone_filter) return; - string_list_append(&filter_options->filter_spec, - promisor->partial_clone_filter); + strbuf_addstr(&filter_options->filter_spec, + promisor->partial_clone_filter); gently_parse_list_objects_filter(filter_options, promisor->partial_clone_filter, &errbuf); @@ -411,14 +398,12 @@ void list_objects_filter_copy( const struct list_objects_filter_options *src) { int i; - struct string_list_item *item; /* Copy everything. We will overwrite the pointers shortly. */ memcpy(dest, src, sizeof(struct list_objects_filter_options)); - string_list_init_dup(&dest->filter_spec); - for_each_string_list_item(item, &src->filter_spec) - string_list_append(&dest->filter_spec, item->string); + strbuf_init(&dest->filter_spec, 0); + strbuf_addbuf(&dest->filter_spec, &src->filter_spec); dest->sparse_oid_name = xstrdup_or_null(src->sparse_oid_name); ALLOC_ARRAY(dest->sub, dest->sub_alloc); diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 2720f7dba87c6a..7eeadab2dd0551 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -35,7 +35,7 @@ struct list_objects_filter_options { * To get the raw filter spec given by the user, use the result of * list_objects_filter_spec(). */ - struct string_list filter_spec; + struct strbuf filter_spec; /* * 'choice' is determined by parsing the filter-spec. This indicates @@ -69,7 +69,7 @@ struct list_objects_filter_options { */ }; -#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRING_LIST_INIT_DUP } +#define LIST_OBJECTS_FILTER_INIT { .filter_spec = STRBUF_INIT } void list_objects_filter_init(struct list_objects_filter_options *filter_options); /* From 746aae3dd1a8e8dba31797ac237916d9533e4254 Mon Sep 17 00:00:00 2001 From: ZheNing Hu Date: Sun, 11 Sep 2022 14:03:17 +0000 Subject: [PATCH 042/303] ls-files: fix black space in error message ce74de9(ls-files: introduce "--format" option) miss a space between two words incorrectly, it leads to wrong i10n messages. So fix it by adding a space at the end of the error message. Signed-off-by: ZheNing Hu Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 779dc18e59d56b..4cf8a2364835c9 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -257,7 +257,7 @@ static size_t expand_show_index(struct strbuf *sb, const char *start, end = strchr(start + 1, ')'); if (!end) - die(_("bad ls-files format: element '%s'" + die(_("bad ls-files format: element '%s' " "does not end in ')'"), start); len = end - start + 1; From 7c04aa73906b9186c9d46010227d4437fd534d93 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Tue, 13 Sep 2022 04:01:47 +0000 Subject: [PATCH 043/303] chainlint: colorize problem annotations and test delimiters When `chainlint.pl` detects problems in a test definition, it emits the test definition with "?!FOO?!" annotations highlighting the problems it discovered. For instance, given this problematic test: test_expect_success 'discombobulate frobnitz' ' git frob babble && (echo balderdash; echo gnabgib) >expect && for i in three two one do git nitfol $i done >actual test_cmp expect actual ' chainlint.pl will output: # chainlint: t1234-confusing.sh # chainlint: discombobulate frobnitz git frob babble && (echo balderdash ; ?!AMP?! echo gnabgib) >expect && for i in three two one do git nitfol $i ?!LOOP?! done >actual ?!AMP?! test_cmp expect actual in which it may be difficult to spot the "?!FOO?!" annotations. The problem is compounded when multiple tests, possibly in multiple scripts, fail "linting", in which case it may be difficult to spot the "# chainlint:" lines which delimit one problematic test from another. To ameliorate this potential problem, colorize the "?!FOO?!" annotations in order to quickly draw the test author's attention to the problem spots, and colorize the "# chainlint:" lines to help the author identify the name of each script and each problematic test. Colorization is disabled automatically if output is not directed to a terminal or if NO_COLOR environment variable is set. The implementation is specific to Unix (it employs `tput` if available) but works equally well in the Git for Windows development environment which emulates Unix sufficiently. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- t/chainlint.pl | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/t/chainlint.pl b/t/chainlint.pl index 386999ce65d617..976db4b8a01b80 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -585,12 +585,14 @@ sub check_test { my $parser = TestParser->new(\$body); my @tokens = $parser->parse(); return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); + my $c = main::fd_colors(1); my $checked = join(' ', @tokens); $checked =~ s/^\n//; $checked =~ s/^ //mg; $checked =~ s/ $//mg; + $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg; $checked .= "\n" unless $checked =~ /\n$/; - push(@{$self->{output}}, "# chainlint: $title\n$checked"); + push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked"); } sub parse_cmd { @@ -615,6 +617,41 @@ package main; $interval = sub { return Time::HiRes::tv_interval(shift); }; } +# Restore TERM if test framework set it to "dumb" so 'tput' will work; do this +# outside of get_colors() since under 'ithreads' all threads use %ENV of main +# thread and ignore %ENV changes in subthreads. +$ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM}; + +my @NOCOLORS = (bold => '', rev => '', reset => '', blue => '', green => '', red => ''); +my %COLORS = (); +sub get_colors { + return \%COLORS if %COLORS; + if (exists($ENV{NO_COLOR}) || + system("tput sgr0 >/dev/null 2>&1") != 0 || + system("tput bold >/dev/null 2>&1") != 0 || + system("tput rev >/dev/null 2>&1") != 0 || + system("tput setaf 1 >/dev/null 2>&1") != 0) { + %COLORS = @NOCOLORS; + return \%COLORS; + } + %COLORS = (bold => `tput bold`, + rev => `tput rev`, + reset => `tput sgr0`, + blue => `tput setaf 4`, + green => `tput setaf 2`, + red => `tput setaf 1`); + chomp(%COLORS); + return \%COLORS; +} + +my %FD_COLORS = (); +sub fd_colors { + my $fd = shift; + return $FD_COLORS{$fd} if exists($FD_COLORS{$fd}); + $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS}; + return $FD_COLORS{$fd}; +} + sub ncores { # Windows return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); @@ -630,6 +667,8 @@ sub show_stats { my $walltime = $interval->($start_time); my ($usertime) = times(); my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); + my $c = fd_colors(2); + print(STDERR $c->{green}); for (@$stats) { my ($worker, $nscripts, $ntests, $nerrs) = @$_; print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); @@ -638,7 +677,7 @@ sub show_stats { $total_tests += $ntests; $total_errs += $nerrs; } - printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); + printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); } sub check_script { @@ -656,8 +695,9 @@ sub check_script { my $parser = ScriptParser->new(\$s); 1 while $parser->parse_cmd(); if (@{$parser->{output}}) { + my $c = fd_colors(1); my $s = join('', @{$parser->{output}}); - $emit->("# chainlint: $path\n" . $s); + $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s); $nerrs += () = $s =~ /\?![^?]+\?!/g; } $ntests += $parser->{ntests}; From 255a6f91ae4600ee2d257670477caf97b7986470 Mon Sep 17 00:00:00 2001 From: Adam Dinwoodie Date: Thu, 15 Sep 2022 08:57:17 +0100 Subject: [PATCH 044/303] t1800: correct test to handle Cygwin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Cygwin, when failing to spawn a process using start_command, Git outputs the same error as on Linux systems, rather than using the GIT_WINDOWS_NATIVE-specific error output. The WINDOWS test prerequisite is set in both Cygwin and native Windows environments, which means it's not appropriate to use to anticipate the error output from start_command. Instead, use the MINGW test prerequisite, which is only set for Git in native Windows environments, and not for Cygwin. Signed-off-by: Adam Dinwoodie Helped-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- t/t1800-hook.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 64096adac7e108..43fcb7c0bfc85e 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -157,9 +157,9 @@ test_expect_success 'git hook run a hook with a bad shebang' ' write_script bad-hooks/test-hook "/bad/path/no/spaces" expect <<-\EOF fatal: cannot run bad-hooks/test-hook: ... From c18eecbe5c44be4c23f978a3f1c74b802d26c277 Mon Sep 17 00:00:00 2001 From: Elijah Conners Date: Wed, 14 Sep 2022 20:37:34 -0700 Subject: [PATCH 045/303] reftable: use a pointer for pq_entry param The speed of the merged_iter_pqueue_add() can be improved by using a pointer to the pq_entry struct, which is 96 bytes. Since the pq_entry param is worked directly on the stack and does not currently have a pointer to it, the merged_iter_pqueue_add() function is slightly slower. References to pq_entry in reftable have typically included pointers, such as both of the params for pq_less(). Since we are working with pointers in the pq_entry param, as keenly pointed out, the pq_entry param has also been made into a const since the contents of the pq_entry param are copied and not manipulated. Signed-off-by: Elijah Conners Signed-off-by: Junio C Hamano --- reftable/merged.c | 4 ++-- reftable/pq.c | 4 ++-- reftable/pq.h | 2 +- reftable/pq_test.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/reftable/merged.c b/reftable/merged.c index 2a6efa110d5dfa..5ded470c086c92 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -36,7 +36,7 @@ static int merged_iter_init(struct merged_iter *mi) .rec = rec, .index = i, }; - merged_iter_pqueue_add(&mi->pq, e); + merged_iter_pqueue_add(&mi->pq, &e); } } @@ -71,7 +71,7 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, return 0; } - merged_iter_pqueue_add(&mi->pq, e); + merged_iter_pqueue_add(&mi->pq, &e); return 0; } diff --git a/reftable/pq.c b/reftable/pq.c index 96ca6dd37b3ac0..dcefeb793a9051 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -71,7 +71,7 @@ struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) return e; } -void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, struct pq_entry e) +void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e) { int i = 0; @@ -81,7 +81,7 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, struct pq_entry e) pq->cap * sizeof(struct pq_entry)); } - pq->heap[pq->len++] = e; + pq->heap[pq->len++] = *e; i = pq->len - 1; while (i > 0) { int j = (i - 1) / 2; diff --git a/reftable/pq.h b/reftable/pq.h index 56fc1b6d873e6b..e85bac9b52e003 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -26,7 +26,7 @@ struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq); int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq); void merged_iter_pqueue_check(struct merged_iter_pqueue pq); struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); -void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, struct pq_entry e); +void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); diff --git a/reftable/pq_test.c b/reftable/pq_test.c index 7de5e886f35236..011b5c75028a9d 100644 --- a/reftable/pq_test.c +++ b/reftable/pq_test.c @@ -46,7 +46,7 @@ static void test_pq(void) .u.ref = { .refname = names[i], } } }; - merged_iter_pqueue_add(&pq, e); + merged_iter_pqueue_add(&pq, &e); merged_iter_pqueue_check(pq); i = (i * 7) % N; } while (i != 1); From 086eaab8da7da69907cfc1461d912aea2827406b Mon Sep 17 00:00:00 2001 From: Todd Zullinger Date: Fri, 16 Sep 2022 02:23:02 -0400 Subject: [PATCH 046/303] docs: fix a few recently broken links Some links were broken in the recent move of various technical docs c0f6dd49f1 (Merge branch 'ab/tech-docs-to-help', 2022-08-14). Fix them. Signed-off-by: Todd Zullinger Signed-off-by: Junio C Hamano --- Documentation/gitprotocol-capabilities.txt | 4 ++-- Documentation/gitprotocol-v2.txt | 4 ++-- Documentation/technical/bundle-uri.txt | 3 +-- Documentation/user-manual.txt | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Documentation/gitprotocol-capabilities.txt b/Documentation/gitprotocol-capabilities.txt index c6dcc7d565d7f0..0fb5ea0c1ca754 100644 --- a/Documentation/gitprotocol-capabilities.txt +++ b/Documentation/gitprotocol-capabilities.txt @@ -388,8 +388,8 @@ the server as well. Session IDs should be unique to a given process. They must fit within a packet-line, and must not contain non-printable or whitespace characters. The current implementation uses trace2 session IDs (see -link:api-trace2.html[api-trace2] for details), but this may change and users of -the session ID should not rely on this fact. +link:technical/api-trace2.html[api-trace2] for details), but this may change +and users of the session ID should not rely on this fact. GIT --- diff --git a/Documentation/gitprotocol-v2.txt b/Documentation/gitprotocol-v2.txt index c9c0f9160b22e4..59bf41cefb9b95 100644 --- a/Documentation/gitprotocol-v2.txt +++ b/Documentation/gitprotocol-v2.txt @@ -544,8 +544,8 @@ the server as well. Session IDs should be unique to a given process. They must fit within a packet-line, and must not contain non-printable or whitespace characters. The current implementation uses trace2 session IDs (see -link:api-trace2.html[api-trace2] for details), but this may change and users of -the session ID should not rely on this fact. +link:technical/api-trace2.html[api-trace2] for details), but this may change +and users of the session ID should not rely on this fact. object-info ~~~~~~~~~~~ diff --git a/Documentation/technical/bundle-uri.txt b/Documentation/technical/bundle-uri.txt index c25c42378abe1f..85c6a7fc7c55c6 100644 --- a/Documentation/technical/bundle-uri.txt +++ b/Documentation/technical/bundle-uri.txt @@ -3,8 +3,7 @@ Bundle URIs Git bundles are files that store a pack-file along with some extra metadata, including a set of refs and a (possibly empty) set of necessary commits. See -linkgit:git-bundle[1] and link:bundle-format.txt[the bundle format] for more -information. +linkgit:git-bundle[1] and linkgit:gitformat-bundle[5] for more information. Bundle URIs are locations where Git can download one or more bundles in order to bootstrap the object database in advance of fetching the remaining diff --git a/Documentation/user-manual.txt b/Documentation/user-manual.txt index ca9decdd952f88..dc9c6a663a97e6 100644 --- a/Documentation/user-manual.txt +++ b/Documentation/user-manual.txt @@ -3133,7 +3133,7 @@ those "loose" objects. You can save space and make Git faster by moving these loose objects in to a "pack file", which stores a group of objects in an efficient compressed format; the details of how pack files are formatted can be -found in link:gitformat-pack[5]. +found in linkgit:gitformat-pack[5]. To put the loose objects into a pack, just run git repack: From 4945f046c7f5ef6e84f06a5f4abb1bbd18c1eb85 Mon Sep 17 00:00:00 2001 From: Todd Zullinger Date: Fri, 16 Sep 2022 02:23:03 -0400 Subject: [PATCH 047/303] api docs: link to html version of api-trace2 In f6d25d7878 (api docs: document that BUG() emits a trace2 error event, 2021-04-13), a link to the plain text version of api-trace2 was added in `technical/api-error-handling.txt`. All of our other `link:`s point to the html versions. Do the same here. Signed-off-by: Todd Zullinger Signed-off-by: Junio C Hamano --- Documentation/technical/api-error-handling.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/technical/api-error-handling.txt b/Documentation/technical/api-error-handling.txt index 70bf1d3e522fea..665c4960b440ba 100644 --- a/Documentation/technical/api-error-handling.txt +++ b/Documentation/technical/api-error-handling.txt @@ -46,7 +46,7 @@ parse-options.c. returns -1 after reporting the situation to the caller. These reports will be logged via the trace2 facility. See the "error" -event in link:api-trace2.txt[trace2 API]. +event in link:api-trace2.html[trace2 API]. Customizable error handlers --------------------------- From 225e815ef238d6033c7f78160274b96de7b197f9 Mon Sep 17 00:00:00 2001 From: Fangyi Zhou Date: Fri, 16 Sep 2022 13:05:29 +0000 Subject: [PATCH 048/303] help: fix doubled words in explanation for developer interfaces Signed-off-by: Fangyi Zhou Signed-off-by: Junio C Hamano --- help.c | 2 +- t/t0012-help.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/help.c b/help.c index ec670d5f68bd6a..d04542d8261dd4 100644 --- a/help.c +++ b/help.c @@ -39,7 +39,7 @@ static struct category_description main_categories[] = { { CAT_synchingrepositories, N_("Low-level Commands / Syncing Repositories") }, { CAT_purehelpers, N_("Low-level Commands / Internal Helpers") }, { CAT_userinterfaces, N_("User-facing repository, command and file interfaces") }, - { CAT_developerinterfaces, N_("Developer-facing file file formats, protocols and interfaces") }, + { CAT_developerinterfaces, N_("Developer-facing file formats, protocols and other interfaces") }, { 0, NULL } }; diff --git a/t/t0012-help.sh b/t/t0012-help.sh index 4ed2f242eb246b..dbfc5c826764bc 100755 --- a/t/t0012-help.sh +++ b/t/t0012-help.sh @@ -231,7 +231,7 @@ test_expect_success "'git help -a' section spacing" ' User-facing repository, command and file interfaces - Developer-facing file file formats, protocols and interfaces + Developer-facing file formats, protocols and other interfaces EOF test_cmp expect actual ' From cb98e1d50a7a4a84b76f72dad694d49d2276eef3 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Sat, 17 Sep 2022 18:16:55 +0000 Subject: [PATCH 049/303] diagnose.c: refactor to safely use 'd_type' Refactor usage of the 'd_type' property of 'struct dirent' in 'diagnose.c' to instead utilize the compatibility macro 'DTYPE()'. On systems where 'd_type' is not present in 'struct dirent', this macro will always return 'DT_UNKNOWN'. In that case, instead fall back on using the 'stat.st_mode' to determine whether the dirent points to a dir, file, or link. Additionally, add a test to 't0092-diagnose.sh' to verify that files (e.g., loose objects) are counted properly. Note that the new function 'get_dtype()' is based on 'resolve_dtype()' in 'dir.c' (which itself was refactored from a prior 'get_dtype()' in ad6f2157f9 (dir: restructure in a way to avoid passing around a struct dirent, 2020-01-16)), but differs in that it is meant for use on arbitrary files, such as those inside the '.git' dir. Because of this, it does not search the index for a matching entry to derive the 'd_type'. Reported-by: Randall S. Becker Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- diagnose.c | 70 ++++++++++++++++++++++++++++++++++++--------- t/t0092-diagnose.sh | 12 ++++++++ 2 files changed, 68 insertions(+), 14 deletions(-) diff --git a/diagnose.c b/diagnose.c index beb0a8741ba3ce..8f2656989666b6 100644 --- a/diagnose.c +++ b/diagnose.c @@ -66,17 +66,53 @@ static int dir_file_stats(struct object_directory *object_dir, void *data) return 0; } -static int count_files(char *path) +/* + * Get the d_type of a dirent. If the d_type is unknown, derive it from + * stat.st_mode. + * + * Note that 'path' is assumed to have a trailing slash. It is also modified + * in-place during the execution of the function, but is then reverted to its + * original value before returning. + */ +static unsigned char get_dtype(struct dirent *e, struct strbuf *path) { - DIR *dir = opendir(path); + struct stat st; + unsigned char dtype = DTYPE(e); + size_t base_path_len; + + if (dtype != DT_UNKNOWN) + return dtype; + + /* d_type unknown in dirent, try to fall back on lstat results */ + base_path_len = path->len; + strbuf_addstr(path, e->d_name); + if (lstat(path->buf, &st)) + goto cleanup; + + /* determine d_type from st_mode */ + if (S_ISREG(st.st_mode)) + dtype = DT_REG; + else if (S_ISDIR(st.st_mode)) + dtype = DT_DIR; + else if (S_ISLNK(st.st_mode)) + dtype = DT_LNK; + +cleanup: + strbuf_setlen(path, base_path_len); + return dtype; +} + +static int count_files(struct strbuf *path) +{ + DIR *dir = opendir(path->buf); struct dirent *e; int count = 0; if (!dir) return 0; - while ((e = readdir(dir)) != NULL) - if (!is_dot_or_dotdot(e->d_name) && e->d_type == DT_REG) + while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL) + if (get_dtype(e, path) == DT_REG) count++; closedir(dir); @@ -104,13 +140,13 @@ static void loose_objs_stats(struct strbuf *buf, const char *path) strbuf_addch(&count_path, '/'); base_path_len = count_path.len; - while ((e = readdir(dir)) != NULL) - if (!is_dot_or_dotdot(e->d_name) && - e->d_type == DT_DIR && strlen(e->d_name) == 2 && + while ((e = readdir_skip_dot_and_dotdot(dir)) != NULL) + if (get_dtype(e, &count_path) == DT_DIR && + strlen(e->d_name) == 2 && !hex_to_bytes(&c, e->d_name, 1)) { strbuf_setlen(&count_path, base_path_len); - strbuf_addstr(&count_path, e->d_name); - total += (count = count_files(count_path.buf)); + strbuf_addf(&count_path, "%s/", e->d_name); + total += (count = count_files(&count_path)); strbuf_addf(buf, "%s : %7d files\n", e->d_name, count); } @@ -144,22 +180,28 @@ static int add_directory_to_archiver(struct strvec *archiver_args, len = buf.len; strvec_pushf(archiver_args, "--prefix=%s", buf.buf); - while (!res && (e = readdir(dir))) { - if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name)) - continue; + while (!res && (e = readdir_skip_dot_and_dotdot(dir))) { + struct strbuf abspath = STRBUF_INIT; + unsigned char dtype; + + strbuf_add_absolute_path(&abspath, at_root ? "." : path); + strbuf_addch(&abspath, '/'); + dtype = get_dtype(e, &abspath); strbuf_setlen(&buf, len); strbuf_addstr(&buf, e->d_name); - if (e->d_type == DT_REG) + if (dtype == DT_REG) strvec_pushf(archiver_args, "--add-file=%s", buf.buf); - else if (e->d_type != DT_DIR) + else if (dtype != DT_DIR) warning(_("skipping '%s', which is neither file nor " "directory"), buf.buf); else if (recurse && add_directory_to_archiver(archiver_args, buf.buf, recurse) < 0) res = -1; + + strbuf_release(&abspath); } closedir(dir); diff --git a/t/t0092-diagnose.sh b/t/t0092-diagnose.sh index fca9b58489cf54..133e5747d613e5 100755 --- a/t/t0092-diagnose.sh +++ b/t/t0092-diagnose.sh @@ -28,12 +28,23 @@ test_expect_success UNZIP 'creates diagnostics zip archive' ' ! "$GIT_UNZIP" -l "$zip_path" | grep ".git/" ' +test_expect_success UNZIP 'counts loose objects' ' + test_commit A && + + # After committing, should have non-zero loose objects + git diagnose -o test-count -s 1 >out && + zip_path=test-count/git-diagnostics-1.zip && + "$GIT_UNZIP" -p "$zip_path" objects-local.txt >out && + grep "^Total: [1-9][0-9]* loose objects" out +' + test_expect_success UNZIP '--mode=stats excludes .git dir contents' ' test_when_finished rm -rf report && git diagnose -o report -s test --mode=stats >out && # Includes pack quantity/size info + zip_path=report/git-diagnostics-test.zip && "$GIT_UNZIP" -p "$zip_path" packs-local.txt >out && grep ".git/objects" out && @@ -47,6 +58,7 @@ test_expect_success UNZIP '--mode=all includes .git dir contents' ' git diagnose -o report -s test --mode=all >out && # Includes pack quantity/size info + zip_path=report/git-diagnostics-test.zip && "$GIT_UNZIP" -p "$zip_path" packs-local.txt >out && grep ".git/objects" out && From 12f1ae53243d3ff06a956da1846dde6f32498342 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 19 Sep 2022 18:14:40 +0400 Subject: [PATCH 050/303] commit-graph: Fix missing closedir in expire_commit_graphs The function calls opendir() but missing the corresponding closedir() before exit the function. Add missing closedir() to fix it. Signed-off-by: Miaoqian Lin Reviewed-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index 2b528187316e28..0d44cd0fa485dc 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2269,6 +2269,8 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) } out: + if(dir) + closedir(dir); strbuf_release(&path); } From dda7228a83e2e9ff584bf6adbf55910565b41e14 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 19 Sep 2022 12:55:59 -0700 Subject: [PATCH 051/303] A bit more of remaining topics before -rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.38.0.txt | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Documentation/RelNotes/2.38.0.txt b/Documentation/RelNotes/2.38.0.txt index 01617baa98d5a3..5d9bd8c2950d0c 100644 --- a/Documentation/RelNotes/2.38.0.txt +++ b/Documentation/RelNotes/2.38.0.txt @@ -6,7 +6,7 @@ UI, Workflows & Features * "git remote show [-n] frotz" now pays attention to negative pathspec. - * "git push" sometimes perform poorly when reachability bitmaps are + * "git push" sometimes performs poorly when reachability bitmaps are used, even in a repository where other operations are helped by bitmaps. The push.useBitmaps configuration variable is introduced to allow disabling use of reachability bitmaps only for "git push". @@ -27,7 +27,7 @@ UI, Workflows & Features what locale they are in by sending Accept-Language HTTP header, but this was done only for some requests but not others. - * Introduce a discovery.barerepository configuration variable that + * Introduce a safe.barerepository configuration variable that allows users to forbid discovery of bare repositories. * Various messages that come from the pack-bitmap codepaths have been @@ -79,12 +79,15 @@ UI, Workflows & Features * "git format-patch --from=" can be told to add an in-body "From:" line even for commits that are authored by the given - with "--force-in-body-from"option. + with "--force-in-body-from" option. * The built-in fsmonitor refuses to work on a network mounted repositories; a configuration knob for users to override this has been introduced. + * The "scalar" addition from Microsoft is now part of the core Git + installation. + Performance, Internal Implementation, Development Support etc. @@ -127,7 +130,7 @@ Performance, Internal Implementation, Development Support etc. * The way "git multi-pack" uses parse-options API has been improved. - * A coccinelle rule (in contrib/) to encourage use of COPY_ARRAY + * A Coccinelle rule (in contrib/) to encourage use of COPY_ARRAY macro has been improved. * API tweak to make it easier to run fuzz testing on commit-graph parser. @@ -172,6 +175,12 @@ Performance, Internal Implementation, Development Support etc. * Share the text used to explain configuration variables used by "git " in "git help " with the text from "git help config". + * "git mv A B" in a sparsely populated working tree can be asked to + move a path from a directory that is "in cone" to another directory + that is "out of cone". Handling of such a case has been improved. + + * The chainlint script for our tests has been revamped. + Fixes since v2.37 ----------------- @@ -297,7 +306,7 @@ Fixes since v2.37 * "git fsck" reads mode from tree objects but canonicalizes the mode before passing it to the logic to check object sanity, which has hid broken tree objects from the checking logic. This has been - corrected, but to help exiting projects with broken tree objects + corrected, but to help existing projects with broken tree objects that they cannot fix retroactively, the severity of anomalies this code detects has been demoted to "info" for now. @@ -306,12 +315,10 @@ Fixes since v2.37 * An earlier optimization discarded a tree-object buffer that is still in use, which has been corrected. - (merge 1490d7d82d jk/is-promisor-object-keep-tree-in-use later to maint). * Fix deadlocks between main Git process and subprocess spawned via the pipe_command() API, that can kill "git add -p" that was reimplemented in C recently. - (merge 716c1f649e jk/pipe-command-nonblock later to maint). * The sequencer machinery translated messages left in the reflog by mistake, which has been corrected. @@ -319,20 +326,16 @@ Fixes since v2.37 * xcalloc(), imitating calloc(), takes "number of elements of the array", and "size of a single element", in this order. A call that does not follow this ordering has been corrected. - (merge c4bbd9bb8f sg/xcalloc-cocci-fix later to maint). * The preload-index codepath made copies of pathspec to give to multiple threads, which were left leaked. - (merge 23578904da ad/preload-plug-memleak later to maint). * Update the version of Ubuntu used for GitHub Actions CI from 18.04 to 22.04. - (merge ef46584831 ds/github-actions-use-newer-ubuntu later to maint). * The auto-stashed local changes created by "git merge --autostash" was mixed into a conflicted state left in the working tree, which has been corrected. - (merge d3a9295ada en/merge-unstash-only-on-clean-merge later to maint). * Multi-pack index got corrupted when preferred pack changed from one pack to another in a certain way, which has been corrected. From e01b851923d43cbd3c5b7055f689cc18283591b9 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 19 Sep 2022 19:12:46 +0000 Subject: [PATCH 052/303] Documentation: add ReviewingGuidelines Add a reviewing guidelines document including advice and common terminology used in Git mailing list reviews. The document is included in the 'TECH_DOCS' list in order to include it in Git's published documentation. Helped-by: Johannes Schindelin Helped-by: Derrick Stolee Helped-by: Junio C Hamano Helped-by: Josh Steadmon Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- Documentation/Makefile | 1 + Documentation/ReviewingGuidelines.txt | 162 ++++++++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 Documentation/ReviewingGuidelines.txt diff --git a/Documentation/Makefile b/Documentation/Makefile index 4f801f4e4c9470..d4c389324e6530 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -90,6 +90,7 @@ SP_ARTICLES += howto/coordinate-embargoed-releases API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt))) SP_ARTICLES += $(API_DOCS) +TECH_DOCS += ReviewingGuidelines TECH_DOCS += MyFirstContribution TECH_DOCS += MyFirstObjectWalk TECH_DOCS += SubmittingPatches diff --git a/Documentation/ReviewingGuidelines.txt b/Documentation/ReviewingGuidelines.txt new file mode 100644 index 00000000000000..0e323d54779a7c --- /dev/null +++ b/Documentation/ReviewingGuidelines.txt @@ -0,0 +1,162 @@ +Reviewing Patches in the Git Project +==================================== + +Introduction +------------ +The Git development community is a widely distributed, diverse, ever-changing +group of individuals. Asynchronous communication via the Git mailing list poses +unique challenges when reviewing or discussing patches. This document contains +some guiding principles and helpful tools you can use to make your reviews both +more efficient for yourself and more effective for other contributors. + +Note that none of the recommendations here are binding or in any way a +requirement of participation in the Git community. They are provided as a +resource to supplement your skills as a contributor. + +Principles +---------- + +Selecting patch(es) to review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you are looking for a patch series in need of review, start by checking +latest "What's cooking in git.git" email +(https://lore.kernel.org/git/xmqqilm1yp3m.fsf@gitster.g/[example]). The "What's +cooking" emails & replies can be found using the query `s:"What's cooking"` on +the https://lore.kernel.org/git/[`lore.kernel.org` mailing list archive]; +alternatively, you can find the contents of the "What's cooking" email tracked +in `whats-cooking.txt` on the `todo` branch of Git. Topics tagged with "Needs +review" and those in the "[New Topics]" section are typically those that would +benefit the most from additional review. + +Patches can also be searched manually in the mailing list archive using a query +like `s:"PATCH" -s:"Re:"`. You can browse these results for topics relevant to +your expertise or interest. + +If you've already contributed to Git, you may also be CC'd in another +contributor's patch series. These are topics where the author feels that your +attention is warranted. This may be because their patch changes something you +wrote previously (making you a good judge of whether the new approach does or +doesn't work), or because you have the expertise to provide an exceptionally +helpful review. There is no requirement to review these patches but, in the +spirit of open source collaboration, you should strongly consider doing so. + +Reviewing patches +~~~~~~~~~~~~~~~~~ +While every contributor takes their own approach to reviewing patches, here are +some general pieces of advice to make your reviews as clear and helpful as +possible. The advice is broken into two rough categories: high-level reviewing +guidance, and concrete tips for interacting with patches on the mailing list. + +==== High-level guidance +- Remember to review the content of commit messages for correctness and clarity, + in addition to the code change in the patch's diff. The commit message of a + patch should accurately and fully explain the code change being made in the + diff. + +- Reviewing test coverage is an important - but easy to overlook - component of + reviews. A patch's changes may be covered by existing tests, or new tests may + be introduced to exercise new behavior. Checking out a patch or series locally + allows you to manually mutate lines of new & existing tests to verify expected + pass/fail behavior. You can use this information to verify proper coverage or + to suggest additional tests the author could add. + +- When providing a recommendation, be as clear as possible about whether you + consider it "blocking" (the code would be broken or otherwise made worse if an + issue isn't fixed) or "non-blocking" (the patch could be made better by taking + the recommendation, but acceptance of the series does not require it). + Non-blocking recommendations can be particularly ambiguous when they are + related to - but outside the scope of - a series ("nice-to-have"s), or when + they represent only stylistic differences between the author and reviewer. + +- When commenting on an issue, try to include suggestions for how the author + could fix it. This not only helps the author to understand and fix the issue, + it also deepens and improves your understanding of the topic. + +- Reviews do not need to exclusively point out problems. Feel free to "think out + loud" in your review: describe how you read & understood a complex section of + a patch, ask a question about something that confused you, point out something + you found exceptionally well-written, etc. In particular, uplifting feedback + goes a long way towards encouraging contributors to participate more actively + in the Git community. + +==== Performing your review +- Provide your review comments per-patch in a plaintext "Reply-All" email to the + relevant patch. Comments should be made inline, immediately below the relevant + section(s). + +- You may find that the limited context provided in the patch diff is sometimes + insufficient for a thorough review. In such cases, you can review patches in + your local tree by either applying patches with linkgit:git-am[1] or checking + out the associated branch from https://github.com/gitster/git once the series + is tracked there. + +- Large, complicated patch diffs are sometimes unavoidable, such as when they + refactor existing code. If you find such a patch difficult to parse, try + reviewing the diff produced with the `--color-moved` and/or + `--ignore-space-change` options. + +- If a patch is long, you are encouraged to delete parts of it that are + unrelated to your review from the email reply. Make sure to leave enough + context for readers to understand your comments! + +- If you cannot complete a full review of a series all at once, consider letting + the author know (on- or off-list) if/when you plan to review the rest of the + series. + +Completing a review +~~~~~~~~~~~~~~~~~~~ +Once each patch of a series is reviewed, the author (and/or other contributors) +may discuss the review(s). This may result in no changes being applied, or the +author will send a new version of their patch(es). + +After a series is rerolled in response to your or others' review, make sure to +re-review the updates. If you are happy with the state of the patch series, +explicitly indicate your approval (typically with a reply to the latest +version's cover letter). Optionally, you can let the author know that they can +add a "Reviewed-by: " trailer if they resubmit the reviewed patch verbatim +in a later iteration of the series. + +Finally, subsequent "What's cooking" emails may explicitly ask whether a +reviewed topic is ready for merging to the `next` branch (typically phrased +"Will merge to \'next\'?"). You can help the maintainer and author by responding +with a short description of the state of your (and others', if applicable) +review, including the links to the relevant thread(s). + +Terminology +----------- +nit: :: + Denotes a small issue that should be fixed, such as a typographical error + or mis-alignment of conditions in an `if()` statement. + +aside: :: +optional: :: +non-blocking: :: + Indicates to the reader that the following comment should not block the + acceptance of the patch or series. These are typically recommendations + related to code organization & style, or musings about topics related to + the patch in question, but beyond its scope. + +s///:: + Shorthand for "you wrote , but I think you meant ," usually + for misspellings or other typographical errors. The syntax is a reference + to "substitute" command commonly found in Unix tools such as `ed`, `sed`, + `vim`, and `perl`. + +cover letter:: + The "Patch 0" of a multi-patch series. This email describes the + high-level intent and structure of the patch series to readers on the + Git mailing list. It is also where the changelog notes and range-diff of + subsequent versions are provided by the author. ++ +On single-patch submissions, cover letter content is typically not sent as a +separate email. Instead, it is inserted between the end of the patch's commit +message (after the `---`) and the beginning of the diff. + +#leftoverbits:: + Used by either an author or a reviewer to describe features or suggested + changes that are out-of-scope of a given patch or series, but are relevant + to the topic for the sake of discussion. + +See Also +-------- +link:MyFirstContribution.html[MyFirstContribution] From 89c8048855e7193988a7991ad01af0c6d8cf9226 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 20 Sep 2022 00:19:54 +0000 Subject: [PATCH 053/303] diagnose: add to command-list.txt Add 'git diagnose' as an "ancilliaryinterrogator" (like 'git bugreport') to 'command-list.txt' in order to have it show up in 'git help -a' and avoid the "no link" warning message from the 'check-docs' Makefile target. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- command-list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/command-list.txt b/command-list.txt index 93f94e42ab70f5..bb2e0a9214b486 100644 --- a/command-list.txt +++ b/command-list.txt @@ -91,6 +91,7 @@ git-cvsimport foreignscminterface git-cvsserver foreignscminterface git-daemon synchingrepositories git-describe mainporcelain +git-diagnose ancillaryinterrogators git-diff mainporcelain info git-diff-files plumbinginterrogators git-diff-index plumbinginterrogators From 9b1dc1c9d879368b6cfccb183f5fc19facb1d9df Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Tue, 20 Sep 2022 00:19:55 +0000 Subject: [PATCH 054/303] version: fix builtin linking & documentation Like most builtins, 'version' is documented in a corresponding 'Documentation/git-version.txt' and can be invoked with 'git version'. However, the 'check-docs' Makefile target showed that it was "removed but documented: git-version." This was cause by the fact that it is not built as a standalone 'git-version' executable, therefore appearing "removed" to 'check-docs'. Without a precedent for documented builtins that aren't built into an executable *or* any clear reason why a standalone 'git-version' shouldn't exist, the 'check-docs' error appears to correctly identify an issue. To correct that mismatch, add 'git-version' to the 'BUILT_INS' list in the root Makefile (indicating that the 'cmd_version()' function appears in a file that is *not* 'builtin/version.c'). Additionally, to avoid the "no link" message in 'check-docs', list 'git-version' as an "ancilliaryinterrogator" (like 'git help') in 'command-list.txt'. Signed-off-by: Victoria Dye Signed-off-by: Junio C Hamano --- .gitignore | 1 + Makefile | 1 + command-list.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3d1b880101e5ea..b3dcafcb3310e9 100644 --- a/.gitignore +++ b/.gitignore @@ -181,6 +181,7 @@ /git-verify-commit /git-verify-pack /git-verify-tag +/git-version /git-web--browse /git-whatchanged /git-worktree diff --git a/Makefile b/Makefile index 35a07f80c52160..cac3452edb90b4 100644 --- a/Makefile +++ b/Makefile @@ -818,6 +818,7 @@ BUILT_INS += git-show$X BUILT_INS += git-stage$X BUILT_INS += git-status$X BUILT_INS += git-switch$X +BUILT_INS += git-version$X BUILT_INS += git-whatchanged$X # what 'all' will build but not install in gitexecdir diff --git a/command-list.txt b/command-list.txt index bb2e0a9214b486..54b2a50f5f1dfe 100644 --- a/command-list.txt +++ b/command-list.txt @@ -199,6 +199,7 @@ git-var plumbinginterrogators git-verify-commit ancillaryinterrogators git-verify-pack plumbinginterrogators git-verify-tag ancillaryinterrogators +git-version ancillaryinterrogators git-whatchanged ancillaryinterrogators complete git-worktree mainporcelain git-write-tree plumbingmanipulators From 72991ff558585490aa4284c0b8ca1f13e86f0f18 Mon Sep 17 00:00:00 2001 From: Jacob Stopak Date: Mon, 19 Sep 2022 19:45:56 -0700 Subject: [PATCH 055/303] Documentation: clean up a few misspelled word typos Used GNU "aspell check " to review various documentation files with the default aspell dictionary. Ignored false-positives between american and british english. Signed-off-by: Jacob Stopak Reviewed-by: Eric Sunshine Signed-off-by: Junio C Hamano --- Documentation/MyFirstContribution.txt | 2 +- Documentation/MyFirstObjectWalk.txt | 2 +- Documentation/git.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/MyFirstContribution.txt b/Documentation/MyFirstContribution.txt index 1da15d9ad44615..1a4be8ee0adde5 100644 --- a/Documentation/MyFirstContribution.txt +++ b/Documentation/MyFirstContribution.txt @@ -1160,7 +1160,7 @@ all named like `v2-000n-my-commit-subject.patch`. `-v2` will also format your patches by prefixing them with "[PATCH v2]" instead of "[PATCH]", and your range-diff will be prefaced with "Range-diff against v1". -Afer you run this command, `format-patch` will output the patches to the `psuh/` +After you run this command, `format-patch` will output the patches to the `psuh/` directory, alongside the v1 patches. Using a single directory makes it easy to refer to the old v1 patches while proofreading the v2 patches, but you will need to be careful to send out only the v2 patches. We will use a pattern like diff --git a/Documentation/MyFirstObjectWalk.txt b/Documentation/MyFirstObjectWalk.txt index 8d9e85566e642e..eee513e86f4d64 100644 --- a/Documentation/MyFirstObjectWalk.txt +++ b/Documentation/MyFirstObjectWalk.txt @@ -534,7 +534,7 @@ the arguments to `traverse_commit_list()`. - `void *show_data`: A context buffer which is passed in turn to `show_commit` and `show_object`. -In addition, `traverse_commit_list_filtered()` has an additional paramter: +In addition, `traverse_commit_list_filtered()` has an additional parameter: - `struct oidset *omitted`: A linked-list of object IDs which the provided filter caused to be omitted. diff --git a/Documentation/git.txt b/Documentation/git.txt index 0ef7f5e4ecebf5..0c15ef3a8e6a51 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -344,7 +344,7 @@ Repository, command and file interfaces This documentation discusses repository and command interfaces which users are expected to interact with directly. See `--user-formats` in -linkgit:git-help[1] for more details on the critera. +linkgit:git-help[1] for more details on the criteria. include::cmds-userinterfaces.txt[] From bbb0c357b81d86dfd0b843cabe6c8fe29ced9ebd Mon Sep 17 00:00:00 2001 From: Jacob Stopak Date: Mon, 19 Sep 2022 19:45:57 -0700 Subject: [PATCH 056/303] Documentation: clean up various typos in technical docs Used GNU "aspell check " to review various technical documentation files with the default aspell dictionary. Ignored false-positives between american and british english. Signed-off-by: Jacob Stopak Reviewed-by: Eric Sunshine Signed-off-by: Junio C Hamano --- Documentation/technical/api-parse-options.txt | 2 +- Documentation/technical/bundle-uri.txt | 6 +++--- Documentation/technical/commit-graph.txt | 4 ++-- Documentation/technical/remembering-renames.txt | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/technical/api-parse-options.txt b/Documentation/technical/api-parse-options.txt index c2a5e429149417..61fa6ee167833a 100644 --- a/Documentation/technical/api-parse-options.txt +++ b/Documentation/technical/api-parse-options.txt @@ -60,7 +60,7 @@ Subcommands are special in a couple of ways: * All arguments following the subcommand are considered to be arguments of the subcommand, and, conversely, arguments meant for the subcommand may - not preceed the subcommand. + not precede the subcommand. Therefore, if the options array contains at least one subcommand and `parse_options()` encounters the first dashless argument, it will either: diff --git a/Documentation/technical/bundle-uri.txt b/Documentation/technical/bundle-uri.txt index c25c42378abe1f..8939655fc0aa68 100644 --- a/Documentation/technical/bundle-uri.txt +++ b/Documentation/technical/bundle-uri.txt @@ -290,7 +290,7 @@ expect that the process will end when all prerequisite commit OIDs in a thin bundle are already in the object database. When using the `creationToken` heuristic, the client can avoid downloading -any bundles if their creation tokenss are not larger than the stored +any bundles if their creation tokens are not larger than the stored creation token. After fetching new bundles, Git updates this local creation token. @@ -319,7 +319,7 @@ Here are a few example error conditions: Git's other HTTP protocols in terms of handling specific 400-level errors. -* The server reports any other failure reponse. +* The server reports any other failure response. * The client receives data that is not parsable as a bundle or bundle list. @@ -447,7 +447,7 @@ created every hour, and then once a day those "hourly" bundles could be merged into a "daily" bundle. The daily bundles are merged into the oldest bundle after 30 days. -It is recommened that this bundle strategy is repeated with the `blob:none` +It is recommended that this bundle strategy is repeated with the `blob:none` filter if clients of this repository are expecting to use blobless partial clones. This list of blobless bundles stays in the same list as the full bundles, but uses the `bundle..filter` key to separate the two groups. diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index f05e7bda1a9d66..90c9760c230555 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -40,7 +40,7 @@ Values 1-4 satisfy the requirements of parse_commit_gently(). There are two definitions of generation number: 1. Corrected committer dates (generation number v2) -2. Topological levels (generation nummber v1) +2. Topological levels (generation number v1) Define "corrected committer date" of a commit recursively as follows: @@ -48,7 +48,7 @@ Define "corrected committer date" of a commit recursively as follows: equal to its committer date. * A commit with at least one parent has corrected committer date equal to - the maximum of its commiter date and one more than the largest corrected + the maximum of its committer date and one more than the largest corrected committer date among its parents. * As a special case, a root commit with timestamp zero has corrected commit diff --git a/Documentation/technical/remembering-renames.txt b/Documentation/technical/remembering-renames.txt index af091a7556a4ef..1e34d913901e59 100644 --- a/Documentation/technical/remembering-renames.txt +++ b/Documentation/technical/remembering-renames.txt @@ -407,7 +407,7 @@ considered to be "irrelevant". See for example the following commits: no longer relevant", 2021-03-13) Relevance is always determined by what the _other_ side of history has -done, in terms of modifing a file that our side renamed, or adding a +done, in terms of modifying a file that our side renamed, or adding a file to a directory which our side renamed. This means that a path that is "irrelevant" when picking the first commit of a series in a rebase or cherry-pick, may suddenly become "relevant" when picking the From 8b74492135481fe0fdf4b2e023c55d4146a6d209 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Mon, 19 Sep 2022 23:07:25 -0600 Subject: [PATCH 057/303] gc: don't translate literal commands The command you type is still "git maintenance" even in other languages. Signed-off-by: Alex Henrie Signed-off-by: Junio C Hamano --- builtin/gc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 84549888f59d89..01ab0716eea94f 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1466,7 +1466,7 @@ static char *get_maintpath(void) } static char const * const builtin_maintenance_register_usage[] = { - N_("git maintenance register"), + "git maintenance register", NULL }; @@ -1524,7 +1524,7 @@ static int maintenance_register(int argc, const char **argv, const char *prefix) } static char const * const builtin_maintenance_unregister_usage[] = { - N_("git maintenance unregister"), + "git maintenance unregister", NULL }; @@ -2540,7 +2540,7 @@ static int maintenance_start(int argc, const char **argv, const char *prefix) } static const char *const builtin_maintenance_stop_usage[] = { - N_("git maintenance stop"), + "git maintenance stop", NULL }; From d11b875197c7b8150136f94788330567dc5902d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Tue, 20 Sep 2022 22:16:19 +0200 Subject: [PATCH 058/303] t/Makefile: remove 'test-results' on 'make clean' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 't/test-results' directory and its contents are by-products of the test process, so 'make clean' should remove them, but, alas, this has been broken since fee65b194d (t/Makefile: don't remove test-results in "clean-except-prove-cache", 2022-07-28). The 'clean' target in 't/Makefile' was not directly responsible for removing the 'test-results' directory, but relied on its dependency 'clean-except-prove-cache' to do that [1]. ee65b194d broke this, because it only removed the 'rm -r test-results' command from the 'clean-except-prove-cache' target instead of moving it to the 'clean' target, resulting in stray 't/test-results' directories. Add that missing cleanup command to 't/Makefile', and to all sub-Makefiles touched by that commit as well. [1] 60f26f6348 (t/Makefile: retain cache t/.prove across prove runs, 2012-05-02) Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano --- contrib/subtree/t/Makefile | 1 + t/Makefile | 1 + 2 files changed, 2 insertions(+) diff --git a/contrib/subtree/t/Makefile b/contrib/subtree/t/Makefile index 3d278bb0edbc36..4655e0987b32a8 100644 --- a/contrib/subtree/t/Makefile +++ b/contrib/subtree/t/Makefile @@ -51,6 +51,7 @@ clean-except-prove-cache: $(RM) -r valgrind/bin clean: clean-except-prove-cache + $(RM) -r '$(TEST_RESULTS_DIRECTORY_SQ)' $(RM) .prove test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax diff --git a/t/Makefile b/t/Makefile index 3db48c0cb64825..882782a519c97b 100644 --- a/t/Makefile +++ b/t/Makefile @@ -73,6 +73,7 @@ clean-except-prove-cache: clean-chainlint $(RM) -r valgrind/bin clean: clean-except-prove-cache + $(RM) -r '$(TEST_RESULTS_DIRECTORY_SQ)' $(RM) .prove clean-chainlint: From d956fa8082e1f8fb0fb26493113c1b98fee19fe2 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Mon, 19 Sep 2022 23:06:32 -0600 Subject: [PATCH 059/303] builtin/diagnose.c: don't translate the two mode values These strings are not translatable in the diagnose_options array in diagnose.c. Don't translate them in builtin/diagnose.c either. Signed-off-by: Alex Henrie Signed-off-by: Junio C Hamano --- builtin/diagnose.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/diagnose.c b/builtin/diagnose.c index cd260c20155e27..576e0e8e385c97 100644 --- a/builtin/diagnose.c +++ b/builtin/diagnose.c @@ -22,7 +22,7 @@ int cmd_diagnose(int argc, const char **argv, const char *prefix) N_("specify a destination for the diagnostics archive")), OPT_STRING('s', "suffix", &option_suffix, N_("format"), N_("specify a strftime format suffix for the filename")), - OPT_CALLBACK_F(0, "mode", &mode, N_("(stats|all)"), + OPT_CALLBACK_F(0, "mode", &mode, "(stats|all)", N_("specify the content of the diagnostic archive"), PARSE_OPT_NONEG, option_parse_diagnose), OPT_END() From 370d3a06a3ecb6950e10158c77ac32eb01ae0a88 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 21 Sep 2022 13:50:47 -0700 Subject: [PATCH 060/303] Final batch before -rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.38.0.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/RelNotes/2.38.0.txt b/Documentation/RelNotes/2.38.0.txt index 5d9bd8c2950d0c..d16d19dcddc620 100644 --- a/Documentation/RelNotes/2.38.0.txt +++ b/Documentation/RelNotes/2.38.0.txt @@ -390,6 +390,10 @@ Fixes since v2.37 been corrected. (merge 49ca2fba39 jk/proto-v2-ref-prefix-fix later to maint). + * A result from opendir() was leaking in the commit-graph expiration + codepath, which has been plugged. + (merge 12f1ae5324 ml/commit-graph-expire-dir-leak-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 77b9e85c0f vd/fix-perf-tests later to maint). (merge 0682bc43f5 jk/test-crontab-fixes later to maint). From 1b3d6e17fe83eb6f79ffbac2f2c61bbf1eaef5f8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 21 Sep 2022 15:26:39 -0700 Subject: [PATCH 061/303] Git 2.38-rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.38.0.txt | 4 ++++ GIT-VERSION-GEN | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/RelNotes/2.38.0.txt b/Documentation/RelNotes/2.38.0.txt index d16d19dcddc620..870581fc57ecff 100644 --- a/Documentation/RelNotes/2.38.0.txt +++ b/Documentation/RelNotes/2.38.0.txt @@ -394,6 +394,10 @@ Fixes since v2.37 codepath, which has been plugged. (merge 12f1ae5324 ml/commit-graph-expire-dir-leak-fix later to maint). + * Just like we have coding guidelines, we now have guidelines for + reviewers. + (merge e01b851923 vd/doc-reviewing-guidelines later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 77b9e85c0f vd/fix-perf-tests later to maint). (merge 0682bc43f5 jk/test-crontab-fixes later to maint). diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index ecd94fd3f28130..a6d1044e8db683 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.38.0-rc0 +DEF_VER=v2.38.0-rc1 LF=' ' From e94519109dab48245402dd16b3faa9d45a87fdc8 Mon Sep 17 00:00:00 2001 From: Sverre Rabbelier Date: Sun, 24 Jul 2011 15:54:04 +0200 Subject: [PATCH 062/303] t9350: point out that refs are not updated correctly This happens only when the corresponding commits are not exported in the current fast-export run. This can happen either when the relevant commit is already marked, or when the commit is explicitly marked as UNINTERESTING with a negative ref by another argument. This breaks fast-export basec remote helpers. Signed-off-by: Sverre Rabbelier --- t/t9350-fast-export.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index fc99703fc51810..2f203d41f0b22f 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -789,4 +789,15 @@ test_expect_success 'fast-export --first-parent outputs all revisions output by ) ' +cat > expected << EOF +reset refs/heads/master +from $(git rev-parse master) + +EOF + +test_expect_failure 'refs are updated even if no commits need to be exported' ' + git fast-export master..master > actual && + test_cmp expected actual +' + test_done From 751fa7112afce116f9e3621fbba080a875dcfea5 Mon Sep 17 00:00:00 2001 From: Sverre Rabbelier Date: Sat, 28 Aug 2010 20:49:01 -0500 Subject: [PATCH 063/303] transport-helper: add trailing -- [PT: ensure we add an additional element to the argv array] Signed-off-by: Sverre Rabbelier Signed-off-by: Johannes Schindelin --- transport-helper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/transport-helper.c b/transport-helper.c index e95267a4ab54dc..6afd7f8eb35521 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -478,6 +478,8 @@ static int get_exporter(struct transport *transport, for (i = 0; i < revlist_args->nr; i++) strvec_push(&fastexport->args, revlist_args->items[i].string); + strvec_push(&fastexport->args, "--"); + fastexport->git_cmd = 1; return start_command(fastexport); } From aca341d8b49f3fceb82547bfb877f304a8939777 Mon Sep 17 00:00:00 2001 From: Sverre Rabbelier Date: Sun, 24 Jul 2011 00:06:00 +0200 Subject: [PATCH 064/303] remote-helper: check helper status after import/export Signed-off-by: Johannes Schindelin Signed-off-by: Sverre Rabbelier --- t/t5801-remote-helpers.sh | 2 +- transport-helper.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/t/t5801-remote-helpers.sh b/t/t5801-remote-helpers.sh index d386076dbd3f8d..b5d69b0c50e060 100755 --- a/t/t5801-remote-helpers.sh +++ b/t/t5801-remote-helpers.sh @@ -239,7 +239,7 @@ test_expect_success 'push update refs failure' ' echo "update fail" >>file && git commit -a -m "update fail" && git rev-parse --verify testgit/origin/heads/update >expect && - test_expect_code 1 env GIT_REMOTE_TESTGIT_FAILURE="non-fast forward" \ + test_must_fail env GIT_REMOTE_TESTGIT_FAILURE="non-fast forward" \ git push origin update && git rev-parse --verify testgit/origin/heads/update >actual && test_cmp expect actual diff --git a/transport-helper.c b/transport-helper.c index 6afd7f8eb35521..432964d070cd02 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -484,6 +484,19 @@ static int get_exporter(struct transport *transport, return start_command(fastexport); } +static void check_helper_status(struct helper_data *data) +{ + int pid, status; + + pid = waitpid(data->helper->pid, &status, WNOHANG); + if (pid < 0) + die("Could not retrieve status of remote helper '%s'", + data->name); + if (pid > 0 && WIFEXITED(status)) + die("Remote helper '%s' died with %d", + data->name, WEXITSTATUS(status)); +} + static int fetch_with_import(struct transport *transport, int nr_heads, struct ref **to_fetch) { @@ -520,6 +533,7 @@ static int fetch_with_import(struct transport *transport, if (finish_command(&fastimport)) die(_("error while running fast-import")); + check_helper_status(data); /* * The fast-import stream of a remote helper that advertises @@ -1124,6 +1138,7 @@ static int push_refs_with_export(struct transport *transport, if (finish_command(&exporter)) die(_("error while running fast-export")); + check_helper_status(data); if (push_update_refs_status(data, remote_refs, flags)) return 1; From b8dddd89bd2a4fff05ddecb747d9fe2339716587 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 18 Apr 2017 12:09:08 +0200 Subject: [PATCH 065/303] mingw: demonstrate a problem with certain absolute paths On Windows, there are several categories of absolute paths. One such category starts with a backslash and is implicitly relative to the drive associated with the current working directory. Example: c: git clone https://github.com/git-for-windows/git \G4W should clone into C:\G4W. There is currently a problem with that, in that mingw_mktemp() does not expect the _wmktemp() function to prefix the absolute path with the drive prefix, and as a consequence, the resulting path does not fit into the originally-passed string buffer. The symptom is a "Result too large" error. Reported by Juan Carlos Arevalo Baeza. Signed-off-by: Johannes Schindelin --- t/t5580-unc-paths.sh | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/t/t5580-unc-paths.sh b/t/t5580-unc-paths.sh index cd7604fff93a55..116204b59d919e 100755 --- a/t/t5580-unc-paths.sh +++ b/t/t5580-unc-paths.sh @@ -21,14 +21,11 @@ fi UNCPATH="$(winpwd)" case "$UNCPATH" in [A-Z]:*) + WITHOUTDRIVE="${UNCPATH#?:}" # Use administrative share e.g. \\localhost\C$\git-sdk-64\usr\src\git # (we use forward slashes here because MSYS2 and Git accept them, and # they are easier on the eyes) - UNCPATH="//localhost/${UNCPATH%%:*}\$/${UNCPATH#?:}" - test -d "$UNCPATH" || { - skip_all='could not access administrative share; skipping' - test_done - } + UNCPATH="//localhost/${UNCPATH%%:*}\$$WITHOUTDRIVE" ;; *) skip_all='skipping UNC path tests, cannot determine current path as UNC' @@ -36,6 +33,18 @@ case "$UNCPATH" in ;; esac +test_expect_failure 'clone into absolute path lacking a drive prefix' ' + USINGBACKSLASHES="$(echo "$WITHOUTDRIVE"/without-drive-prefix | + tr / \\\\)" && + git clone . "$USINGBACKSLASHES" && + test -f without-drive-prefix/.git/HEAD +' + +test -d "$UNCPATH" || { + skip_all='could not access administrative share; skipping' + test_done +} + test_expect_success setup ' test_commit initial ' From 6a38c9fea66ce98b01301bbf2350f4950e54eff3 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 25 Apr 2019 12:05:29 -0400 Subject: [PATCH 066/303] gitk: prevent overly long command lines To avoid running into command line limitations, some of Git's commands support the `--stdin` option. Let's use exactly this option in the three rev-list/log invocations in gitk that would otherwise possibly run the danger of trying to invoke a too-long command line. While it is easy to redirect either stdin or stdout in Tcl/Tk scripts, what we need here is both. We need to capture the output, yet we also need to pipe in the revs/files arguments via stdin (because stdin does not have any limit, unlike the command line). To help this, we use the neat Tcl feature where you can capture stdout and at the same time feed a fixed string as stdin to the spawned process. One non-obvious aspect about this change is that the `--stdin` option allows to specify revs, the double-dash, and files, but *no* other options such as `--not`. This is addressed by prefixing the "negative" revs with `^` explicitly rather than relying on the `--not` option (thanks for coming up with that idea, Max!). This fixes https://github.com/git-for-windows/git/issues/1987 Analysis-and-initial-patch-by: Max Kirillov Signed-off-by: Johannes Schindelin --- gitk-git/gitk | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/gitk-git/gitk b/gitk-git/gitk index 0ae7d685904b85..92375ca6a2a96d 100755 --- a/gitk-git/gitk +++ b/gitk-git/gitk @@ -405,14 +405,16 @@ proc start_rev_list {view} { if {$revs eq {}} { return 0 } - set args [concat $vflags($view) $revs] + set args $vflags($view) } else { + set revs {} set args $vorigargs($view) } if {[catch { set fd [open [concat | git log --no-color -z --pretty=raw $show_notes \ - --parents --boundary $args "--" $files] r] + --parents --boundary $args --stdin \ + "<<[join [concat $revs "--" $files] "\\n"]"] r] } err]} { error_popup "[mc "Error executing git log:"] $err" return 0 @@ -554,13 +556,19 @@ proc updatecommits {} { set revs $newrevs set vposids($view) [lsort -unique [concat $oldpos $vposids($view)]] } - set args [concat $vflags($view) $revs --not $oldpos] + set args $vflags($view) + foreach r $oldpos { + lappend revs "^$r" + } } else { + set revs {} set args $vorigargs($view) } if {[catch { set fd [open [concat | git log --no-color -z --pretty=raw $show_notes \ - --parents --boundary $args "--" $vfilelimit($view)] r] + --parents --boundary $args --stdin \ + "<<[join [concat $revs "--" \ + $vfilelimit($view)] "\\n"]"] r] } err]} { error_popup "[mc "Error executing git log:"] $err" return @@ -10231,10 +10239,16 @@ proc getallcommits {} { foreach id $seeds { lappend ids "^$id" } + lappend ids "--" } } if {$ids ne {}} { - set fd [open [concat $cmd $ids] r] + if {$ids eq "--all"} { + set cmd [concat $cmd "--all"] + } else { + set cmd [concat $cmd --stdin "<<[join $ids "\\n"]"] + } + set fd [open $cmd r] fconfigure $fd -blocking 0 incr allcommits nowbusy allcommits From cae39813a2216950bff644dff1b23ae0def4489c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 7 Dec 2018 13:39:30 +0100 Subject: [PATCH 067/303] clean: do not traverse mount points It seems to be not exactly rare on Windows to install NTFS junction points (the equivalent of "bind mounts" on Linux/Unix) in worktrees, e.g. to map some development tools into a subdirectory. In such a scenario, it is pretty horrible if `git clean -dfx` traverses into the mapped directory and starts to "clean up". Let's just not do that. Let's make sure before we traverse into a directory that it is not a mount point (or junction). This addresses https://github.com/git-for-windows/git/issues/607 Signed-off-by: Johannes Schindelin --- builtin/clean.c | 14 ++++++++++++++ cache.h | 1 + compat/mingw.c | 22 ++++++++++++++++++++++ compat/mingw.h | 3 +++ git-compat-util.h | 4 ++++ path.c | 39 +++++++++++++++++++++++++++++++++++++++ t/t7300-clean.sh | 9 +++++++++ 7 files changed, 92 insertions(+) diff --git a/builtin/clean.c b/builtin/clean.c index 5466636e66604e..34d56426d79a74 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -34,6 +34,8 @@ static const char *msg_remove = N_("Removing %s\n"); static const char *msg_would_remove = N_("Would remove %s\n"); static const char *msg_skip_git_dir = N_("Skipping repository %s\n"); static const char *msg_would_skip_git_dir = N_("Would skip repository %s\n"); +static const char *msg_skip_mount_point = N_("Skipping mount point %s\n"); +static const char *msg_would_skip_mount_point = N_("Would skip mount point %s\n"); static const char *msg_warn_remove_failed = N_("failed to remove %s"); static const char *msg_warn_lstat_failed = N_("could not lstat %s\n"); static const char *msg_skip_cwd = N_("Refusing to remove current working directory\n"); @@ -175,6 +177,18 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, goto out; } + if (is_mount_point(path)) { + if (!quiet) { + quote_path(path->buf, prefix, "ed, 0); + printf(dry_run ? + _(msg_would_skip_mount_point) : + _(msg_skip_mount_point), quoted.buf); + } + *dir_gone = 0; + + goto out; + } + dir = opendir(path->buf); if (!dir) { /* an empty dir could be removed even if it is unreadble */ diff --git a/cache.h b/cache.h index 26ed03bd6de626..e7344004f5c7e0 100644 --- a/cache.h +++ b/cache.h @@ -1313,6 +1313,7 @@ int normalize_path_copy_len(char *dst, const char *src, int *prefix_len); int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); +int is_mount_point_via_stat(struct strbuf *path); int daemon_avoid_alias(const char *path); /* diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..5e8e7d584e020f 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2506,6 +2506,28 @@ pid_t waitpid(pid_t pid, int *status, int options) return -1; } +int mingw_is_mount_point(struct strbuf *path) +{ + WIN32_FIND_DATAW findbuf = { 0 }; + HANDLE handle; + wchar_t wfilename[MAX_PATH]; + int wlen = xutftowcs_path(wfilename, path->buf); + if (wlen < 0) + die(_("could not get long path for '%s'"), path->buf); + + /* remove trailing slash, if any */ + if (wlen > 0 && wfilename[wlen - 1] == L'/') + wfilename[--wlen] = L'\0'; + + handle = FindFirstFileW(wfilename, &findbuf); + if (handle == INVALID_HANDLE_VALUE) + return 0; + FindClose(handle); + + return (findbuf.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) && + (findbuf.dwReserved0 == IO_REPARSE_TAG_MOUNT_POINT); +} + int xutftowcsn(wchar_t *wcs, const char *utfs, size_t wcslen, int utflen) { int upos = 0, wpos = 0; diff --git a/compat/mingw.h b/compat/mingw.h index 209cf7cebadd17..33d8269db05103 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -449,6 +449,9 @@ static inline void convert_slashes(char *path) if (*path == '\\') *path = '/'; } +struct strbuf; +int mingw_is_mount_point(struct strbuf *path); +#define is_mount_point mingw_is_mount_point #define PATH_SEP ';' char *mingw_query_user_email(void); #define query_user_email mingw_query_user_email diff --git a/git-compat-util.h b/git-compat-util.h index b90b64718eb610..456a3f7b0e87be 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -540,6 +540,10 @@ static inline int git_has_dir_sep(const char *path) #define has_dir_sep(path) git_has_dir_sep(path) #endif +#ifndef is_mount_point +#define is_mount_point is_mount_point_via_stat +#endif + #ifndef query_user_email #define query_user_email() NULL #endif diff --git a/path.c b/path.c index a3cfcd8a6e95b3..b41c645c89b4c1 100644 --- a/path.c +++ b/path.c @@ -1300,6 +1300,45 @@ char *strip_path_suffix(const char *path, const char *suffix) return offset == -1 ? NULL : xstrndup(path, offset); } +int is_mount_point_via_stat(struct strbuf *path) +{ + size_t len = path->len; + unsigned int current_dev; + struct stat st; + + if (!strcmp("/", path->buf)) + return 1; + + strbuf_addstr(path, "/."); + if (lstat(path->buf, &st)) { + /* + * If we cannot access the current directory, we cannot say + * that it is a bind mount. + */ + strbuf_setlen(path, len); + return 0; + } + current_dev = st.st_dev; + + /* Now look at the parent directory */ + strbuf_addch(path, '.'); + if (lstat(path->buf, &st)) { + /* + * If we cannot access the parent directory, we cannot say + * that it is a bind mount. + */ + strbuf_setlen(path, len); + return 0; + } + strbuf_setlen(path, len); + + /* + * If the device ID differs between current and parent directory, + * then it is a bind mount. + */ + return current_dev != st.st_dev; +} + int daemon_avoid_alias(const char *p) { int sl, ndot; diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 0399701e6276d6..6439e0f723db7c 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -788,4 +788,13 @@ test_expect_success 'traverse into directories that may have ignored entries' ' ) ' +test_expect_success MINGW 'clean does not traverse mount points' ' + mkdir target && + >target/dont-clean-me && + git init with-mountpoint && + cmd //c "mklink /j with-mountpoint\\mountpoint target" && + git -C with-mountpoint clean -dfx && + test_path_is_file target/dont-clean-me +' + test_done From a19f843c61806fad9e736e002a45055d27b55870 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 9 Apr 2012 13:04:35 -0500 Subject: [PATCH 068/303] Always auto-gc after calling a fast-import transport After importing anything with fast-import, we should always let the garbage collector do its job, since the objects are written to disk inefficiently. This brings down an initial import of http://selenic.com/hg from about 230 megabytes to about 14. In the future, we may want to make this configurable on a per-remote basis, or maybe teach fast-import about it in the first place. Signed-off-by: Johannes Schindelin --- transport-helper.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/transport-helper.c b/transport-helper.c index 432964d070cd02..eada58c8ef0391 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -16,6 +16,8 @@ #include "protocol.h" static int debug; +/* TODO: put somewhere sensible, e.g. git_transport_options? */ +static int auto_gc = 1; struct helper_data { const char *name; @@ -567,6 +569,12 @@ static int fetch_with_import(struct transport *transport, } } strbuf_release(&buf); + if (auto_gc) { + const char *argv_gc_auto[] = { + "gc", "--auto", "--quiet", NULL, + }; + run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); + } return 0; } From e479420675ac42e60459b5107b5dadee2b41fdaa Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 18 Apr 2017 12:38:30 +0200 Subject: [PATCH 069/303] mingw: allow absolute paths without drive prefix When specifying an absolute path without a drive prefix, we convert that path internally. Let's make sure that we handle that case properly, too ;-) This fixes the command git clone https://github.com/git-for-windows/git \G4W Signed-off-by: Johannes Schindelin --- compat/mingw.c | 10 +++++++++- t/t5580-unc-paths.sh | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..4f2f96272d74b6 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1049,11 +1049,19 @@ unsigned int sleep (unsigned int seconds) char *mingw_mktemp(char *template) { wchar_t wtemplate[MAX_PATH]; + int offset = 0; + if (xutftowcs_path(wtemplate, template) < 0) return NULL; + + if (is_dir_sep(template[0]) && !is_dir_sep(template[1]) && + iswalpha(wtemplate[0]) && wtemplate[1] == L':') { + /* We have an absolute path missing the drive prefix */ + offset = 2; + } if (!_wmktemp(wtemplate)) return NULL; - if (xwcstoutf(template, wtemplate, strlen(template) + 1) < 0) + if (xwcstoutf(template, wtemplate + offset, strlen(template) + 1) < 0) return NULL; return template; } diff --git a/t/t5580-unc-paths.sh b/t/t5580-unc-paths.sh index 116204b59d919e..2c5b410048952d 100755 --- a/t/t5580-unc-paths.sh +++ b/t/t5580-unc-paths.sh @@ -33,7 +33,7 @@ case "$UNCPATH" in ;; esac -test_expect_failure 'clone into absolute path lacking a drive prefix' ' +test_expect_success 'clone into absolute path lacking a drive prefix' ' USINGBACKSLASHES="$(echo "$WITHOUTDRIVE"/without-drive-prefix | tr / \\\\)" && git clone . "$USINGBACKSLASHES" && From 991a895d10799080538a0dbfa6529dfccfe9f131 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 4 Sep 2017 11:59:45 +0200 Subject: [PATCH 070/303] mingw: change core.fsyncObjectFiles = 1 by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the documentation of said setting: This boolean will enable fsync() when writing object files. This is a total waste of time and effort on a filesystem that orders data writes properly, but can be useful for filesystems that do not use journalling (traditional UNIX filesystems) or that only journal metadata and not file contents (OS X’s HFS+, or Linux ext3 with "data=writeback"). The most common file system on Windows (NTFS) does not guarantee that order, therefore a sudden loss of power (or any other event causing an unclean shutdown) would cause corrupt files (i.e. files filled with NULs). Therefore we need to change the default. Note that the documentation makes it sound as if this causes really bad performance. In reality, writing loose objects is something that is done only rarely, and only a handful of files at a time. Signed-off-by: Johannes Schindelin --- compat/mingw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..13e90dea00a04b 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -3028,6 +3028,7 @@ int wmain(int argc, const wchar_t **wargv) #endif maybe_redirect_std_handles(); + fsync_object_files = 1; /* determine size of argv and environ conversion buffer */ maxlen = wcslen(wargv[0]); From b3b2ae8e0d718f12ffeffdce5527591dda88565b Mon Sep 17 00:00:00 2001 From: Nico Rieck Date: Mon, 19 Aug 2019 20:46:12 +0200 Subject: [PATCH 071/303] gitk: Escape file paths before piping to git log Fixes https://github.com/git-for-windows/git/issues/2293 Signed-off-by: Nico Rieck --- gitk-git/gitk | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/gitk-git/gitk b/gitk-git/gitk index 92375ca6a2a96d..df3ba2ea99b310 100755 --- a/gitk-git/gitk +++ b/gitk-git/gitk @@ -353,6 +353,16 @@ proc parseviewrevs {view revs} { return $ret } +# Escapes a list of filter paths to be passed to git log via stdin. Note that +# paths must not be quoted. +proc escape_filter_paths {paths} { + set escaped [list] + foreach path $paths { + lappend escaped [string map {\\ \\\\ "\ " "\\\ "} $path] + } + return $escaped +} + # Start off a git log process and arrange to read its output proc start_rev_list {view} { global startmsecs commitidx viewcomplete curview @@ -414,7 +424,8 @@ proc start_rev_list {view} { if {[catch { set fd [open [concat | git log --no-color -z --pretty=raw $show_notes \ --parents --boundary $args --stdin \ - "<<[join [concat $revs "--" $files] "\\n"]"] r] + "<<[join [concat $revs "--" \ + [escape_filter_paths $files]] "\\n"]"] r] } err]} { error_popup "[mc "Error executing git log:"] $err" return 0 @@ -568,7 +579,8 @@ proc updatecommits {} { set fd [open [concat | git log --no-color -z --pretty=raw $show_notes \ --parents --boundary $args --stdin \ "<<[join [concat $revs "--" \ - $vfilelimit($view)] "\\n"]"] r] + [escape_filter_paths \ + $vfilelimit($view)]] "\\n"]"] r] } err]} { error_popup "[mc "Error executing git log:"] $err" return From 31df13d441e3f778e78dd2333fd1fd3cc589bbde Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 11 Dec 2018 12:55:26 +0100 Subject: [PATCH 072/303] clean: remove mount points when possible Windows' equivalent to "bind mounts", NTFS junction points, can be unlinked without affecting the mount target. This is clearly what users expect to happen when they call `git clean -dfx` in a worktree that contains NTFS junction points: the junction should be removed, and the target directory of said junction should be left alone (unless it is inside the worktree). Signed-off-by: Johannes Schindelin --- builtin/clean.c | 13 +++++++++++++ compat/mingw.h | 1 + t/t7300-clean.sh | 1 + 3 files changed, 15 insertions(+) diff --git a/builtin/clean.c b/builtin/clean.c index 34d56426d79a74..cb55e8956b0747 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -34,8 +34,10 @@ static const char *msg_remove = N_("Removing %s\n"); static const char *msg_would_remove = N_("Would remove %s\n"); static const char *msg_skip_git_dir = N_("Skipping repository %s\n"); static const char *msg_would_skip_git_dir = N_("Would skip repository %s\n"); +#ifndef CAN_UNLINK_MOUNT_POINTS static const char *msg_skip_mount_point = N_("Skipping mount point %s\n"); static const char *msg_would_skip_mount_point = N_("Would skip mount point %s\n"); +#endif static const char *msg_warn_remove_failed = N_("failed to remove %s"); static const char *msg_warn_lstat_failed = N_("could not lstat %s\n"); static const char *msg_skip_cwd = N_("Refusing to remove current working directory\n"); @@ -178,6 +180,7 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, } if (is_mount_point(path)) { +#ifndef CAN_UNLINK_MOUNT_POINTS if (!quiet) { quote_path(path->buf, prefix, "ed, 0); printf(dry_run ? @@ -185,6 +188,16 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, _(msg_skip_mount_point), quoted.buf); } *dir_gone = 0; +#else + if (!dry_run && unlink(path->buf)) { + int saved_errno = errno; + quote_path(path->buf, prefix, "ed, 0); + errno = saved_errno; + warning_errno(_(msg_warn_remove_failed), quoted.buf); + *dir_gone = 0; + ret = -1; + } +#endif goto out; } diff --git a/compat/mingw.h b/compat/mingw.h index 33d8269db05103..941dfc5c8ff2a9 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -452,6 +452,7 @@ static inline void convert_slashes(char *path) struct strbuf; int mingw_is_mount_point(struct strbuf *path); #define is_mount_point mingw_is_mount_point +#define CAN_UNLINK_MOUNT_POINTS 1 #define PATH_SEP ';' char *mingw_query_user_email(void); #define query_user_email mingw_query_user_email diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 6439e0f723db7c..336f41c7426c87 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -794,6 +794,7 @@ test_expect_success MINGW 'clean does not traverse mount points' ' git init with-mountpoint && cmd //c "mklink /j with-mountpoint\\mountpoint target" && git -C with-mountpoint clean -dfx && + test_path_is_missing with-mountpoint/mountpoint && test_path_is_file target/dont-clean-me ' From 033c693de320e9a8174482a2ee4b13c654e0e803 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 16 Feb 2015 14:06:59 +0100 Subject: [PATCH 073/303] mingw: include the Python parts in the build While Git for Windows does not _ship_ Python (in order to save on bandwidth), MSYS2 provides very fine Python interpreters that users can easily take advantage of, by using Git for Windows within its SDK. Signed-off-by: Johannes Schindelin --- config.mak.uname | 1 + 1 file changed, 1 insertion(+) diff --git a/config.mak.uname b/config.mak.uname index d63629fe807f59..bf875867385aab 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -720,6 +720,7 @@ else USE_GETTEXT_SCHEME = fallthrough USE_LIBPCRE = YesPlease USE_NED_ALLOCATOR = YesPlease + NO_PYTHON = ifeq (/mingw64,$(subst 32,64,$(prefix))) # Move system config into top-level /etc/ ETC_GITCONFIG = ../etc/gitconfig From 3b060e516c0aaf0480f208de072bda58f0d581df Mon Sep 17 00:00:00 2001 From: Thomas Braun Date: Thu, 8 May 2014 21:43:24 +0200 Subject: [PATCH 074/303] Config option to disable side-band-64k for transport Since commit 0c499ea60f the send-pack builtin uses the side-band-64k capability if advertised by the server. Unfortunately this breaks pushing over the dump git protocol if used over a network connection. The detailed reasons for this breakage are (by courtesy of Jeff Preshing, quoted from ttps://groups.google.com/d/msg/msysgit/at8D7J-h7mw/eaLujILGUWoJ): ---------------------------------------------------------------------------- MinGW wraps Windows sockets in CRT file descriptors in order to mimic the functionality of POSIX sockets. This causes msvcrt.dll to treat sockets as Installable File System (IFS) handles, calling ReadFile, WriteFile, DuplicateHandle and CloseHandle on them. This approach works well in simple cases on recent versions of Windows, but does not support all usage patterns. In particular, using this approach, any attempt to read & write concurrently on the same socket (from one or more processes) will deadlock in a scenario where the read waits for a response from the server which is only invoked after the write. This is what send_pack currently attempts to do in the use_sideband codepath. ---------------------------------------------------------------------------- The new config option "sendpack.sideband" allows to override the side-band-64k capability of the server, and thus makes the dump git protocol work. Other transportation methods like ssh and http/https still benefit from the sideband channel, therefore the default value of "sendpack.sideband" is still true. [jes: split out the documentation into Documentation/config/] Signed-off-by: Thomas Braun Signed-off-by: Johannes Schindelin Signed-off-by: Oliver Schneider --- Documentation/config.txt | 2 ++ Documentation/config/sendpack.txt | 5 +++++ send-pack.c | 14 +++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 Documentation/config/sendpack.txt diff --git a/Documentation/config.txt b/Documentation/config.txt index 5b5b9765699933..d9e87c1ba5a583 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -503,6 +503,8 @@ include::config/safe.txt[] include::config/sendemail.txt[] +include::config/sendpack.txt[] + include::config/sequencer.txt[] include::config/showbranch.txt[] diff --git a/Documentation/config/sendpack.txt b/Documentation/config/sendpack.txt new file mode 100644 index 00000000000000..e306f657fba7dd --- /dev/null +++ b/Documentation/config/sendpack.txt @@ -0,0 +1,5 @@ +sendpack.sideband:: + Allows to disable the side-band-64k capability for send-pack even + when it is advertised by the server. Makes it possible to work + around a limitation in the git for windows implementation together + with the dump git protocol. Defaults to true. diff --git a/send-pack.c b/send-pack.c index f2e19838c9c342..03356faae66710 100644 --- a/send-pack.c +++ b/send-pack.c @@ -39,6 +39,16 @@ int option_parse_push_signed(const struct option *opt, die("bad %s argument: %s", opt->long_name, arg); } +static int config_use_sideband = 1; + +static int send_pack_config(const char *var, const char *value, void *unused) +{ + if (!strcmp("sendpack.sideband", var)) + config_use_sideband = git_config_bool(var, value); + + return 0; +} + static void feed_object(const struct object_id *oid, FILE *fh, int negative) { if (negative && @@ -497,6 +507,8 @@ int send_pack(struct send_pack_args *args, return 0; } + git_config(send_pack_config, NULL); + git_config_get_bool("push.negotiate", &push_negotiate); if (push_negotiate) get_commons_through_negotiation(args->url, remote_refs, &commons); @@ -515,7 +527,7 @@ int send_pack(struct send_pack_args *args, allow_deleting_refs = 1; if (server_supports("ofs-delta")) args->use_ofs_delta = 1; - if (server_supports("side-band-64k")) + if (config_use_sideband && server_supports("side-band-64k")) use_sideband = 1; if (server_supports("quiet")) quiet_supported = 1; From 65532192f0e1225a7991cb17d52b4c0a288fa4ce Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 14 Nov 2019 20:09:23 +0100 Subject: [PATCH 075/303] mingw: make sure `errno` is set correctly when socket operations fail The winsock2 library provides functions that work on different data types than file descriptors, therefore we wrap them. But that is not the only difference: they also do not set `errno` but expect the callers to enquire about errors via `WSAGetLastError()`. Let's translate that into appropriate `errno` values whenever the socket operations fail so that Git's code base does not have to change its expectations. This closes https://github.com/git-for-windows/git/issues/2404 Helped-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/mingw.c | 157 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 147 insertions(+), 10 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..efeb5b1694592a 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2018,18 +2018,150 @@ static void ensure_socket_initialization(void) initialized = 1; } +static int winsock_error_to_errno(DWORD err) +{ + switch (err) { + case WSAEINTR: return EINTR; + case WSAEBADF: return EBADF; + case WSAEACCES: return EACCES; + case WSAEFAULT: return EFAULT; + case WSAEINVAL: return EINVAL; + case WSAEMFILE: return EMFILE; + case WSAEWOULDBLOCK: return EWOULDBLOCK; + case WSAEINPROGRESS: return EINPROGRESS; + case WSAEALREADY: return EALREADY; + case WSAENOTSOCK: return ENOTSOCK; + case WSAEDESTADDRREQ: return EDESTADDRREQ; + case WSAEMSGSIZE: return EMSGSIZE; + case WSAEPROTOTYPE: return EPROTOTYPE; + case WSAENOPROTOOPT: return ENOPROTOOPT; + case WSAEPROTONOSUPPORT: return EPROTONOSUPPORT; + case WSAEOPNOTSUPP: return EOPNOTSUPP; + case WSAEAFNOSUPPORT: return EAFNOSUPPORT; + case WSAEADDRINUSE: return EADDRINUSE; + case WSAEADDRNOTAVAIL: return EADDRNOTAVAIL; + case WSAENETDOWN: return ENETDOWN; + case WSAENETUNREACH: return ENETUNREACH; + case WSAENETRESET: return ENETRESET; + case WSAECONNABORTED: return ECONNABORTED; + case WSAECONNRESET: return ECONNRESET; + case WSAENOBUFS: return ENOBUFS; + case WSAEISCONN: return EISCONN; + case WSAENOTCONN: return ENOTCONN; + case WSAETIMEDOUT: return ETIMEDOUT; + case WSAECONNREFUSED: return ECONNREFUSED; + case WSAELOOP: return ELOOP; + case WSAENAMETOOLONG: return ENAMETOOLONG; + case WSAEHOSTUNREACH: return EHOSTUNREACH; + case WSAENOTEMPTY: return ENOTEMPTY; + /* No errno equivalent; default to EIO */ + case WSAESOCKTNOSUPPORT: + case WSAEPFNOSUPPORT: + case WSAESHUTDOWN: + case WSAETOOMANYREFS: + case WSAEHOSTDOWN: + case WSAEPROCLIM: + case WSAEUSERS: + case WSAEDQUOT: + case WSAESTALE: + case WSAEREMOTE: + case WSASYSNOTREADY: + case WSAVERNOTSUPPORTED: + case WSANOTINITIALISED: + case WSAEDISCON: + case WSAENOMORE: + case WSAECANCELLED: + case WSAEINVALIDPROCTABLE: + case WSAEINVALIDPROVIDER: + case WSAEPROVIDERFAILEDINIT: + case WSASYSCALLFAILURE: + case WSASERVICE_NOT_FOUND: + case WSATYPE_NOT_FOUND: + case WSA_E_NO_MORE: + case WSA_E_CANCELLED: + case WSAEREFUSED: + case WSAHOST_NOT_FOUND: + case WSATRY_AGAIN: + case WSANO_RECOVERY: + case WSANO_DATA: + case WSA_QOS_RECEIVERS: + case WSA_QOS_SENDERS: + case WSA_QOS_NO_SENDERS: + case WSA_QOS_NO_RECEIVERS: + case WSA_QOS_REQUEST_CONFIRMED: + case WSA_QOS_ADMISSION_FAILURE: + case WSA_QOS_POLICY_FAILURE: + case WSA_QOS_BAD_STYLE: + case WSA_QOS_BAD_OBJECT: + case WSA_QOS_TRAFFIC_CTRL_ERROR: + case WSA_QOS_GENERIC_ERROR: + case WSA_QOS_ESERVICETYPE: + case WSA_QOS_EFLOWSPEC: + case WSA_QOS_EPROVSPECBUF: + case WSA_QOS_EFILTERSTYLE: + case WSA_QOS_EFILTERTYPE: + case WSA_QOS_EFILTERCOUNT: + case WSA_QOS_EOBJLENGTH: + case WSA_QOS_EFLOWCOUNT: +#ifndef _MSC_VER + case WSA_QOS_EUNKNOWNPSOBJ: +#endif + case WSA_QOS_EPOLICYOBJ: + case WSA_QOS_EFLOWDESC: + case WSA_QOS_EPSFLOWSPEC: + case WSA_QOS_EPSFILTERSPEC: + case WSA_QOS_ESDMODEOBJ: + case WSA_QOS_ESHAPERATEOBJ: + case WSA_QOS_RESERVED_PETYPE: + default: return EIO; + } +} + +/* + * On Windows, `errno` is a global macro to a function call. + * This makes it difficult to debug and single-step our mappings. + */ +static inline void set_wsa_errno(void) +{ + DWORD wsa = WSAGetLastError(); + int e = winsock_error_to_errno(wsa); + errno = e; + +#ifdef DEBUG_WSA_ERRNO + fprintf(stderr, "winsock error: %d -> %d\n", wsa, e); + fflush(stderr); +#endif +} + +static inline int winsock_return(int ret) +{ + if (ret < 0) + set_wsa_errno(); + + return ret; +} + +#define WINSOCK_RETURN(x) do { return winsock_return(x); } while (0) + #undef gethostname int mingw_gethostname(char *name, int namelen) { - ensure_socket_initialization(); - return gethostname(name, namelen); + ensure_socket_initialization(); + WINSOCK_RETURN(gethostname(name, namelen)); } #undef gethostbyname struct hostent *mingw_gethostbyname(const char *host) { + struct hostent *ret; + ensure_socket_initialization(); - return gethostbyname(host); + + ret = gethostbyname(host); + if (!ret) + set_wsa_errno(); + + return ret; } #undef getaddrinfo @@ -2037,7 +2169,7 @@ int mingw_getaddrinfo(const char *node, const char *service, const struct addrinfo *hints, struct addrinfo **res) { ensure_socket_initialization(); - return getaddrinfo(node, service, hints, res); + WINSOCK_RETURN(getaddrinfo(node, service, hints, res)); } int mingw_socket(int domain, int type, int protocol) @@ -2057,7 +2189,7 @@ int mingw_socket(int domain, int type, int protocol) * in errno so that _if_ someone looks up the code somewhere, * then it is at least the number that are usually listed. */ - errno = WSAGetLastError(); + set_wsa_errno(); return -1; } /* convert into a file descriptor */ @@ -2073,35 +2205,35 @@ int mingw_socket(int domain, int type, int protocol) int mingw_connect(int sockfd, struct sockaddr *sa, size_t sz) { SOCKET s = (SOCKET)_get_osfhandle(sockfd); - return connect(s, sa, sz); + WINSOCK_RETURN(connect(s, sa, sz)); } #undef bind int mingw_bind(int sockfd, struct sockaddr *sa, size_t sz) { SOCKET s = (SOCKET)_get_osfhandle(sockfd); - return bind(s, sa, sz); + WINSOCK_RETURN(bind(s, sa, sz)); } #undef setsockopt int mingw_setsockopt(int sockfd, int lvl, int optname, void *optval, int optlen) { SOCKET s = (SOCKET)_get_osfhandle(sockfd); - return setsockopt(s, lvl, optname, (const char*)optval, optlen); + WINSOCK_RETURN(setsockopt(s, lvl, optname, (const char*)optval, optlen)); } #undef shutdown int mingw_shutdown(int sockfd, int how) { SOCKET s = (SOCKET)_get_osfhandle(sockfd); - return shutdown(s, how); + WINSOCK_RETURN(shutdown(s, how)); } #undef listen int mingw_listen(int sockfd, int backlog) { SOCKET s = (SOCKET)_get_osfhandle(sockfd); - return listen(s, backlog); + WINSOCK_RETURN(listen(s, backlog)); } #undef accept @@ -2112,6 +2244,11 @@ int mingw_accept(int sockfd1, struct sockaddr *sa, socklen_t *sz) SOCKET s1 = (SOCKET)_get_osfhandle(sockfd1); SOCKET s2 = accept(s1, sa, sz); + if (s2 == INVALID_SOCKET) { + set_wsa_errno(); + return -1; + } + /* convert into a file descriptor */ if ((sockfd2 = _open_osfhandle(s2, O_RDWR|O_BINARY)) < 0) { int err = errno; From 511d9f8dcd722232524e2eaf99a3a754c212e418 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 1 Jan 2020 21:07:22 +0100 Subject: [PATCH 076/303] mingw: do resolve symlinks in `getcwd()` As pointed out in https://github.com/git-for-windows/git/issues/1676, the `git rev-parse --is-inside-work-tree` command currently fails when the current directory's path contains symbolic links. The underlying reason for this bug is that `getcwd()` is supposed to resolve symbolic links, but our `mingw_getcwd()` implementation did not. We do have all the building blocks for that, though: the `GetFinalPathByHandleW()` function will resolve symbolic links. However, we only called that function if `GetLongPathNameW()` failed, for historical reasons: the latter function was supported for a long time, but the former API function was introduced only with Windows Vista, and we used to support also Windows XP. With that support having been dropped, we are free to call the symbolic link-resolving function right away. Signed-off-by: Johannes Schindelin --- compat/mingw.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..84704034866e4d 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1119,18 +1119,16 @@ char *mingw_getcwd(char *pointer, int len) { wchar_t cwd[MAX_PATH], wpointer[MAX_PATH]; DWORD ret = GetCurrentDirectoryW(ARRAY_SIZE(cwd), cwd); + HANDLE hnd; if (!ret || ret >= ARRAY_SIZE(cwd)) { errno = ret ? ENAMETOOLONG : err_win_to_posix(GetLastError()); return NULL; } - ret = GetLongPathNameW(cwd, wpointer, ARRAY_SIZE(wpointer)); - if (!ret && GetLastError() == ERROR_ACCESS_DENIED) { - HANDLE hnd = CreateFileW(cwd, 0, - FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, - OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); - if (hnd == INVALID_HANDLE_VALUE) - return NULL; + hnd = CreateFileW(cwd, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (hnd != INVALID_HANDLE_VALUE) { ret = GetFinalPathNameByHandleW(hnd, wpointer, ARRAY_SIZE(wpointer), 0); CloseHandle(hnd); if (!ret || ret >= ARRAY_SIZE(wpointer)) @@ -1139,13 +1137,11 @@ char *mingw_getcwd(char *pointer, int len) return NULL; return pointer; } - if (!ret || ret >= ARRAY_SIZE(wpointer)) - return NULL; - if (GetFileAttributesW(wpointer) == INVALID_FILE_ATTRIBUTES) { + if (GetFileAttributesW(cwd) == INVALID_FILE_ATTRIBUTES) { errno = ENOENT; return NULL; } - if (xwcstoutf(pointer, wpointer, len) < 0) + if (xwcstoutf(pointer, cwd, len) < 0) return NULL; convert_slashes(pointer); return pointer; From 26c1e40c80387e9d829ced2561c6d9372567b64c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 31 Jan 2020 12:02:47 +0100 Subject: [PATCH 077/303] mingw: demonstrate a `git add` issue with NTFS junctions NTFS junctions are somewhat similar in spirit to Unix bind mounts: they point to a different directory and are resolved by the filesystem driver. As such, they appear to `lstat()` as if they are directories, not as if they are symbolic links. _Any_ user can create junctions, while symbolic links can only be created by non-administrators in Developer Mode on Windows 10. Hence NTFS junctions are much more common "in the wild" than NTFS symbolic links. It was reported in https://github.com/git-for-windows/git/issues/2481 that adding files via an absolute path that traverses an NTFS junction: since 1e64d18 (mingw: do resolve symlinks in `getcwd()`), we resolve not only symbolic links but also NTFS junctions when determining the absolute path of the current directory. The same is not true for `git add `, where symbolic links are resolved in ``, but not NTFS junctions. Signed-off-by: Johannes Schindelin --- t/t3700-add.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t3700-add.sh b/t/t3700-add.sh index 8689b48589c0d5..c2e4cfe1003b93 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -498,4 +498,15 @@ test_expect_success CASE_INSENSITIVE_FS 'path is case-insensitive' ' git add "$downcased" ' +test_expect_failure MINGW 'can add files via NTFS junctions' ' + test_when_finished "cmd //c rmdir junction && rm -rf target" && + test_create_repo target && + cmd //c "mklink /j junction target" && + >target/via-junction && + git -C junction add "$(pwd)/junction/via-junction" && + echo via-junction >expect && + git -C target diff --cached --name-only >actual && + test_cmp expect actual +' + test_done From 7aded36cfee9e5e92567773628b0844d9ad44b08 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 21 Feb 2017 13:28:58 +0100 Subject: [PATCH 078/303] mingw: ensure valid CTYPE A change between versions 2.4.1 and 2.6.0 of the MSYS2 runtime modified how Cygwin's runtime (and hence Git for Windows' MSYS2 runtime derivative) handles locales: d16a56306d (Consolidate wctomb/mbtowc calls for POSIX-1.2008, 2016-07-20). An unintended side-effect is that "cold-calling" into the POSIX emulation will start with a locale based on the current code page, something that Git for Windows is very ill-prepared for, as it expects to be able to pass a command-line containing non-ASCII characters to the shell without having those characters munged. One symptom of this behavior: when `git clone` or `git fetch` shell out to call `git-upload-pack` with a path that contains non-ASCII characters, the shell tried to interpret the entire command-line (including command-line parameters) as executable path, which obviously must fail. This fixes https://github.com/git-for-windows/git/issues/1036 Signed-off-by: Johannes Schindelin --- compat/mingw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..01a84b11c6c736 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2642,6 +2642,9 @@ static void setup_windows_environment(void) if (!tmp && (tmp = getenv("USERPROFILE"))) setenv("HOME", tmp, 1); } + + if (!getenv("LC_ALL") && !getenv("LC_CTYPE") && !getenv("LANG")) + setenv("LC_CTYPE", "C.UTF-8", 1); } static PSID get_current_user_sid(void) From 026942a1bd93c46911c1cb07235d3f76aa39d8d1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 9 May 2020 14:08:36 +0200 Subject: [PATCH 079/303] vcxproj: unclash project directories with build outputs It already caused problems with the test suite that the directory containing `git.vcxproj` is called the same as the Git executable without its file extension: `./git` is ambiguous, it could refer both to the directory `git/` as well as to `git.exe`. Now there is one more problem: when our GitHub workflow runs on the `vs/master` branch, it fails in all but the Windows builds, as they want to write the file `git` but there is already a directory in the way. Let's just go ahead and append `.proj` to all of those directories, e.g. `git.proj/` instead of `git/`. Signed-off-by: Johannes Schindelin --- config.mak.uname | 8 ++++---- contrib/buildsystems/Generators/Vcxproj.pm | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/config.mak.uname b/config.mak.uname index d63629fe807f59..ec75abf3227683 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -755,7 +755,7 @@ vcxproj: # Make .vcxproj files and add them perl contrib/buildsystems/generate -g Vcxproj - git add -f git.sln {*,*/lib,t/helper/*}/*.vcxproj + git add -f git.sln {*,*/lib.proj,t/helper/*}/*.vcxproj # Generate the LinkOrCopyBuiltins.targets and LinkOrCopyRemoteHttp.targets file (echo '' && \ @@ -765,7 +765,7 @@ vcxproj: echo ' '; \ done && \ echo ' ' && \ - echo '') >git/LinkOrCopyBuiltins.targets + echo '') >git.proj/LinkOrCopyBuiltins.targets (echo '' && \ echo ' ' && \ for name in $(REMOTE_CURL_ALIASES); \ @@ -773,8 +773,8 @@ vcxproj: echo ' '; \ done && \ echo ' ' && \ - echo '') >git-remote-http/LinkOrCopyRemoteHttp.targets - git add -f git/LinkOrCopyBuiltins.targets git-remote-http/LinkOrCopyRemoteHttp.targets + echo '') >git-remote-http.proj/LinkOrCopyRemoteHttp.targets + git add -f git.proj/LinkOrCopyBuiltins.targets git-remote-http.proj/LinkOrCopyRemoteHttp.targets # Add generated headers $(MAKE) MSVC=1 SKIP_VCPKG=1 prefix=/mingw64 $(GENERATED_H) diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 1a25789d28513b..81ee2f5a4a41a5 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -58,8 +58,8 @@ sub createProject { my $uuid = generate_guid($name); $$build_structure{"$prefix${target}_GUID"} = $uuid; my $vcxproj = $target; - $vcxproj =~ s/(.*\/)?(.*)/$&\/$2.vcxproj/; - $vcxproj =~ s/([^\/]*)(\/lib)\/(lib.vcxproj)/$1$2\/$1_$3/; + $vcxproj =~ s/(.*\/)?(.*)/$&.proj\/$2.vcxproj/; + $vcxproj =~ s/([^\/]*)(\/lib\.proj)\/(lib.vcxproj)/$1$2\/$1_$3/; $$build_structure{"$prefix${target}_VCXPROJ"} = $vcxproj; my @srcs = sort(map("$rel_dir\\$_", @{$$build_structure{"$prefix${name}_SOURCES"}})); @@ -89,7 +89,9 @@ sub createProject { $defines =~ s/>/>/g; $defines =~ s/\'//g; - die "Could not create the directory $target for $label project!\n" unless (-d "$target" || mkdir "$target"); + my $dir = $vcxproj; + $dir =~ s/\/[^\/]*$//; + die "Could not create the directory $dir for $label project!\n" unless (-d "$dir" || mkdir "$dir"); open F, ">$vcxproj" or die "Could not open $vcxproj for writing!\n"; binmode F, ":crlf :utf8"; @@ -237,7 +239,7 @@ EOM print F << "EOM"; - + $uuid_libgit false @@ -252,7 +254,7 @@ EOM } if (!($name =~ 'xdiff')) { print F << "EOM"; - + $uuid_xdiff_lib false @@ -261,7 +263,7 @@ EOM if ($name =~ /(test-(line-buffer|svn-fe)|^git-remote-testsvn)\.exe$/) { my $uuid_vcs_svn_lib = $$build_structure{"LIBS_vcs-svn/lib_GUID"}; print F << "EOM"; - + $uuid_vcs_svn_lib false @@ -338,7 +340,7 @@ sub createGlueProject { my $vcxproj = $build_structure{"APPS_${appname}_VCXPROJ"}; $vcxproj =~ s/\//\\/g; $appname =~ s/.*\///; - print F "\"${appname}\", \"${vcxproj}\", \"${uuid}\""; + print F "\"${appname}.proj\", \"${vcxproj}\", \"${uuid}\""; print F "$SLN_POST"; } foreach (@libs) { @@ -348,7 +350,7 @@ sub createGlueProject { my $vcxproj = $build_structure{"LIBS_${libname}_VCXPROJ"}; $vcxproj =~ s/\//\\/g; $libname =~ s/\//_/g; - print F "\"${libname}\", \"${vcxproj}\", \"${uuid}\""; + print F "\"${libname}.proj\", \"${vcxproj}\", \"${uuid}\""; print F "$SLN_POST"; } From 07f2f5741efb72e7df742fdf80574cd808d5eb42 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 31 Jan 2020 11:44:31 +0100 Subject: [PATCH 080/303] strbuf_realpath(): use platform-dependent API if available Some platforms (e.g. Windows) provide API functions to resolve paths much quicker. Let's offer a way to short-cut `strbuf_realpath()` on those platforms. Signed-off-by: Johannes Schindelin --- abspath.c | 3 +++ git-compat-util.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/abspath.c b/abspath.c index 39e06b58486e3e..7a5f114382f182 100644 --- a/abspath.c +++ b/abspath.c @@ -91,6 +91,9 @@ static char *strbuf_realpath_1(struct strbuf *resolved, const char *path, goto error_out; } + if (platform_strbuf_realpath(resolved, path)) + return resolved->buf; + strbuf_addstr(&remaining, path); get_root_part(resolved, &remaining); diff --git a/git-compat-util.h b/git-compat-util.h index b90b64718eb610..437e7008d99c28 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -544,6 +544,10 @@ static inline int git_has_dir_sep(const char *path) #define query_user_email() NULL #endif +#ifndef platform_strbuf_realpath +#define platform_strbuf_realpath(resolved, path) NULL +#endif + #ifdef __TANDEM #include #include From ed7747c744a35304db6abc17bb7ce0b54531f86b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 1 Feb 2020 00:31:16 +0100 Subject: [PATCH 081/303] mingw: allow `git.exe` to be used instead of the "Git wrapper" Git for Windows wants to add `git.exe` to the users' `PATH`, without cluttering the latter with unnecessary executables such as `wish.exe`. To that end, it invented the concept of its "Git wrapper", i.e. a tiny executable located in `C:\Program Files\Git\cmd\git.exe` (originally a CMD script) whose sole purpose is to set up a couple of environment variables and then spawn the _actual_ `git.exe` (which nowadays lives in `C:\Program Files\Git\mingw64\bin\git.exe` for 64-bit, and the obvious equivalent for 32-bit installations). Currently, the following environment variables are set unless already initialized: - `MSYSTEM`, to make sure that the MSYS2 Bash and the MSYS2 Perl interpreter behave as expected, and - `PLINK_PROTOCOL`, to force PuTTY's `plink.exe` to use the SSH protocol instead of Telnet, - `PATH`, to make sure that the `bin` folder in the user's home directory, as well as the `/mingw64/bin` and the `/usr/bin` directories are included. The trick here is that the `/mingw64/bin/` and `/usr/bin/` directories are relative to the top-level installation directory of Git for Windows (which the included Bash interprets as `/`, i.e. as the MSYS pseudo root directory). Using the absence of `MSYSTEM` as a tell-tale, we can detect in `git.exe` whether these environment variables have been initialized properly. Therefore we can call `C:\Program Files\Git\mingw64\bin\git` in-place after this change, without having to call Git through the Git wrapper. Obviously, above-mentioned directories must be _prepended_ to the `PATH` variable, otherwise we risk picking up executables from unrelated Git installations. We do that by constructing the new `PATH` value from scratch, appending `$HOME/bin` (if `HOME` is set), then the MSYS2 system directories, and then appending the original `PATH`. Side note: this modification of the `PATH` variable is independent of the modification necessary to reach the executables and scripts in `/mingw64/libexec/git-core/`, i.e. the `GIT_EXEC_PATH`. That modification is still performed by Git, elsewhere, long after making the changes described above. While we _still_ cannot simply hard-link `mingw64\bin\git.exe` to `cmd` (because the former depends on a couple of `.dll` files that are only in `mingw64\bin`, i.e. calling `...\cmd\git.exe` would fail to load due to missing dependencies), at least we can now avoid that extra process of running the Git wrapper (which then has to wait for the spawned `git.exe` to finish) by calling `...\mingw64\bin\git.exe` directly, via its absolute path. Testing this is in Git's test suite tricky: we set up a "new" MSYS pseudo-root and copy the `git.exe` file into the appropriate location, then verify that `MSYSTEM` is set properly, and also that the `PATH` is modified so that scripts can be found in `$HOME/bin`, `/mingw64/bin/` and `/usr/bin/`. This addresses https://github.com/git-for-windows/git/issues/2283 Signed-off-by: Johannes Schindelin --- compat/mingw.c | 69 +++++++++++++++++++++++++++++++++++++++++++ config.mak.uname | 4 +-- t/t0060-path-utils.sh | 23 ++++++++++++++- 3 files changed, 93 insertions(+), 3 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 01a84b11c6c736..7f9150b443742f 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2591,6 +2591,47 @@ int xwcstoutf(char *utf, const wchar_t *wcs, size_t utflen) return -1; } +#ifdef ENSURE_MSYSTEM_IS_SET +static size_t append_system_bin_dirs(char *path, size_t size) +{ +#if !defined(RUNTIME_PREFIX) || !defined(HAVE_WPGMPTR) + return 0; +#else + char prefix[32768]; + const char *slash; + size_t len = xwcstoutf(prefix, _wpgmptr, sizeof(prefix)), off = 0; + + if (len == 0 || len >= sizeof(prefix) || + !(slash = find_last_dir_sep(prefix))) + return 0; + /* strip trailing `git.exe` */ + len = slash - prefix; + + /* strip trailing `cmd` or `mingw64\bin` or `mingw32\bin` or `bin` or `libexec\git-core` */ + if (strip_suffix_mem(prefix, &len, "\\mingw64\\libexec\\git-core") || + strip_suffix_mem(prefix, &len, "\\mingw64\\bin")) + off += xsnprintf(path + off, size - off, + "%.*s\\mingw64\\bin;", (int)len, prefix); + else if (strip_suffix_mem(prefix, &len, "\\mingw32\\libexec\\git-core") || + strip_suffix_mem(prefix, &len, "\\mingw32\\bin")) + off += xsnprintf(path + off, size - off, + "%.*s\\mingw32\\bin;", (int)len, prefix); + else if (strip_suffix_mem(prefix, &len, "\\cmd") || + strip_suffix_mem(prefix, &len, "\\bin") || + strip_suffix_mem(prefix, &len, "\\libexec\\git-core")) + off += xsnprintf(path + off, size - off, + "%.*s\\mingw%d\\bin;", (int)len, prefix, + (int)(sizeof(void *) * 8)); + else + return 0; + + off += xsnprintf(path + off, size - off, + "%.*s\\usr\\bin;", (int)len, prefix); + return off; +#endif +} +#endif + static void setup_windows_environment(void) { char *tmp = getenv("TMPDIR"); @@ -2643,6 +2684,34 @@ static void setup_windows_environment(void) setenv("HOME", tmp, 1); } + if (!getenv("PLINK_PROTOCOL")) + setenv("PLINK_PROTOCOL", "ssh", 0); + +#ifdef ENSURE_MSYSTEM_IS_SET + if (!(tmp = getenv("MSYSTEM")) || !tmp[0]) { + const char *home = getenv("HOME"), *path = getenv("PATH"); + char buf[32768]; + size_t off = 0; + + xsnprintf(buf, sizeof(buf), + "MINGW%d", (int)(sizeof(void *) * 8)); + setenv("MSYSTEM", buf, 1); + + if (home) + off += xsnprintf(buf + off, sizeof(buf) - off, + "%s\\bin;", home); + off += append_system_bin_dirs(buf + off, sizeof(buf) - off); + if (path) + off += xsnprintf(buf + off, sizeof(buf) - off, + "%s", path); + else if (off > 0) + buf[off - 1] = '\0'; + else + buf[0] = '\0'; + setenv("PATH", buf, 1); + } +#endif + if (!getenv("LC_ALL") && !getenv("LC_CTYPE") && !getenv("LANG")) setenv("LC_CTYPE", "C.UTF-8", 1); } diff --git a/config.mak.uname b/config.mak.uname index d63629fe807f59..b747afd95db0f1 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -487,7 +487,7 @@ endif compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/trace2_win32_process_info.o \ compat/win32/dirent.o - COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DDETECT_MSYS_TTY -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" + COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DDETECT_MSYS_TTY -DENSURE_MSYSTEM_IS_SET -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -ENTRY:wmainCRTStartup -SUBSYSTEM:CONSOLE # invalidcontinue.obj allows Git's source code to close the same file # handle twice, or to access the osfhandle of an already-closed stdout @@ -712,7 +712,7 @@ else endif CC = gcc COMPAT_CFLAGS += -D__USE_MINGW_ANSI_STDIO=0 -DDETECT_MSYS_TTY \ - -fstack-protector-strong + -DENSURE_MSYSTEM_IS_SET -fstack-protector-strong EXTLIBS += -lntdll INSTALL = /bin/install INTERNAL_QSORT = YesPlease diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 68e29c904a62c9..304a2ea96032b3 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -549,7 +549,8 @@ test_expect_success !VALGRIND,RUNTIME_PREFIX,CAN_EXEC_IN_PWD 'RUNTIME_PREFIX wor cp "$GIT_EXEC_PATH"/git$X pretend/bin/ && GIT_EXEC_PATH= ./pretend/bin/git here >actual && echo HERE >expect && - test_cmp expect actual' + test_cmp expect actual +' test_expect_success !VALGRIND,RUNTIME_PREFIX,CAN_EXEC_IN_PWD '%(prefix)/ works' ' mkdir -p pretend/bin && @@ -560,4 +561,24 @@ test_expect_success !VALGRIND,RUNTIME_PREFIX,CAN_EXEC_IN_PWD '%(prefix)/ works' test_cmp expect actual ' +test_expect_success MINGW 'MSYSTEM/PATH is adjusted if necessary' ' + mkdir -p "$HOME"/bin pretend/mingw64/bin \ + pretend/mingw64/libexec/git-core pretend/usr/bin && + cp "$GIT_EXEC_PATH"/git.exe pretend/mingw64/bin/ && + cp "$GIT_EXEC_PATH"/git.exe pretend/mingw64/libexec/git-core/ && + echo "env | grep MSYSTEM=" | write_script "$HOME"/bin/git-test-home && + echo "echo mingw64" | write_script pretend/mingw64/bin/git-test-bin && + echo "echo usr" | write_script pretend/usr/bin/git-test-bin2 && + + ( + MSYSTEM= && + GIT_EXEC_PATH= && + pretend/mingw64/libexec/git-core/git.exe test-home >actual && + pretend/mingw64/libexec/git-core/git.exe test-bin >>actual && + pretend/mingw64/bin/git.exe test-bin2 >>actual + ) && + test_write_lines MSYSTEM=$MSYSTEM mingw64 usr >expect && + test_cmp expect actual +' + test_done From ccfba3414ca49eee518788a2a09c73adaa3b9c37 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 9 May 2020 16:19:06 +0200 Subject: [PATCH 082/303] t5505/t5516: allow running without `.git/branches/` in the templates When we commit the template directory as part of `make vcxproj`, the `branches/` directory is not actually commited, as it is empty. Two tests were not prepared for that situation. This developer tried to get rid of the support for `.git/branches/` a long time ago, but that effort did not bear fruit, so the best we can do is work around in these here tests. Signed-off-by: Johannes Schindelin --- t/t5505-remote.sh | 4 ++-- t/t5516-fetch-push.sh | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/t/t5505-remote.sh b/t/t5505-remote.sh index 9006196ac601e8..41e64e79a0ca65 100755 --- a/t/t5505-remote.sh +++ b/t/t5505-remote.sh @@ -1028,7 +1028,7 @@ test_expect_success 'migrate a remote from named file in $GIT_DIR/branches' ' ( cd six && git remote rm origin && - mkdir .git/branches && + mkdir -p .git/branches && echo "$origin_url#main" >.git/branches/origin && git remote rename origin origin && test_path_is_missing .git/branches/origin && @@ -1043,7 +1043,7 @@ test_expect_success 'migrate a remote from named file in $GIT_DIR/branches (2)' ( cd seven && git remote rm origin && - mkdir .git/branches && + mkdir -p .git/branches && echo "quux#foom" > .git/branches/origin && git remote rename origin origin && test_path_is_missing .git/branches/origin && diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 3211002d466867..12d4244535730c 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -945,7 +945,7 @@ test_expect_success 'fetch with branches' ' mk_empty testrepo && git branch second $the_first_commit && git checkout second && - mkdir testrepo/.git/branches && + mkdir -p testrepo/.git/branches && echo ".." > testrepo/.git/branches/branch1 && ( cd testrepo && @@ -959,7 +959,7 @@ test_expect_success 'fetch with branches' ' test_expect_success 'fetch with branches containing #' ' mk_empty testrepo && - mkdir testrepo/.git/branches && + mkdir -p testrepo/.git/branches && echo "..#second" > testrepo/.git/branches/branch2 && ( cd testrepo && @@ -976,7 +976,7 @@ test_expect_success 'push with branches' ' git checkout second && test_when_finished "rm -rf .git/branches" && - mkdir .git/branches && + mkdir -p .git/branches && echo "testrepo" > .git/branches/branch1 && git push branch1 && @@ -992,7 +992,7 @@ test_expect_success 'push with branches containing #' ' mk_empty testrepo && test_when_finished "rm -rf .git/branches" && - mkdir .git/branches && + mkdir -p .git/branches && echo "testrepo#branch3" > .git/branches/branch2 && git push branch2 && From c59288955ba32c894f2fe4c5468d3a2754b82241 Mon Sep 17 00:00:00 2001 From: Bjoern Mueller Date: Wed, 22 Jan 2020 13:49:13 +0100 Subject: [PATCH 083/303] mingw: fix fatal error working on mapped network drives on Windows In 1e64d18 (mingw: do resolve symlinks in `getcwd()`) a problem was introduced that causes git for Windows to stop working with certain mapped network drives (in particular, drives that are mapped to locations with long path names). Error message was "fatal: Unable to read current working directory: No such file or directory". Present change fixes this issue as discussed in https://github.com/git-for-windows/git/issues/2480 Signed-off-by: Bjoern Mueller --- compat/mingw.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..3e7fba933fa024 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1133,8 +1133,13 @@ char *mingw_getcwd(char *pointer, int len) return NULL; ret = GetFinalPathNameByHandleW(hnd, wpointer, ARRAY_SIZE(wpointer), 0); CloseHandle(hnd); - if (!ret || ret >= ARRAY_SIZE(wpointer)) - return NULL; + if (!ret || ret >= ARRAY_SIZE(wpointer)) { + ret = GetLongPathNameW(cwd, wpointer, ARRAY_SIZE(wpointer)); + if (!ret || ret >= ARRAY_SIZE(wpointer)) { + errno = ret ? ENAMETOOLONG : err_win_to_posix(GetLastError()); + return NULL; + } + } if (xwcstoutf(pointer, normalize_ntpath(wpointer), len) < 0) return NULL; return pointer; From 6b66bdd02119d72bf188ff879afd86a78144536f Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 30 Jan 2020 14:22:27 -0500 Subject: [PATCH 084/303] clink.pl: fix MSVC compile script to handle libcurl-d.lib Update clink.pl to link with either libcurl.lib or libcurl-d.lib depending on whether DEBUG=1 is set. Signed-off-by: Jeff Hostetler Signed-off-by: Johannes Schindelin --- compat/vcbuild/scripts/clink.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compat/vcbuild/scripts/clink.pl b/compat/vcbuild/scripts/clink.pl index 3bd824154be381..c4c99d1a11f18c 100755 --- a/compat/vcbuild/scripts/clink.pl +++ b/compat/vcbuild/scripts/clink.pl @@ -56,7 +56,8 @@ # need to use that instead? foreach my $flag (@lflags) { if ($flag =~ /^-LIBPATH:(.*)/) { - foreach my $l ("libcurl_imp.lib", "libcurl.lib") { + my $libcurl = $is_debug ? "libcurl-d.lib" : "libcurl.lib"; + foreach my $l ("libcurl_imp.lib", $libcurl) { if (-f "$1/$l") { $lib = $l; last; From d58838c7f797239304de9821086d8666af50408f Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 31 Jan 2020 11:49:04 +0100 Subject: [PATCH 085/303] mingw: implement a platform-specific `strbuf_realpath()` There is a Win32 API function to resolve symbolic links, and we can use that instead of resolving them manually. Even better, this function also resolves NTFS junction points (which are somewhat similar to bind mounts). This fixes https://github.com/git-for-windows/git/issues/2481. Signed-off-by: Johannes Schindelin --- compat/mingw.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 3 +++ t/t3700-add.sh | 2 +- t/t5601-clone.sh | 7 ++++++ 4 files changed, 74 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..c49d7e4d1039e2 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1115,6 +1115,69 @@ struct tm *localtime_r(const time_t *timep, struct tm *result) } #endif +char *mingw_strbuf_realpath(struct strbuf *resolved, const char *path) +{ + wchar_t wpath[MAX_PATH]; + HANDLE h; + DWORD ret; + int len; + const char *last_component = NULL; + + if (xutftowcs_path(wpath, path) < 0) + return NULL; + + h = CreateFileW(wpath, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + + /* + * strbuf_realpath() allows the last path component to not exist. If + * that is the case, now it's time to try without last component. + */ + if (h == INVALID_HANDLE_VALUE && + GetLastError() == ERROR_FILE_NOT_FOUND) { + /* cut last component off of `wpath` */ + wchar_t *p = wpath + wcslen(wpath); + + while (p != wpath) + if (*(--p) == L'/' || *p == L'\\') + break; /* found start of last component */ + + if (p != wpath && (last_component = find_last_dir_sep(path))) { + last_component++; /* skip directory separator */ + *p = L'\0'; + h = CreateFileW(wpath, 0, FILE_SHARE_READ | + FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, NULL); + } + } + + if (h == INVALID_HANDLE_VALUE) + return NULL; + + ret = GetFinalPathNameByHandleW(h, wpath, ARRAY_SIZE(wpath), 0); + CloseHandle(h); + if (!ret || ret >= ARRAY_SIZE(wpath)) + return NULL; + + len = wcslen(wpath) * 3; + strbuf_grow(resolved, len); + len = xwcstoutf(resolved->buf, normalize_ntpath(wpath), len); + if (len < 0) + return NULL; + resolved->len = len; + + if (last_component) { + /* Use forward-slash, like `normalize_ntpath()` */ + strbuf_addch(resolved, '/'); + strbuf_addstr(resolved, last_component); + } + + return resolved->buf; + +} + char *mingw_getcwd(char *pointer, int len) { wchar_t cwd[MAX_PATH], wpointer[MAX_PATH]; diff --git a/compat/mingw.h b/compat/mingw.h index 209cf7cebadd17..cadf2c4c69584c 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -452,6 +452,9 @@ static inline void convert_slashes(char *path) #define PATH_SEP ';' char *mingw_query_user_email(void); #define query_user_email mingw_query_user_email +struct strbuf; +char *mingw_strbuf_realpath(struct strbuf *resolved, const char *path); +#define platform_strbuf_realpath mingw_strbuf_realpath #if !defined(__MINGW64_VERSION_MAJOR) && (!defined(_MSC_VER) || _MSC_VER < 1800) #define PRIuMAX "I64u" #define PRId64 "I64d" diff --git a/t/t3700-add.sh b/t/t3700-add.sh index c2e4cfe1003b93..648019d38e7a23 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -498,7 +498,7 @@ test_expect_success CASE_INSENSITIVE_FS 'path is case-insensitive' ' git add "$downcased" ' -test_expect_failure MINGW 'can add files via NTFS junctions' ' +test_expect_success MINGW 'can add files via NTFS junctions' ' test_when_finished "cmd //c rmdir junction && rm -rf target" && test_create_repo target && cmd //c "mklink /j junction target" && diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 2e57de9c12a39a..6abc4e3685ed89 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -94,6 +94,13 @@ test_expect_success LIBCURL 'clone does not detect username:password when it is ! grep "uses plaintext credentials" err ' +test_expect_success CASE_INSENSITIVE_FS 'core.worktree is not added due to path case' ' + + mkdir UPPERCASE && + git clone src "$(pwd)/uppercase" && + test "unset" = "$(git -C UPPERCASE config --default unset core.worktree)" +' + test_expect_success 'clone from hooks' ' test_create_repo r0 && From 4a1dc4490e480ccd2203ee6f86f79d9f453c6e0d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 25 Aug 2020 12:13:26 +0200 Subject: [PATCH 086/303] mingw: ignore HOMEDRIVE/HOMEPATH if it points to Windows' system directory Internally, Git expects the environment variable `HOME` to be set, and to point to the current user's home directory. This environment variable is not set by default on Windows, and therefore Git tries its best to construct one if it finds `HOME` unset. There are actually two different approaches Git tries: first, it looks at `HOMEDRIVE`/`HOMEPATH` because this is widely used in corporate environments with roaming profiles, and a user generally wants their global Git settings to be in a roaming profile. Only when `HOMEDRIVE`/`HOMEPATH` is either unset or does not point to a valid location, Git will fall back to using `USERPROFILE` instead. However, starting with Windows Vista, for secondary logons and services, the environment variables `HOMEDRIVE`/`HOMEPATH` point to Windows' system directory (usually `C:\Windows\system32`). That is undesirable, and that location is usually write-protected anyway. So let's verify that the `HOMEDRIVE`/`HOMEPATH` combo does not point to Windows' system directory before using it, falling back to `USERPROFILE` if it does. This fixes git-for-windows#2709 Initial-Path-by: Ivan Pozdeev Signed-off-by: Johannes Schindelin --- compat/mingw.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 7f9150b443742f..5301f1b1df646c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2632,6 +2632,18 @@ static size_t append_system_bin_dirs(char *path, size_t size) } #endif +static int is_system32_path(const char *path) +{ + WCHAR system32[MAX_PATH], wpath[MAX_PATH]; + + if (xutftowcs_path(wpath, path) < 0 || + !GetSystemDirectoryW(system32, ARRAY_SIZE(system32)) || + _wcsicmp(system32, wpath)) + return 0; + + return 1; +} + static void setup_windows_environment(void) { char *tmp = getenv("TMPDIR"); @@ -2672,7 +2684,8 @@ static void setup_windows_environment(void) strbuf_addstr(&buf, tmp); if ((tmp = getenv("HOMEPATH"))) { strbuf_addstr(&buf, tmp); - if (is_directory(buf.buf)) + if (!is_system32_path(buf.buf) && + is_directory(buf.buf)) setenv("HOME", buf.buf, 1); else tmp = NULL; /* use $USERPROFILE */ From 0e8c173f2fbda5ff5a30ad2bed8a07146f6337f3 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 4 Mar 2020 21:55:28 +0100 Subject: [PATCH 087/303] http: use new "best effort" strategy for Secure Channel revoke checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The native Windows HTTPS backend is based on Secure Channel which lets the caller decide how to handle revocation checking problems caused by missing information in the certificate or offline CRL distribution points. Unfortunately, cURL chose to handle these problems differently than OpenSSL by default: while OpenSSL happily ignores those problems (essentially saying "¯\_(ツ)_/¯"), the Secure Channel backend will error out instead. As a remedy, the "no revoke" mode was introduced, which turns off revocation checking altogether. This is a bit heavy-handed. We support this via the `http.schannelCheckRevoke` setting. In https://github.com/curl/curl/pull/4981, we contributed an opt-in "best effort" strategy that emulates what OpenSSL seems to do. In Git for Windows, we actually want this to be the default. This patch makes it so, introducing it as a new value for the `http.schannelCheckRevoke" setting, which now becmes a tristate: it accepts the values "false", "true" or "best-effort" (defaulting to the last one). Signed-off-by: Johannes Schindelin --- Documentation/config/http.txt | 12 +++++++----- http.c | 26 ++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt index afeeccfbfa7a70..e044f4920fd998 100644 --- a/Documentation/config/http.txt +++ b/Documentation/config/http.txt @@ -189,11 +189,13 @@ http.sslBackend:: http.schannelCheckRevoke:: Used to enforce or disable certificate revocation checks in cURL - when http.sslBackend is set to "schannel". Defaults to `true` if - unset. Only necessary to disable this if Git consistently errors - and the message is about checking the revocation status of a - certificate. This option is ignored if cURL lacks support for - setting the relevant SSL option at runtime. + when http.sslBackend is set to "schannel" via "true" and "false", + respectively. Another accepted value is "best-effort" (the default) + in which case revocation checks are performed, but errors due to + revocation list distribution points that are offline are silently + ignored, as well as errors due to certificates missing revocation + list distribution points. This option is ignored if cURL lacks + support for setting the relevant SSL option at runtime. http.schannelUseSSLCAInfo:: As of cURL v7.60.0, the Secure Channel backend can use the diff --git a/http.c b/http.c index 5d0502f51fd85d..f3da692ad2c2d7 100644 --- a/http.c +++ b/http.c @@ -136,7 +136,13 @@ static char *cached_accept_language; static char *http_ssl_backend; -static int http_schannel_check_revoke = 1; +static int http_schannel_check_revoke_mode = +#ifdef CURLSSLOPT_REVOKE_BEST_EFFORT + CURLSSLOPT_REVOKE_BEST_EFFORT; +#else + CURLSSLOPT_NO_REVOKE; +#endif + /* * With the backend being set to `schannel`, setting sslCAinfo would override * the Certificate Store in cURL v7.60.0 and later, which is not what we want @@ -287,7 +293,19 @@ static int http_options(const char *var, const char *value, void *cb) } if (!strcmp("http.schannelcheckrevoke", var)) { - http_schannel_check_revoke = git_config_bool(var, value); + if (value && !strcmp(value, "best-effort")) { + http_schannel_check_revoke_mode = +#ifdef CURLSSLOPT_REVOKE_BEST_EFFORT + CURLSSLOPT_REVOKE_BEST_EFFORT; +#else + CURLSSLOPT_NO_REVOKE; + warning(_("%s=%s unsupported by current cURL"), + var, value); +#endif + } else + http_schannel_check_revoke_mode = + (git_config_bool(var, value) ? + 0 : CURLSSLOPT_NO_REVOKE); return 0; } @@ -819,9 +837,9 @@ static CURL *get_curl_handle(void) #endif if (http_ssl_backend && !strcmp("schannel", http_ssl_backend) && - !http_schannel_check_revoke) { + http_schannel_check_revoke_mode) { #ifdef GIT_CURL_HAVE_CURLSSLOPT_NO_REVOKE - curl_easy_setopt(result, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NO_REVOKE); + curl_easy_setopt(result, CURLOPT_SSL_OPTIONS, http_schannel_check_revoke_mode); #else warning(_("CURLSSLOPT_NO_REVOKE not supported with cURL < 7.44.0")); #endif From 5dc23b03008e5faab6a89edbc1658ba03b73c27b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 9 May 2020 19:24:23 +0200 Subject: [PATCH 088/303] t5505/t5516: fix white-space around redirectors The convention in Git project's shell scripts is to have white-space _before_, but not _after_ the `>` (or `<`). Signed-off-by: Johannes Schindelin --- t/t5505-remote.sh | 6 +++--- t/t5516-fetch-push.sh | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/t/t5505-remote.sh b/t/t5505-remote.sh index 41e64e79a0ca65..c40fe351977568 100755 --- a/t/t5505-remote.sh +++ b/t/t5505-remote.sh @@ -835,8 +835,8 @@ test_expect_success '"remote show" does not show symbolic refs' ' ( cd three && git remote show origin >output && - ! grep "^ *HEAD$" < output && - ! grep -i stale < output + ! grep "^ *HEAD$" .git/branches/origin && + echo "quux#foom" >.git/branches/origin && git remote rename origin origin && test_path_is_missing .git/branches/origin && test "$(git config remote.origin.url)" = "quux" && diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 12d4244535730c..882b6ed191c494 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -946,7 +946,7 @@ test_expect_success 'fetch with branches' ' git branch second $the_first_commit && git checkout second && mkdir -p testrepo/.git/branches && - echo ".." > testrepo/.git/branches/branch1 && + echo ".." >testrepo/.git/branches/branch1 && ( cd testrepo && git fetch branch1 && @@ -960,7 +960,7 @@ test_expect_success 'fetch with branches' ' test_expect_success 'fetch with branches containing #' ' mk_empty testrepo && mkdir -p testrepo/.git/branches && - echo "..#second" > testrepo/.git/branches/branch2 && + echo "..#second" >testrepo/.git/branches/branch2 && ( cd testrepo && git fetch branch2 && @@ -977,7 +977,7 @@ test_expect_success 'push with branches' ' test_when_finished "rm -rf .git/branches" && mkdir -p .git/branches && - echo "testrepo" > .git/branches/branch1 && + echo "testrepo" >.git/branches/branch1 && git push branch1 && ( @@ -993,7 +993,7 @@ test_expect_success 'push with branches containing #' ' test_when_finished "rm -rf .git/branches" && mkdir -p .git/branches && - echo "testrepo#branch3" > .git/branches/branch2 && + echo "testrepo#branch3" >.git/branches/branch2 && git push branch2 && ( @@ -1522,7 +1522,7 @@ EOF git init no-thin && git --git-dir=no-thin/.git config receive.unpacklimit 0 && git push no-thin/.git refs/heads/main:refs/heads/foo && - echo modified >> path1 && + echo modified >>path1 && git commit -am modified && git repack -adf && rcvpck="git receive-pack --reject-thin-pack-for-testing" && From dffaaaf2fd8786ae8b9b529af978da6c7a922142 Mon Sep 17 00:00:00 2001 From: Kelly Heller Date: Wed, 27 May 2015 14:51:43 -0700 Subject: [PATCH 089/303] Allow `add -p` and `add -i` with a large number of files This fixes https://github.com/msysgit/git/issues/182. Inspired by Pull Request 218 using code from @PhilipDavis. [jes: simplified code quite a bit] Signed-off-by: Kelly Heller Signed-off-by: Johannes Schindelin --- git-add--interactive.perl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/git-add--interactive.perl b/git-add--interactive.perl index 95887fd8e52ae9..63dd89a6513bcd 100755 --- a/git-add--interactive.perl +++ b/git-add--interactive.perl @@ -174,6 +174,24 @@ sub run_cmd_pipe { die "$^O does not support: @invalid\n" if @invalid; my @args = map { m/ /o ? "\"$_\"": $_ } @_; return qx{@args}; + } elsif (($^O eq 'MSWin32' || $^O eq 'msys') && (scalar @_ > 200) && + grep $_ eq '--', @_) { + use File::Temp qw(tempfile); + my ($fhargs, $filename) = + tempfile('git-args-XXXXXX', UNLINK => 1); + + my $cmd = 'cat '.$filename.' | xargs -0 -s 20000 '; + while ($_[0] ne '--') { + $cmd = $cmd . shift(@_) . ' '; + } + + shift(@_); + print $fhargs join("\0", @_); + close($fhargs); + + my $fh = undef; + open($fh, '-|', $cmd) or die; + return <$fh>; } else { my $fh = undef; open($fh, '-|', @_) or die; From cdbebe01bacc6f808399a8c3beaf51ccc80e65a5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Jul 2020 16:35:05 +0200 Subject: [PATCH 090/303] git add -i: handle CR/LF line endings in the interactive input As of Git for Windows v2.27.0, there is an option to use Windows' newly-introduced Pseudo Console support. When running an interactive add operation with this support enabled, Git will receive CR/LF line endings. Therefore, let's not pretend that we are expecting Unix line endings. This fixes https://github.com/git-for-windows/git/issues/2729 Signed-off-by: Johannes Schindelin --- prompt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompt.c b/prompt.c index 50df17279d1d5b..c546e4373774d6 100644 --- a/prompt.c +++ b/prompt.c @@ -77,7 +77,7 @@ int git_read_line_interactively(struct strbuf *line) int ret; fflush(stdout); - ret = strbuf_getline_lf(line, stdin); + ret = strbuf_getline(line, stdin); if (ret != EOF) strbuf_trim_trailing_newline(line); From f1370629b4e9d4a9a35eb03a59e085e582cffdc3 Mon Sep 17 00:00:00 2001 From: Luke Bonanomi Date: Wed, 24 Jun 2020 07:45:52 -0400 Subject: [PATCH 091/303] commit: accept "scissors" with CR/LF line endings This change enhances `git commit --cleanup=scissors` by detecting scissors lines ending in either LF (UNIX-style) or CR/LF (DOS-style). Regression tests are included to specifically test for trailing comments after a CR/LF-terminated scissors line. Signed-off-by: Luke Bonanomi Signed-off-by: Johannes Schindelin --- t/t7502-commit-porcelain.sh | 42 +++++++++++++++++++++++++++++++++++++ wt-status.c | 13 +++++++++--- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/t/t7502-commit-porcelain.sh b/t/t7502-commit-porcelain.sh index 38a532d81ccd06..b643375fa59da0 100755 --- a/t/t7502-commit-porcelain.sh +++ b/t/t7502-commit-porcelain.sh @@ -586,6 +586,48 @@ test_expect_success 'cleanup commit messages (scissors option,-F,-e, scissors on test_must_be_empty actual ' +test_expect_success 'helper-editor' ' + + write_script lf-to-crlf.sh <<-\EOF + sed "s/\$/Q/" <"$1" | tr Q "\\015" >"$1".new && + mv -f "$1".new "$1" + EOF +' + +test_expect_success 'cleanup commit messages (scissors option,-F,-e, CR/LF line endings)' ' + + test_config core.editor "\"$PWD/lf-to-crlf.sh\"" && + scissors="# ------------------------ >8 ------------------------" && + + test_write_lines >text \ + "# Keep this comment" "" " $scissors" \ + "# Keep this comment, too" "$scissors" \ + "# Remove this comment" "$scissors" \ + "Remove this comment, too" && + + test_write_lines >expect \ + "# Keep this comment" "" " $scissors" \ + "# Keep this comment, too" && + + git commit --cleanup=scissors -e -F text --allow-empty && + git cat-file -p HEAD >raw && + sed -e "1,/^\$/d" raw >actual && + test_cmp expect actual +' + +test_expect_success 'cleanup commit messages (scissors option,-F,-e, scissors on first line, CR/LF line endings)' ' + + scissors="# ------------------------ >8 ------------------------" && + test_write_lines >text \ + "$scissors" \ + "# Remove this comment and any following lines" && + cp text /tmp/test2-text && + git commit --cleanup=scissors -e -F text --allow-empty --allow-empty-message && + git cat-file -p HEAD >raw && + sed -e "1,/^\$/d" raw >actual && + test_must_be_empty actual +' + test_expect_success 'cleanup commit messages (strip option,-F)' ' echo >>negative && diff --git a/wt-status.c b/wt-status.c index 5813174896cc9a..eebf26370753c2 100644 --- a/wt-status.c +++ b/wt-status.c @@ -22,7 +22,7 @@ #define AB_DELAY_WARNING_IN_MS (2 * 1000) static const char cut_line[] = -"------------------------ >8 ------------------------\n"; +"------------------------ >8 ------------------------"; static char default_wt_status_colors[][COLOR_MAXLEN] = { GIT_COLOR_NORMAL, /* WT_STATUS_HEADER */ @@ -1070,15 +1070,22 @@ static void wt_longstatus_print_other(struct wt_status *s, status_printf_ln(s, GIT_COLOR_NORMAL, "%s", ""); } +static inline int starts_with_newline(const char *p) +{ + return *p == '\n' || (*p == '\r' && p[1] == '\n'); +} + size_t wt_status_locate_end(const char *s, size_t len) { const char *p; struct strbuf pattern = STRBUF_INIT; strbuf_addf(&pattern, "\n%c %s", comment_line_char, cut_line); - if (starts_with(s, pattern.buf + 1)) + if (starts_with(s, pattern.buf + 1) && + starts_with_newline(s + pattern.len - 1)) len = 0; - else if ((p = strstr(s, pattern.buf))) + else if ((p = strstr(s, pattern.buf)) && + starts_with_newline(p + pattern.len)) len = p - s + 1; strbuf_release(&pattern); return len; From 1ad2c9058196e928b62d4d2b7b627516aa2abb03 Mon Sep 17 00:00:00 2001 From: Jens Glathe Date: Tue, 2 Jun 2020 12:12:25 +0200 Subject: [PATCH 092/303] t0014: fix indentation For some reason, this test case was indented with 4 spaces instead of 1 horizontal tab. The other test cases in the same test script are fine. Signed-off-by: Jens Glathe Signed-off-by: Johannes Schindelin --- t/t0014-alias.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/t/t0014-alias.sh b/t/t0014-alias.sh index 8d3d9144c06d61..288e08299abc4e 100755 --- a/t/t0014-alias.sh +++ b/t/t0014-alias.sh @@ -38,10 +38,10 @@ test_expect_success 'looping aliases - internal execution' ' #' test_expect_success 'run-command formats empty args properly' ' - test_must_fail env GIT_TRACE=1 git frotz a "" b " " c 2>actual.raw && - sed -ne "/run_command:/s/.*trace: run_command: //p" actual.raw >actual && - echo "git-frotz a '\'''\'' b '\'' '\'' c" >expect && - test_cmp expect actual + test_must_fail env GIT_TRACE=1 git frotz a "" b " " c 2>actual.raw && + sed -ne "/run_command:/s/.*trace: run_command: //p" actual.raw >actual && + echo "git-frotz a '\'''\'' b '\'' '\'' c" >expect && + test_cmp expect actual ' test_done From cce1b401a05eb611f5d518bccbcab74b48691c33 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 30 Mar 2021 14:25:31 -0400 Subject: [PATCH 093/303] clink.pl: fix libexpatd.lib link error when using MSVC When building with `make MSVC=1 DEBUG=1`, link to `libexpatd.lib` rather than `libexpat.lib`. It appears that the `vcpkg` package for "libexpat" has changed and now creates `libexpatd.lib` for debug mode builds. Previously, both debug and release builds created a ".lib" with the same basename. Signed-off-by: Jeff Hostetler --- compat/vcbuild/scripts/clink.pl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compat/vcbuild/scripts/clink.pl b/compat/vcbuild/scripts/clink.pl index 3bd824154be381..2768ae15f1879f 100755 --- a/compat/vcbuild/scripts/clink.pl +++ b/compat/vcbuild/scripts/clink.pl @@ -66,7 +66,11 @@ } push(@args, $lib); } elsif ("$arg" eq "-lexpat") { + if ($is_debug) { + push(@args, "libexpatd.lib"); + } else { push(@args, "libexpat.lib"); + } } elsif ("$arg" =~ /^-L/ && "$arg" ne "-LTCG") { $arg =~ s/^-L/-LIBPATH:/; push(@lflags, $arg); From 20f062e94056ae7723236a1c88eb9a48a5df2654 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 5 Apr 2021 15:27:38 -0400 Subject: [PATCH 094/303] Makefile: clean up .ilk files when MSVC=1 Signed-off-by: Jeff Hostetler --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index cac3452edb90b4..b76f63630149f0 100644 --- a/Makefile +++ b/Makefile @@ -3483,12 +3483,15 @@ endif ifdef MSVC $(RM) $(patsubst %.o,%.o.pdb,$(OBJECTS)) $(RM) $(patsubst %.exe,%.pdb,$(OTHER_PROGRAMS)) + $(RM) $(patsubst %.exe,%.ilk,$(OTHER_PROGRAMS)) $(RM) $(patsubst %.exe,%.iobj,$(OTHER_PROGRAMS)) $(RM) $(patsubst %.exe,%.ipdb,$(OTHER_PROGRAMS)) $(RM) $(patsubst %.exe,%.pdb,$(PROGRAMS)) + $(RM) $(patsubst %.exe,%.ilk,$(PROGRAMS)) $(RM) $(patsubst %.exe,%.iobj,$(PROGRAMS)) $(RM) $(patsubst %.exe,%.ipdb,$(PROGRAMS)) $(RM) $(patsubst %.exe,%.pdb,$(TEST_PROGRAMS)) + $(RM) $(patsubst %.exe,%.ilk,$(TEST_PROGRAMS)) $(RM) $(patsubst %.exe,%.iobj,$(TEST_PROGRAMS)) $(RM) $(patsubst %.exe,%.ipdb,$(TEST_PROGRAMS)) $(RM) compat/vcbuild/MSVC-DEFS-GEN From e13117d760842deec808dc3e91e127a856d201d5 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 5 Apr 2021 14:08:22 -0400 Subject: [PATCH 095/303] vcbuild: add support for compiling Windows resource files Create a wrapper for the Windows Resource Compiler (RC.EXE) for use by the MSVC=1 builds. This is similar to the CL.EXE and LIB.EXE wrappers used for the MSVC=1 builds. Signed-off-by: Jeff Hostetler --- compat/vcbuild/find_vs_env.bat | 7 ++++++ compat/vcbuild/scripts/rc.pl | 46 ++++++++++++++++++++++++++++++++++ config.mak.uname | 3 ++- 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 compat/vcbuild/scripts/rc.pl diff --git a/compat/vcbuild/find_vs_env.bat b/compat/vcbuild/find_vs_env.bat index b35d264c0e6bed..379b16296e09c2 100644 --- a/compat/vcbuild/find_vs_env.bat +++ b/compat/vcbuild/find_vs_env.bat @@ -99,6 +99,7 @@ REM ================================================================ SET sdk_dir=%WindowsSdkDir% SET sdk_ver=%WindowsSDKVersion% + SET sdk_ver_bin_dir=%WindowsSdkVerBinPath%%tgt% SET si=%sdk_dir%Include\%sdk_ver% SET sdk_includes=-I"%si%ucrt" -I"%si%um" -I"%si%shared" SET sl=%sdk_dir%lib\%sdk_ver% @@ -130,6 +131,7 @@ REM ================================================================ SET sdk_dir=%WindowsSdkDir% SET sdk_ver=%WindowsSDKVersion% + SET sdk_ver_bin_dir=%WindowsSdkVerBinPath%bin\amd64 SET si=%sdk_dir%Include\%sdk_ver% SET sdk_includes=-I"%si%ucrt" -I"%si%um" -I"%si%shared" -I"%si%winrt" SET sl=%sdk_dir%lib\%sdk_ver% @@ -160,6 +162,11 @@ REM ================================================================ echo msvc_includes=%msvc_includes% echo msvc_libs=%msvc_libs% + echo sdk_ver_bin_dir=%sdk_ver_bin_dir% + SET X1=%sdk_ver_bin_dir:C:=/C% + SET X2=%X1:\=/% + echo sdk_ver_bin_dir_msys=%X2% + echo sdk_includes=%sdk_includes% echo sdk_libs=%sdk_libs% diff --git a/compat/vcbuild/scripts/rc.pl b/compat/vcbuild/scripts/rc.pl new file mode 100644 index 00000000000000..7bca4cd81c6c63 --- /dev/null +++ b/compat/vcbuild/scripts/rc.pl @@ -0,0 +1,46 @@ +#!/usr/bin/perl -w +###################################################################### +# Compile Resources on Windows +# +# This is a wrapper to facilitate the compilation of Git with MSVC +# using GNU Make as the build system. So, instead of manipulating the +# Makefile into something nasty, just to support non-space arguments +# etc, we use this wrapper to fix the command line options +# +###################################################################### +use strict; +my @args = (); +my @input = (); + +while (@ARGV) { + my $arg = shift @ARGV; + if ("$arg" =~ /^-[dD]/) { + # GIT_VERSION gets passed with too many + # layers of dquote escaping. + $arg =~ s/\\"/"/g; + + push(@args, $arg); + + } elsif ("$arg" eq "-i") { + my $arg = shift @ARGV; + # TODO complain if NULL or is dashed ?? + push(@input, $arg); + + } elsif ("$arg" eq "-o") { + my $arg = shift @ARGV; + # TODO complain if NULL or is dashed ?? + push(@args, "-fo$arg"); + + } else { + push(@args, $arg); + } +} + +push(@args, "-nologo"); +push(@args, "-v"); +push(@args, @input); + +unshift(@args, "rc.exe"); +printf("**** @args\n"); + +exit (system(@args) != 0); diff --git a/config.mak.uname b/config.mak.uname index 59c002963044c0..4e34ed7a08e755 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -425,7 +425,7 @@ ifeq ($(uname_S),Windows) # link.exe next to, and required by, cl.exe, we have to prepend this # onto the existing $PATH. # - SANE_TOOL_PATH ?= $(msvc_bin_dir_msys) + SANE_TOOL_PATH ?= $(msvc_bin_dir_msys):$(sdk_ver_bin_dir_msys) HAVE_ALLOCA_H = YesPlease NO_PREAD = YesPlease NEEDS_CRYPTO_WITH_SSL = YesPlease @@ -494,6 +494,7 @@ endif # See https://msdn.microsoft.com/en-us/library/ms235330.aspx EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj kernel32.lib ntdll.lib PTHREAD_LIBS = + RC = compat/vcbuild/scripts/rc.pl lib = BASIC_CFLAGS += $(vcpkg_inc) $(sdk_includes) $(msvc_includes) ifndef DEBUG From 8192ddf08f6e6d174852ac9c307fd8fc6829f966 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 5 Apr 2021 14:12:14 -0400 Subject: [PATCH 096/303] config.mak.uname: add git.rc to MSVC builds Teach MSVC=1 builds to depend on the `git.rc` file so that the resulting executables have Windows-style resources and version number information within them. Signed-off-by: Jeff Hostetler --- config.mak.uname | 1 + 1 file changed, 1 insertion(+) diff --git a/config.mak.uname b/config.mak.uname index 4e34ed7a08e755..c18ce2bda9b81a 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -493,6 +493,7 @@ endif # handle twice, or to access the osfhandle of an already-closed stdout # See https://msdn.microsoft.com/en-us/library/ms235330.aspx EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj kernel32.lib ntdll.lib + GITLIBS += git.res PTHREAD_LIBS = RC = compat/vcbuild/scripts/rc.pl lib = From 1e122c3f94911c37e6086a5a5cf008a71b7377cf Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 5 Apr 2021 14:24:52 -0400 Subject: [PATCH 097/303] clink.pl: ignore no-stack-protector arg on MSVC=1 builds Ignore the `-fno-stack-protector` compiler argument when building with MSVC. This will be used in a later commit that needs to build a Win32 GUI app. Signed-off-by: Jeff Hostetler --- compat/vcbuild/scripts/clink.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compat/vcbuild/scripts/clink.pl b/compat/vcbuild/scripts/clink.pl index 2768ae15f1879f..73c8a2b184f38b 100755 --- a/compat/vcbuild/scripts/clink.pl +++ b/compat/vcbuild/scripts/clink.pl @@ -122,6 +122,8 @@ push(@cflags, "-wd4996"); } elsif ("$arg" =~ /^-W[a-z]/) { # let's ignore those + } elsif ("$arg" eq "-fno-stack-protector") { + # eat this } else { push(@args, $arg); } From 0b1bee89b68493e97ae5ba28c48e4c993b1e4d67 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 5 Apr 2021 14:39:33 -0400 Subject: [PATCH 098/303] clink.pl: move default linker options for MSVC=1 builds Move the default `-ENTRY` and `-SUBSYSTEM` arguments for MSVC=1 builds from `config.mak.uname` into `clink.pl`. These args are constant for console-mode executables. Add support to `clink.pl` for generating a Win32 GUI application using the `-mwindows` argument (to match how GCC does it). This changes the `-ENTRY` and `-SUBSYSTEM` arguments accordingly. Signed-off-by: Jeff Hostetler --- compat/vcbuild/scripts/clink.pl | 11 +++++++++++ config.mak.uname | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/compat/vcbuild/scripts/clink.pl b/compat/vcbuild/scripts/clink.pl index 73c8a2b184f38b..a38b360015ece9 100755 --- a/compat/vcbuild/scripts/clink.pl +++ b/compat/vcbuild/scripts/clink.pl @@ -15,6 +15,7 @@ my @lflags = (); my $is_linking = 0; my $is_debug = 0; +my $is_gui = 0; while (@ARGV) { my $arg = shift @ARGV; if ("$arg" eq "-DDEBUG") { @@ -124,11 +125,21 @@ # let's ignore those } elsif ("$arg" eq "-fno-stack-protector") { # eat this + } elsif ("$arg" eq "-mwindows") { + $is_gui = 1; } else { push(@args, $arg); } } if ($is_linking) { + if ($is_gui) { + push(@args, "-ENTRY:wWinMainCRTStartup"); + push(@args, "-SUBSYSTEM:WINDOWS"); + } else { + push(@args, "-ENTRY:wmainCRTStartup"); + push(@args, "-SUBSYSTEM:CONSOLE"); + } + push(@args, @lflags); unshift(@args, "link.exe"); } else { diff --git a/config.mak.uname b/config.mak.uname index c18ce2bda9b81a..098075d69de20e 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -488,7 +488,7 @@ endif compat/win32/trace2_win32_process_info.o \ compat/win32/dirent.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DDETECT_MSYS_TTY -DENSURE_MSYSTEM_IS_SET -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" - BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -ENTRY:wmainCRTStartup -SUBSYSTEM:CONSOLE + BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO # invalidcontinue.obj allows Git's source code to close the same file # handle twice, or to access the osfhandle of an already-closed stdout # See https://msdn.microsoft.com/en-us/library/ms235330.aspx From 1f88d3dad02aa50260514b7429c30ace70a5fcef Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 7 Apr 2021 15:29:21 +0200 Subject: [PATCH 099/303] buildsystems: remove duplicate clause This seems to have been there since 259d87c35495 (Add scripts to generate projects for other buildsystems (MSVC vcproj, QMake), 2009-09-16), i.e. since the beginning of that file. Signed-off-by: Johannes Schindelin --- contrib/buildsystems/engine.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/contrib/buildsystems/engine.pl b/contrib/buildsystems/engine.pl index ed6c45988a38b0..37b0b026f22430 100755 --- a/contrib/buildsystems/engine.pl +++ b/contrib/buildsystems/engine.pl @@ -265,7 +265,6 @@ sub handleCompileLine shift @parts; } elsif ("$part" eq "-c") { # ignore compile flag - } elsif ("$part" eq "-c") { } elsif ($part =~ /^.?-I/) { push(@incpaths, $part); } elsif ($part =~ /^.?-D/) { From a81bcc8f1865d52792e8328b94f1af1450879f54 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 7 Apr 2021 15:15:08 +0200 Subject: [PATCH 100/303] vcxproj: handle resource files, too On Windows, we also compile a "resource" file, which is similar to source code, but contains metadata (such as the program version). So far, we did not compile it in `MSVC` mode, only when compiling Git for Windows with the GNU C Compiler. In preparation for including it also when compiling with MS Visual C, let's teach our `vcxproj` generator to handle those sort of files, too. Signed-off-by: Johannes Schindelin --- contrib/buildsystems/Generators/Vcxproj.pm | 17 ++++++++++++++++- contrib/buildsystems/engine.pl | 9 +++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 81ee2f5a4a41a5..58a48233c59a76 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -89,6 +89,9 @@ sub createProject { $defines =~ s/>/>/g; $defines =~ s/\'//g; + my $rcdefines = $defines; + $rcdefines =~ s/(?WIN32;_DEBUG;$defines;%(PreprocessorDefinitions) MultiThreadedDebugDLL + + WIN32;_DEBUG;$rcdefines;%(PreprocessorDefinitions) + true @@ -216,6 +222,9 @@ EOM true Speed + + WIN32;NDEBUG;$rcdefines;%(PreprocessorDefinitions) + true true @@ -225,9 +234,15 @@ EOM EOM foreach(@sources) { - print F << "EOM"; + if (/\.rc$/) { + print F << "EOM"; + +EOM + } else { + print F << "EOM"; EOM + } } print F << "EOM"; diff --git a/contrib/buildsystems/engine.pl b/contrib/buildsystems/engine.pl index 37b0b026f22430..441059a7370271 100755 --- a/contrib/buildsystems/engine.pl +++ b/contrib/buildsystems/engine.pl @@ -165,7 +165,7 @@ sub parseMakeOutput next; } - if($text =~ / -c /) { + if($text =~ / -c / || $text =~ / -i \S+\.rc /) { # compilation handleCompileLine($text, $line); @@ -263,7 +263,7 @@ sub handleCompileLine if ("$part" eq "-o") { # ignore object file shift @parts; - } elsif ("$part" eq "-c") { + } elsif ("$part" eq "-c" || "$part" eq "-i") { # ignore compile flag } elsif ($part =~ /^.?-I/) { push(@incpaths, $part); @@ -271,7 +271,7 @@ sub handleCompileLine push(@defines, $part); } elsif ($part =~ /^-/) { push(@cflags, $part); - } elsif ($part =~ /\.(c|cc|cpp)$/) { + } elsif ($part =~ /\.(c|cc|cpp|rc)$/) { $sourcefile = $part; } else { die "Unhandled compiler option @ line $lineno: $part"; @@ -358,7 +358,7 @@ sub handleLinkLine push(@libs, $part); } elsif ($part eq 'invalidcontinue.obj') { # ignore - known to MSVC - } elsif ($part =~ /\.o$/) { + } elsif ($part =~ /\.(o|res)$/) { push(@objfiles, $part); } elsif ($part =~ /\.obj$/) { # do nothing, 'make' should not be producing .obj, only .o files @@ -371,6 +371,7 @@ sub handleLinkLine foreach (@objfiles) { my $sourcefile = $_; $sourcefile =~ s/\.o$/.c/; + $sourcefile =~ s/\.res$/.rc/; push(@sources, $sourcefile); push(@cflags, @{$compile_options{"${sourcefile}_CFLAGS"}}); push(@defines, @{$compile_options{"${sourcefile}_DEFINES"}}); From 1abc8673bfaf2e63866e9f26f04f2f40a1a6e81a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 7 Apr 2021 21:57:31 +0200 Subject: [PATCH 101/303] vcxproj: ignore -fno-stack-protector and -fno-common An upcoming commit will introduce those compile options; MSVC does not understand them, so let's suppress them when generating the Visual Studio project files. Signed-off-by: Johannes Schindelin --- contrib/buildsystems/engine.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/buildsystems/engine.pl b/contrib/buildsystems/engine.pl index 441059a7370271..7453bc9697aa7f 100755 --- a/contrib/buildsystems/engine.pl +++ b/contrib/buildsystems/engine.pl @@ -263,7 +263,7 @@ sub handleCompileLine if ("$part" eq "-o") { # ignore object file shift @parts; - } elsif ("$part" eq "-c" || "$part" eq "-i") { + } elsif ("$part" eq "-c" || "$part" eq "-i" || "$part" =~ /^-fno-/) { # ignore compile flag } elsif ($part =~ /^.?-I/) { push(@incpaths, $part); From dc954b3d75b7c74582623d726202a8b1f4d29871 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 7 Apr 2021 15:48:50 +0200 Subject: [PATCH 102/303] vcxproj: handle GUI programs, too So far, we only built Console programs, but we are about to introduce a program that targets the Windows subsystem (i.e. it is a so-called "GUI" program). Let's handle this preemptively in the script that generates the Visual Studio files. Signed-off-by: Johannes Schindelin --- contrib/buildsystems/Generators/Vcxproj.pm | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 58a48233c59a76..1cabe69bff68c7 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -92,6 +92,13 @@ sub createProject { my $rcdefines = $defines; $rcdefines =~ s/(?\$(VCPKGLibDirectory);%(AdditionalLibraryDirectories) \$(VCPKGLibs);\$(AdditionalDependencies) invalidcontinue.obj %(AdditionalOptions) - wmainCRTStartup + $entrypoint $cdup\\compat\\win32\\git.manifest - Console + $subsystem EOM if ($target eq 'libgit') { From a1cb6f141769feaf314abc6fccd0d08dc9c59326 Mon Sep 17 00:00:00 2001 From: Ian Bearman Date: Fri, 31 Jan 2020 15:37:27 -0800 Subject: [PATCH 103/303] vcxproj: support building Windows/ARM64 binaries Signed-off-by: Ian Bearman Signed-off-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- contrib/buildsystems/Generators/Vcxproj.pm | 23 ++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 1a25789d28513b..548766fa9dbdb6 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -114,12 +114,21 @@ sub createProject { Release x64 + + Debug + ARM64 + + + Release + ARM64 + $uuid Win32Proj x86-windows - x64-windows + x64-windows + arm64-windows $cdup\\compat\\vcbuild\\vcpkg\\installed\\\$(VCPKGArch) \$(VCPKGArchDirectory)\\debug\\bin \$(VCPKGArchDirectory)\\debug\\lib @@ -140,7 +149,7 @@ sub createProject { $config_type - v140 + v142 ..\\ @@ -355,8 +364,10 @@ sub createGlueProject { print F << "EOM"; Global GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM64 = Debug|ARM64 Debug|x64 = Debug|x64 Debug|x86 = Debug|x86 + Release|ARM64 = Release|ARM64 Release|x64 = Release|x64 Release|x86 = Release|x86 EndGlobalSection @@ -367,10 +378,14 @@ EOM foreach (@apps) { my $appname = $_; my $uuid = $build_structure{"APPS_${appname}_GUID"}; + print F "\t\t${uuid}.Debug|ARM64.ActiveCfg = Debug|ARM64\n"; + print F "\t\t${uuid}.Debug|ARM64.Build.0 = Debug|ARM64\n"; print F "\t\t${uuid}.Debug|x64.ActiveCfg = Debug|x64\n"; print F "\t\t${uuid}.Debug|x64.Build.0 = Debug|x64\n"; print F "\t\t${uuid}.Debug|x86.ActiveCfg = Debug|Win32\n"; print F "\t\t${uuid}.Debug|x86.Build.0 = Debug|Win32\n"; + print F "\t\t${uuid}.Release|ARM64.ActiveCfg = Release|ARM64\n"; + print F "\t\t${uuid}.Release|ARM64.Build.0 = Release|ARM64\n"; print F "\t\t${uuid}.Release|x64.ActiveCfg = Release|x64\n"; print F "\t\t${uuid}.Release|x64.Build.0 = Release|x64\n"; print F "\t\t${uuid}.Release|x86.ActiveCfg = Release|Win32\n"; @@ -379,10 +394,14 @@ EOM foreach (@libs) { my $libname = $_; my $uuid = $build_structure{"LIBS_${libname}_GUID"}; + print F "\t\t${uuid}.Debug|ARM64.ActiveCfg = Debug|ARM64\n"; + print F "\t\t${uuid}.Debug|ARM64.Build.0 = Debug|ARM64\n"; print F "\t\t${uuid}.Debug|x64.ActiveCfg = Debug|x64\n"; print F "\t\t${uuid}.Debug|x64.Build.0 = Debug|x64\n"; print F "\t\t${uuid}.Debug|x86.ActiveCfg = Debug|Win32\n"; print F "\t\t${uuid}.Debug|x86.Build.0 = Debug|Win32\n"; + print F "\t\t${uuid}.Release|ARM64.ActiveCfg = Release|ARM64\n"; + print F "\t\t${uuid}.Release|ARM64.Build.0 = Release|ARM64\n"; print F "\t\t${uuid}.Release|x64.ActiveCfg = Release|x64\n"; print F "\t\t${uuid}.Release|x64.Build.0 = Release|x64\n"; print F "\t\t${uuid}.Release|x86.ActiveCfg = Release|Win32\n"; From 396c873764dd407f4b7d87003291c080b5d9cad7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 Dec 2020 22:44:24 +0100 Subject: [PATCH 104/303] win32: add a helper to run `git.exe` without a foreground window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, there are two kinds of executables, console ones and non-console ones. Git's executables are all console ones. When launching the former e.g. in a scheduled task, a CMD window pops up. This is not what we want for the tasks installed via the `git maintenance` command. To work around this, let's introduce `headless-git.exe`, which is a non-console program that does _not_ pop up any window. All it does is to re-launch `git.exe`, suppressing that console window, passing through all command-line arguments as-are. Helped-by: Carlo Marcelo Arenas Belón Helped-by: Yuyi Wang Signed-off-by: Johannes Schindelin Signed-off-by: Derrick Stolee --- Makefile | 9 ++ compat/win32/headless.c | 115 +++++++++++++++++++++ config.mak.uname | 3 + contrib/buildsystems/CMakeLists.txt | 9 ++ contrib/buildsystems/Generators/Vcxproj.pm | 4 +- contrib/buildsystems/engine.pl | 1 + 6 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 compat/win32/headless.c diff --git a/Makefile b/Makefile index b76f63630149f0..8e60021b4fd9b1 100644 --- a/Makefile +++ b/Makefile @@ -2665,6 +2665,13 @@ compat/nedmalloc/nedmalloc.sp compat/nedmalloc/nedmalloc.o: EXTRA_CPPFLAGS = \ compat/nedmalloc/nedmalloc.sp: SP_EXTRA_FLAGS += -Wno-non-pointer-null endif +headless-git.o: compat/win32/headless.c GIT-CFLAGS + $(QUIET_CC)$(CC) $(ALL_CFLAGS) $(COMPAT_CFLAGS) \ + -fno-stack-protector -o $@ -c -Wall -Wwrite-strings $< + +headless-git$X: headless-git.o git.res GIT-LDFLAGS + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) -mwindows -o $@ $< git.res + git-%$X: %.o GIT-LDFLAGS $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -3454,6 +3461,7 @@ clean: profile-clean coverage-clean cocciclean $(RM) po/git.pot po/git-core.pot $(RM) git.res $(RM) $(OBJECTS) + $(RM) headless-git.o $(RM) $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(REFTABLE_TEST_LIB) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) $(OTHER_PROGRAMS) $(RM) $(TEST_PROGRAMS) @@ -3482,6 +3490,7 @@ endif $(RM) GIT-SCRIPT-DEFINES GIT-PERL-DEFINES GIT-PERL-HEADER GIT-PYTHON-VARS ifdef MSVC $(RM) $(patsubst %.o,%.o.pdb,$(OBJECTS)) + $(RM) headless-git.o.pdb $(RM) $(patsubst %.exe,%.pdb,$(OTHER_PROGRAMS)) $(RM) $(patsubst %.exe,%.ilk,$(OTHER_PROGRAMS)) $(RM) $(patsubst %.exe,%.iobj,$(OTHER_PROGRAMS)) diff --git a/compat/win32/headless.c b/compat/win32/headless.c new file mode 100644 index 00000000000000..8b00dfe3bd5d00 --- /dev/null +++ b/compat/win32/headless.c @@ -0,0 +1,115 @@ +/* + * headless Git - run Git without opening a console window on Windows + */ + +#define STRICT +#define WIN32_LEAN_AND_MEAN +#define UNICODE +#define _UNICODE +#include +#include +#include +#include + +/* + * If `dir` contains the path to a Git exec directory, extend `PATH` to + * include the corresponding `bin/` directory (which is where all those + * `.dll` files needed by `git.exe` are, on Windows). + */ +static int extend_path(wchar_t *dir, size_t dir_len) +{ + const wchar_t *suffix = L"\\libexec\\git-core"; + size_t suffix_len = wcslen(suffix); + wchar_t *env; + DWORD len; + + if (dir_len < suffix_len) + return 0; + + dir_len -= suffix_len; + if (memcmp(dir + dir_len, suffix, suffix_len * sizeof(wchar_t))) + return 0; + + len = GetEnvironmentVariableW(L"PATH", NULL, 0); + if (!len) + return 0; + + env = _alloca((dir_len + 5 + len) * sizeof(wchar_t)); + wcsncpy(env, dir, dir_len); + wcscpy(env + dir_len, L"\\bin;"); + if (!GetEnvironmentVariableW(L"PATH", env + dir_len + 5, len)) + return 0; + + SetEnvironmentVariableW(L"PATH", env); + return 1; +} + +int WINAPI wWinMain(_In_ HINSTANCE instance, + _In_opt_ HINSTANCE previous_instance, + _In_ LPWSTR command_line, _In_ int show) +{ + wchar_t git_command_line[32768]; + size_t size = sizeof(git_command_line) / sizeof(wchar_t); + const wchar_t *needs_quotes = L""; + int slash = 0, i; + + STARTUPINFO startup_info = { + .cb = sizeof(STARTUPINFO), + .dwFlags = STARTF_USESHOWWINDOW, + .wShowWindow = SW_HIDE, + }; + PROCESS_INFORMATION process_info = { 0 }; + DWORD creation_flags = CREATE_UNICODE_ENVIRONMENT | + CREATE_NEW_CONSOLE | CREATE_NO_WINDOW; + DWORD exit_code; + + /* First, determine the full path of argv[0] */ + for (i = 0; _wpgmptr[i]; i++) + if (_wpgmptr[i] == L' ') + needs_quotes = L"\""; + else if (_wpgmptr[i] == L'\\') + slash = i; + + if (slash >= size - 11) + return 127; /* Too long path */ + + /* If it is in Git's exec path, add the bin/ directory to the PATH */ + extend_path(_wpgmptr, slash); + + /* Then, add the full path of `git.exe` as argv[0] */ + i = swprintf_s(git_command_line, size, L"%ls%.*ls\\git.exe%ls", + needs_quotes, slash, _wpgmptr, needs_quotes); + if (i < 0) + return 127; /* Too long path */ + + if (*command_line) { + /* Now, append the command-line arguments */ + i = swprintf_s(git_command_line + i, size - i, + L" %ls", command_line); + if (i < 0) + return 127; + } + + startup_info.hStdInput = GetStdHandle(STD_INPUT_HANDLE); + startup_info.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE); + startup_info.hStdError = GetStdHandle(STD_ERROR_HANDLE); + + if (!CreateProcess(NULL, /* infer argv[0] from the command line */ + git_command_line, /* modified command line */ + NULL, /* inherit process handles? */ + NULL, /* inherit thread handles? */ + FALSE, /* handles inheritable? */ + creation_flags, + NULL, /* use this process' environment */ + NULL, /* use this process' working directory */ + &startup_info, &process_info)) + return 129; /* could not start */ + WaitForSingleObject(process_info.hProcess, INFINITE); + if (!GetExitCodeProcess(process_info.hProcess, &exit_code)) + exit_code = 130; /* Could not determine exit code? */ + + CloseHandle(process_info.hProcess); + CloseHandle(process_info.hThread); + + return (int)exit_code; +} diff --git a/config.mak.uname b/config.mak.uname index 098075d69de20e..7fd4bdf169e55b 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -527,6 +527,8 @@ else endif X = .exe + EXTRA_PROGRAMS += headless-git$X + compat/msvc.o: compat/msvc.c compat/mingw.c GIT-CFLAGS endif ifeq ($(uname_S),Interix) @@ -679,6 +681,7 @@ ifeq ($(uname_S),MINGW) RC = windres -O coff NATIVE_CRLF = YesPlease X = .exe + EXTRA_PROGRAMS += headless-git$X ifneq (,$(wildcard ../THIS_IS_MSYSGIT)) htmldir = doc/git/html/ prefix = diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index ea2a531be87494..c560c3c341fd21 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -734,6 +734,15 @@ if(WIN32) else() message(FATAL_ERROR "Unhandled compiler: ${CMAKE_C_COMPILER_ID}") endif() + + add_executable(headless-git ${CMAKE_SOURCE_DIR}/compat/win32/headless.c) + if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang") + target_link_options(headless-git PUBLIC -municode -Wl,-subsystem,windows) + elseif(CMAKE_C_COMPILER_ID STREQUAL "MSVC") + target_link_options(headless-git PUBLIC /NOLOGO /ENTRY:wWinMainCRTStartup /SUBSYSTEM:WINDOWS) + else() + message(FATAL_ERROR "Unhandled compiler: ${CMAKE_C_COMPILER_ID}") + endif() elseif(UNIX) target_link_libraries(common-main pthread rt) endif() diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 1cabe69bff68c7..a6d1c6b8d05682 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -76,7 +76,7 @@ sub createProject { my $libs_release = "\n "; my $libs_debug = "\n "; - if (!$static_library) { + if (!$static_library && $name ne 'headless-git') { $libs_release = join(";", sort(grep /^(?!libgit\.lib|xdiff\/lib\.lib|vcs-svn\/lib\.lib|reftable\/libreftable\.lib)/, @{$$build_structure{"$prefix${name}_LIBS"}})); $libs_debug = $libs_release; $libs_debug =~ s/zlib\.lib/zlibd\.lib/g; @@ -254,7 +254,7 @@ EOM print F << "EOM"; EOM - if (!$static_library || $target =~ 'vcs-svn' || $target =~ 'xdiff') { + if ((!$static_library || $target =~ 'vcs-svn' || $target =~ 'xdiff') && !($name =~ /headless-git/)) { my $uuid_libgit = $$build_structure{"LIBS_libgit_GUID"}; my $uuid_libreftable = $$build_structure{"LIBS_reftable/libreftable_GUID"}; my $uuid_xdiff_lib = $$build_structure{"LIBS_xdiff/lib_GUID"}; diff --git a/contrib/buildsystems/engine.pl b/contrib/buildsystems/engine.pl index 7453bc9697aa7f..417ae71d44ccab 100755 --- a/contrib/buildsystems/engine.pl +++ b/contrib/buildsystems/engine.pl @@ -370,6 +370,7 @@ sub handleLinkLine # exit(1); foreach (@objfiles) { my $sourcefile = $_; + $sourcefile =~ s/^headless-git\.o$/compat\/win32\/headless.c/; $sourcefile =~ s/\.o$/.c/; $sourcefile =~ s/\.res$/.rc/; push(@sources, $sourcefile); From fb835c43d2b92e5f663052a129a5eef2fd47b996 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Sun, 6 Oct 2019 18:40:55 +0100 Subject: [PATCH 105/303] vcpkg_install: detect lack of Git The vcpkg_install batch file depends on the availability of a working Git on the CMD path. This may not be present if the user has selected the 'bash only' option during Git-for-Windows install. Detect and tell the user about their lack of a working Git in the CMD window. Fixes #2348. A separate PR https://github.com/git-for-windows/build-extra/pull/258 now highlights the recommended path setting during install. Signed-off-by: Philip Oakley --- compat/vcbuild/vcpkg_install.bat | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compat/vcbuild/vcpkg_install.bat b/compat/vcbuild/vcpkg_install.bat index ebd0bad242a8ca..bcbbf536af3141 100644 --- a/compat/vcbuild/vcpkg_install.bat +++ b/compat/vcbuild/vcpkg_install.bat @@ -36,6 +36,13 @@ REM ================================================================ dir vcpkg\vcpkg.exe >nul 2>nul && GOTO :install_libraries + git.exe version 2>nul + IF ERRORLEVEL 1 ( + echo "***" + echo "Git not found. Please adjust your CMD path or Git install option." + echo "***" + EXIT /B 1 ) + echo Fetching vcpkg in %cwd%vcpkg git.exe clone https://github.com/Microsoft/vcpkg vcpkg IF ERRORLEVEL 1 ( EXIT /B 1 ) From fcf4e5f08a53dbf7b51ed3edb8f9c53167242fa0 Mon Sep 17 00:00:00 2001 From: Ian Bearman Date: Fri, 31 Jan 2020 16:00:25 -0800 Subject: [PATCH 106/303] vcbuild: install ARM64 dependencies when building ARM64 binaries Co-authored-by: Dennis Ameling Signed-off-by: Ian Bearman Signed-off-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- compat/vcbuild/README | 6 +++++- compat/vcbuild/vcpkg_copy_dlls.bat | 7 ++++++- compat/vcbuild/vcpkg_install.bat | 9 +++++++-- contrib/buildsystems/Generators/Vcxproj.pm | 2 +- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/compat/vcbuild/README b/compat/vcbuild/README index 29ec1d0f104b80..1df1cabb1ebbbd 100644 --- a/compat/vcbuild/README +++ b/compat/vcbuild/README @@ -6,7 +6,11 @@ The Steps to Build Git with VS2015 or VS2017 from the command line. Prompt or from an SDK bash window: $ cd - $ ./compat/vcbuild/vcpkg_install.bat + $ ./compat/vcbuild/vcpkg_install.bat x64-windows + + or + + $ ./compat/vcbuild/vcpkg_install.bat arm64-windows The vcpkg tools and all of the third-party sources will be installed in this folder: diff --git a/compat/vcbuild/vcpkg_copy_dlls.bat b/compat/vcbuild/vcpkg_copy_dlls.bat index 13661c14f8705c..8bea0cbf83b6cf 100644 --- a/compat/vcbuild/vcpkg_copy_dlls.bat +++ b/compat/vcbuild/vcpkg_copy_dlls.bat @@ -15,7 +15,12 @@ REM ================================================================ @FOR /F "delims=" %%D IN ("%~dp0") DO @SET cwd=%%~fD cd %cwd% - SET arch=x64-windows + SET arch=%2 + IF NOT DEFINED arch ( + echo defaulting to 'x64-windows`. Invoke %0 with 'x86-windows', 'x64-windows', or 'arm64-windows' + set arch=x64-windows + ) + SET inst=%cwd%vcpkg\installed\%arch% IF [%1]==[release] ( diff --git a/compat/vcbuild/vcpkg_install.bat b/compat/vcbuild/vcpkg_install.bat index 8330d8120fb511..cacef18c11dc79 100644 --- a/compat/vcbuild/vcpkg_install.bat +++ b/compat/vcbuild/vcpkg_install.bat @@ -31,6 +31,12 @@ REM ================================================================ SETLOCAL EnableDelayedExpansion + SET arch=%1 + IF NOT DEFINED arch ( + echo defaulting to 'x64-windows`. Invoke %0 with 'x86-windows', 'x64-windows', or 'arm64-windows' + set arch=x64-windows + ) + @FOR /F "delims=" %%D IN ("%~dp0") DO @SET cwd=%%~fD cd %cwd% @@ -55,9 +61,8 @@ REM ================================================================ echo Successfully installed %cwd%vcpkg\vcpkg.exe :install_libraries - SET arch=x64-windows - echo Installing third-party libraries... + echo Installing third-party libraries(%arch%)... FOR %%i IN (zlib expat libiconv openssl libssh2 curl) DO ( cd %cwd%vcpkg IF NOT EXIST "packages\%%i_%arch%" CALL :sub__install_one %%i diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 548766fa9dbdb6..7a92599946b097 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -193,7 +193,7 @@ EOM Initialize VCPKG del "$cdup\\compat\\vcbuild\\vcpkg" - call "$cdup\\compat\\vcbuild\\vcpkg_install.bat" + call "$cdup\\compat\\vcbuild\\vcpkg_install.bat" \$(VCPKGArch) EOM } From cfbe1853a99a65a1b248376f81f1c3e65d001851 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 12 Aug 2020 15:06:17 +0000 Subject: [PATCH 107/303] git-gui: accommodate for intent-to-add files As of Git v2.28.0, the diff for files staged via `git add -N` marks them as new files. Git GUI was ill-prepared for that, and this patch teaches Git GUI about them. Please note that this will not even fix things with v2.28.0, as the `rp/apply-cached-with-i-t-a` patches are required on Git's side, too. This fixes https://github.com/git-for-windows/git/issues/2779 Signed-off-by: Johannes Schindelin Signed-off-by: Pratyush Yadav --- git-gui/git-gui.sh | 2 ++ git-gui/lib/diff.tcl | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/git-gui/git-gui.sh b/git-gui/git-gui.sh index 201524c34edac0..3fe9bd9f4798b8 100755 --- a/git-gui/git-gui.sh +++ b/git-gui/git-gui.sh @@ -2087,6 +2087,7 @@ set all_icons(U$ui_index) file_merge set all_icons(T$ui_index) file_statechange set all_icons(_$ui_workdir) file_plain +set all_icons(A$ui_workdir) file_plain set all_icons(M$ui_workdir) file_mod set all_icons(D$ui_workdir) file_question set all_icons(U$ui_workdir) file_merge @@ -2113,6 +2114,7 @@ foreach i { {A_ {mc "Staged for commit"}} {AM {mc "Portions staged for commit"}} {AD {mc "Staged for commit, missing"}} + {AA {mc "Intended to be added"}} {_D {mc "Missing"}} {D_ {mc "Staged for removal"}} diff --git a/git-gui/lib/diff.tcl b/git-gui/lib/diff.tcl index 871ad488c2a1c0..36d3715f7b25a2 100644 --- a/git-gui/lib/diff.tcl +++ b/git-gui/lib/diff.tcl @@ -582,7 +582,8 @@ proc apply_or_revert_hunk {x y revert} { if {$current_diff_side eq $ui_index} { set failed_msg [mc "Failed to unstage selected hunk."] lappend apply_cmd --reverse --cached - if {[string index $mi 0] ne {M}} { + set file_state [string index $mi 0] + if {$file_state ne {M} && $file_state ne {A}} { unlock_index return } @@ -595,7 +596,8 @@ proc apply_or_revert_hunk {x y revert} { lappend apply_cmd --cached } - if {[string index $mi 1] ne {M}} { + set file_state [string index $mi 1] + if {$file_state ne {M} && $file_state ne {A}} { unlock_index return } @@ -687,7 +689,8 @@ proc apply_or_revert_range_or_line {x y revert} { set failed_msg [mc "Failed to unstage selected line."] set to_context {+} lappend apply_cmd --reverse --cached - if {[string index $mi 0] ne {M}} { + set file_state [string index $mi 0] + if {$file_state ne {M} && $file_state ne {A}} { unlock_index return } @@ -702,7 +705,8 @@ proc apply_or_revert_range_or_line {x y revert} { lappend apply_cmd --cached } - if {[string index $mi 1] ne {M}} { + set file_state [string index $mi 1] + if {$file_state ne {M} && $file_state ne {A}} { unlock_index return } From 079b85ab5297004545c9ed361fd27191d43972bf Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 Dec 2020 22:54:11 +0100 Subject: [PATCH 108/303] git maintenance: avoid console window in scheduled tasks on Windows We just introduced a helper to avoid showing a console window when the scheduled task runs `git.exe`. Let's actually use it. Signed-off-by: Johannes Schindelin Signed-off-by: Derrick Stolee --- builtin/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/gc.c b/builtin/gc.c index 2753bd15a5e85f..effe7ac15661ce 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1998,7 +1998,7 @@ static int schtasks_schedule_task(const char *exec_path, enum schedule_priority "\n" "\n" "\n" - "\"%s\\git.exe\"\n" + "\"%s\\headless-git.exe\"\n" "--exec-path=\"%s\" for-each-repo --config=maintenance.repo maintenance run --schedule=%s\n" "\n" "\n" From d061df3df0dd476e93fa9f533f26cc9824199958 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Sun, 6 Oct 2019 18:43:57 +0100 Subject: [PATCH 109/303] vcpkg_install: add comment regarding slow network connections The vcpkg downloads may not succeed. Warn careful readers of the time out. A simple retry will usually resolve the issue. Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- compat/vcbuild/vcpkg_install.bat | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compat/vcbuild/vcpkg_install.bat b/compat/vcbuild/vcpkg_install.bat index bcbbf536af3141..8330d8120fb511 100644 --- a/compat/vcbuild/vcpkg_install.bat +++ b/compat/vcbuild/vcpkg_install.bat @@ -80,6 +80,12 @@ REM ================================================================ :sub__install_one echo Installing package %1... + REM vcpkg may not be reliable on slow, intermittent or proxy + REM connections, see e.g. + REM https://social.msdn.microsoft.com/Forums/windowsdesktop/en-US/4a8f7be5-5e15-4213-a7bb-ddf424a954e6/winhttpsendrequest-ends-with-12002-errorhttptimeout-after-21-seconds-no-matter-what-timeout?forum=windowssdk + REM which explains the hidden 21 second timeout + REM (last post by Dave : Microsoft - Windows Networking team) + .\vcpkg.exe install %1:%arch% IF ERRORLEVEL 1 ( EXIT /B 1 ) From c6ff9511d1a17c816a73b079054104936ce8d1b6 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 10 Apr 2020 23:39:40 +0200 Subject: [PATCH 110/303] Add a GitHub workflow to generate Git for Windows' Pacman package Git for Windows uses MSYS2 as base system, and therefore the Git binaries are bundled as Pacman package. This workflow allows building the 64-bit version of this package (which is called `mingw-w64-x86_64-git`). Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 141 ++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 .github/workflows/git-artifacts.yml diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml new file mode 100644 index 00000000000000..837015d7f7752e --- /dev/null +++ b/.github/workflows/git-artifacts.yml @@ -0,0 +1,141 @@ +name: mingw-w64-x86_64-git + +on: + # This workflow can be triggered manually in the Actions tab, see + # https://github.blog/changelog/2020-07-06-github-actions-manual-triggers-with-workflow_dispatch/ + - workflow_dispatch + +env: + HOME: "${{github.workspace}}\\home" + MSYSTEM: MINGW64 + USERPROFILE: "${{github.workspace}}\\home" + +jobs: + bundle-artifacts: + runs-on: windows-latest + steps: + - name: Configure user + shell: bash + run: + USER_NAME="${{github.actor}}" && + USER_EMAIL="${{github.actor}}@users.noreply.github.com" && + mkdir "$HOME" && + git config --global user.name "$USER_NAME" && + git config --global user.email "$USER_EMAIL" && + echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV + - name: Download git-sdk-64-build-installers + shell: bash + run: | + # Use Git Bash to download and unpack the artifact + + ## Get artifact + urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds + id=$(curl "$urlbase?definitions=29&statusFilter=completed&resultFilter=succeeded&\$top=1" | + jq -r '.value[0].id') + download_url=$(curl "$urlbase/$id/artifacts" | + jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl') + + curl -o artifacts.zip "$download_url" + + ## Unpack artifact + unzip artifacts.zip + - name: Clone build-extra + shell: bash + run: | + d=git-sdk-64-build-installers/usr/src/build-extra && + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d + - name: Generate bundle artifacts + shell: powershell + run: | + & .\git-sdk-64-build-installers\git-cmd.exe --command=usr\bin\bash.exe -lc @" + printf '#!/bin/sh\n\nexec /mingw64/bin/git.exe "`$@"\n' >/usr/bin/git && + mkdir -p bundle-artifacts && + + git -c init.defaultBranch=main init --bare && + git remote add -f origin https://github.com/git-for-windows/git && + git fetch https://github.com/${{github.repository}} ${{github.ref}}:${{github.ref}} && + + tag_name=\"`$(git describe --match 'v[0-9]*' FETCH_HEAD)-`$(date +%Y%m%d%H%M%S)\" && + echo \"prerelease-`${tag_name#v}\" >bundle-artifacts/ver && + echo \"`${tag_name#v}\" >bundle-artifacts/display_version && + echo \"`$tag_name\" >bundle-artifacts/next_version && + git tag -m \"Snapshot build\" \"`$tag_name\" FETCH_HEAD && + git bundle create bundle-artifacts/git.bundle origin/main..\"`$tag_name\" && + + sh -x /usr/src/build-extra/please.sh mention feature \"Snapshot of `$(git show -s --pretty='tformat:%h (%s, %ad)' --date=short FETCH_HEAD)\" && + git -C /usr/src/build-extra bundle create \"`$PWD/bundle-artifacts/build-extra.bundle\" origin/main..main + "@ + - name: 'Publish Pipeline Artifact: bundle-artifacts' + uses: actions/upload-artifact@v1 + with: + name: bundle-artifacts + path: bundle-artifacts + pkg-x86_64: + runs-on: windows-latest + needs: bundle-artifacts + steps: + - name: Configure user + shell: bash + run: + USER_NAME="${{github.actor}}" && + USER_EMAIL="${{github.actor}}@users.noreply.github.com" && + mkdir "$HOME" && + git config --global user.name "$USER_NAME" && + git config --global user.email "$USER_EMAIL" && + echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV + - name: Download git-sdk-64-makepkg-git + shell: bash + run: | + # Use Git Bash to download and unpack the artifact + + ## Get artifact + urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds + id=${{ needs.bundle-artifacts.outputs.latest-sdk64-extra-build-id }} + download_url="$(curl "$urlbase/$id/artifacts" | + jq -r '.value[] | select(.name == "git-sdk-64-makepkg-git").resource.downloadUrl')" + + curl -o artifacts.zip "$download_url" + + ## Unpack artifact + unzip artifacts.zip + - name: Download bundle-artifacts + uses: actions/download-artifact@v1 + with: + name: bundle-artifacts + path: bundle-artifacts + - name: Clone and update build-extra + shell: bash + run: | + d=git-sdk-64-makepkg-git/usr/src/build-extra && + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && + git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main + - name: Check out git/git + shell: bash + run: | + git -c init.defaultBranch=main init && + git remote add -f origin https://github.com/git-for-windows/git && + git fetch --tags bundle-artifacts/git.bundle $(cat bundle-artifacts/next_version) && + git reset --hard $(cat bundle-artifacts/next_version) + - name: Build mingw-w64-x86_64-git + shell: powershell + run: | + & git-sdk-64-makepkg-git\usr\bin\sh.exe -lc @" + set -x + # Let `cv2pdb` find the DLLs + PATH=\"`$PATH:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64\" + type -p mspdb140.dll || exit 1 + sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-64-bit --build-src-pkg -o artifacts HEAD && + cp bundle-artifacts/ver artifacts/ && + + b=`$PWD/artifacts && + version=`$(cat bundle-artifacts/next_version) && + (cd /usr/src/MINGW-packages/mingw-w64-git && + cp PKGBUILD.`$version PKGBUILD && + git commit -s -m \"mingw-w64-git: new version (`$version)\" PKGBUILD && + git bundle create \"`$b\"/MINGW-packages.bundle origin/main..main) + "@ + - name: Publish mingw-w64-x86_64-git + uses: actions/upload-artifact@v1 + with: + name: pkg-x86_64 + path: artifacts From 5bfd37b1af0c92cf7b8e59f8e962254ddfb04237 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 12 Apr 2020 00:24:49 +0200 Subject: [PATCH 111/303] git-artifacts: if GPG secrets are available, use them This expects the `GPGKEY` and `PRIVGPGKEY` secrets to be set in the respective GitHub repository. The `GPGKEY` value should be of the form --passphrase --yes --batch --no-tty --pinentry-mode loopback --digest-algo SHA256 and the `PRIVGPGKEY` should be generated via gpg --export-secret-keys | base64 | tr '\n' % Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 44 ++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 837015d7f7752e..b5dbf6c4bcf5a4 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -6,6 +6,7 @@ on: - workflow_dispatch env: + GPG_OPTIONS: "--batch --yes --no-tty --list-options no-show-photos --verify-options no-show-photos --pinentry-mode loopback" HOME: "${{github.workspace}}\\home" MSYSTEM: MINGW64 USERPROFILE: "${{github.workspace}}\\home" @@ -44,7 +45,21 @@ jobs: run: | d=git-sdk-64-build-installers/usr/src/build-extra && git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d + - name: Prepare home directory for GPG signing + if: env.GPGKEY != '' + shell: bash + run: | + echo '${{secrets.PRIVGPGKEY}}' | tr % '\n' | gpg $GPG_OPTIONS --import && + mkdir -p home && + git config --global gpg.program "$PWD/git-sdk-64-build-installers/usr/src/build-extra/gnupg-with-gpgkey.sh" && + info="$(gpg --list-keys --with-colons "${GPGKEY%% *}" | cut -d : -f 1,10 | sed -n '/^uid/{s|uid:||p;q}')" && + git config --global user.name "${info% <*}" && + git config --global user.email "<${info#*<}" + env: + GPGKEY: ${{secrets.GPGKEY}} - name: Generate bundle artifacts + env: + GPGKEY: ${{secrets.GPGKEY}} shell: powershell run: | & .\git-sdk-64-build-installers\git-cmd.exe --command=usr\bin\bash.exe -lc @" @@ -59,12 +74,16 @@ jobs: echo \"prerelease-`${tag_name#v}\" >bundle-artifacts/ver && echo \"`${tag_name#v}\" >bundle-artifacts/display_version && echo \"`$tag_name\" >bundle-artifacts/next_version && - git tag -m \"Snapshot build\" \"`$tag_name\" FETCH_HEAD && + git tag `$(test -z \"`$GPGKEY\" || echo \" -s\") -m \"Snapshot build\" \"`$tag_name\" FETCH_HEAD && git bundle create bundle-artifacts/git.bundle origin/main..\"`$tag_name\" && sh -x /usr/src/build-extra/please.sh mention feature \"Snapshot of `$(git show -s --pretty='tformat:%h (%s, %ad)' --date=short FETCH_HEAD)\" && git -C /usr/src/build-extra bundle create \"`$PWD/bundle-artifacts/build-extra.bundle\" origin/main..main "@ + - name: Clean up temporary files + if: always() + shell: bash + run: rm -rf home - name: 'Publish Pipeline Artifact: bundle-artifacts' uses: actions/upload-artifact@v1 with: @@ -116,7 +135,19 @@ jobs: git remote add -f origin https://github.com/git-for-windows/git && git fetch --tags bundle-artifacts/git.bundle $(cat bundle-artifacts/next_version) && git reset --hard $(cat bundle-artifacts/next_version) + - name: Prepare home directory for GPG signing + if: env.GPGKEY != '' + shell: bash + run: | + echo '${{secrets.PRIVGPGKEY}}' | tr % '\n' | gpg $GPG_OPTIONS --import && + info="$(gpg --list-keys --with-colons "${GPGKEY%% *}" | cut -d : -f 1,10 | sed -n '/^uid/{s|uid:||p;q}')" && + git config --global user.name "${info% <*}" && + git config --global user.email "<${info#*<}" + env: + GPGKEY: ${{secrets.GPGKEY}} - name: Build mingw-w64-x86_64-git + env: + GPGKEY: "${{secrets.GPGKEY}}" shell: powershell run: | & git-sdk-64-makepkg-git\usr\bin\sh.exe -lc @" @@ -126,6 +157,13 @@ jobs: type -p mspdb140.dll || exit 1 sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-64-bit --build-src-pkg -o artifacts HEAD && cp bundle-artifacts/ver artifacts/ && + if test -n \"`$GPGKEY\" + then + for tar in artifacts/*.tar* + do + /usr/src/build-extra/gnupg-with-gpgkey.sh --detach-sign --no-armor `$tar + done + fi && b=`$PWD/artifacts && version=`$(cat bundle-artifacts/next_version) && @@ -134,6 +172,10 @@ jobs: git commit -s -m \"mingw-w64-git: new version (`$version)\" PKGBUILD && git bundle create \"`$b\"/MINGW-packages.bundle origin/main..main) "@ + - name: Clean up temporary files + if: always() + shell: bash + run: rm -rf home - name: Publish mingw-w64-x86_64-git uses: actions/upload-artifact@v1 with: From 400f76969dd96a00151cb4ba42fd447d241490a8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 12 Apr 2020 00:34:56 +0200 Subject: [PATCH 112/303] git-artifacts: also code-sign, if configured via the secrets When the secrets `CODESIGN_P12` and `CODESIGN_PASS` are set, the workflow will now code-sign the `.exe` files contained in the package. This should help with a few anti-malware programs, at least when the certificate saw some action and gained trust. Note: `CODESIGN_P12` needs to be generated via cat .p12 | base64 | tr '\n' % Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index b5dbf6c4bcf5a4..7faeac0fcca436 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -135,6 +135,18 @@ jobs: git remote add -f origin https://github.com/git-for-windows/git && git fetch --tags bundle-artifacts/git.bundle $(cat bundle-artifacts/next_version) && git reset --hard $(cat bundle-artifacts/next_version) + - name: Prepare home directory for code-signing + env: + CODESIGN_P12: ${{secrets.CODESIGN_P12}} + CODESIGN_PASS: ${{secrets.CODESIGN_PASS}} + if: env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' + shell: bash + run: | + cd home && + mkdir -p .sig && + echo -n "$CODESIGN_P12" | tr % '\n' | base64 -d >.sig/codesign.p12 && + echo -n "$CODESIGN_PASS" >.sig/codesign.pass + git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' - name: Prepare home directory for GPG signing if: env.GPGKEY != '' shell: bash From 217d7a42e5c66e6b98395b70fd409e5b83d2fcc8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 12 Apr 2020 00:43:19 +0200 Subject: [PATCH 113/303] git-artifacts: also build the installer While at it, we might just as well build the Git for Windows installer ;-) Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 75 ++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 7faeac0fcca436..380dc00b86da80 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -1,4 +1,4 @@ -name: mingw-w64-x86_64-git +name: git-artifacts on: # This workflow can be triggered manually in the Actions tab, see @@ -193,3 +193,76 @@ jobs: with: name: pkg-x86_64 path: artifacts + installer-x86_64: + runs-on: windows-latest + needs: pkg-x86_64 + env: + MSYSTEM: MINGW64 + steps: + - name: Download pkg-x86_64 + uses: actions/download-artifact@v1 + with: + name: pkg-x86_64 + path: pkg-x86_64 + - name: Download bundle-artifacts + uses: actions/download-artifact@v1 + with: + name: bundle-artifacts + path: bundle-artifacts + - name: Download git-sdk-64-build-installers + shell: bash + run: | + # Use Git Bash to download and unpack the artifact + + ## Get artifact + urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds + id=${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} + download_url="$(curl "$urlbase/$id/artifacts" | + jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl')" + + curl -o artifacts.zip "$download_url" + + ## Unpack artifact + unzip artifacts.zip + - name: Clone and update build-extra + shell: bash + run: | + d=git-sdk-64-build-installers/usr/src/build-extra && + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && + git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main + - name: Prepare home directory for code-signing + env: + CODESIGN_P12: ${{secrets.CODESIGN_P12}} + CODESIGN_PASS: ${{secrets.CODESIGN_PASS}} + if: (matrix.artifact.name == 'installer' || matrix.artifact.name == 'portable') && env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' + shell: bash + run: | + mkdir -p home/.sig && + echo -n "$CODESIGN_P12" | tr % '\n' | base64 -d >home/.sig/codesign.p12 && + echo -n "$CODESIGN_PASS" >home/.sig/codesign.pass && + git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' + - name: Build 64-bit installer + shell: powershell + run: | + & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" + set -x + /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --installer --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && + openssl dgst -sha256 artifacts/Git-*.exe | sed \"s/.* //\" >artifacts/sha-256.txt && + cp /usr/src/build-extra/installer/package-versions.txt artifacts/ && + + a=`$PWD/artifacts && + p=`$PWD/pkg-x86_64 && + (cd /usr/src/build-extra && + mkdir -p cached-source-packages && + cp \"`$p\"/*-pdb* cached-source-packages/ && + GIT_CONFIG_PARAMETERS=\"'windows.sdk64.path='\" ./please.sh bundle_pdbs --arch=x86_64 --directory=\"`$a\" installer/package-versions.txt) + "@ + - name: Clean up temporary files + if: always() + shell: bash + run: rm -rf home + - name: Publish installer-x86_64 + uses: actions/upload-artifact@v1 + with: + name: installer-x86_64 + path: artifacts From 5012d5750a7fa27c2d75fe8863b789039f692e25 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 12 Apr 2020 00:51:00 +0200 Subject: [PATCH 114/303] git-artifacts: also build portable, mingit and mingit-busybox ... because we can. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 43 ++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 380dc00b86da80..ed7c7e4a261d2f 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -193,9 +193,28 @@ jobs: with: name: pkg-x86_64 path: artifacts - installer-x86_64: + artifacts: runs-on: windows-latest - needs: pkg-x86_64 + needs: pkg + strategy: + matrix: + artifact: + - name: installer + fileprefix: Git + fileextension: exe + - name: portable + fileprefix: PortableGit + fileextension: exe + - name: archive + fileprefix: Git + fileextension: tar.bz2 + - name: mingit + fileprefix: MinGit + fileextension: zip + - name: mingit-busybox + fileprefix: MinGit + fileextension: zip + fail-fast: false env: MSYSTEM: MINGW64 steps: @@ -241,13 +260,23 @@ jobs: echo -n "$CODESIGN_P12" | tr % '\n' | base64 -d >home/.sig/codesign.p12 && echo -n "$CODESIGN_PASS" >home/.sig/codesign.pass && git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' - - name: Build 64-bit installer + - name: Build 64-bit ${{matrix.artifact.name}} shell: powershell run: | & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" set -x - /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --installer --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && - openssl dgst -sha256 artifacts/Git-*.exe | sed \"s/.* //\" >artifacts/sha-256.txt && + /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --${{matrix.artifact.name}} --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && + if test portable = '${{matrix.artifact.name}}' && test -n \"`$(git config alias.signtool)\" + then + git signtool artifacts/PortableGit-*.exe + fi && + openssl dgst -sha256 artifacts/${{matrix.artifact.fileprefix}}-*.${{matrix.artifact.fileextension}} | sed \"s/.* //\" >artifacts/sha-256.txt + "@ + - name: Copy package-versions and pdbs + if: matrix.artifact.name == 'installer' + shell: powershell + run: | + & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" cp /usr/src/build-extra/installer/package-versions.txt artifacts/ && a=`$PWD/artifacts && @@ -261,8 +290,8 @@ jobs: if: always() shell: bash run: rm -rf home - - name: Publish installer-x86_64 + - name: Publish ${{matrix.artifact.name}}-x86_64 uses: actions/upload-artifact@v1 with: - name: installer-x86_64 + name: ${{matrix.artifact.name}}-x86_64 path: artifacts From 0f110f637c5ea0341eecce422b0209dcaf37389f Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 12 Apr 2020 00:57:23 +0200 Subject: [PATCH 115/303] git-artifacts: also build 32-bit versions Just in case that we need to generate those real quick. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 70 +++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index ed7c7e4a261d2f..2b2ef6ebab234d 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -89,9 +89,18 @@ jobs: with: name: bundle-artifacts path: bundle-artifacts - pkg-x86_64: + pkg: runs-on: windows-latest needs: bundle-artifacts + strategy: + matrix: + arch: + - name: x86_64 + bitness: 64 + bin: /amd64 + - name: i686 + bitness: 32 + bin: '' steps: - name: Configure user shell: bash @@ -157,7 +166,7 @@ jobs: git config --global user.email "<${info#*<}" env: GPGKEY: ${{secrets.GPGKEY}} - - name: Build mingw-w64-x86_64-git + - name: Build mingw-w64-${{matrix.arch.name}}-git env: GPGKEY: "${{secrets.GPGKEY}}" shell: powershell @@ -165,9 +174,9 @@ jobs: & git-sdk-64-makepkg-git\usr\bin\sh.exe -lc @" set -x # Let `cv2pdb` find the DLLs - PATH=\"`$PATH:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64\" + PATH=\"`$PATH:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin${{matrix.arch.bin}}\" type -p mspdb140.dll || exit 1 - sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-64-bit --build-src-pkg -o artifacts HEAD && + sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-${{matrix.arch.bitness}}-bit --build-src-pkg -o artifacts HEAD && cp bundle-artifacts/ver artifacts/ && if test -n \"`$GPGKEY\" then @@ -188,10 +197,10 @@ jobs: if: always() shell: bash run: rm -rf home - - name: Publish mingw-w64-x86_64-git + - name: Publish mingw-w64-${{matrix.arch.name}}-git uses: actions/upload-artifact@v1 with: - name: pkg-x86_64 + name: pkg-${{matrix.arch.name}} path: artifacts artifacts: runs-on: windows-latest @@ -214,21 +223,27 @@ jobs: - name: mingit-busybox fileprefix: MinGit fileextension: zip + arch: + - name: x86_64 + bitness: 64 + - name: i686 + bitness: 32 fail-fast: false env: - MSYSTEM: MINGW64 + MSYSTEM: MINGW${{matrix.arch.bitness}} steps: - - name: Download pkg-x86_64 + - name: Download pkg-${{matrix.arch.name}} uses: actions/download-artifact@v1 with: - name: pkg-x86_64 - path: pkg-x86_64 + name: pkg-${{matrix.arch.name}} + path: pkg-${{matrix.arch.name}} - name: Download bundle-artifacts uses: actions/download-artifact@v1 with: name: bundle-artifacts path: bundle-artifacts - name: Download git-sdk-64-build-installers + if: matrix.arch.bitness == '64' shell: bash run: | # Use Git Bash to download and unpack the artifact @@ -241,12 +256,29 @@ jobs: curl -o artifacts.zip "$download_url" + ## Unpack artifact + unzip artifacts.zip + - name: Download git-sdk-32-build-installers + if: matrix.arch.bitness == '32' + shell: bash + run: | + # Use Git Bash to download and unpack the artifact + + ## Get artifact + urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds + id=$(curl "$urlbase?definitions=30&statusFilter=completed&resultFilter=succeeded&\$top=1" | + jq -r '.value[0].id') + download_url=$(curl "$urlbase/$id/artifacts" | + jq -r '.value[] | select(.name == "git-sdk-32-build-installers").resource.downloadUrl') + + curl -o artifacts.zip "$download_url" + ## Unpack artifact unzip artifacts.zip - name: Clone and update build-extra shell: bash run: | - d=git-sdk-64-build-installers/usr/src/build-extra && + d=git-sdk-${{matrix.arch.bitness}}-build-installers/usr/src/build-extra && git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main - name: Prepare home directory for code-signing @@ -260,12 +292,12 @@ jobs: echo -n "$CODESIGN_P12" | tr % '\n' | base64 -d >home/.sig/codesign.p12 && echo -n "$CODESIGN_PASS" >home/.sig/codesign.pass && git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' - - name: Build 64-bit ${{matrix.artifact.name}} + - name: Build ${{matrix.arch.bitness}}-bit ${{matrix.artifact.name}} shell: powershell run: | - & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" + & .\git-sdk-${{matrix.arch.bitness}}-build-installers\usr\bin\bash.exe -lc @" set -x - /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --${{matrix.artifact.name}} --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && + /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-${{matrix.arch.name}}/ver) -o artifacts --${{matrix.artifact.name}} --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-[0-9]*.tar.xz --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-doc-html-[0-9]*.tar.xz && if test portable = '${{matrix.artifact.name}}' && test -n \"`$(git config alias.signtool)\" then git signtool artifacts/PortableGit-*.exe @@ -276,22 +308,22 @@ jobs: if: matrix.artifact.name == 'installer' shell: powershell run: | - & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" + & .\git-sdk-${{matrix.arch.bitness}}-build-installers\usr\bin\bash.exe -lc @" cp /usr/src/build-extra/installer/package-versions.txt artifacts/ && a=`$PWD/artifacts && - p=`$PWD/pkg-x86_64 && + p=`$PWD/pkg-${{matrix.arch.name}} && (cd /usr/src/build-extra && mkdir -p cached-source-packages && cp \"`$p\"/*-pdb* cached-source-packages/ && - GIT_CONFIG_PARAMETERS=\"'windows.sdk64.path='\" ./please.sh bundle_pdbs --arch=x86_64 --directory=\"`$a\" installer/package-versions.txt) + GIT_CONFIG_PARAMETERS=\"'windows.sdk${{matrix.arch.bitness}}.path='\" ./please.sh bundle_pdbs --arch=${{matrix.arch.name}} --directory=\"`$a\" installer/package-versions.txt) "@ - name: Clean up temporary files if: always() shell: bash run: rm -rf home - - name: Publish ${{matrix.artifact.name}}-x86_64 + - name: Publish ${{matrix.artifact.name}}-${{matrix.arch.name}} uses: actions/upload-artifact@v1 with: - name: ${{matrix.artifact.name}}-x86_64 + name: ${{matrix.artifact.name}}-${{matrix.arch.name}} path: artifacts From 44ad606f65b8c717e581c7caa13907be0c24fcd5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 12 Apr 2020 00:52:22 +0200 Subject: [PATCH 116/303] git-artifacts: also build the nuget package The two NuGet artifact exists only in the 64-bit version. So let's make them in a separate, non-matrix job. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 2b2ef6ebab234d..2f521175d20351 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -327,3 +327,52 @@ jobs: with: name: ${{matrix.artifact.name}}-${{matrix.arch.name}} path: artifacts + nuget: + runs-on: windows-latest + needs: pkg + steps: + - name: Download pkg-x86_64 + uses: actions/download-artifact@v1 + with: + name: pkg-x86_64 + path: pkg-x86_64 + - name: Download bundle-artifacts + uses: actions/download-artifact@v1 + with: + name: bundle-artifacts + path: bundle-artifacts + - name: Download git-sdk-64-build-installers + shell: bash + run: | + # Use Git Bash to download and unpack the artifact + + ## Get artifact + urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds + id=${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} + download_url=$(curl "$urlbase/$id/artifacts" | + jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl') + + curl -o artifacts.zip "$download_url" + + ## Unpack artifact + unzip artifacts.zip + - name: Clone and update build-extra + shell: bash + run: | + d=git-sdk-64-build-installers/usr/src/build-extra && + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && + git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main + - uses: nuget/setup-nuget@v1 + - name: Build 64-bit NuGet packages + shell: powershell + run: | + & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" + /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --nuget --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && + /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --nuget-mingit && + openssl dgst -sha256 artifacts/Git*.nupkg | sed \"s/.* //\" >artifacts/sha-256.txt + "@ + - name: Publish nuget-x86_64 + uses: actions/upload-artifact@v1 + with: + name: nuget-x86_64 + path: artifacts From 5427059c001c3e1feae8585e09fd63ebb95b58d9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 25 Feb 2021 00:06:41 +0100 Subject: [PATCH 117/303] git-artifacts(arm64): avoid hard-linking the dashed built-ins The archive and MinGit variants really get bloated because they handle those as straight copies instead of hard-links. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 327683f35f8f2c..630a3e17b58273 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -288,7 +288,7 @@ jobs: run: | cmake `pwd`/contrib/buildsystems/ -DCMAKE_PREFIX_PATH=`pwd`/compat/vcbuild/vcpkg/installed/arm64-windows \ -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON -DCMAKE_GENERATOR_PLATFORM=arm64 -DVCPKG_ARCH=arm64-windows \ - -DCMAKE_INSTALL_PREFIX="`pwd`/git-arm64" + -DCMAKE_INSTALL_PREFIX="`pwd`/git-arm64" -DSKIP_DASHED_BUILT_INS=ON - name: MSBuild run: msbuild git.sln -property:Configuration=Release - name: Link the Git executables From e5c469befc44e9a2a93639653eb6650054dcc844 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 11 Apr 2020 23:43:55 +0200 Subject: [PATCH 118/303] git-artifacts: allow restricting which artifacts are built Users can now specify which artifacts they want to build, via the `build_only` input, which is a space-separated list of artifacts. For example, `installer portable` will build `installer-x86_64`, `installer-i686`, `portable-x86_64` and `portable-i686`, and an empty or unset value will build all artifacts. Please note that the `mingw-w64-git` packages are built always, as it would be tricky to figure out when they need to be built (for example, `build_only=portable-x86_64` technically does not need `pkg-i686` to be built, while `build_only=portable` does). Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 44 +++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 2f521175d20351..6700e7ef26cf04 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -3,13 +3,17 @@ name: git-artifacts on: # This workflow can be triggered manually in the Actions tab, see # https://github.blog/changelog/2020-07-06-github-actions-manual-triggers-with-workflow_dispatch/ - - workflow_dispatch + workflow_dispatch: + inputs: + build_only: + description: 'Optionally restrict what artifacts to build' env: GPG_OPTIONS: "--batch --yes --no-tty --list-options no-show-photos --verify-options no-show-photos --pinentry-mode loopback" HOME: "${{github.workspace}}\\home" MSYSTEM: MINGW64 USERPROFILE: "${{github.workspace}}\\home" + BUILD_ONLY: "${{github.event.inputs.build_only}}" jobs: bundle-artifacts: @@ -232,18 +236,28 @@ jobs: env: MSYSTEM: MINGW${{matrix.arch.bitness}} steps: + - name: Determine whether this job should be skipped + shell: bash + run: | + case " $BUILD_ONLY " in + ' ') ;; # not set; build all + *" ${{matrix.artifact.name}} "*|*" ${{matrix.artifact.name}}-${{matrix.arch.name}} "*) ;; # build this artifact + *) echo "SKIP=true" >>$GITHUB_ENV;; + esac - name: Download pkg-${{matrix.arch.name}} + if: env.SKIP != 'true' uses: actions/download-artifact@v1 with: name: pkg-${{matrix.arch.name}} path: pkg-${{matrix.arch.name}} - name: Download bundle-artifacts + if: env.SKIP != 'true' uses: actions/download-artifact@v1 with: name: bundle-artifacts path: bundle-artifacts - name: Download git-sdk-64-build-installers - if: matrix.arch.bitness == '64' + if: env.SKIP != 'true' && matrix.arch.bitness == '64' shell: bash run: | # Use Git Bash to download and unpack the artifact @@ -259,7 +273,7 @@ jobs: ## Unpack artifact unzip artifacts.zip - name: Download git-sdk-32-build-installers - if: matrix.arch.bitness == '32' + if: env.SKIP != 'true' && matrix.arch.bitness == '32' shell: bash run: | # Use Git Bash to download and unpack the artifact @@ -276,6 +290,7 @@ jobs: ## Unpack artifact unzip artifacts.zip - name: Clone and update build-extra + if: env.SKIP != 'true' shell: bash run: | d=git-sdk-${{matrix.arch.bitness}}-build-installers/usr/src/build-extra && @@ -285,7 +300,7 @@ jobs: env: CODESIGN_P12: ${{secrets.CODESIGN_P12}} CODESIGN_PASS: ${{secrets.CODESIGN_PASS}} - if: (matrix.artifact.name == 'installer' || matrix.artifact.name == 'portable') && env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' + if: env.SKIP != 'true' && (matrix.artifact.name == 'installer' || matrix.artifact.name == 'portable') && env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' shell: bash run: | mkdir -p home/.sig && @@ -293,6 +308,7 @@ jobs: echo -n "$CODESIGN_PASS" >home/.sig/codesign.pass && git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' - name: Build ${{matrix.arch.bitness}}-bit ${{matrix.artifact.name}} + if: env.SKIP != 'true' shell: powershell run: | & .\git-sdk-${{matrix.arch.bitness}}-build-installers\usr\bin\bash.exe -lc @" @@ -305,7 +321,7 @@ jobs: openssl dgst -sha256 artifacts/${{matrix.artifact.fileprefix}}-*.${{matrix.artifact.fileextension}} | sed \"s/.* //\" >artifacts/sha-256.txt "@ - name: Copy package-versions and pdbs - if: matrix.artifact.name == 'installer' + if: env.SKIP != 'true' && matrix.artifact.name == 'installer' shell: powershell run: | & .\git-sdk-${{matrix.arch.bitness}}-build-installers\usr\bin\bash.exe -lc @" @@ -319,10 +335,11 @@ jobs: GIT_CONFIG_PARAMETERS=\"'windows.sdk${{matrix.arch.bitness}}.path='\" ./please.sh bundle_pdbs --arch=${{matrix.arch.name}} --directory=\"`$a\" installer/package-versions.txt) "@ - name: Clean up temporary files - if: always() + if: always() && env.SKIP != 'true' shell: bash run: rm -rf home - name: Publish ${{matrix.artifact.name}}-${{matrix.arch.name}} + if: env.SKIP != 'true' uses: actions/upload-artifact@v1 with: name: ${{matrix.artifact.name}}-${{matrix.arch.name}} @@ -331,17 +348,28 @@ jobs: runs-on: windows-latest needs: pkg steps: + - name: Determine whether this job should be skipped + shell: bash + run: | + case " $BUILD_ONLY " in + ' ') ;; # not set; build all + *" nuget "*) ;; # build this artifact + *) echo "SKIP=true" >>$GITHUB_ENV;; + esac - name: Download pkg-x86_64 + if: env.SKIP != 'true' uses: actions/download-artifact@v1 with: name: pkg-x86_64 path: pkg-x86_64 - name: Download bundle-artifacts + if: env.SKIP != 'true' uses: actions/download-artifact@v1 with: name: bundle-artifacts path: bundle-artifacts - name: Download git-sdk-64-build-installers + if: env.SKIP != 'true' shell: bash run: | # Use Git Bash to download and unpack the artifact @@ -357,13 +385,16 @@ jobs: ## Unpack artifact unzip artifacts.zip - name: Clone and update build-extra + if: env.SKIP != 'true' shell: bash run: | d=git-sdk-64-build-installers/usr/src/build-extra && git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main - uses: nuget/setup-nuget@v1 + if: env.SKIP != 'true' - name: Build 64-bit NuGet packages + if: env.SKIP != 'true' shell: powershell run: | & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" @@ -372,6 +403,7 @@ jobs: openssl dgst -sha256 artifacts/Git*.nupkg | sed \"s/.* //\" >artifacts/sha-256.txt "@ - name: Publish nuget-x86_64 + if: env.SKIP != 'true' uses: actions/upload-artifact@v1 with: name: nuget-x86_64 From 60208ade85e9f3f1f47a6ed6027196cbd0593351 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 23 Feb 2021 11:39:15 +0100 Subject: [PATCH 119/303] git-artifacts: use a narrower `PATH` GitHub workflows run in agents that have quite a bit of stuff in their `PATH`, e.g. Chocolatey. To make sure that those bits and pieces are _not_ used to build the artifacts, let's whittle down the `PATH` to contain the bare minimum. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 630a3e17b58273..f1b0426779e9b4 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -224,8 +224,10 @@ jobs: run: | & git-sdk-64-build-installers\usr\bin\sh.exe -lc @" set -x - # Let `cv2pdb` find the DLLs - PATH=\"`$PATH:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin${{matrix.arch.bin}}\" + + # Restrict `PATH` to MSYS2 and to Visual Studio (to let `cv2pdb` find the relevant DLLs) + PATH=\"`/mingw64/bin:/usr/bin:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin${{matrix.arch.bin}}:/C/Windows/system32\" + type -p mspdb140.dll || exit 1 sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-${{matrix.arch.bitness}}-bit --build-src-pkg -o artifacts HEAD && cp bundle-artifacts/ver artifacts/ && From c7793b8d4d385c800368c114e1b47ea6020de089 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 11 Apr 2020 23:58:47 +0200 Subject: [PATCH 120/303] git-artifacts: allow specifying repo/ref via workflow_dispatch With this change, users can specify the branch and repository from which they want to build Git for Windows' artifacts, via the `ref` and `repository` inputs. This allows e.g. building `refs/heads/seen` of `git/git` (even if no `git-artifacts` workflow is configured in that repository), or `refs/pull//merge` for a given Pull Request. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 6700e7ef26cf04..0090941ce0ac48 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -7,6 +7,10 @@ on: inputs: build_only: description: 'Optionally restrict what artifacts to build' + ref: + description: 'Optionally override which branch to build' + repository: + description: 'Optionally override from where to fetch the specified ref' env: GPG_OPTIONS: "--batch --yes --no-tty --list-options no-show-photos --verify-options no-show-photos --pinentry-mode loopback" @@ -14,6 +18,8 @@ env: MSYSTEM: MINGW64 USERPROFILE: "${{github.workspace}}\\home" BUILD_ONLY: "${{github.event.inputs.build_only}}" + REPOSITORY: "${{github.event.inputs.repository}}" + REF: "${{github.event.inputs.ref}}" jobs: bundle-artifacts: @@ -70,9 +76,11 @@ jobs: printf '#!/bin/sh\n\nexec /mingw64/bin/git.exe "`$@"\n' >/usr/bin/git && mkdir -p bundle-artifacts && + { test -n \"`$REPOSITORY\" || REPOSITORY='${{github.repository}}'; } && + { test -n \"`$REF\" || REF='${{github.ref}}'; } && git -c init.defaultBranch=main init --bare && git remote add -f origin https://github.com/git-for-windows/git && - git fetch https://github.com/${{github.repository}} ${{github.ref}}:${{github.ref}} && + git fetch \"https://github.com/`$REPOSITORY\" \"`$REF:`$REF\" && tag_name=\"`$(git describe --match 'v[0-9]*' FETCH_HEAD)-`$(date +%Y%m%d%H%M%S)\" && echo \"prerelease-`${tag_name#v}\" >bundle-artifacts/ver && From cefb9bf197fe6273620b7d133f9ba2abc56ea47b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 23 Feb 2021 12:25:34 +0100 Subject: [PATCH 121/303] git-artifacts: fix BUILD_ONLY handling for ARM64 The workflow allows users to restrict what parts are being built. For example, `installer-i686` will build only the 32-bit installer, not the 64-bit one nor any MinGit flavor. However, this logic was not extended when introducing support for ARM64: Instead, we _also_ built the ARM64 installer when the user asked for `installer-i686`. Let's allow restricting to `installer-i686` _without_ building the ARM64 version, and allow restricting to `installer-arm64` _just_ for the ARM64 version. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index f1b0426779e9b4..8b47fa4c3ef569 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -338,9 +338,14 @@ jobs: - name: Determine whether this job should be skipped shell: bash run: | + suffix=${{matrix.arch.name}} + if test true = ${{matrix.arch.arm64}} + then + suffix=arm64 + fi case " $BUILD_ONLY " in ' ') ;; # not set; build all - *" ${{matrix.artifact.name}} "*|*" ${{matrix.artifact.name}}-${{matrix.arch.name}} "*) ;; # build this artifact + *" ${{matrix.artifact.name}} "*|*" ${{matrix.artifact.name}}-$suffix "*) ;; # build this artifact *) echo "SKIP=true" >>$GITHUB_ENV;; esac - name: Download pkg-${{matrix.arch.name}} From f193e90dfc40a40f8128953fcfc7e86809dc4c47 Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Thu, 28 Jan 2021 21:31:05 +0100 Subject: [PATCH 122/303] git-artifacts: cache the build-installers artifact It is a bit expensive to fetch just the git-sdk-64-build-installers artifact from Azure Pipelines and then to unpack it (takes some 6-7 minutes, typically). Let's cache it if possible. Signed-off-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 89 +++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 0090941ce0ac48..c2767fe26693b1 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -24,6 +24,8 @@ env: jobs: bundle-artifacts: runs-on: windows-latest + outputs: + latest-sdk64-extra-build-id: ${{ steps.determine-latest-sdk64-extra-build-id.outputs.id }} steps: - name: Configure user shell: bash @@ -34,15 +36,31 @@ jobs: git config --global user.name "$USER_NAME" && git config --global user.email "$USER_EMAIL" && echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV + - name: Determine latest git-sdk-64-extra-artifacts build ID + id: determine-latest-sdk64-extra-build-id + shell: bash + run: | + urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds + id=$(curl "$urlbase?definitions=29&statusFilter=completed&resultFilter=succeeded&\$top=1" | + jq -r '.value[0].id') + + echo "Latest ID is ${id}" + echo "::set-output name=id::$id" + - name: Cache git-sdk-64-build-installers + id: cache-sdk-build-installers + uses: actions/cache@v2 + with: + path: git-sdk-64-build-installers + key: build-installers-64-${{ steps.determine-latest-sdk64-extra-build-id.outputs.id }} - name: Download git-sdk-64-build-installers + if: steps.cache-sdk-build-installers.outputs.cache-hit != 'true' shell: bash run: | # Use Git Bash to download and unpack the artifact ## Get artifact urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=$(curl "$urlbase?definitions=29&statusFilter=completed&resultFilter=succeeded&\$top=1" | - jq -r '.value[0].id') + id=${{ steps.determine-latest-sdk64-extra-build-id.outputs.id }} download_url=$(curl "$urlbase/$id/artifacts" | jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl') @@ -54,7 +72,13 @@ jobs: shell: bash run: | d=git-sdk-64-build-installers/usr/src/build-extra && - git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d + if test ! -d $d/.git + then + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d + else + git -C $d fetch https://github.com/git-for-windows/build-extra main && + git -C $d switch -C main FETCH_HEAD + fi - name: Prepare home directory for GPG signing if: env.GPGKEY != '' shell: bash @@ -104,6 +128,8 @@ jobs: pkg: runs-on: windows-latest needs: bundle-artifacts + outputs: + latest-sdk64-extra-build-id: ${{ needs.bundle-artifacts.outputs.latest-sdk64-extra-build-id }} strategy: matrix: arch: @@ -264,8 +290,15 @@ jobs: with: name: bundle-artifacts path: bundle-artifacts - - name: Download git-sdk-64-build-installers + - name: Cache git-sdk-64-build-installers if: env.SKIP != 'true' && matrix.arch.bitness == '64' + id: cache-sdk64-build-installers + uses: actions/cache@v2 + with: + path: git-sdk-64-build-installers + key: build-installers-64-${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} + - name: Download git-sdk-64-build-installers + if: env.SKIP != 'true' && matrix.arch.bitness == '64' && steps.cache-sdk64-build-installers.outputs.cache-hit != 'true' shell: bash run: | # Use Git Bash to download and unpack the artifact @@ -280,16 +313,33 @@ jobs: ## Unpack artifact unzip artifacts.zip - - name: Download git-sdk-32-build-installers + - name: Determine latest git-sdk-32-extra-artifacts build ID if: env.SKIP != 'true' && matrix.arch.bitness == '32' + id: determine-latest-sdk32-extra-build-id + shell: bash + run: | + urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds + id=$(curl "$urlbase?definitions=30&statusFilter=completed&resultFilter=succeeded&\$top=1" | + jq -r '.value[0].id') + + echo "Latest ID is ${id}" + echo "::set-output name=id::$id" + - name: Cache git-sdk-32-build-installers + if: env.SKIP != 'true' && matrix.arch.bitness == '32' + id: cache-sdk32-build-installers + uses: actions/cache@v2 + with: + path: git-sdk-32-build-installers + key: build-installers-32-${{ steps.determine-latest-sdk32-extra-build-id.outputs.id }} + - name: Download git-sdk-32-build-installers + if: env.SKIP != 'true' && matrix.arch.bitness == '32' && steps.cache-sdk32-build-installers.outputs.cache-hit != 'true' shell: bash run: | # Use Git Bash to download and unpack the artifact ## Get artifact urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=$(curl "$urlbase?definitions=30&statusFilter=completed&resultFilter=succeeded&\$top=1" | - jq -r '.value[0].id') + id=${{ steps.determine-latest-sdk32-extra-build-id.outputs.id }} download_url=$(curl "$urlbase/$id/artifacts" | jq -r '.value[] | select(.name == "git-sdk-32-build-installers").resource.downloadUrl') @@ -302,7 +352,13 @@ jobs: shell: bash run: | d=git-sdk-${{matrix.arch.bitness}}-build-installers/usr/src/build-extra && - git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && + if test ! -d $d/.git + then + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d + else + git -C $d fetch https://github.com/git-for-windows/build-extra main && + git -C $d switch -C main FETCH_HEAD + fi && git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main - name: Prepare home directory for code-signing env: @@ -376,8 +432,15 @@ jobs: with: name: bundle-artifacts path: bundle-artifacts - - name: Download git-sdk-64-build-installers + - name: Cache git-sdk-64-build-installers if: env.SKIP != 'true' + id: cache-sdk-build-installers + uses: actions/cache@v2 + with: + path: git-sdk-64-build-installers + key: build-installers-64-${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} + - name: Download git-sdk-64-build-installers + if: env.SKIP != 'true' && steps.cache-sdk-build-installers.outputs.cache-hit != 'true' shell: bash run: | # Use Git Bash to download and unpack the artifact @@ -397,7 +460,13 @@ jobs: shell: bash run: | d=git-sdk-64-build-installers/usr/src/build-extra && - git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && + if test ! -d $d/.git + then + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d + else + git -C $d fetch https://github.com/git-for-windows/build-extra main && + git -C $d switch -C main FETCH_HEAD + fi && git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main - uses: nuget/setup-nuget@v1 if: env.SKIP != 'true' From b6d1188b59ff357cd56a6137b0e47dd2fc638e7c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 23 Feb 2021 12:31:24 +0100 Subject: [PATCH 123/303] git-artifacts: extend the `SKIP` logic to handle `pkg` and `build-arm64` When the user asked for `installer-x86_64`, there is no point in building `pkg-i686` or `build-arm64`; Let's be a bit smarter about this. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 50 ++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 8b47fa4c3ef569..f6537f6c5acd3c 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -140,7 +140,21 @@ jobs: bitness: 32 bin: '' steps: + - name: Determine whether this job should be skipped + shell: bash + run: | + for e in ${BUILD_ONLY:-pkg} + do + case $e in + *-${{matrix.arch.name}}) exit 0;; # build this artifact + *-arm64) test i686 != ${{matrix.arch.name}} || exit 0;; # pkg-i686 is required for the ARM64 version + *-*) ;; # not this build artifact, keep looking + *) exit 0;; # build this artifact + esac + done + echo "SKIP=true" >>$GITHUB_ENV - name: Configure user + if: env.SKIP != 'true' shell: bash run: USER_NAME="${{github.actor}}" && @@ -150,13 +164,14 @@ jobs: git config --global user.email "$USER_EMAIL" && echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV - name: Cache git-sdk-64-build-installers + if: env.SKIP != 'true' id: cache-sdk-build-installers uses: actions/cache@v2 with: path: git-sdk-64-build-installers key: build-installers-64-${{ needs.bundle-artifacts.outputs.latest-sdk64-extra-build-id }} - name: Download git-sdk-64-build-installers - if: steps.cache-sdk-build-installers.outputs.cache-hit != 'true' + if: env.SKIP != 'true' && steps.cache-sdk-build-installers.outputs.cache-hit != 'true' shell: bash run: | # Use Git Bash to download and unpack the artifact @@ -172,11 +187,13 @@ jobs: ## Unpack artifact unzip artifacts.zip - name: Download bundle-artifacts + if: env.SKIP != 'true' uses: actions/download-artifact@v1 with: name: bundle-artifacts path: bundle-artifacts - name: Clone and update build-extra + if: env.SKIP != 'true' shell: bash run: | d=git-sdk-64-build-installers/usr/src/build-extra && @@ -189,6 +206,7 @@ jobs: fi && git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main - name: Check out git/git + if: env.SKIP != 'true' shell: bash run: | git -c init.defaultBranch=main init && @@ -199,7 +217,7 @@ jobs: env: CODESIGN_P12: ${{secrets.CODESIGN_P12}} CODESIGN_PASS: ${{secrets.CODESIGN_PASS}} - if: env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' + if: env.SKIP != 'true' && env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' shell: bash run: | cd home && @@ -208,7 +226,7 @@ jobs: echo -n "$CODESIGN_PASS" >.sig/codesign.pass git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' - name: Prepare home directory for GPG signing - if: env.GPGKEY != '' + if: env.SKIP != 'true' && env.GPGKEY != '' shell: bash run: | echo '${{secrets.PRIVGPGKEY}}' | tr % '\n' | gpg $GPG_OPTIONS --import && @@ -218,6 +236,7 @@ jobs: env: GPGKEY: ${{secrets.GPGKEY}} - name: Build mingw-w64-${{matrix.arch.name}}-git + if: env.SKIP != 'true' env: GPGKEY: "${{secrets.GPGKEY}}" shell: powershell @@ -247,10 +266,11 @@ jobs: git bundle create \"`$b\"/MINGW-packages.bundle origin/main..main) "@ - name: Clean up temporary files - if: always() + if: always() && env.SKIP != 'true' shell: bash run: rm -rf home - name: Publish mingw-w64-${{matrix.arch.name}}-git + if: env.SKIP != 'true' uses: actions/upload-artifact@v1 with: name: pkg-${{matrix.arch.name}} @@ -259,7 +279,20 @@ jobs: needs: bundle-artifacts runs-on: windows-latest steps: + - name: Determine whether this job should be skipped + shell: bash + run: | + for e in ${BUILD_ONLY:-pkg} + do + case $e in + *-arm64) exit 0;; # build this artifact + *-*) ;; # not this build artifact, keep looking + *) exit 0;; # build this artifact + esac + done + echo "SKIP=true" >>$GITHUB_ENV - name: Configure user + if: env.SKIP != 'true' shell: bash run: USER_NAME="${{github.actor}}" && @@ -268,34 +301,43 @@ jobs: git config --global user.name "$USER_NAME" && git config --global user.email "$USER_EMAIL" - uses: actions/checkout@v2 + if: env.SKIP != 'true' - name: initialize vcpkg + if: env.SKIP != 'true' uses: actions/checkout@v2 with: repository: 'microsoft/vcpkg' path: 'compat/vcbuild/vcpkg' - name: download vcpkg artifacts + if: env.SKIP != 'true' uses: git-for-windows/get-azure-pipelines-artifact@v0 with: repository: git/git definitionId: 9 - name: add msbuild to PATH + if: env.SKIP != 'true' uses: microsoft/setup-msbuild@v1 - name: copy dlls to root + if: env.SKIP != 'true' shell: powershell run: | & compat\vcbuild\vcpkg_copy_dlls.bat release arm64-windows if (!$?) { exit(1) } - name: generate Visual Studio solution + if: env.SKIP != 'true' shell: bash run: | cmake `pwd`/contrib/buildsystems/ -DCMAKE_PREFIX_PATH=`pwd`/compat/vcbuild/vcpkg/installed/arm64-windows \ -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON -DCMAKE_GENERATOR_PLATFORM=arm64 -DVCPKG_ARCH=arm64-windows \ -DCMAKE_INSTALL_PREFIX="`pwd`/git-arm64" -DSKIP_DASHED_BUILT_INS=ON - name: MSBuild + if: env.SKIP != 'true' run: msbuild git.sln -property:Configuration=Release - name: Link the Git executables + if: env.SKIP != 'true' run: msbuild INSTALL.vcxproj -property:Configuration=Release - name: upload build artifacts + if: env.SKIP != 'true' uses: actions/upload-artifact@v1 with: name: arm64-artifacts From 45812f18e20c115a1c133e18404baefb98388f0a Mon Sep 17 00:00:00 2001 From: Ian Bearman Date: Tue, 4 Feb 2020 10:34:40 -0800 Subject: [PATCH 124/303] vcbuild: add an option to install individual 'features' In this context, a "feature" is a dependency combined with its own dependencies. Signed-off-by: Ian Bearman Signed-off-by: Johannes Schindelin --- compat/vcbuild/vcpkg_install.bat | 35 +++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/compat/vcbuild/vcpkg_install.bat b/compat/vcbuild/vcpkg_install.bat index cacef18c11dc79..8da212487ae97d 100644 --- a/compat/vcbuild/vcpkg_install.bat +++ b/compat/vcbuild/vcpkg_install.bat @@ -85,14 +85,47 @@ REM ================================================================ :sub__install_one echo Installing package %1... + call :%1_features + REM vcpkg may not be reliable on slow, intermittent or proxy REM connections, see e.g. REM https://social.msdn.microsoft.com/Forums/windowsdesktop/en-US/4a8f7be5-5e15-4213-a7bb-ddf424a954e6/winhttpsendrequest-ends-with-12002-errorhttptimeout-after-21-seconds-no-matter-what-timeout?forum=windowssdk REM which explains the hidden 21 second timeout REM (last post by Dave : Microsoft - Windows Networking team) - .\vcpkg.exe install %1:%arch% + .\vcpkg.exe install %1%features%:%arch% IF ERRORLEVEL 1 ( EXIT /B 1 ) echo Finished %1 goto :EOF + +:: +:: features for each vcpkg to install +:: there should be an entry here for each package to install +:: 'set features=' means use the default otherwise +:: 'set features=[comma-delimited-feature-set]' is the syntax +:: + +:zlib_features +set features= +goto :EOF + +:expat_features +set features= +goto :EOF + +:libiconv_features +set features= +goto :EOF + +:openssl_features +set features= +goto :EOF + +:libssh2_features +set features= +goto :EOF + +:curl_features +set features=[core,openssl] +goto :EOF From 9f8cc18028e8e8074417888a0aafde8932055229 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 3 Feb 2021 15:52:58 +0100 Subject: [PATCH 125/303] git-artifacts: use the cached build-installers instead of makepkg-git When building the Pacman packages, we technically do not need the full `build-installers` artifact (which is substantially larger than the `makepkg-git` artifact). However, the former is already cached and includes the latter's files. And it is _so_ much faster to download the cached (larger) artifact than to download the smaller `makepkg-git` artifact from Azure Pipelines. Suggested-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index c2767fe26693b1..ba954605845d09 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -149,16 +149,23 @@ jobs: git config --global user.name "$USER_NAME" && git config --global user.email "$USER_EMAIL" && echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV - - name: Download git-sdk-64-makepkg-git + - name: Cache git-sdk-64-build-installers + id: cache-sdk-build-installers + uses: actions/cache@v2 + with: + path: git-sdk-64-build-installers + key: build-installers-64-${{ needs.bundle-artifacts.outputs.latest-sdk64-extra-build-id }} + - name: Download git-sdk-64-build-installers + if: steps.cache-sdk-build-installers.outputs.cache-hit != 'true' shell: bash run: | # Use Git Bash to download and unpack the artifact ## Get artifact urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=${{ needs.bundle-artifacts.outputs.latest-sdk64-extra-build-id }} - download_url="$(curl "$urlbase/$id/artifacts" | - jq -r '.value[] | select(.name == "git-sdk-64-makepkg-git").resource.downloadUrl')" + id=${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} + download_url=$(curl "$urlbase/$id/artifacts" | + jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl') curl -o artifacts.zip "$download_url" @@ -172,8 +179,14 @@ jobs: - name: Clone and update build-extra shell: bash run: | - d=git-sdk-64-makepkg-git/usr/src/build-extra && - git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d && + d=git-sdk-64-build-installers/usr/src/build-extra && + if test ! -d $d/.git + then + git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d + else + git -C $d fetch https://github.com/git-for-windows/build-extra main && + git -C $d switch -C main FETCH_HEAD + fi && git -C $d pull "$PWD"/bundle-artifacts/build-extra.bundle main - name: Check out git/git shell: bash @@ -209,7 +222,7 @@ jobs: GPGKEY: "${{secrets.GPGKEY}}" shell: powershell run: | - & git-sdk-64-makepkg-git\usr\bin\sh.exe -lc @" + & git-sdk-64-build-installers\usr\bin\sh.exe -lc @" set -x # Let `cv2pdb` find the DLLs PATH=\"`$PATH:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin${{matrix.arch.bin}}\" From 2e138ebba4822a8fedac363730e0fd851745acc9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 23 Feb 2021 12:44:29 +0100 Subject: [PATCH 126/303] git-artifacts: mark all inputs as "not required" This workflow needs to be triggered manually, and it offers to specify a couple input parameters. But none of them are required. Make that explicit. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index f6537f6c5acd3c..04ce23498887d3 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -7,10 +7,13 @@ on: inputs: build_only: description: 'Optionally restrict what artifacts to build' + required: false ref: description: 'Optionally override which branch to build' + required: false repository: description: 'Optionally override from where to fetch the specified ref' + required: false env: GPG_OPTIONS: "--batch --yes --no-tty --list-options no-show-photos --verify-options no-show-photos --pinentry-mode loopback" From a6ab733c9c641882685e18970218d48d3fef4950 Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Fri, 4 Dec 2020 14:11:34 +0100 Subject: [PATCH 127/303] cmake: allow building for Windows/ARM64 Signed-off-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- contrib/buildsystems/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index ea2a531be87494..b6b1910d8d6721 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -65,9 +65,9 @@ if(USE_VCPKG) set(VCPKG_DIR "${CMAKE_SOURCE_DIR}/compat/vcbuild/vcpkg") if(NOT EXISTS ${VCPKG_DIR}) message("Initializing vcpkg and building the Git's dependencies (this will take a while...)") - execute_process(COMMAND ${CMAKE_SOURCE_DIR}/compat/vcbuild/vcpkg_install.bat) + execute_process(COMMAND ${CMAKE_SOURCE_DIR}/compat/vcbuild/vcpkg_install.bat ${VCPKG_ARCH}) endif() - list(APPEND CMAKE_PREFIX_PATH "${VCPKG_DIR}/installed/x64-windows") + list(APPEND CMAKE_PREFIX_PATH "${VCPKG_DIR}/installed/${VCPKG_ARCH}") # In the vcpkg edition, we need this to be able to link to libcurl set(CURL_NO_CURL_CMAKE ON) @@ -1064,7 +1064,7 @@ file(APPEND ${CMAKE_BINARY_DIR}/GIT-BUILD-OPTIONS "RUNTIME_PREFIX='${RUNTIME_PRE file(APPEND ${CMAKE_BINARY_DIR}/GIT-BUILD-OPTIONS "NO_PYTHON='${NO_PYTHON}'\n") file(APPEND ${CMAKE_BINARY_DIR}/GIT-BUILD-OPTIONS "SUPPORTS_SIMPLE_IPC='${SUPPORTS_SIMPLE_IPC}'\n") if(USE_VCPKG) - file(APPEND ${CMAKE_BINARY_DIR}/GIT-BUILD-OPTIONS "PATH=\"$PATH:$TEST_DIRECTORY/../compat/vcbuild/vcpkg/installed/x64-windows/bin\"\n") + file(APPEND ${CMAKE_BINARY_DIR}/GIT-BUILD-OPTIONS "PATH=\"$PATH:$TEST_DIRECTORY/../compat/vcbuild/vcpkg/installed/${VCPKG_ARCH}/bin\"\n") endif() #Make the tests work when building out of the source tree From eff54b00d422f66b84c6fa81401428bab14cd6aa Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Thu, 4 Feb 2021 16:49:01 +0100 Subject: [PATCH 128/303] git-artifacts: add ARM64 artifacts Adds ARM64 artifacts to the git-artifacts GitHub Action workflow. Signed-off-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 75 +++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 3 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index ba954605845d09..327683f35f8f2c 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -253,9 +253,54 @@ jobs: with: name: pkg-${{matrix.arch.name}} path: artifacts + build-arm64: + needs: bundle-artifacts + runs-on: windows-latest + steps: + - name: Configure user + shell: bash + run: + USER_NAME="${{github.actor}}" && + USER_EMAIL="${{github.actor}}@users.noreply.github.com" && + mkdir -p "$HOME" && + git config --global user.name "$USER_NAME" && + git config --global user.email "$USER_EMAIL" + - uses: actions/checkout@v2 + - name: initialize vcpkg + uses: actions/checkout@v2 + with: + repository: 'microsoft/vcpkg' + path: 'compat/vcbuild/vcpkg' + - name: download vcpkg artifacts + uses: git-for-windows/get-azure-pipelines-artifact@v0 + with: + repository: git/git + definitionId: 9 + - name: add msbuild to PATH + uses: microsoft/setup-msbuild@v1 + - name: copy dlls to root + shell: powershell + run: | + & compat\vcbuild\vcpkg_copy_dlls.bat release arm64-windows + if (!$?) { exit(1) } + - name: generate Visual Studio solution + shell: bash + run: | + cmake `pwd`/contrib/buildsystems/ -DCMAKE_PREFIX_PATH=`pwd`/compat/vcbuild/vcpkg/installed/arm64-windows \ + -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON -DCMAKE_GENERATOR_PLATFORM=arm64 -DVCPKG_ARCH=arm64-windows \ + -DCMAKE_INSTALL_PREFIX="`pwd`/git-arm64" + - name: MSBuild + run: msbuild git.sln -property:Configuration=Release + - name: Link the Git executables + run: msbuild INSTALL.vcxproj -property:Configuration=Release + - name: upload build artifacts + uses: actions/upload-artifact@v1 + with: + name: arm64-artifacts + path: ./git-arm64 artifacts: runs-on: windows-latest - needs: pkg + needs: [pkg, build-arm64] strategy: matrix: artifact: @@ -277,8 +322,13 @@ jobs: arch: - name: x86_64 bitness: 64 + arm64: false - name: i686 bitness: 32 + arm64: false + - name: i686 + bitness: 32 + arm64: true fail-fast: false env: MSYSTEM: MINGW${{matrix.arch.bitness}} @@ -360,6 +410,12 @@ jobs: ## Unpack artifact unzip artifacts.zip + - name: Download arm64 artifact + if: env.SKIP != 'true' && matrix.arch.arm64 == true + uses: actions/download-artifact@v1 + with: + name: arm64-artifacts + path: ${{github.workspace}}/arm64 - name: Clone and update build-extra if: env.SKIP != 'true' shell: bash @@ -390,7 +446,14 @@ jobs: run: | & .\git-sdk-${{matrix.arch.bitness}}-build-installers\usr\bin\bash.exe -lc @" set -x - /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-${{matrix.arch.name}}/ver) -o artifacts --${{matrix.artifact.name}} --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-[0-9]*.tar.xz --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-doc-html-[0-9]*.tar.xz && + if test "${{matrix.arch.arm64}}" = true + then + ARM64="--include-arm64-artifacts=\"$PWD/arm64\"" + else + ARM64= + fi + + eval /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git `$ARM64 --version=`$(cat pkg-${{matrix.arch.name}}/ver) -o artifacts --${{matrix.artifact.name}} --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-[0-9]*.tar.xz --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-doc-html-[0-9]*.tar.xz && if test portable = '${{matrix.artifact.name}}' && test -n \"`$(git config alias.signtool)\" then git signtool artifacts/PortableGit-*.exe @@ -416,11 +479,17 @@ jobs: shell: bash run: rm -rf home - name: Publish ${{matrix.artifact.name}}-${{matrix.arch.name}} - if: env.SKIP != 'true' + if: env.SKIP != 'true' && matrix.arch.arm64 != true uses: actions/upload-artifact@v1 with: name: ${{matrix.artifact.name}}-${{matrix.arch.name}} path: artifacts + - name: Publish ${{matrix.artifact.name}}-arm64 + if: env.SKIP != 'true' && matrix.arch.arm64 == true + uses: actions/upload-artifact@v1 + with: + name: ${{matrix.artifact.name}}-arm64 + path: artifacts nuget: runs-on: windows-latest needs: pkg From 771153dce7a39f4f22bc47e736cf2cdac04166bb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 23 Feb 2021 13:32:31 +0100 Subject: [PATCH 129/303] git-artifacts(build-arm64): build artifacts using the intended Git revision We cannot just check out the current revision: The user might have overridden `REPOSITORY` and `REF` via the workflow dispatch. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 04ce23498887d3..35dd92fc521457 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -303,8 +303,20 @@ jobs: mkdir -p "$HOME" && git config --global user.name "$USER_NAME" && git config --global user.email "$USER_EMAIL" - - uses: actions/checkout@v2 + - name: Download bundle-artifacts + if: env.SKIP != 'true' + uses: actions/download-artifact@v1 + with: + name: bundle-artifacts + path: bundle-artifacts + - name: Check out git/git if: env.SKIP != 'true' + shell: bash + run: | + git -c init.defaultBranch=main init && + git remote add -f origin https://github.com/git-for-windows/git && + git fetch --tags bundle-artifacts/git.bundle $(cat bundle-artifacts/next_version) && + git reset --hard $(cat bundle-artifacts/next_version) - name: initialize vcpkg if: env.SKIP != 'true' uses: actions/checkout@v2 From 651a629b75b7e55c57e8bc55c37f669acd1e10bb Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Sun, 29 Nov 2020 00:12:26 +0100 Subject: [PATCH 130/303] ci(vs-build) also build Windows/ARM64 artifacts There are no Windows/ARM64 agents in GitHub Actions yet, therefore we just skip adjusting the `vs-test` job for now. Signed-off-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- .github/workflows/main.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 831f4df56c51dc..e7799fda9ca2e6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -137,6 +137,9 @@ jobs: NO_PERL: 1 GIT_CONFIG_PARAMETERS: "'user.name=CI' 'user.email=ci@git'" runs-on: windows-latest + strategy: + matrix: + arch: [x64, arm64] steps: - uses: actions/checkout@v2 - uses: git-for-windows/setup-git-for-windows-sdk@v1 @@ -158,14 +161,14 @@ jobs: uses: microsoft/setup-msbuild@v1 - name: copy dlls to root shell: cmd - run: compat\vcbuild\vcpkg_copy_dlls.bat release + run: compat\vcbuild\vcpkg_copy_dlls.bat release ${{ matrix.arch }}-windows - name: generate Visual Studio solution shell: bash run: | - cmake `pwd`/contrib/buildsystems/ -DCMAKE_PREFIX_PATH=`pwd`/compat/vcbuild/vcpkg/installed/x64-windows \ - -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON + cmake `pwd`/contrib/buildsystems/ -DCMAKE_PREFIX_PATH=`pwd`/compat/vcbuild/vcpkg/installed/${{ matrix.arch }}-windows \ + -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON -DCMAKE_GENERATOR_PLATFORM=${{ matrix.arch }} -DVCPKG_ARCH=${{ matrix.arch }}-windows - name: MSBuild - run: msbuild git.sln -property:Configuration=Release -property:Platform=x64 -maxCpuCount:4 -property:PlatformToolset=v142 + run: msbuild git.sln -property:Configuration=Release -property:Platform=${{ matrix.arch }} -maxCpuCount:4 -property:PlatformToolset=v142 - name: bundle artifact tar shell: bash env: @@ -179,7 +182,7 @@ jobs: - name: upload tracked files and build artifacts uses: actions/upload-artifact@v2 with: - name: vs-artifacts + name: vs-artifacts-${{ matrix.arch }} path: artifacts vs-test: name: win+VS test @@ -194,7 +197,7 @@ jobs: - name: download tracked files and build artifacts uses: actions/download-artifact@v2 with: - name: vs-artifacts + name: vs-artifacts-x64 path: ${{github.workspace}} - name: extract tracked files and build artifacts shell: bash From a8658bd4c31116bad790ab591fea0a1fd5000ab7 Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Wed, 3 Mar 2021 11:59:05 +0100 Subject: [PATCH 131/303] git-artifacts: add workaround for GCM Core on ARM64 Since there is no GCM Core for ARM64, let's just install a simple shell script that calls the i686 version for now. Signed-off-by: Dennis Ameling Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 327683f35f8f2c..64dc1dbcef4b21 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -416,6 +416,13 @@ jobs: with: name: arm64-artifacts path: ${{github.workspace}}/arm64 + # Workaround for Git Credential Manager Core on ARM64: https://github.com/git-for-windows/git/issues/3015 + - name: Create git-credential-manager-core wrapper for ARM64 + if: env.SKIP != 'true' && matrix.arch.arm64 == true + shell: bash + run: | + printf '%s\n' '#!/bin/sh' 'exec /mingw32/libexec/git-core/git-credential-manager-core.exe "$@"' > arm64/libexec/git-core/git-credential-manager-core + chmod +x arm64/libexec/git-core/git-credential-manager-core - name: Clone and update build-extra if: env.SKIP != 'true' shell: bash From 85c91bf0db786b687efd281f7dab001d4081329e Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Mon, 19 Jul 2021 13:02:16 +0200 Subject: [PATCH 132/303] cmake(): allow setting HOST_CPU for cross-compilation Git's regular Makefile mentions that HOST_CPU should be defined when cross-compiling Git: https://github.com/git-for-windows/git/blob/37796bca76ef4180c39ee508ca3e42c0777ba444/Makefile#L438-L439 This is then used to set the GIT_HOST_CPU variable when compiling Git: https://github.com/git-for-windows/git/blob/37796bca76ef4180c39ee508ca3e42c0777ba444/Makefile#L1337-L1341 Then, when the user runs `git version --build-options`, it returns that value: https://github.com/git-for-windows/git/blob/37796bca76ef4180c39ee508ca3e42c0777ba444/help.c#L658 This commit adds the same functionality to the CMake configuration. Users can now set -DHOST_CPU= to set the target architecture. Signed-off-by: Dennis Ameling --- .github/workflows/main.yml | 2 +- contrib/buildsystems/CMakeLists.txt | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e7799fda9ca2e6..47f076ea30f44f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -166,7 +166,7 @@ jobs: shell: bash run: | cmake `pwd`/contrib/buildsystems/ -DCMAKE_PREFIX_PATH=`pwd`/compat/vcbuild/vcpkg/installed/${{ matrix.arch }}-windows \ - -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON -DCMAKE_GENERATOR_PLATFORM=${{ matrix.arch }} -DVCPKG_ARCH=${{ matrix.arch }}-windows + -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON -DCMAKE_GENERATOR_PLATFORM=${{ matrix.arch }} -DVCPKG_ARCH=${{ matrix.arch }}-windows -DHOST_CPU=${{ matrix.arch }} - name: MSBuild run: msbuild git.sln -property:Configuration=Release -property:Platform=${{ matrix.arch }} -maxCpuCount:4 -property:PlatformToolset=v142 - name: bundle artifact tar diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index b6b1910d8d6721..71e93558e68a27 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -223,7 +223,14 @@ endif() #default behaviour include_directories(${CMAKE_SOURCE_DIR}) -add_compile_definitions(GIT_HOST_CPU="${CMAKE_SYSTEM_PROCESSOR}") + +# When cross-compiling, define HOST_CPU as the canonical name of the CPU on +# which the built Git will run (for instance "x86_64"). +if(NOT HOST_CPU) + add_compile_definitions(GIT_HOST_CPU="${CMAKE_SYSTEM_PROCESSOR}") +else() + add_compile_definitions(GIT_HOST_CPU="${HOST_CPU}") +endif() add_compile_definitions(SHA256_BLK INTERNAL_QSORT RUNTIME_PREFIX) add_compile_definitions(NO_OPENSSL SHA1_DC SHA1DC_NO_STANDARD_INCLUDES SHA1DC_INIT_SAFE_HASH_DEFAULT=0 From e0d817629a168f68a7683ee5f8591bbce0ff9139 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 23 Feb 2021 11:35:47 +0100 Subject: [PATCH 133/303] git-artifacts: use the `setup-git-for-windows-sdk` Action This simplifies the workflow dramatically, as well as accelerating it because the Action recently learned to use the partial clone/parallel checkout features of Git. Note that we have to reinstate that `/usr/bin/git` hack (a shell script that simply redirects to `/mingw64/bin/git.exe`) in the `pkg` job manually, since we no longer cache the `build-installers` artifact _after_ installing that hack in `bundle-artifacts`. Signed-off-by: Johannes Schindelin --- .github/workflows/git-artifacts.yml | 288 ++++++++-------------------- 1 file changed, 81 insertions(+), 207 deletions(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index 35dd92fc521457..4352cb126c86dc 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -27,8 +27,6 @@ env: jobs: bundle-artifacts: runs-on: windows-latest - outputs: - latest-sdk64-extra-build-id: ${{ steps.determine-latest-sdk64-extra-build-id.outputs.id }} steps: - name: Configure user shell: bash @@ -39,42 +37,13 @@ jobs: git config --global user.name "$USER_NAME" && git config --global user.email "$USER_EMAIL" && echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV - - name: Determine latest git-sdk-64-extra-artifacts build ID - id: determine-latest-sdk64-extra-build-id - shell: bash - run: | - urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=$(curl "$urlbase?definitions=29&statusFilter=completed&resultFilter=succeeded&\$top=1" | - jq -r '.value[0].id') - - echo "Latest ID is ${id}" - echo "::set-output name=id::$id" - - name: Cache git-sdk-64-build-installers - id: cache-sdk-build-installers - uses: actions/cache@v2 + - uses: git-for-windows/setup-git-for-windows-sdk@v1 with: - path: git-sdk-64-build-installers - key: build-installers-64-${{ steps.determine-latest-sdk64-extra-build-id.outputs.id }} - - name: Download git-sdk-64-build-installers - if: steps.cache-sdk-build-installers.outputs.cache-hit != 'true' - shell: bash - run: | - # Use Git Bash to download and unpack the artifact - - ## Get artifact - urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=${{ steps.determine-latest-sdk64-extra-build-id.outputs.id }} - download_url=$(curl "$urlbase/$id/artifacts" | - jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl') - - curl -o artifacts.zip "$download_url" - - ## Unpack artifact - unzip artifacts.zip + flavor: build-installers - name: Clone build-extra shell: bash run: | - d=git-sdk-64-build-installers/usr/src/build-extra && + d=/usr/src/build-extra && if test ! -d $d/.git then git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d @@ -88,7 +57,7 @@ jobs: run: | echo '${{secrets.PRIVGPGKEY}}' | tr % '\n' | gpg $GPG_OPTIONS --import && mkdir -p home && - git config --global gpg.program "$PWD/git-sdk-64-build-installers/usr/src/build-extra/gnupg-with-gpgkey.sh" && + git config --global gpg.program "/usr/src/build-extra/gnupg-with-gpgkey.sh" && info="$(gpg --list-keys --with-colons "${GPGKEY%% *}" | cut -d : -f 1,10 | sed -n '/^uid/{s|uid:||p;q}')" && git config --global user.name "${info% <*}" && git config --global user.email "<${info#*<}" @@ -97,28 +66,26 @@ jobs: - name: Generate bundle artifacts env: GPGKEY: ${{secrets.GPGKEY}} - shell: powershell + shell: bash run: | - & .\git-sdk-64-build-installers\git-cmd.exe --command=usr\bin\bash.exe -lc @" - printf '#!/bin/sh\n\nexec /mingw64/bin/git.exe "`$@"\n' >/usr/bin/git && - mkdir -p bundle-artifacts && + printf '#!/bin/sh\n\nexec /mingw64/bin/git.exe "$@"\n' >/usr/bin/git && + mkdir -p bundle-artifacts && - { test -n \"`$REPOSITORY\" || REPOSITORY='${{github.repository}}'; } && - { test -n \"`$REF\" || REF='${{github.ref}}'; } && - git -c init.defaultBranch=main init --bare && - git remote add -f origin https://github.com/git-for-windows/git && - git fetch \"https://github.com/`$REPOSITORY\" \"`$REF:`$REF\" && + { test -n "$REPOSITORY" || REPOSITORY='${{github.repository}}'; } && + { test -n "$REF" || REF='${{github.ref}}'; } && + git -c init.defaultBranch=main init --bare && + git remote add -f origin https://github.com/git-for-windows/git && + git fetch "https://github.com/$REPOSITORY" "$REF:$REF" && - tag_name=\"`$(git describe --match 'v[0-9]*' FETCH_HEAD)-`$(date +%Y%m%d%H%M%S)\" && - echo \"prerelease-`${tag_name#v}\" >bundle-artifacts/ver && - echo \"`${tag_name#v}\" >bundle-artifacts/display_version && - echo \"`$tag_name\" >bundle-artifacts/next_version && - git tag `$(test -z \"`$GPGKEY\" || echo \" -s\") -m \"Snapshot build\" \"`$tag_name\" FETCH_HEAD && - git bundle create bundle-artifacts/git.bundle origin/main..\"`$tag_name\" && + tag_name="$(git describe --match 'v[0-9]*' FETCH_HEAD)-$(date +%Y%m%d%H%M%S)" && + echo "prerelease-${tag_name#v}" >bundle-artifacts/ver && + echo "${tag_name#v}" >bundle-artifacts/display_version && + echo "$tag_name" >bundle-artifacts/next_version && + git tag $(test -z "$GPGKEY" || echo " -s") -m "Snapshot build" "$tag_name" FETCH_HEAD && + git bundle create bundle-artifacts/git.bundle origin/main.."$tag_name" && - sh -x /usr/src/build-extra/please.sh mention feature \"Snapshot of `$(git show -s --pretty='tformat:%h (%s, %ad)' --date=short FETCH_HEAD)\" && - git -C /usr/src/build-extra bundle create \"`$PWD/bundle-artifacts/build-extra.bundle\" origin/main..main - "@ + sh -x /usr/src/build-extra/please.sh mention feature "Snapshot of $(git show -s --pretty='tformat:%h (%s, %ad)' --date=short FETCH_HEAD)" && + git -C /usr/src/build-extra bundle create "$PWD/bundle-artifacts/build-extra.bundle" origin/main..main - name: Clean up temporary files if: always() shell: bash @@ -131,8 +98,6 @@ jobs: pkg: runs-on: windows-latest needs: bundle-artifacts - outputs: - latest-sdk64-extra-build-id: ${{ needs.bundle-artifacts.outputs.latest-sdk64-extra-build-id }} strategy: matrix: arch: @@ -166,29 +131,10 @@ jobs: git config --global user.name "$USER_NAME" && git config --global user.email "$USER_EMAIL" && echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV - - name: Cache git-sdk-64-build-installers + - uses: git-for-windows/setup-git-for-windows-sdk@v1 if: env.SKIP != 'true' - id: cache-sdk-build-installers - uses: actions/cache@v2 with: - path: git-sdk-64-build-installers - key: build-installers-64-${{ needs.bundle-artifacts.outputs.latest-sdk64-extra-build-id }} - - name: Download git-sdk-64-build-installers - if: env.SKIP != 'true' && steps.cache-sdk-build-installers.outputs.cache-hit != 'true' - shell: bash - run: | - # Use Git Bash to download and unpack the artifact - - ## Get artifact - urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} - download_url=$(curl "$urlbase/$id/artifacts" | - jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl') - - curl -o artifacts.zip "$download_url" - - ## Unpack artifact - unzip artifacts.zip + flavor: build-installers - name: Download bundle-artifacts if: env.SKIP != 'true' uses: actions/download-artifact@v1 @@ -199,7 +145,7 @@ jobs: if: env.SKIP != 'true' shell: bash run: | - d=git-sdk-64-build-installers/usr/src/build-extra && + d=/usr/src/build-extra && if test ! -d $d/.git then git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d @@ -242,32 +188,33 @@ jobs: if: env.SKIP != 'true' env: GPGKEY: "${{secrets.GPGKEY}}" - shell: powershell + shell: bash run: | - & git-sdk-64-build-installers\usr\bin\sh.exe -lc @" - set -x + set -x - # Restrict `PATH` to MSYS2 and to Visual Studio (to let `cv2pdb` find the relevant DLLs) - PATH=\"`/mingw64/bin:/usr/bin:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin${{matrix.arch.bin}}:/C/Windows/system32\" + # Make sure that there is a `/usr/bin/git` that can be used by `makepkg-mingw` + printf '#!/bin/sh\n\nexec /mingw64/bin/git.exe "$@"\n' >/usr/bin/git && - type -p mspdb140.dll || exit 1 - sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-${{matrix.arch.bitness}}-bit --build-src-pkg -o artifacts HEAD && - cp bundle-artifacts/ver artifacts/ && - if test -n \"`$GPGKEY\" - then - for tar in artifacts/*.tar* - do - /usr/src/build-extra/gnupg-with-gpgkey.sh --detach-sign --no-armor `$tar - done - fi && + # Restrict `PATH` to MSYS2 and to Visual Studio (to let `cv2pdb` find the relevant DLLs) + PATH="/mingw64/bin:/usr/bin:/c/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/IDE/:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin${{matrix.arch.bin}}:/C/Windows/system32" - b=`$PWD/artifacts && - version=`$(cat bundle-artifacts/next_version) && - (cd /usr/src/MINGW-packages/mingw-w64-git && - cp PKGBUILD.`$version PKGBUILD && - git commit -s -m \"mingw-w64-git: new version (`$version)\" PKGBUILD && - git bundle create \"`$b\"/MINGW-packages.bundle origin/main..main) - "@ + type -p mspdb140.dll || exit 1 + sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-${{matrix.arch.bitness}}-bit --build-src-pkg -o artifacts HEAD && + cp bundle-artifacts/ver artifacts/ && + if test -n "$GPGKEY" + then + for tar in artifacts/*.tar* + do + /usr/src/build-extra/gnupg-with-gpgkey.sh --detach-sign --no-armor $tar + done + fi && + + b=$PWD/artifacts && + version=$(cat bundle-artifacts/next_version) && + (cd /usr/src/MINGW-packages/mingw-w64-git && + cp PKGBUILD.$version PKGBUILD && + git commit -s -m "mingw-w64-git: new version ($version)" PKGBUILD && + git bundle create "$b"/MINGW-packages.bundle origin/main..main) - name: Clean up temporary files if: always() && env.SKIP != 'true' shell: bash @@ -417,63 +364,15 @@ jobs: with: name: bundle-artifacts path: bundle-artifacts - - name: Cache git-sdk-64-build-installers + - uses: git-for-windows/setup-git-for-windows-sdk@v1 if: env.SKIP != 'true' && matrix.arch.bitness == '64' - id: cache-sdk64-build-installers - uses: actions/cache@v2 with: - path: git-sdk-64-build-installers - key: build-installers-64-${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} - - name: Download git-sdk-64-build-installers - if: env.SKIP != 'true' && matrix.arch.bitness == '64' && steps.cache-sdk64-build-installers.outputs.cache-hit != 'true' - shell: bash - run: | - # Use Git Bash to download and unpack the artifact - - ## Get artifact - urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} - download_url="$(curl "$urlbase/$id/artifacts" | - jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl')" - - curl -o artifacts.zip "$download_url" - - ## Unpack artifact - unzip artifacts.zip - - name: Determine latest git-sdk-32-extra-artifacts build ID + flavor: build-installers + - uses: git-for-windows/setup-git-for-windows-sdk@v1 if: env.SKIP != 'true' && matrix.arch.bitness == '32' - id: determine-latest-sdk32-extra-build-id - shell: bash - run: | - urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=$(curl "$urlbase?definitions=30&statusFilter=completed&resultFilter=succeeded&\$top=1" | - jq -r '.value[0].id') - - echo "Latest ID is ${id}" - echo "::set-output name=id::$id" - - name: Cache git-sdk-32-build-installers - if: env.SKIP != 'true' && matrix.arch.bitness == '32' - id: cache-sdk32-build-installers - uses: actions/cache@v2 with: - path: git-sdk-32-build-installers - key: build-installers-32-${{ steps.determine-latest-sdk32-extra-build-id.outputs.id }} - - name: Download git-sdk-32-build-installers - if: env.SKIP != 'true' && matrix.arch.bitness == '32' && steps.cache-sdk32-build-installers.outputs.cache-hit != 'true' - shell: bash - run: | - # Use Git Bash to download and unpack the artifact - - ## Get artifact - urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=${{ steps.determine-latest-sdk32-extra-build-id.outputs.id }} - download_url=$(curl "$urlbase/$id/artifacts" | - jq -r '.value[] | select(.name == "git-sdk-32-build-installers").resource.downloadUrl') - - curl -o artifacts.zip "$download_url" - - ## Unpack artifact - unzip artifacts.zip + flavor: build-installers + architecture: i686 - name: Download arm64 artifact if: env.SKIP != 'true' && matrix.arch.arm64 == true uses: actions/download-artifact@v1 @@ -484,7 +383,7 @@ jobs: if: env.SKIP != 'true' shell: bash run: | - d=git-sdk-${{matrix.arch.bitness}}-build-installers/usr/src/build-extra && + d=/usr/src/build-extra && if test ! -d $d/.git then git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d @@ -506,38 +405,34 @@ jobs: git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' - name: Build ${{matrix.arch.bitness}}-bit ${{matrix.artifact.name}} if: env.SKIP != 'true' - shell: powershell + shell: bash run: | - & .\git-sdk-${{matrix.arch.bitness}}-build-installers\usr\bin\bash.exe -lc @" - set -x - if test "${{matrix.arch.arm64}}" = true - then - ARM64="--include-arm64-artifacts=\"$PWD/arm64\"" - else - ARM64= - fi + set -x + if test "${{matrix.arch.arm64}}" = true + then + ARM64="--include-arm64-artifacts=\"$PWD/arm64\"" + else + ARM64= + fi - eval /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git `$ARM64 --version=`$(cat pkg-${{matrix.arch.name}}/ver) -o artifacts --${{matrix.artifact.name}} --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-[0-9]*.tar.xz --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-doc-html-[0-9]*.tar.xz && - if test portable = '${{matrix.artifact.name}}' && test -n \"`$(git config alias.signtool)\" - then - git signtool artifacts/PortableGit-*.exe - fi && - openssl dgst -sha256 artifacts/${{matrix.artifact.fileprefix}}-*.${{matrix.artifact.fileextension}} | sed \"s/.* //\" >artifacts/sha-256.txt - "@ + eval /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git $ARM64 --version=$(cat pkg-${{matrix.arch.name}}/ver) -o artifacts --${{matrix.artifact.name}} --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-[0-9]*.tar.xz --pkg=pkg-${{matrix.arch.name}}/mingw-w64-${{matrix.arch.name}}-git-doc-html-[0-9]*.tar.xz && + if test portable = '${{matrix.artifact.name}}' && test -n "$(git config alias.signtool)" + then + git signtool artifacts/PortableGit-*.exe + fi && + openssl dgst -sha256 artifacts/${{matrix.artifact.fileprefix}}-*.${{matrix.artifact.fileextension}} | sed "s/.* //" >artifacts/sha-256.txt - name: Copy package-versions and pdbs if: env.SKIP != 'true' && matrix.artifact.name == 'installer' - shell: powershell + shell: bash run: | - & .\git-sdk-${{matrix.arch.bitness}}-build-installers\usr\bin\bash.exe -lc @" - cp /usr/src/build-extra/installer/package-versions.txt artifacts/ && + cp /usr/src/build-extra/installer/package-versions.txt artifacts/ && - a=`$PWD/artifacts && - p=`$PWD/pkg-${{matrix.arch.name}} && - (cd /usr/src/build-extra && - mkdir -p cached-source-packages && - cp \"`$p\"/*-pdb* cached-source-packages/ && - GIT_CONFIG_PARAMETERS=\"'windows.sdk${{matrix.arch.bitness}}.path='\" ./please.sh bundle_pdbs --arch=${{matrix.arch.name}} --directory=\"`$a\" installer/package-versions.txt) - "@ + a=$PWD/artifacts && + p=$PWD/pkg-${{matrix.arch.name}} && + (cd /usr/src/build-extra && + mkdir -p cached-source-packages && + cp "$p"/*-pdb* cached-source-packages/ && + GIT_CONFIG_PARAMETERS="'windows.sdk${{matrix.arch.bitness}}.path='" ./please.sh bundle_pdbs --arch=${{matrix.arch.name}} --directory="$a" installer/package-versions.txt) - name: Clean up temporary files if: always() && env.SKIP != 'true' shell: bash @@ -578,34 +473,15 @@ jobs: with: name: bundle-artifacts path: bundle-artifacts - - name: Cache git-sdk-64-build-installers + - uses: git-for-windows/setup-git-for-windows-sdk@v1 if: env.SKIP != 'true' - id: cache-sdk-build-installers - uses: actions/cache@v2 with: - path: git-sdk-64-build-installers - key: build-installers-64-${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} - - name: Download git-sdk-64-build-installers - if: env.SKIP != 'true' && steps.cache-sdk-build-installers.outputs.cache-hit != 'true' - shell: bash - run: | - # Use Git Bash to download and unpack the artifact - - ## Get artifact - urlbase=https://dev.azure.com/git-for-windows/git/_apis/build/builds - id=${{ needs.pkg.outputs.latest-sdk64-extra-build-id }} - download_url=$(curl "$urlbase/$id/artifacts" | - jq -r '.value[] | select(.name == "git-sdk-64-build-installers").resource.downloadUrl') - - curl -o artifacts.zip "$download_url" - - ## Unpack artifact - unzip artifacts.zip + flavor: build-installers - name: Clone and update build-extra if: env.SKIP != 'true' shell: bash run: | - d=git-sdk-64-build-installers/usr/src/build-extra && + d=/usr/src/build-extra && if test ! -d $d/.git then git clone --single-branch -b main https://github.com/git-for-windows/build-extra $d @@ -618,13 +494,11 @@ jobs: if: env.SKIP != 'true' - name: Build 64-bit NuGet packages if: env.SKIP != 'true' - shell: powershell + shell: bash run: | - & .\git-sdk-64-build-installers\usr\bin\bash.exe -lc @" - /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --nuget --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && - /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=`$(cat pkg-x86_64/ver) -o artifacts --nuget-mingit && - openssl dgst -sha256 artifacts/Git*.nupkg | sed \"s/.* //\" >artifacts/sha-256.txt - "@ + /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=$(cat pkg-x86_64/ver) -o artifacts --nuget --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && + /usr/src/build-extra/please.sh make_installers_from_mingw_w64_git --version=$(cat pkg-x86_64/ver) -o artifacts --nuget-mingit && + openssl dgst -sha256 artifacts/Git*.nupkg | sed "s/.* //" >artifacts/sha-256.txt - name: Publish nuget-x86_64 if: env.SKIP != 'true' uses: actions/upload-artifact@v1 From a2a4f8e727946c219eeef62ffe1923c1cfe2196f Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Sun, 6 Dec 2020 18:39:26 +0100 Subject: [PATCH 134/303] Add schannel to curl installation Signed-off-by: Dennis Ameling --- compat/vcbuild/vcpkg_install.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/vcbuild/vcpkg_install.bat b/compat/vcbuild/vcpkg_install.bat index 8da212487ae97d..575c65c20ba307 100644 --- a/compat/vcbuild/vcpkg_install.bat +++ b/compat/vcbuild/vcpkg_install.bat @@ -127,5 +127,5 @@ set features= goto :EOF :curl_features -set features=[core,openssl] +set features=[core,openssl,schannel] goto :EOF From 7f9f87fdd10edfbb5e65dfcce3a3901a2ae31340 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Fri, 2 Jul 2021 00:30:24 +0100 Subject: [PATCH 135/303] CMake: default Visual Studio generator has changed Correct some wording and inform users regarding the Visual Studio changes (from V16.6) to the default generator. Subsequent commits ensure that Git for Windows can be directly opened in modern Visual Studio without needing special configuration of the CMakeLists settings. It appeares that internally Visual Studio creates it's own version of the .sln file (etc.) for extension tools that expect them. The large number of references below document the shifting of Visual Studio default and CMake setting options. refs: https://docs.microsoft.com/en-us/search/?scope=C%2B%2B&view=msvc-150&terms=Ninja 1. https://docs.microsoft.com/en-us/cpp/linux/cmake-linux-configure?view=msvc-160 (note the linux bit) "In Visual Studio 2019 version 16.6 or later ***, Ninja is the default generator for configurations targeting a remote system or WSL. For more information, see this post on the C++ Team Blog [https://devblogs.microsoft.com/cppblog/linux-development-with-visual-studio-first-class-support-for-gdbserver-improved-build-times-with-ninja-and-updates-to-the-connection-manager/]. For more information about these settings, see CMakeSettings.json reference [https://docs.microsoft.com/en-us/cpp/build/cmakesettings-reference?view=msvc-160]." 2. https://docs.microsoft.com/en-us/cpp/build/cmake-presets-vs?view=msvc-160 "CMake supports two files that allow users to specify common configure, build, and test options and share them with others: CMakePresets.json and CMakeUserPresets.json." " Both files are supported in Visual Studio 2019 version 16.10 or later. ***" 3. https://devblogs.microsoft.com/cppblog/linux-development-with-visual-studio-first-class-support-for-gdbserver-improved-build-times-with-ninja-and-updates-to-the-connection-manager/ " Ninja has been the default generator (underlying build system) for CMake configurations targeting Windows for some time***, but in Visual Studio 2019 version 16.6 Preview 3*** we added support for Ninja on Linux." 4. https://docs.microsoft.com/en-us/cpp/build/cmakesettings-reference?view=msvc-160 " `generator`: specifies CMake generator to use for this configuration. May be one of: Visual Studio 2019 only: Visual Studio 16 2019 Visual Studio 16 2019 Win64 Visual Studio 16 2019 ARM Visual Studio 2017 and later: Visual Studio 15 2017 Visual Studio 15 2017 Win64 Visual Studio 15 2017 ARM Visual Studio 14 2015 Visual Studio 14 2015 Win64 Visual Studio 14 2015 ARM Unix Makefiles Ninja Because Ninja is designed for fast build speeds instead of flexibility and function, it is set as the default. However, some CMake projects may be unable to correctly build using Ninja. If this occurs, you can instruct CMake to generate Visual Studio projects instead. To specify a Visual Studio generator in Visual Studio 2017, open the settings editor from the main menu by choosing CMake | Change CMake Settings. Delete "Ninja" and type "V". This activates IntelliSense, which enables you to choose the generator you want." "To specify a Visual Studio generator in Visual Studio 2019, right-click on the CMakeLists.txt file in Solution Explorer and choose CMake Settings for project > Show Advanced Settings > CMake Generator. When the active configuration specifies a Visual Studio generator, by default MSBuild.exe is invoked with` -m -v:minimal` arguments." 5. https://docs.microsoft.com/en-us/cpp/build/cmake-presets-vs?view=msvc-160#enable-cmakepresetsjson-integration-in-visual-studio-2019 "Enable CMakePresets.json integration in Visual Studio 2019 CMakePresets.json integration isn't enabled by default in Visual Studio 2019. You can enable it for all CMake projects in Tools > Options > CMake > General: (tick a box)" ... see more. 6. https://docs.microsoft.com/en-us/cpp/build/cmakesettings-reference?view=msvc-140 (whichever v140 is..) "CMake projects are supported in Visual Studio 2017 and later." 7. https://docs.microsoft.com/en-us/cpp/overview/what-s-new-for-cpp-2017?view=msvc-150 "Support added for the CMake Ninja generator." 8. https://docs.microsoft.com/en-us/cpp/overview/what-s-new-for-cpp-2017?view=msvc-150#cmake-support-via-open-folder "CMake support via Open Folder Visual Studio 2017 introduces support for using CMake projects without converting to MSBuild project files (.vcxproj). For more information, see CMake projects in Visual Studio[https://docs.microsoft.com/en-us/cpp/build/cmake-projects-in-visual-studio?view=msvc-150]. Opening CMake projects with Open Folder automatically configures the environment for C++ editing, building, and debugging." ... +more! 9. https://docs.microsoft.com/en-us/cpp/build/cmake-presets-vs?view=msvc-160#supported-cmake-and-cmakepresetsjson-versions "Visual Studio reads and evaluates CMakePresets.json and CMakeUserPresets.json itself and doesn't invoke CMake directly with the --preset option. So, CMake version 3.20 or later isn't strictly required when you're building with CMakePresets.json inside Visual Studio. We recommend using CMake version 3.14 or later." 10. https://docs.microsoft.com/en-us/cpp/build/cmake-presets-vs?view=msvc-160#enable-cmakepresetsjson-integration-in-visual-studio-2019 "If you don't want to enable CMakePresets.json integration for all CMake projects, you can enable CMakePresets.json integration for a single CMake project by adding a CMakePresets.json file to the root of the open folder. You must close and reopen the folder in Visual Studio to activate the integration. 11. https://docs.microsoft.com/en-us/cpp/build/cmake-presets-vs?view=msvc-160#default-configure-presets ***(doesn't actually say which version..) "Default Configure Presets If no CMakePresets.json or CMakeUserPresets.json file exists, or if CMakePresets.json or CMakeUserPresets.json is invalid, Visual Studio will fall back*** on the following default Configure Presets: Windows example JSON { "name": "windows-default", "displayName": "Windows x64 Debug", "description": "Sets Ninja generator, compilers, x64 architecture, build and install directory, debug build type", "generator": "Ninja", "binaryDir": "${sourceDir}/out/build/${presetName}", "architecture": { "value": "x64", "strategy": "external" }, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug", "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}" }, "vendor": { "microsoft.com/VisualStudioSettings/CMake/1.0": { "hostOS": [ "Windows" ] } } }, " Signed-off-by: Philip Oakley --- contrib/buildsystems/CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 71e93558e68a27..da343998eea0af 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -14,6 +14,11 @@ Note: Visual Studio also has the option of opening `CMakeLists.txt` directly; Using this option, Visual Studio will not find the source code, though, therefore the `File>Open>Folder...` option is preferred. +Visual Studio does not produce a .sln solution file nor the .vcxproj files +that may be required by VS extension tools. + +To generate the .sln/.vcxproj files run CMake manually, as described below. + Instructions to run CMake manually: mkdir -p contrib/buildsystems/out @@ -22,7 +27,7 @@ Instructions to run CMake manually: This will build the git binaries in contrib/buildsystems/out directory (our top-level .gitignore file knows to ignore contents of -this directory). +this directory). The project .sln and .vcxproj files are also generated. Possible build configurations(-DCMAKE_BUILD_TYPE) with corresponding compiler flags @@ -35,17 +40,16 @@ empty(default) : NOTE: -DCMAKE_BUILD_TYPE is optional. For multi-config generators like Visual Studio this option is ignored -This process generates a Makefile(Linux/*BSD/MacOS) , Visual Studio solution(Windows) by default. +This process generates a Makefile(Linux/*BSD/MacOS), Visual Studio solution(Windows) by default. Run `make` to build Git on Linux/*BSD/MacOS. Open git.sln on Windows and build Git. -NOTE: By default CMake uses Makefile as the build tool on Linux and Visual Studio in Windows, -to use another tool say `ninja` add this to the command line when configuring. -`-G Ninja` - NOTE: By default CMake will install vcpkg locally to your source tree on configuration, to avoid this, add `-DNO_VCPKG=TRUE` to the command line when configuring. +The Visual Studio default generator changed in v16.6 from its Visual Studio +implemenation to `Ninja` This required changes to many CMake scripts. + ]] cmake_minimum_required(VERSION 3.14) From 69782295b631dcc02b1b07142e70114ccc5ff3d2 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Sat, 24 Apr 2021 11:09:58 +0100 Subject: [PATCH 136/303] .gitignore: add Visual Studio CMakeSetting.json file The CMakeSettings.json file is tool generated. Developers may track it should they provide additional settings. Signed-off-by: Philip Oakley --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b3dcafcb3310e9..6ded10067a9e97 100644 --- a/.gitignore +++ b/.gitignore @@ -249,3 +249,4 @@ Release/ /git.VC.db *.dSYM /contrib/buildsystems/out +CMakeSettings.json From 534e67ace1b867835e2b25c6e00b3e751e9e96be Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 5 Aug 2021 19:04:13 -0400 Subject: [PATCH 137/303] subtree: update `contrib/subtree` `test` target The intention of this change is to align with how the top-level git `Makefile` defines its own test target (which also internally calls `$(MAKE) -C t/ all`). This change also ensures the consistency of `make -C contrib/subtree test` with other testing in CI executions (which rely on `$DEFAULT_TEST_TARGET` being defined as `prove`). Signed-off-by: Victoria Dye --- contrib/subtree/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/subtree/Makefile b/contrib/subtree/Makefile index 6fa7496bfdb3fd..6f6e90c4cb49b6 100644 --- a/contrib/subtree/Makefile +++ b/contrib/subtree/Makefile @@ -94,7 +94,7 @@ $(GIT_SUBTREE_TEST): $(GIT_SUBTREE) cp $< $@ test: $(GIT_SUBTREE_TEST) - $(MAKE) -C t/ test + $(MAKE) -C t/ all clean: $(RM) $(GIT_SUBTREE) From 5e2b1f6428caebbb846598f2d57c3b72d4046d6a Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Thu, 22 Apr 2021 11:11:38 +0100 Subject: [PATCH 138/303] CMakeLists: add default "x64-windows" arch for Visual Studio In Git-for-Windows, work on using ARM64 has progressed. The commit 2d94b77b27 (cmake: allow building for Windows/ARM64, 2020-12-04) failed to notice that /compat/vcbuild/vcpkg_install.bat will default to using the "x64-windows" architecture for the vcpkg installation if not set, but CMake is not told of this default. Commit 635b6d99b3 (vcbuild: install ARM64 dependencies when building ARM64 binaries, 2020-01-31) later updated vcpkg_install.bat to accept an arch (%1) parameter, but retained the default. This default is neccessary for the use case where the project directory is opened directly in Visual Studio, which will find and build a CMakeLists.txt file without any parameters, thus expecting use of the default setting. Also Visual studio will generate internal .sln solution and .vcxproj project files needed for some extension tools. Inform users of the additional .sln/.vcxproj generation. ** How to test: rm -rf '.vs' # remove old visual studio settings rm -rf 'compat/vcbuild/vcpkg' # remove any vcpkg downloads rm -rf 'contrib/buildsystems/out' # remove builds & CMake artifacts with a fresh Visual Studio Community Edition, File>>Open>>(git *folder*) to load the project (which will take some time!). check for successful compilation. The implicit .sln (etc.) are in the hidden .vs directory created by Visual Studio. Signed-off-by: Philip Oakley --- contrib/buildsystems/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index da343998eea0af..820b746fa30324 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -71,6 +71,10 @@ if(USE_VCPKG) message("Initializing vcpkg and building the Git's dependencies (this will take a while...)") execute_process(COMMAND ${CMAKE_SOURCE_DIR}/compat/vcbuild/vcpkg_install.bat ${VCPKG_ARCH}) endif() + if(NOT EXISTS ${VCPKG_ARCH}) + message("VCPKG_ARCH: unset, using 'x64-windows'") + set(VCPKG_ARCH "x64-windows") # default from vcpkg_install.bat + endif() list(APPEND CMAKE_PREFIX_PATH "${VCPKG_DIR}/installed/${VCPKG_ARCH}") # In the vcpkg edition, we need this to be able to link to libcurl From d8f646d1b27f533c7b47e154235e22b047b6682c Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 4 Apr 2022 15:38:58 -0700 Subject: [PATCH 139/303] fsmonitor: reintroduce core.useBuiltinFSMonitor Reintroduce the 'core.useBuiltinFSMonitor' config setting (originally added in 0a756b2a25 (fsmonitor: config settings are repository-specific, 2021-03-05)) after its removal from the upstream version of FSMonitor. Upstream, the 'core.useBuiltinFSMonitor' setting was rendered obsolete by "overloading" the 'core.fsmonitor' setting to take a boolean value. However, several applications (e.g., 'scalar') utilize the original config setting, so it should be preserved for a deprecation period before complete removal: * if 'core.fsmonitor' is a boolean, the user is correctly using the new config syntax; do not use 'core.useBuiltinFSMonitor'. * if 'core.fsmonitor' is unspecified, use 'core.useBuiltinFSMonitor'. * if 'core.fsmonitor' is a path, override and use the builtin FSMonitor if 'core.useBuiltinFSMonitor' is 'true'; otherwise, use the FSMonitor hook indicated by the path. Additionally, for this deprecation period, advise users to switch to using 'core.fsmonitor' to specify their use of the builtin FSMonitor. Signed-off-by: Victoria Dye --- Documentation/config/advice.txt | 3 +++ advice.c | 1 + advice.h | 1 + fsmonitor-settings.c | 33 +++++++++++++++++++++++++++++++-- 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt index a00d0100a82ba7..0d1e04f1ed631c 100644 --- a/Documentation/config/advice.txt +++ b/Documentation/config/advice.txt @@ -136,4 +136,7 @@ advice.*:: Advice shown when either linkgit:git-add[1] or linkgit:git-rm[1] is asked to update index entries outside the current sparse checkout. + useCoreFSMonitorConfig:: + Advice shown if the deprecated 'core.useBuiltinFSMonitor' config + setting is in use. -- diff --git a/advice.c b/advice.c index fd189689437c75..1d75981d05db60 100644 --- a/advice.c +++ b/advice.c @@ -74,6 +74,7 @@ static struct { [ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE] = { "submoduleAlternateErrorStrategyDie", 1 }, [ADVICE_SUBMODULES_NOT_UPDATED] = { "submodulesNotUpdated", 1 }, [ADVICE_UPDATE_SPARSE_PATH] = { "updateSparsePath", 1 }, + [ADVICE_USE_CORE_FSMONITOR_CONFIG] = { "useCoreFSMonitorConfig", 1 }, [ADVICE_WAITING_FOR_EDITOR] = { "waitingForEditor", 1 }, }; diff --git a/advice.h b/advice.h index 07e0f76833e780..edfb86ecaa8857 100644 --- a/advice.h +++ b/advice.h @@ -48,6 +48,7 @@ struct string_list; ADVICE_SUBMODULE_ALTERNATE_ERROR_STRATEGY_DIE, ADVICE_SUBMODULES_NOT_UPDATED, ADVICE_UPDATE_SPARSE_PATH, + ADVICE_USE_CORE_FSMONITOR_CONFIG, ADVICE_WAITING_FOR_EDITOR, ADVICE_SKIPPED_CHERRY_PICKS, }; diff --git a/fsmonitor-settings.c b/fsmonitor-settings.c index 464424a1e924c6..5377222651d09e 100644 --- a/fsmonitor-settings.c +++ b/fsmonitor-settings.c @@ -47,6 +47,31 @@ static struct fsmonitor_settings *alloc_settings(void) return s; } +static int check_deprecated_builtin_config(struct repository *r) +{ + int core_use_builtin_fsmonitor = 0; + + /* + * If 'core.useBuiltinFSMonitor' is set, print a deprecation warning + * suggesting the use of 'core.fsmonitor' instead. If the config is + * set to true, set the appropriate mode and return 1 indicating that + * the check resulted the config being set by this (deprecated) setting. + */ + if(!repo_config_get_bool(r, "core.useBuiltinFSMonitor", &core_use_builtin_fsmonitor) && + core_use_builtin_fsmonitor) { + if (!git_env_bool("GIT_SUPPRESS_USEBUILTINFSMONITOR_ADVICE", 0)) { + advise_if_enabled(ADVICE_USE_CORE_FSMONITOR_CONFIG, + _("core.useBuiltinFSMonitor=true is deprecated;" + "please set core.fsmonitor=true instead")); + setenv("GIT_SUPPRESS_USEBUILTINFSMONITOR_ADVICE", "1", 1); + } + fsm_settings__set_ipc(r); + return 1; + } + + return 0; +} + static void lookup_fsmonitor_settings(struct repository *r) { const char *const_str; @@ -72,12 +97,16 @@ static void lookup_fsmonitor_settings(struct repository *r) return; case 1: /* config value was unset */ + if (check_deprecated_builtin_config(r)) + return; + const_str = getenv("GIT_TEST_FSMONITOR"); break; case -1: /* config value set to an arbitrary string */ - if (repo_config_get_pathname(r, "core.fsmonitor", &const_str)) - return; /* should not happen */ + if (check_deprecated_builtin_config(r) || + repo_config_get_pathname(r, "core.fsmonitor", &const_str)) + return; break; default: /* should not happen */ From c9c8710bee1f88f835f1f35b9f34272fa6cfe192 Mon Sep 17 00:00:00 2001 From: Dennis Ameling Date: Mon, 19 Jul 2021 13:04:27 +0200 Subject: [PATCH 140/303] ci(): add HOST_CPU to CMake command As mentioned in the Makefile and CMakeLists.txt: "When cross-compiling, define HOST_CPU as the canonical name of the CPU on which the built Git will run (for instance "x86_64")" This commit sets the HOST_CPU variable since Git for Windows arm64 is cross-compiled from an amd64 host. Signed-off-by: Dennis Ameling --- .github/workflows/git-artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/git-artifacts.yml b/.github/workflows/git-artifacts.yml index a82bb56700fbc3..9b4d1c3aaa3580 100644 --- a/.github/workflows/git-artifacts.yml +++ b/.github/workflows/git-artifacts.yml @@ -291,7 +291,7 @@ jobs: run: | cmake `pwd`/contrib/buildsystems/ -DCMAKE_PREFIX_PATH=`pwd`/compat/vcbuild/vcpkg/installed/arm64-windows \ -DNO_GETTEXT=YesPlease -DPERL_TESTS=OFF -DPYTHON_TESTS=OFF -DCURL_NO_CURL_CMAKE=ON -DCMAKE_GENERATOR_PLATFORM=arm64 -DVCPKG_ARCH=arm64-windows \ - -DCMAKE_INSTALL_PREFIX="`pwd`/git-arm64" -DSKIP_DASHED_BUILT_INS=ON + -DCMAKE_INSTALL_PREFIX="`pwd`/git-arm64" -DSKIP_DASHED_BUILT_INS=ON -DHOST_CPU=arm64 - name: MSBuild if: env.SKIP != 'true' run: msbuild git.sln -property:Configuration=Release From c1f85f3c9023d52148f0e7d01ea00f492c36eaee Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 19 Feb 2021 11:39:37 +0100 Subject: [PATCH 141/303] ci(vs-build): download the vcpkg artifacts using a dedicated Action We now have a GitHub Action to download and cache Azure Pipelines artifacts (such as the `vcpkg` artifacts), hiding gnarly internals, and also providing some robustness against network glitches. Let's use it. Signed-off-by: Johannes Schindelin --- .github/workflows/main.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 831f4df56c51dc..5034de74702b1d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -146,14 +146,10 @@ jobs: repository: 'microsoft/vcpkg' path: 'compat/vcbuild/vcpkg' - name: download vcpkg artifacts - shell: powershell - run: | - $urlbase = "https://dev.azure.com/git/git/_apis/build/builds" - $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=9&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id - $downloadUrl = ((Invoke-WebRequest -UseBasicParsing "${urlbase}/$id/artifacts").content | ConvertFrom-JSON).value[0].resource.downloadUrl - (New-Object Net.WebClient).DownloadFile($downloadUrl, "compat.zip") - Expand-Archive compat.zip -DestinationPath . -Force - Remove-Item compat.zip + uses: git-for-windows/get-azure-pipelines-artifact@v0 + with: + repository: git/git + definitionId: 9 - name: add msbuild to PATH uses: microsoft/setup-msbuild@v1 - name: copy dlls to root From ffdce9b0d54f49ef22ad3fbb2d2a49c88c6f3c1c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 2 Apr 2021 22:50:54 +0200 Subject: [PATCH 142/303] mingw: allow for longer paths in `parse_interpreter()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As reported in https://github.com/newren/git-filter-repo/pull/225, it looks like 99 bytes is not really sufficient to represent e.g. the full path to Python when installed via Windows Store (and this path is used in the hasb bang line when installing scripts via `pip`). Let's increase it to what is probably the maximum sensible path size: MAX_PATH. This makes `parse_interpreter()` in line with what `lookup_prog()` handles. Signed-off-by: Johannes Schindelin Signed-off-by: Vilius Šumskas --- compat/mingw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..2333a676aea45c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1247,7 +1247,7 @@ static const char *quote_arg_msys2(const char *arg) static const char *parse_interpreter(const char *cmd) { - static char buf[100]; + static char buf[MAX_PATH]; char *p, *opt; int n, fd; From af09265bf6bcaf7eb3939c408f11addd13cb2ac2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 17 May 2021 10:46:52 +0200 Subject: [PATCH 143/303] compat/vcbuild: document preferred way to build in Visual Studio We used to have that `make vcxproj` hack, but a hack it is. In the meantime, we have a much cleaner solution: using CMake, either explicitly, or even more conveniently via Visual Studio's built-in CMake support (simply open Git's top-level directory via File>Open>Folder...). Let's let the `README` reflect this. Signed-off-by: Johannes Schindelin --- compat/vcbuild/README | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/compat/vcbuild/README b/compat/vcbuild/README index 29ec1d0f104b80..5c71ea2daa4017 100644 --- a/compat/vcbuild/README +++ b/compat/vcbuild/README @@ -37,27 +37,17 @@ The Steps to Build Git with VS2015 or VS2017 from the command line. ================================================================ -Alternatively, run `make vcxproj` and then load the generated `git.sln` in -Visual Studio. The initial build will install the vcpkg system and build the +Alternatively, just open Git's top-level directory in Visual Studio, via +`File>Open>Folder...`. This will use CMake internally to generate the +project definitions. It will also install the vcpkg system and build the dependencies automatically. This will take a while. -Instead of generating the `git.sln` file yourself (which requires a full Git -for Windows SDK), you may want to consider fetching the `vs/master` branch of -https://github.com/git-for-windows/git instead (which is updated automatically -via CI running `make vcxproj`). The `vs/master` branch does not require a Git -for Windows to build, but you can run the test scripts in a regular Git Bash. - -Note that `make vcxproj` will automatically add and commit the generated `.sln` -and `.vcxproj` files to the repo. This is necessary to allow building a -fully-testable Git in Visual Studio, where a regular Git Bash can be used to -run the test scripts (as opposed to a full Git for Windows SDK): a number of -build targets, such as Git commands implemented as Unix shell scripts (where -`@@SHELL_PATH@@` and other placeholders are interpolated) require a full-blown -Git for Windows SDK (which is about 10x the size of a regular Git for Windows -installation). - -If your plan is to open a Pull Request with Git for Windows, it is a good idea -to drop this commit before submitting. +You can also generate the Visual Studio solution manually by downloading +and running CMake explicitly rather than letting Visual Studio doing +that implicitly. + +Another, deprecated option is to run `make vcxproj`. This option is +superseded by the CMake-based build, and will be removed at some point. ================================================================ The Steps of Build Git with VS2008 From 391b097f24e66cb20d2cfc6a69778dff5761fcee Mon Sep 17 00:00:00 2001 From: Pascal Muller Date: Wed, 23 Jun 2021 21:21:10 +0200 Subject: [PATCH 144/303] http: optionally send SSL client certificate This adds support for a new http.sslAutoClientCert config value. In cURL 7.77 or later the schannel backend does not automatically send client certificates from the Windows Certificate Store anymore. This config value is only used if http.sslBackend is set to "schannel", and can be used to opt in to the old behavior and force cURL to send client certificates. This fixes https://github.com/git-for-windows/git/issues/3292 Signed-off-by: Pascal Muller --- Documentation/config/http.txt | 5 +++++ git-curl-compat.h | 8 ++++++++ http.c | 26 ++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt index e044f4920fd998..c9aa1428556ed2 100644 --- a/Documentation/config/http.txt +++ b/Documentation/config/http.txt @@ -205,6 +205,11 @@ http.schannelUseSSLCAInfo:: when the `schannel` backend was configured via `http.sslBackend`, unless `http.schannelUseSSLCAInfo` overrides this behavior. +http.sslAutoClientCert:: + As of cURL v7.77.0, the Secure Channel backend won't automatically + send client certificates from the Windows Certificate Store anymore. + To opt in to the old behavior, http.sslAutoClientCert can be set. + http.pinnedPubkey:: Public key of the https service. It may either be the filename of a PEM or DER encoded public key file or a string starting with diff --git a/git-curl-compat.h b/git-curl-compat.h index 56a83b6bbd8c43..f9f784220f83e6 100644 --- a/git-curl-compat.h +++ b/git-curl-compat.h @@ -126,4 +126,12 @@ #define GIT_CURL_HAVE_CURLSSLSET_NO_BACKENDS #endif +/** + * CURLSSLOPT_AUTO_CLIENT_CERT was added in 7.77.0, released in May + * 2021. + */ +#if LIBCURL_VERSION_NUM >= 0x074d00 +#define GIT_CURL_HAVE_CURLSSLOPT_AUTO_CLIENT_CERT +#endif + #endif diff --git a/http.c b/http.c index f3da692ad2c2d7..2a722f63579840 100644 --- a/http.c +++ b/http.c @@ -150,6 +150,8 @@ static int http_schannel_check_revoke_mode = */ static int http_schannel_use_ssl_cainfo; +static int http_auto_client_cert; + size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_) { size_t size = eltsize * nmemb; @@ -314,6 +316,11 @@ static int http_options(const char *var, const char *value, void *cb) return 0; } + if (!strcmp("http.sslautoclientcert", var)) { + http_auto_client_cert = git_config_bool(var, value); + return 0; + } + if (!strcmp("http.minsessions", var)) { min_curl_sessions = git_config_int(var, value); if (min_curl_sessions > 1) @@ -836,13 +843,24 @@ static CURL *get_curl_handle(void) } #endif - if (http_ssl_backend && !strcmp("schannel", http_ssl_backend) && - http_schannel_check_revoke_mode) { + if (http_ssl_backend && !strcmp("schannel", http_ssl_backend)) { + long ssl_options = 0; + if (http_schannel_check_revoke_mode) { #ifdef GIT_CURL_HAVE_CURLSSLOPT_NO_REVOKE - curl_easy_setopt(result, CURLOPT_SSL_OPTIONS, http_schannel_check_revoke_mode); + ssl_options |= http_schannel_check_revoke_mode; #else - warning(_("CURLSSLOPT_NO_REVOKE not supported with cURL < 7.44.0")); + warning(_("CURLSSLOPT_NO_REVOKE not supported with cURL < 7.44.0")); #endif + } + + if (http_auto_client_cert) { +#ifdef GIT_CURL_HAVE_CURLSSLOPT_AUTO_CLIENT_CERT + ssl_options |= CURLSSLOPT_AUTO_CLIENT_CERT; +#endif + } + + if (ssl_options) + curl_easy_setopt(result, CURLOPT_SSL_OPTIONS, ssl_options); } if (http_proactive_auth) From 406cd1d326710c31b1655a931daca67db5809afb Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Thu, 5 Aug 2021 19:11:59 -0400 Subject: [PATCH 145/303] ci: run `contrib/subtree` tests in CI builds Because `git subtree` (unlike most other `contrib` modules) is included as part of the standard release of Git for Windows, its stability should be verified as consistently as it is for the rest of git. By including the `git subtree` tests in the CI workflow, these tests are as much of a gate to merging and indicator of stability as the standard test suite. Signed-off-by: Victoria Dye --- ci/run-build-and-tests.sh | 4 ++++ ci/run-test-slice.sh | 3 +++ 2 files changed, 7 insertions(+) diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index 8ebff4259676e3..1ea8909c6f8449 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -53,4 +53,8 @@ then fi check_unignored_build_artifacts +case " $MAKE_TARGETS " in +*" all "*) make -C contrib/subtree test;; +esac + save_good_tree diff --git a/ci/run-test-slice.sh b/ci/run-test-slice.sh index a3c67956a8df8f..2f2dbea25b48f2 100755 --- a/ci/run-test-slice.sh +++ b/ci/run-test-slice.sh @@ -15,4 +15,7 @@ group "Run tests" make --quiet -C t T="$(cd t && tr '\n' ' ')" || handle_failed_tests +# Run the git subtree tests only if main tests succeeded +test 0 != "$1" || make -C contrib/subtree test + check_unignored_build_artifacts From 49674f139d37e9105e1576dedf8e32b8639dcb70 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Mon, 10 May 2021 16:47:40 +0100 Subject: [PATCH 146/303] CMake: show Win32 and Generator_platform build-option values Ensure key CMake option values are part of the CMake output to facilitate user support when tool updates impact the wider CMake actions, particularly ongoing 'improvements' in Visual Studio. These CMake displays perform the same function as the build-options.txt provided in the main Git for Windows. CMake is already chatty. The setting of CMAKE_EXPORT_COMPILE_COMMANDS is also reported. Include the environment's CMAKE_EXPORT_COMPILE_COMMANDS value which may have been propogated to CMake's internal value. Testing the CMAKE_EXPORT_COMPILE_COMMANDS processing can be difficult in the Visual Studio environment, as it may be cached in many places. The 'environment' may include the OS, the user shell, CMake's own environment, along with the Visual Studio presets and caches. See previous commit for arefacts that need removing for a clean test. Signed-off-by: Philip Oakley --- contrib/buildsystems/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 820b746fa30324..1e4c803c3eb7c9 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -63,10 +63,20 @@ endif() if(NOT DEFINED CMAKE_EXPORT_COMPILE_COMMANDS) set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE) + message("settting CMAKE_EXPORT_COMPILE_COMMANDS: ${CMAKE_EXPORT_COMPILE_COMMANDS}") endif() if(USE_VCPKG) set(VCPKG_DIR "${CMAKE_SOURCE_DIR}/compat/vcbuild/vcpkg") + message("WIN32: ${WIN32}") # show its underlying text values + message("VCPKG_DIR: ${VCPKG_DIR}") + message("VCPKG_ARCH: ${VCPKG_ARCH}") # maybe unset + message("MSVC: ${MSVC}") + message("CMAKE_GENERATOR: ${CMAKE_GENERATOR}") + message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") + message("CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}") + message("CMAKE_EXPORT_COMPILE_COMMANDS: ${CMAKE_EXPORT_COMPILE_COMMANDS}") + message("ENV(CMAKE_EXPORT_COMPILE_COMMANDS): $ENV{CMAKE_EXPORT_COMPILE_COMMANDS}") if(NOT EXISTS ${VCPKG_DIR}) message("Initializing vcpkg and building the Git's dependencies (this will take a while...)") execute_process(COMMAND ${CMAKE_SOURCE_DIR}/compat/vcbuild/vcpkg_install.bat ${VCPKG_ARCH}) From 2f93a90e728853e793b9e814544a6cc0cc7dd437 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 8 Sep 2021 13:05:42 +0200 Subject: [PATCH 147/303] init: do parse _all_ core.* settings early In Git for Windows, `has_symlinks` is set to 0 by default. Therefore, we need to parse the config setting `core.symlinks` to know if it has been set to `true`. In `git init`, we must do that before copying the templates because they might contain symbolic links. Even if the support for symbolic links on Windows has not made it to upstream Git yet, we really should make sure that all the `core.*` settings are parsed before proceeding, as they might very well change the behavior of `git init` in a way the user intended. This fixes https://github.com/git-for-windows/git/issues/3414 Signed-off-by: Johannes Schindelin --- builtin/init-db.c | 2 +- config.c | 2 +- config.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/init-db.c b/builtin/init-db.c index 546f9c595e7d8c..f33d18e8533606 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -410,7 +410,7 @@ int init_db(const char *git_dir, const char *real_git_dir, startup_info->have_repository = 1; /* Ensure `core.hidedotfiles` is processed */ - git_config(platform_core_config, NULL); + git_config(git_default_core_config, NULL); safe_create_dir(git_dir, 0); diff --git a/config.c b/config.c index cbb5a3bab74f6f..7176f800e1607b 100644 --- a/config.c +++ b/config.c @@ -1475,7 +1475,7 @@ int git_config_color(char *dest, const char *var, const char *value) return 0; } -static int git_default_core_config(const char *var, const char *value, void *cb) +int git_default_core_config(const char *var, const char *value, void *cb) { /* This needs a better name */ if (!strcmp(var, "core.filemode")) { diff --git a/config.h b/config.h index ca994d771475a9..fe5b7a1ae639b5 100644 --- a/config.h +++ b/config.h @@ -131,6 +131,7 @@ struct config_options { typedef int (*config_fn_t)(const char *, const char *, void *); int git_default_config(const char *, const char *, void *); +int git_default_core_config(const char *var, const char *value, void *cb); /** * Read a specific file in git-config format. From 4b272bd078bac431a22e4e5c970315cba192e092 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Sun, 31 Oct 2021 23:15:13 +0000 Subject: [PATCH 148/303] hash-object: demonstrate a >4GB/LLP64 problem On LLP64 systems, such as Windows, the size of `long`, `int`, etc. is only 32 bits (for backward compatibility). Git's use of `unsigned long` for file memory sizes in many places, rather than size_t, limits the handling of large files on LLP64 systems (commonly given as `>4GB`). Provide a minimum test for handling a >4GB file. The `hash-object` command, with the `--literally` and without `-w` option avoids writing the object, either loose or packed. This avoids the code paths hitting the `bigFileThreshold` config test code, the zlib code, and the pack code. Subsequent patches will walk the test's call chain, converting types to `size_t` (which is larger in LLP64 data models) where appropriate. Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- t/t1007-hash-object.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index ac5ad8c7402d2b..428298fa2934d6 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -50,6 +50,9 @@ test_expect_success 'setup' ' example sha1:ddd3f836d3e3fbb7ae289aa9ae83536f76956399 example sha256:b44fe1fe65589848253737db859bd490453510719d7424daab03daf0767b85ae + + large5GB sha1:0be2be10a4c8764f32c4bf372a98edc731a4b204 + large5GB sha256:dc18ca621300c8d3cfa505a275641ebab00de189859e022a975056882d313e64 EOF ' @@ -249,4 +252,12 @@ test_expect_success '--literally with extra-long type' ' echo example | git hash-object -t $t --literally --stdin ' +test_expect_failure EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ + 'files over 4GB hash literally' ' + test-tool genzeros $((5*1024*1024*1024)) >big && + test_oid large5GB >expect && + git hash-object --stdin --literally actual && + test_cmp expect actual +' + test_done From 1ee67ebee879d2649077f5be34b43b95f8d1ac90 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Fri, 12 Nov 2021 21:07:03 +0000 Subject: [PATCH 149/303] hash_object_file_literally(): use size_t The previous commit adds a test that demonstrates a problem in the `hash-object --literally` command, manifesting in an unnecessary file size limit on systems using the LLP64 data model (which includes Windows). Walking the affected code path is `cmd_hash_object()` >> `hash_fd()` >> `hash_literally()` >> `hash_object_file_literally()`. The function `hash_object_file_literally()` is the first with a file length parameter (via a mem buffer). This commit changes the type of that parameter to the LLP64 compatible `size_t` type. There are no other uses of the function. The `strbuf` type is already `size_t` compatible. Note: The hash-object test does not yet pass. Subsequent commits will continue to walk the call tree's lower level functions to identify further fixes. Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- object-file.c | 4 ++-- object-store.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/object-file.c b/object-file.c index 5b270f046dda1d..a1214b5ac309ac 100644 --- a/object-file.c +++ b/object-file.c @@ -1810,7 +1810,7 @@ static void write_object_file_prepare(const struct git_hash_algo *algo, } static void write_object_file_prepare_literally(const struct git_hash_algo *algo, - const void *buf, unsigned long len, + const void *buf, size_t len, const char *type, struct object_id *oid, char *hdr, int *hdrlen) { @@ -2240,7 +2240,7 @@ int write_object_file_flags(const void *buf, unsigned long len, return write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags); } -int write_object_file_literally(const void *buf, unsigned long len, +int write_object_file_literally(const void *buf, size_t len, const char *type, struct object_id *oid, unsigned flags) { diff --git a/object-store.h b/object-store.h index 1be57abaf10d7a..1437c0c0eddb51 100644 --- a/object-store.h +++ b/object-store.h @@ -272,7 +272,7 @@ static inline int write_object_file(const void *buf, unsigned long len, return write_object_file_flags(buf, len, type, oid, 0); } -int write_object_file_literally(const void *buf, unsigned long len, +int write_object_file_literally(const void *buf, size_t len, const char *type, struct object_id *oid, unsigned flags); int stream_loose_object(struct input_stream *in_stream, size_t len, From 03936f20751e4ccf98729c55cb07b5d17c17c194 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 29 Mar 2022 12:05:18 +0200 Subject: [PATCH 150/303] vcxproj: allow building with `NO_PERL` again This is another fall-out of the recent refactoring flurry. Signed-off-by: Johannes Schindelin --- config.mak.uname | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config.mak.uname b/config.mak.uname index 7fd4bdf169e55b..648df18f5b2a4d 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -792,9 +792,11 @@ vcxproj: sed -i '/^git_broken_path_fix ".*/d' git-sh-setup git add -f $(SCRIPT_LIB) $(SCRIPTS) +ifndef NO_PERL # Add Perl module $(MAKE) $(LIB_PERL_GEN) git add -f perl/build +endif # Add bin-wrappers, for testing rm -rf bin-wrappers/ From 34b4f68ef4e164d7c3c89078c7b42ec5c880b486 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Fri, 12 Nov 2021 21:14:50 +0000 Subject: [PATCH 151/303] object-file.c: use size_t for header lengths Continue walking the code path for the >4GB `hash-object --literally` test. The `hash_object_file_literally()` function internally uses both `hash_object_file()` and `write_object_file_prepare()`. Both function signatures use `unsigned long` rather than `size_t` for the mem buffer sizes. Use `size_t` instead, for LLP64 compatibility. While at it, convert those function's object's header buffer length to `size_t` for consistency. The value is already upcast to `uintmax_t` for print format compatibility. Note: The hash-object test still does not pass. A subsequent commit continues to walk the call tree's lower level hash functions to identify further fixes. Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- object-file.c | 21 +++++++++++---------- object-store.h | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/object-file.c b/object-file.c index a1214b5ac309ac..bd84ee4ca36811 100644 --- a/object-file.c +++ b/object-file.c @@ -1787,7 +1787,7 @@ void *read_object_with_reference(struct repository *r, static void hash_object_body(const struct git_hash_algo *algo, git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, - char *hdr, int *hdrlen) + char *hdr, size_t *hdrlen) { algo->init_fn(c); algo->update_fn(c, hdr, *hdrlen); @@ -1796,9 +1796,9 @@ static void hash_object_body(const struct git_hash_algo *algo, git_hash_ctx *c, } static void write_object_file_prepare(const struct git_hash_algo *algo, - const void *buf, unsigned long len, + const void *buf, size_t len, enum object_type type, struct object_id *oid, - char *hdr, int *hdrlen) + char *hdr, size_t *hdrlen) { git_hash_ctx c; @@ -1812,7 +1812,7 @@ static void write_object_file_prepare(const struct git_hash_algo *algo, static void write_object_file_prepare_literally(const struct git_hash_algo *algo, const void *buf, size_t len, const char *type, struct object_id *oid, - char *hdr, int *hdrlen) + char *hdr, size_t *hdrlen) { git_hash_ctx c; @@ -1871,17 +1871,17 @@ static int write_buffer(int fd, const void *buf, size_t len) } static void hash_object_file_literally(const struct git_hash_algo *algo, - const void *buf, unsigned long len, + const void *buf, size_t len, const char *type, struct object_id *oid) { char hdr[MAX_HEADER_LEN]; - int hdrlen = sizeof(hdr); + size_t hdrlen = sizeof(hdr); write_object_file_prepare_literally(algo, buf, len, type, oid, hdr, &hdrlen); } void hash_object_file(const struct git_hash_algo *algo, const void *buf, - unsigned long len, enum object_type type, + size_t len, enum object_type type, struct object_id *oid) { hash_object_file_literally(algo, buf, len, type_name(type), oid); @@ -2223,12 +2223,12 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, return err; } -int write_object_file_flags(const void *buf, unsigned long len, +int write_object_file_flags(const void *buf, size_t len, enum object_type type, struct object_id *oid, unsigned flags) { char hdr[MAX_HEADER_LEN]; - int hdrlen = sizeof(hdr); + size_t hdrlen = sizeof(hdr); /* Normally if we have it in the pack then we do not bother writing * it out into .git/objects/??/?{38} file. @@ -2245,7 +2245,8 @@ int write_object_file_literally(const void *buf, size_t len, unsigned flags) { char *header; - int hdrlen, status = 0; + size_t hdrlen; + int status = 0; /* type string, SP, %lu of the length plus NUL must fit this */ hdrlen = strlen(type) + MAX_HEADER_LEN; diff --git a/object-store.h b/object-store.h index 1437c0c0eddb51..de48c9a78d76b1 100644 --- a/object-store.h +++ b/object-store.h @@ -260,10 +260,10 @@ static inline void *repo_read_object_file(struct repository *r, int oid_object_info(struct repository *r, const struct object_id *, unsigned long *); void hash_object_file(const struct git_hash_algo *algo, const void *buf, - unsigned long len, enum object_type type, + size_t len, enum object_type type, struct object_id *oid); -int write_object_file_flags(const void *buf, unsigned long len, +int write_object_file_flags(const void *buf, size_t len, enum object_type type, struct object_id *oid, unsigned flags); static inline int write_object_file(const void *buf, unsigned long len, From 005f80c46b9f4d4aa138d7af196935f8627f41ed Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 28 Jun 2022 16:35:04 +0200 Subject: [PATCH 152/303] vcxproj: require C11 This fixes the build after 7bc341e21b (git-compat-util: add a test balloon for C99 support, 2021-12-01). Signed-off-by: Johannes Schindelin --- contrib/buildsystems/Generators/Vcxproj.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index a6d1c6b8d05682..1858107378396a 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -178,6 +178,7 @@ sub createProject { OnlyExplicitInline ProgramDatabase + stdc11 true From 334860cd5e62aa755c70bdd75ac5d580e0e499af Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Fri, 12 Nov 2021 21:16:51 +0000 Subject: [PATCH 153/303] hash algorithms: use size_t for section lengths Continue walking the code path for the >4GB `hash-object --literally` test to the hash algorithm step for LLP64 systems. This patch lets the SHA1DC code use `size_t`, making it compatible with LLP64 data models (as used e.g. by Windows). The interested reader of this patch will note that we adjust the signature of the `git_SHA1DCUpdate()` function without updating _any_ call site. This certainly puzzled at least one reviewer already, so here is an explanation: This function is never called directly, but always via the macro `platform_SHA1_Update`, which is usually called via the macro `git_SHA1_Update`. However, we never call `git_SHA1_Update()` directly in `struct git_hash_algo`. Instead, we call `git_hash_sha1_update()`, which is defined thusly: static void git_hash_sha1_update(git_hash_ctx *ctx, const void *data, size_t len) { git_SHA1_Update(&ctx->sha1, data, len); } i.e. it contains an implicit downcast from `size_t` to `unsigned long` (before this here patch). With this patch, there is no downcast anymore. With this patch, finally, the t1007-hash-object.sh "files over 4GB hash literally" test case is fixed. Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- object-file.c | 4 ++-- sha1dc_git.c | 3 +-- sha1dc_git.h | 2 +- t/t1007-hash-object.sh | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/object-file.c b/object-file.c index bd84ee4ca36811..0071655b698063 100644 --- a/object-file.c +++ b/object-file.c @@ -1785,7 +1785,7 @@ void *read_object_with_reference(struct repository *r, } static void hash_object_body(const struct git_hash_algo *algo, git_hash_ctx *c, - const void *buf, unsigned long len, + const void *buf, size_t len, struct object_id *oid, char *hdr, size_t *hdrlen) { @@ -1805,7 +1805,7 @@ static void write_object_file_prepare(const struct git_hash_algo *algo, /* Generate the header */ *hdrlen = format_object_header(hdr, *hdrlen, type, len); - /* Sha1.. */ + /* Hash (function pointers) computation */ hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen); } diff --git a/sha1dc_git.c b/sha1dc_git.c index 5c300e812e0a11..65a6f1f2cd1bc2 100644 --- a/sha1dc_git.c +++ b/sha1dc_git.c @@ -25,10 +25,9 @@ void git_SHA1DCFinal(unsigned char hash[20], SHA1_CTX *ctx) /* * Same as SHA1DCUpdate, but adjust types to match git's usual interface. */ -void git_SHA1DCUpdate(SHA1_CTX *ctx, const void *vdata, unsigned long len) +void git_SHA1DCUpdate(SHA1_CTX *ctx, const void *vdata, size_t len) { const char *data = vdata; - /* We expect an unsigned long, but sha1dc only takes an int */ while (len > INT_MAX) { SHA1DCUpdate(ctx, data, INT_MAX); data += INT_MAX; diff --git a/sha1dc_git.h b/sha1dc_git.h index 41e1c3fd3f787e..847b38fb4b9f23 100644 --- a/sha1dc_git.h +++ b/sha1dc_git.h @@ -15,7 +15,7 @@ void git_SHA1DCInit(SHA1_CTX *); #endif void git_SHA1DCFinal(unsigned char [20], SHA1_CTX *); -void git_SHA1DCUpdate(SHA1_CTX *ctx, const void *data, unsigned long len); +void git_SHA1DCUpdate(SHA1_CTX *ctx, const void *data, size_t len); #define platform_SHA_CTX SHA1_CTX #define platform_SHA1_Init git_SHA1DCInit diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index 428298fa2934d6..b30fb1282aa054 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -252,7 +252,7 @@ test_expect_success '--literally with extra-long type' ' echo example | git hash-object -t $t --literally --stdin ' -test_expect_failure EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ 'files over 4GB hash literally' ' test-tool genzeros $((5*1024*1024*1024)) >big && test_oid large5GB >expect && From 50bec549b94f09088478facdce760d0cefe9c142 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 28 Jun 2022 16:38:12 +0200 Subject: [PATCH 154/303] vcxproj: ignore the `-pedantic` option This is now passed by default, ever since 6a8cbc41ba (developer: enable pedantic by default, 2021-09-03). Signed-off-by: Johannes Schindelin --- contrib/buildsystems/engine.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/buildsystems/engine.pl b/contrib/buildsystems/engine.pl index 417ae71d44ccab..ee4fca200cc506 100755 --- a/contrib/buildsystems/engine.pl +++ b/contrib/buildsystems/engine.pl @@ -263,7 +263,7 @@ sub handleCompileLine if ("$part" eq "-o") { # ignore object file shift @parts; - } elsif ("$part" eq "-c" || "$part" eq "-i" || "$part" =~ /^-fno-/) { + } elsif ("$part" eq "-c" || "$part" eq "-i" || "$part" =~ /^-fno-/ || "$part" eq '-pedantic') { # ignore compile flag } elsif ($part =~ /^.?-I/) { push(@incpaths, $part); From 026b8d3831561bcddb4c11ec27886905b7785029 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Mon, 6 Dec 2021 22:26:50 +0000 Subject: [PATCH 155/303] hash-object --stdin: verify that it works with >4GB/LLP64 Just like the `hash-object --literally` code path, the `--stdin` code path also needs to use `size_t` instead of `unsigned long` to represent memory sizes, otherwise it would cause problems on platforms using the LLP64 data model (such as Windows). To limit the scope of the test case, the object is explicitly not written to the object store, nor are any filters applied. The `big` file from the previous test case is reused to save setup time; To avoid relying on that side effect, it is generated if it does not exist (e.g. when running via `sh t1007-*.sh --long --run=1,41`). Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- t/t1007-hash-object.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index b30fb1282aa054..b6dbee4bb96064 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -260,4 +260,12 @@ test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ test_cmp expect actual ' +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ + 'files over 4GB hash correctly via --stdin' ' + { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } && + test_oid large5GB >expect && + git hash-object --stdin actual && + test_cmp expect actual +' + test_done From 4160649d026b7f0b772eb3af93a1fe96021ff5e5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 28 Jun 2022 17:00:59 +0200 Subject: [PATCH 156/303] vcxproj: include reftable when committing `.vcxproj` files Signed-off-by: Johannes Schindelin --- config.mak.uname | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.mak.uname b/config.mak.uname index 648df18f5b2a4d..04297f1a7707f8 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -760,7 +760,7 @@ vcxproj: # Make .vcxproj files and add them perl contrib/buildsystems/generate -g Vcxproj - git add -f git.sln {*,*/lib.proj,t/helper/*}/*.vcxproj + git add -f git.sln {*,*/lib.proj,t/helper/*,reftable/libreftable{,_test}.proj}/*.vcxproj # Generate the LinkOrCopyBuiltins.targets and LinkOrCopyRemoteHttp.targets file (echo '' && \ From 09c68e91681f6917fa06379ddc21cfb9093973ad Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Mon, 6 Dec 2021 22:42:46 +0000 Subject: [PATCH 157/303] hash-object: add another >4GB/LLP64 test case To complement the `--stdin` and `--literally` test cases that verify that we can hash files larger than 4GB on 64-bit platforms using the LLP64 data model, here is a test case that exercises `hash-object` _without_ any options. Just as before, we use the `big` file from the previous test case if it exists to save on setup time, otherwise generate it. Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- t/t1007-hash-object.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index b6dbee4bb96064..f7062ebca18229 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -268,4 +268,12 @@ test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ test_cmp expect actual ' +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ + 'files over 4GB hash correctly' ' + { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } && + test_oid large5GB >expect && + git hash-object -- big >actual && + test_cmp expect actual +' + test_done From b12655242556f9363f1a330b9a55026c2e3f7ccb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 13 Apr 2022 14:49:17 -0400 Subject: [PATCH 158/303] setup: properly use "%(prefix)/" when in WSL Signed-off-by: Derrick Stolee --- setup.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/setup.c b/setup.c index cefd5f63c4680f..852b3d9141c6ed 100644 --- a/setup.c +++ b/setup.c @@ -1482,10 +1482,19 @@ const char *setup_git_directory_gently(int *nongit_ok) break; case GIT_DIR_INVALID_OWNERSHIP: if (!nongit_ok) { + struct strbuf prequoted = STRBUF_INIT; struct strbuf quoted = STRBUF_INIT; strbuf_complete(&report, '\n'); - sq_quote_buf_pretty("ed, dir.buf); + +#ifdef __MINGW32__ + if (dir.buf[0] == '/') + strbuf_addstr(&prequoted, "%(prefix)/"); +#endif + + strbuf_add(&prequoted, dir.buf, dir.len); + sq_quote_buf_pretty("ed, prequoted.buf); + die(_("detected dubious ownership in repository at '%s'\n" "%s" "To add an exception for this directory, call:\n" From 0f9749153c861afc032cdfdbff85ca2af04b4839 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 28 Jun 2022 18:04:01 +0200 Subject: [PATCH 159/303] vcxproj: handle libreftable_test, too Since ef8a6c6268 (reftable: utility functions, 2021-10-07) we not only have a libreftable, but also a libreftable_test. Signed-off-by: Johannes Schindelin --- contrib/buildsystems/Generators/Vcxproj.pm | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 1858107378396a..20d91ea84bfd44 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -77,7 +77,7 @@ sub createProject { my $libs_release = "\n "; my $libs_debug = "\n "; if (!$static_library && $name ne 'headless-git') { - $libs_release = join(";", sort(grep /^(?!libgit\.lib|xdiff\/lib\.lib|vcs-svn\/lib\.lib|reftable\/libreftable\.lib)/, @{$$build_structure{"$prefix${name}_LIBS"}})); + $libs_release = join(";", sort(grep /^(?!libgit\.lib|xdiff\/lib\.lib|vcs-svn\/lib\.lib|reftable\/libreftable(_test)?\.lib)/, @{$$build_structure{"$prefix${name}_LIBS"}})); $libs_debug = $libs_release; $libs_debug =~ s/zlib\.lib/zlibd\.lib/g; $libs_debug =~ s/libexpat\.lib/libexpatd\.lib/g; @@ -258,6 +258,7 @@ EOM if ((!$static_library || $target =~ 'vcs-svn' || $target =~ 'xdiff') && !($name =~ /headless-git/)) { my $uuid_libgit = $$build_structure{"LIBS_libgit_GUID"}; my $uuid_libreftable = $$build_structure{"LIBS_reftable/libreftable_GUID"}; + my $uuid_libreftable_test = $$build_structure{"LIBS_reftable/libreftable_test_GUID"}; my $uuid_xdiff_lib = $$build_structure{"LIBS_xdiff/lib_GUID"}; print F << "EOM"; @@ -269,10 +270,14 @@ EOM EOM if (!($name =~ /xdiff|libreftable/)) { print F << "EOM"; - + $uuid_libreftable false + + $uuid_libreftable_test + false + EOM } if (!($name =~ 'xdiff')) { From 6fcf6fc6433af12582403b9dd45800d129f7ba46 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 5 Mar 2021 23:12:11 +0100 Subject: [PATCH 160/303] Enable the built-in FSMonitor as an experimental feature If `feature.experimental` and `feature.manyFiles` are set and the user has not explicitly turned off the builtin FSMonitor, we now start the built-in FSMonitor by default. Only forcing it when UNSET matches the behavior of UPDATE_DEFAULT_BOOL() used for other repo settings. Signed-off-by: Johannes Schindelin Signed-off-by: Jeff Hostetler --- repo-settings.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/repo-settings.c b/repo-settings.c index e8b58151bc4a01..80157597c93f17 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -2,7 +2,8 @@ #include "config.h" #include "repository.h" #include "midx.h" -#include "compat/fsmonitor/fsm-listen.h" +#include "fsmonitor-ipc.h" +#include "fsmonitor-settings.h" static void repo_cfg_bool(struct repository *r, const char *key, int *dest, int def) @@ -43,6 +44,30 @@ void prepare_repo_settings(struct repository *r) /* Defaults modified by feature.* */ if (experimental) { r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING; + + /* + * Force enable the builtin FSMonitor (unless the repo + * is incompatible or they've already selected it or + * the hook version). But only if they haven't + * explicitly turned it off -- so only if our config + * value is UNSET. + * + * lookup_fsmonitor_settings() and check_for_ipc() do + * not distinguish between explicitly set FALSE and + * UNSET, so we re-test for an UNSET config key here. + * + * I'm not sure I want to fix fsmonitor-settings.c to + * have more than one _DISABLED state since our usage + * here is only to support this experimental period + * (and I don't want to overload the _reason field + * because it describes incompabilities). + */ + if (manyfiles && + fsmonitor_ipc__is_supported() && + fsm_settings__get_mode(r) == FSMONITOR_MODE_DISABLED && + repo_config_get_maybe_bool(r, "core.fsmonitor", &value) > 0 && + repo_config_get_bool(r, "core.useBuiltinFSMonitor", &value)) + fsm_settings__set_ipc(r); } if (manyfiles) { r->settings.index_version = 4; From 33a014d38694c71785467759e108426f08d3b186 Mon Sep 17 00:00:00 2001 From: Philip Oakley Date: Tue, 7 Dec 2021 09:53:41 +0000 Subject: [PATCH 161/303] hash-object: add a >4GB/LLP64 test case using filtered input To verify that the `clean` side of the `clean`/`smudge` filter code is correct with regards to LLP64 (read: to ensure that `size_t` is used instead of `unsigned long`), here is a test case using a trivial filter, specifically _not_ writing anything to the object store to limit the scope of the test case. As in previous commits, the `big` file from previous test cases is reused if available, to save setup time, otherwise re-generated. Signed-off-by: Philip Oakley Signed-off-by: Johannes Schindelin --- t/t1007-hash-object.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index f7062ebca18229..df78646b073760 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -276,4 +276,16 @@ test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ test_cmp expect actual ' +# This clean filter does nothing, other than excercising the interface. +# We ensure that cleaning doesn't mangle large files on 64-bit Windows. +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ + 'hash filtered files over 4GB correctly' ' + { test -f big || test-tool genzeros $((5*1024*1024*1024)) >big; } && + test_oid large5GB >expect && + test_config filter.null-filter.clean "cat" && + echo "big filter=null-filter" >.gitattributes && + git hash-object -- big >actual && + test_cmp expect actual +' + test_done From ec684598b834925eb5566c5e2abaa2a45c946bf7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 13 Apr 2022 14:54:43 -0400 Subject: [PATCH 162/303] compat/mingw.c: do not warn when failing to get owner In the case of Git for Windows (say, in a Git Bash window) running in a Windows Subsystem for Linux (WSL) directory, the GetNamedSecurityInfoW() call in is_path_owned_By_current_side() returns an error code other than ERROR_SUCCESS. This is consistent behavior across this boundary. In these cases, the owner would always be different because the WSL owner is a different entity than the Windows user. The change here is to suppress the error message that looks like this: error: failed to get owner for '//wsl.localhost/...' (1) Before this change, this warning happens for every Git command, regardless of whether the directory is marked with safe.directory. Signed-off-by: Derrick Stolee --- compat/mingw.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..8831bd693fdcae 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2720,9 +2720,7 @@ int is_path_owned_by_current_sid(const char *path, struct strbuf *report) DACL_SECURITY_INFORMATION, &sid, NULL, NULL, NULL, &descriptor); - if (err != ERROR_SUCCESS) - error(_("failed to get owner for '%s' (%ld)"), path, err); - else if (sid && IsValidSid(sid)) { + if (err == ERROR_SUCCESS && sid && IsValidSid(sid)) { /* Now, verify that the SID matches the current user's */ static PSID current_user_sid; BOOL is_member; From c9a529c3693a71844b6e2c2541ea8e2b3dddac0e Mon Sep 17 00:00:00 2001 From: Rafael Kitover Date: Tue, 12 Apr 2022 19:53:33 +0000 Subject: [PATCH 163/303] mingw: $env:TERM="xterm-256color" for newer OSes For Windows builds >= 15063 set $env:TERM to "xterm-256color" instead of "cygwin" because they have a more capable console system that supports this. Also set $env:COLORTERM="truecolor" if unset. $env:TERM is initialized so that ANSI colors in color.c work, see 29a3963484 (Win32: patch Windows environment on startup, 2012-01-15). See git-for-windows/git#3629 regarding problems caused by always setting $env:TERM="cygwin". This is the same heuristic used by the Cygwin runtime. Signed-off-by: Rafael Kitover Signed-off-by: Johannes Schindelin --- compat/mingw.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..f3383410cc5361 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2615,9 +2615,20 @@ static void setup_windows_environment(void) convert_slashes(tmp); } - /* simulate TERM to enable auto-color (see color.c) */ - if (!getenv("TERM")) - setenv("TERM", "cygwin", 1); + + /* + * Make sure TERM is set up correctly to enable auto-color + * (see color.c .) Use "cygwin" for older OS releases which + * works correctly with MSYS2 utilities on older consoles. + */ + if (!getenv("TERM")) { + if ((GetVersion() >> 16) < 15063) + setenv("TERM", "cygwin", 0); + else { + setenv("TERM", "xterm-256color", 0); + setenv("COLORTERM", "truecolor", 0); + } + } /* calculate HOME if not set */ if (!getenv("HOME")) { From f060165b1167f5a3422e3c6e0aaeb207f6cd489d Mon Sep 17 00:00:00 2001 From: Christopher Degawa Date: Sat, 28 May 2022 14:53:54 -0500 Subject: [PATCH 164/303] winansi: check result and Buffer before using Name NtQueryObject under Wine can return a success but fill out no name. In those situations, Wine will set Buffer to NULL, and set result to the sizeof(OBJECT_NAME_INFORMATION). Running a command such as echo "$(git.exe --version 2>/dev/null)" will crash due to a NULL pointer dereference when the code attempts to null terminate the buffer, although, weirdly, removing the subshell or redirecting stdout to a file will not trigger the crash. Code has been added to also check Buffer and Length to ensure the check is as robust as possible due to the current behavior being fragile at best, and could potentially change in the future This code is based on the behavior of NtQueryObject under wine and reactos. Signed-off-by: Christopher Degawa --- compat/winansi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compat/winansi.c b/compat/winansi.c index 3abe8dd5a2711b..945fe6451daff3 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -573,6 +573,9 @@ static void detect_msys_tty(int fd) if (!NT_SUCCESS(NtQueryObject(h, ObjectNameInformation, buffer, sizeof(buffer) - 2, &result))) return; + if (result < sizeof(*nameinfo) || !nameinfo->Name.Buffer || + !nameinfo->Name.Length) + return; name = nameinfo->Name.Buffer; name[nameinfo->Name.Length / sizeof(*name)] = 0; From 0fe2bc191f782ce0ef1ce15d35026ce117d9fd68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=99=E5=8D=93=E8=AF=86?= Date: Sun, 16 Jan 2022 03:38:33 +0800 Subject: [PATCH 165/303] Add config option `windows.appendAtomically` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atomic append on windows is only supported on local disk files, and it may cause errors in other situations, e.g. network file system. If that is the case, this config option should be used to turn atomic append off. Co-Authored-By: Johannes Schindelin Signed-off-by: 孙卓识 Signed-off-by: Johannes Schindelin --- Documentation/config.txt | 2 ++ Documentation/config/windows.txt | 4 ++++ compat/mingw.c | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 Documentation/config/windows.txt diff --git a/Documentation/config.txt b/Documentation/config.txt index 5b5b9765699933..d4aac2d58e88d2 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -539,4 +539,6 @@ include::config/versionsort.txt[] include::config/web.txt[] +include::config/windows.txt[] + include::config/worktree.txt[] diff --git a/Documentation/config/windows.txt b/Documentation/config/windows.txt new file mode 100644 index 00000000000000..fdaaf1c65504f3 --- /dev/null +++ b/Documentation/config/windows.txt @@ -0,0 +1,4 @@ +windows.appendAtomically:: + By default, append atomic API is used on windows. But it works only with + local disk files, if you're working on a network file system, you should + set it false to turn it off. diff --git a/compat/mingw.c b/compat/mingw.c index 901375d58415a3..a9a36e96f22fbf 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -535,6 +535,7 @@ static int is_local_named_pipe_path(const char *filename) int mingw_open (const char *filename, int oflags, ...) { + static int append_atomically = -1; typedef int (*open_fn_t)(wchar_t const *wfilename, int oflags, ...); va_list args; unsigned mode; @@ -551,7 +552,16 @@ int mingw_open (const char *filename, int oflags, ...) return -1; } - if ((oflags & O_APPEND) && !is_local_named_pipe_path(filename)) + /* + * Only set append_atomically to default value(1) when repo is initialized + * and fail to get config value + */ + if (append_atomically < 0 && the_repository && the_repository->commondir && + git_config_get_bool("windows.appendatomically", &append_atomically)) + append_atomically = 1; + + if (append_atomically && (oflags & O_APPEND) && + !is_local_named_pipe_path(filename)) open_fn = mingw_open_append; else open_fn = _wopen; @@ -700,8 +710,26 @@ ssize_t mingw_write(int fd, const void *buf, size_t len) HANDLE h = (HANDLE) _get_osfhandle(fd); if (GetFileType(h) == FILE_TYPE_PIPE) errno = EPIPE; - else + else { + wchar_t path[MAX_LONG_PATH]; + DWORD ret = GetFinalPathNameByHandleW(h, path, + ARRAY_SIZE(path), 0); + UINT drive_type = ret > 0 && ret < ARRAY_SIZE(path) ? + GetDriveTypeW(path) : DRIVE_UNKNOWN; + + /* + * The default atomic append causes such an error on + * network file systems, in such a case, it should be + * turned off via config. + * + * `drive_type` of UNC path: DRIVE_NO_ROOT_DIR + */ + if (DRIVE_NO_ROOT_DIR == drive_type || DRIVE_REMOTE == drive_type) + warning("invalid write operation detected; you may try:\n" + "\n\tgit config windows.appendAtomically false"); + errno = EINVAL; + } } return result; From 8ea3750abd9041c17df9db964946de3ca7239fad Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 28 Jun 2022 17:36:21 +0200 Subject: [PATCH 166/303] vcxproj: avoid escaping double quotes in the defines Visual Studio 2022 does not like that at all. Signed-off-by: Johannes Schindelin --- contrib/buildsystems/Generators/Vcxproj.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/buildsystems/Generators/Vcxproj.pm b/contrib/buildsystems/Generators/Vcxproj.pm index 20d91ea84bfd44..bf77a44e11f463 100644 --- a/contrib/buildsystems/Generators/Vcxproj.pm +++ b/contrib/buildsystems/Generators/Vcxproj.pm @@ -88,6 +88,7 @@ sub createProject { $defines =~ s//>/g; $defines =~ s/\'//g; + $defines =~ s/\\"/"/g; my $rcdefines = $defines; $rcdefines =~ s/(? Date: Sun, 20 Oct 2019 22:08:58 +0200 Subject: [PATCH 167/303] win32/pthread: avoid name clashes with winpthread The mingw-w64 GCC seems to link implicitly to libwinpthread, which does implement a pthread emulation (that is more complete than Git's). Let's keep preferring Git's. To avoid linker errors where it thinks that the `pthread_self` and the `pthread_create` symbols are defined twice, let's give our version a `win32_` prefix, just like we already do for `pthread_join()`. Signed-off-by: Johannes Schindelin --- compat/win32/pthread.c | 4 ++-- compat/win32/pthread.h | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compat/win32/pthread.c b/compat/win32/pthread.c index 2e7eead42cb008..9934c7ac6fa151 100644 --- a/compat/win32/pthread.c +++ b/compat/win32/pthread.c @@ -21,7 +21,7 @@ static unsigned __stdcall win32_start_routine(void *arg) return 0; } -int pthread_create(pthread_t *thread, const void *unused, +int win32_pthread_create(pthread_t *thread, const void *unused, void *(*start_routine)(void*), void *arg) { thread->arg = arg; @@ -50,7 +50,7 @@ int win32_pthread_join(pthread_t *thread, void **value_ptr) } } -pthread_t pthread_self(void) +pthread_t win32_pthread_self(void) { pthread_t t = { NULL }; t.tid = GetCurrentThreadId(); diff --git a/compat/win32/pthread.h b/compat/win32/pthread.h index 737983d00bae91..4e57147da2e57a 100644 --- a/compat/win32/pthread.h +++ b/compat/win32/pthread.h @@ -50,8 +50,9 @@ typedef struct { DWORD tid; } pthread_t; -int pthread_create(pthread_t *thread, const void *unused, - void *(*start_routine)(void*), void *arg); +int win32_pthread_create(pthread_t *thread, const void *unused, + void *(*start_routine)(void*), void *arg); +#define pthread_create win32_pthread_create /* * To avoid the need of copying a struct, we use small macro wrapper to pass @@ -62,7 +63,8 @@ int pthread_create(pthread_t *thread, const void *unused, int win32_pthread_join(pthread_t *thread, void **value_ptr); #define pthread_equal(t1, t2) ((t1).tid == (t2).tid) -pthread_t pthread_self(void); +pthread_t win32_pthread_self(void); +#define pthread_self win32_pthread_self static inline void NORETURN pthread_exit(void *ret) { From 70cb56e3e1c013bdde7e9e89e1c4df4228d09da5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 10 Jul 2022 00:39:32 +0200 Subject: [PATCH 168/303] ci: adjust Azure Pipeline for `runs_on_pool` These refactorings are really gifts that keep on giving. Signed-off-by: Johannes Schindelin --- ci/lib.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/lib.sh b/ci/lib.sh index 1b0cc2b57db866..b40719b0df3769 100755 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -161,6 +161,12 @@ then MAKEFLAGS="$MAKEFLAGS --jobs=10" test windows_nt != "$CI_OS_NAME" || GIT_TEST_OPTS="--no-chain-lint --no-bin-wrappers $GIT_TEST_OPTS" + case "$CI_OS_NAME" in + linux) runs_on_pool=ubuntu-latest;; + macos|osx) runs_on_pool=macos-latest;; + windows_nt) runs_on_pool=windows-latest;; + *) echo "Unhandled OS: $CI_OS_NAME" >&2; exit 1;; + esac elif test true = "$GITHUB_ACTIONS" then CI_TYPE=github-actions From 8b0c08297dff49fabe34b68b0e2ca3fa4becd8bb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 12 Aug 2022 12:44:15 +0200 Subject: [PATCH 169/303] git-compat-util: avoid redeclaring _DEFAULT_SOURCE We are about to vendor in `mimalloc`'s source code which we will want to include `git-compat-util.h` after defining that constant. Signed-off-by: Johannes Schindelin --- git-compat-util.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index b90b64718eb610..d2212479866369 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -185,7 +185,9 @@ struct strbuf; #define _ALL_SOURCE 1 #define _GNU_SOURCE 1 #define _BSD_SOURCE 1 +#ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE 1 +#endif #define _NETBSD_SOURCE 1 #define _SGI_SOURCE 1 From f7d9c983b6db9101d675674705a46f07fa04cd9b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 10 Jul 2022 01:15:08 +0200 Subject: [PATCH 170/303] ci: stop linking the `prove` cache It is not useful because we do not have any persisted directory anymore, not since dropping our Travis CI support. Signed-off-by: Johannes Schindelin --- ci/run-build-and-tests.sh | 5 ----- ci/run-test-slice.sh | 5 ----- 2 files changed, 10 deletions(-) diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index 8ebff4259676e3..9a5032e6a779aa 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -5,11 +5,6 @@ . ${0%/*}/lib.sh -case "$CI_OS_NAME" in -windows*) cmd //c mklink //j t\\.prove "$(cygpath -aw "$cache_dir/.prove")";; -*) ln -s "$cache_dir/.prove" t/.prove;; -esac - run_tests=t case "$jobname" in diff --git a/ci/run-test-slice.sh b/ci/run-test-slice.sh index a3c67956a8df8f..312a3ef73fe65f 100755 --- a/ci/run-test-slice.sh +++ b/ci/run-test-slice.sh @@ -5,11 +5,6 @@ . ${0%/*}/lib.sh -case "$CI_OS_NAME" in -windows*) cmd //c mklink //j t\\.prove "$(cygpath -aw "$cache_dir/.prove")";; -*) ln -s "$cache_dir/.prove" t/.prove;; -esac - group "Run tests" make --quiet -C t T="$(cd t && ./helper/test-tool path-utils slice-tests "$1" "$2" t[0-9]*.sh | tr '\n' ' ')" || From 111e8fff24b21eddd7ea0ac8f6e9bef97f131fba Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 24 Jun 2019 21:31:30 +0200 Subject: [PATCH 171/303] Import the source code of mimalloc This commit imports mimalloc's source code as per v2.0.6, fetched from the tag at https://github.com/microsoft/mimalloc. The .c files are from the src/ subdirectory, and the .h files from the include/ subdirectory. We will subsequently modify the source code to accommodate building within Git's context. Since we plan on using the `mi_*()` family of functions, we skip the C++-specific source code, some POSIX compliant functions to interact with mimalloc, and the code that wants to support auto-magic overriding of the `malloc()` function (mimalloc-new-delete.h, alloc-posix.c, mimalloc-override.h, alloc-override.c, alloc-override-osx.c, alloc-override-win.c and static.c). To appease the `check-whitespace` job of Git's Continuous Integration, this commit was washed one time via `git rebase --whitespace=fix`. Signed-off-by: Johannes Schindelin --- Makefile | 1 + compat/mimalloc/LICENSE | 21 + compat/mimalloc/alloc-aligned.c | 260 +++++ compat/mimalloc/alloc.c | 934 ++++++++++++++++ compat/mimalloc/arena.c | 446 ++++++++ compat/mimalloc/bitmap.c | 395 +++++++ compat/mimalloc/bitmap.h | 107 ++ compat/mimalloc/heap.c | 580 ++++++++++ compat/mimalloc/init.c | 693 ++++++++++++ compat/mimalloc/mimalloc-atomic.h | 338 ++++++ compat/mimalloc/mimalloc-internal.h | 1049 ++++++++++++++++++ compat/mimalloc/mimalloc-types.h | 598 +++++++++++ compat/mimalloc/mimalloc.h | 453 ++++++++ compat/mimalloc/options.c | 627 +++++++++++ compat/mimalloc/os.c | 1443 +++++++++++++++++++++++++ compat/mimalloc/page-queue.c | 331 ++++++ compat/mimalloc/page.c | 869 +++++++++++++++ compat/mimalloc/random.c | 367 +++++++ compat/mimalloc/readme.md | 715 +++++++++++++ compat/mimalloc/segment-cache.c | 360 +++++++ compat/mimalloc/segment.c | 1542 +++++++++++++++++++++++++++ compat/mimalloc/stats.c | 583 ++++++++++ 22 files changed, 12712 insertions(+) create mode 100644 compat/mimalloc/LICENSE create mode 100644 compat/mimalloc/alloc-aligned.c create mode 100644 compat/mimalloc/alloc.c create mode 100644 compat/mimalloc/arena.c create mode 100644 compat/mimalloc/bitmap.c create mode 100644 compat/mimalloc/bitmap.h create mode 100644 compat/mimalloc/heap.c create mode 100644 compat/mimalloc/init.c create mode 100644 compat/mimalloc/mimalloc-atomic.h create mode 100644 compat/mimalloc/mimalloc-internal.h create mode 100644 compat/mimalloc/mimalloc-types.h create mode 100644 compat/mimalloc/mimalloc.h create mode 100644 compat/mimalloc/options.c create mode 100644 compat/mimalloc/os.c create mode 100644 compat/mimalloc/page-queue.c create mode 100644 compat/mimalloc/page.c create mode 100644 compat/mimalloc/random.c create mode 100644 compat/mimalloc/readme.md create mode 100644 compat/mimalloc/segment-cache.c create mode 100644 compat/mimalloc/segment.c create mode 100644 compat/mimalloc/stats.c diff --git a/Makefile b/Makefile index cac3452edb90b4..97c45505cd7713 100644 --- a/Makefile +++ b/Makefile @@ -1259,6 +1259,7 @@ BUILTIN_OBJS += builtin/write-tree.o # upstream unnecessarily (making merging in future changes easier). THIRD_PARTY_SOURCES += compat/inet_ntop.c THIRD_PARTY_SOURCES += compat/inet_pton.c +THIRD_PARTY_SOURCES += compat/mimalloc/% THIRD_PARTY_SOURCES += compat/nedmalloc/% THIRD_PARTY_SOURCES += compat/obstack.% THIRD_PARTY_SOURCES += compat/poll/% diff --git a/compat/mimalloc/LICENSE b/compat/mimalloc/LICENSE new file mode 100644 index 00000000000000..670b668a0c928e --- /dev/null +++ b/compat/mimalloc/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/compat/mimalloc/alloc-aligned.c b/compat/mimalloc/alloc-aligned.c new file mode 100644 index 00000000000000..5594b6d38387d2 --- /dev/null +++ b/compat/mimalloc/alloc-aligned.c @@ -0,0 +1,260 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memset + +// ------------------------------------------------------ +// Aligned Allocation +// ------------------------------------------------------ + +// Fallback primitive aligned allocation -- split out for better codegen +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +{ + mi_assert_internal(size <= PTRDIFF_MAX); + mi_assert_internal(alignment!=0 && _mi_is_power_of_two(alignment) && alignment <= MI_ALIGNMENT_MAX); + + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const size_t padsize = size + MI_PADDING_SIZE; + + // use regular allocation if it is guaranteed to fit the alignment constraints + if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) { + void* p = _mi_heap_malloc_zero(heap, size, zero); + mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); + return p; + } + + // otherwise over-allocate + void* p = _mi_heap_malloc_zero(heap, size + alignment - 1, zero); + if (p == NULL) return NULL; + + // .. and align within the allocation + uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); + mi_assert_internal(adjust <= alignment); + void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); + if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); + mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); + return aligned_p; +} + +// Primitive aligned allocation +static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +{ + // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. + mi_assert(alignment > 0); + if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) { // require power-of-two (see ) + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); + #endif + return NULL; + } + if (mi_unlikely(alignment > MI_ALIGNMENT_MAX)) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment); + #endif + return NULL; + } + if (mi_unlikely(size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); + #endif + return NULL; + } + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check + + // try first if there happens to be a small block available with just the right alignment + if (mi_likely(padsize <= MI_SMALL_SIZE_MAX)) { + mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); + const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; + if (mi_likely(page->free != NULL && is_aligned)) + { + #if MI_STAT>1 + mi_heap_stat_increase(heap, malloc, size); + #endif + void* p = _mi_page_malloc(heap, page, padsize); // TODO: inline _mi_page_malloc + mi_assert_internal(p != NULL); + mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); + if (zero) { _mi_block_zero_init(page, p, size); } + return p; + } + } + // fallback + return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero); +} + + +// ------------------------------------------------------ +// Optimized mi_heap_malloc_aligned / mi_malloc_aligned +// ------------------------------------------------------ + +mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); +} + +mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { + #if !MI_PADDING + // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) + if (!_mi_is_power_of_two(alignment)) return NULL; + if (mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)) + #else + // with padding, we can only guarantee this for fixed alignments + if (mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))) + && size <= MI_SMALL_SIZE_MAX)) + #endif + { + // fast path for common alignment and size + return mi_heap_malloc_small(heap, size); + } + else { + return mi_heap_malloc_aligned_at(heap, size, alignment, 0); + } +} + +// ------------------------------------------------------ +// Aligned Allocation +// ------------------------------------------------------ + +mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true); +} + +mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_zalloc_aligned_at(heap, size, alignment, 0); +} + +mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + size_t total; + if (mi_count_size_overflow(count, size, &total)) return NULL; + return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); +} + +mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_calloc_aligned_at(heap,count,size,alignment,0); +} + +mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); +} + +mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); +} + +mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); +} + +mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); +} + +mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); +} + +mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); +} + + +// ------------------------------------------------------ +// Aligned re-allocation +// ------------------------------------------------------ + +static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept { + mi_assert(alignment > 0); + if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero); + if (p == NULL) return mi_heap_malloc_zero_aligned_at(heap,newsize,alignment,offset,zero); + size_t size = mi_usable_size(p); + if (newsize <= size && newsize >= (size - (size / 2)) + && (((uintptr_t)p + offset) % alignment) == 0) { + return p; // reallocation still fits, is aligned and not more than 50% waste + } + else { + void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset); + if (newp != NULL) { + if (zero && newsize > size) { + const mi_page_t* page = _mi_ptr_page(newp); + if (page->is_zero) { + // already zero initialized + mi_assert_expensive(mi_mem_is_zero(newp,newsize)); + } + else { + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + memset((uint8_t*)newp + start, 0, newsize - start); + } + } + _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); + mi_free(p); // only free if successful + } + return newp; + } +} + +static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept { + mi_assert(alignment > 0); + if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero); + size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL) + return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero); +} + +void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false); +} + +void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false); +} + +void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true); +} + +void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true); +} + +void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + size_t total; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; + return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); +} + +void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { + size_t total; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; + return mi_heap_rezalloc_aligned(heap, p, total, alignment); +} + +void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); +} + +void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); +} + +void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); +} + +void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); +} + +void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); +} + +void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); +} diff --git a/compat/mimalloc/alloc.c b/compat/mimalloc/alloc.c new file mode 100644 index 00000000000000..70d74cb95790e8 --- /dev/null +++ b/compat/mimalloc/alloc.c @@ -0,0 +1,934 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // for realpath() on Linux +#endif + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset, strlen +#include // malloc, exit + +#define MI_IN_ALLOC_C +#include "alloc-override.c" +#undef MI_IN_ALLOC_C + +// ------------------------------------------------------ +// Allocation +// ------------------------------------------------------ + +// Fast allocation in a page: just pop from the free list. +// Fall back to generic allocation only if the list is empty. +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); + mi_block_t* const block = page->free; + if (mi_unlikely(block == NULL)) { + return _mi_malloc_generic(heap, size); + } + mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); + // pop from the free list + page->used++; + page->free = mi_block_next(page, block); + mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); + +#if (MI_DEBUG>0) + if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } +#elif (MI_SECURE!=0) + block->next = 0; // don't leak internal data +#endif + +#if (MI_STAT>0) + const size_t bsize = mi_page_usable_block_size(page); + if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { + mi_heap_stat_increase(heap, normal, bsize); + mi_heap_stat_counter_increase(heap, normal_count, 1); +#if (MI_STAT>1) + const size_t bin = _mi_bin(bsize); + mi_heap_stat_increase(heap, normal_bins[bin], 1); +#endif + } +#endif + +#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); + uint8_t* fill = (uint8_t*)padding - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } +#endif + + return block; +} + +// allocate a small block +extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + mi_assert(size <= MI_SMALL_SIZE_MAX); + #if (MI_PADDING) + if (size == 0) { + size = sizeof(void*); + } + #endif + mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); + void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); + mi_assert_internal(p==NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif + return p; +} + +extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { + return mi_heap_malloc_small(mi_get_default_heap(), size); +} + +// The main allocation function +extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { + if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { + return mi_heap_malloc_small(heap, size); + } + else { + mi_assert(heap!=NULL); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic + mi_assert_internal(p == NULL || mi_usable_size(p) >= size); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + } + #endif + return p; + } +} + +extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { + return mi_heap_malloc(mi_get_default_heap(), size); +} + + +void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { + // note: we need to initialize the whole usable block size to zero, not just the requested size, + // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) + MI_UNUSED(size); + mi_assert_internal(p != NULL); + mi_assert_internal(mi_usable_size(p) >= size); // size can be zero + mi_assert_internal(_mi_ptr_page(p)==page); + if (page->is_zero && size > sizeof(mi_block_t)) { + // already zero initialized memory + ((mi_block_t*)p)->next = 0; // clear the free list pointer + mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p))); + } + else { + // otherwise memset + memset(p, 0, mi_usable_size(p)); + } +} + +// zero initialized small block +mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { + void* p = mi_malloc_small(size); + if (p != NULL) { + _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again? + } + return p; +} + +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { + void* p = mi_heap_malloc(heap,size); + if (zero && p != NULL) { + _mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again? + } + return p; +} + +extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { + return _mi_heap_malloc_zero(heap, size, true); +} + +mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { + return mi_heap_zalloc(mi_get_default_heap(),size); +} + + +// ------------------------------------------------------ +// Check for double free in secure and debug mode +// This is somewhat expensive so only enabled for secure mode 4 +// ------------------------------------------------------ + +#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) +// linear check if the free list contains a specific element +static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { + while (list != NULL) { + if (elem==list) return true; + list = mi_block_next(page, list); + } + return false; +} + +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, mi_page_thread_free(page), block)) + { + _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); + return true; + } + return false; +} + +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? + { + // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? + // (continue in separate function to improve code generation) + return mi_check_is_double_freex(page, block); + } + return false; +} +#else +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); + return false; +} +#endif + +// --------------------------------------------------------------------------- +// Check for heap block overflow by setting up padding at the end of the block +// --------------------------------------------------------------------------- + +#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) +static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { + *bsize = mi_page_usable_block_size(page); + const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + *delta = padding->delta; + return ((uint32_t)mi_ptr_encode(page,block,page->keys) == padding->canary && *delta <= *bsize); +} + +// Return the exact usable size of a block. +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); mi_assert_internal(delta <= bsize); + return (ok ? bsize - delta : 0); +} + +static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + *size = *wrong = bsize; + if (!ok) return false; + mi_assert_internal(bsize >= delta); + *size = bsize - delta; + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + for (size_t i = 0; i < maxpad; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - delta + i; + return false; + } + } + return true; +} + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + size_t size; + size_t wrong; + if (!mi_verify_padding(page,block,&size,&wrong)) { + _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); + } +} + +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space + mi_assert_internal(bsize >= min_size); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + padding->delta = (uint32_t)new_delta; +} +#else +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); +} + +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(block); + return mi_page_usable_block_size(page); +} + +static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + MI_UNUSED(page); + MI_UNUSED(block); + MI_UNUSED(min_size); +} +#endif + +// only maintain stats for smaller objects if requested +#if (MI_STAT>0) +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { + #if (MI_STAT < 2) + MI_UNUSED(block); + #endif + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); + #if (MI_STAT>1) + const size_t usize = mi_page_usable_size_of(page, block); + mi_heap_stat_decrease(heap, malloc, usize); + #endif + if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal, bsize); + #if (MI_STAT > 1) + mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); + #endif + } + else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, large, bsize); + } + else { + mi_heap_stat_decrease(heap, huge, bsize); + } +} +#else +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); MI_UNUSED(block); +} +#endif + +#if (MI_STAT>0) +// maintain stats for huge objects +static void mi_stat_huge_free(const mi_page_t* page) { + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, large, bsize); + } + else { + mi_heap_stat_decrease(heap, huge, bsize); + } +} +#else +static void mi_stat_huge_free(const mi_page_t* page) { + MI_UNUSED(page); +} +#endif + +// ------------------------------------------------------ +// Free +// ------------------------------------------------------ + +// multi-threaded free +static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) +{ + // The padding check may access the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + mi_check_padding(page, block); + mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + + // huge page segments are always abandoned and can be freed immediately + mi_segment_t* segment = _mi_page_segment(page); + if (segment->kind==MI_SEGMENT_HUGE) { + mi_stat_huge_free(page); + _mi_segment_huge_page_free(segment, page, block); + return; + } + + // Try to put the block on either the page-local thread free list, or the heap delayed free list. + mi_thread_free_t tfreex; + bool use_delayed; + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); + if (mi_unlikely(use_delayed)) { + // unlikely: this only happens on the first concurrent free in a page that is in the full list + tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); + } + else { + // usual: directly add to page thread_free list + mi_block_set_next(page, block, mi_tf_block(tfree)); + tfreex = mi_tf_set_block(tfree,block); + } + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + + if (mi_unlikely(use_delayed)) { + // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); + mi_assert_internal(heap != NULL); + if (heap != NULL) { + // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + do { + mi_block_set_nextx(heap,block,dfree, heap->keys); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } + + // and reset the MI_DELAYED_FREEING flag + tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + tfreex = tfree; + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + } +} + +// regular free +static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) +{ + // and push it on the free list + if (mi_likely(local)) { + // owning thread can free a block directly + if (mi_unlikely(mi_check_is_double_free(page, block))) return; + mi_check_padding(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + page->used--; + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); + } + else if (mi_unlikely(mi_page_is_in_full(page))) { + _mi_page_unfull(page); + } + } + else { + _mi_free_block_mt(page,block); + } +} + + +// Adjust a block that was allocated aligned, to the actual start of the block in the page. +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { + mi_assert_internal(page!=NULL && p!=NULL); + const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); + const size_t adjust = (diff % mi_page_block_size(page)); + return (mi_block_t*)((uintptr_t)p - adjust); +} + + +static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) mi_attr_noexcept { + mi_page_t* const page = _mi_segment_page_of(segment, p); + mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + mi_stat_free(page, block); + _mi_free_block(page, local, block); +} + +// Get the segment data belonging to a pointer +// This is just a single `and` in assembly but does further checks in debug mode +// (and secure mode) if this was a valid pointer. +static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) +{ + MI_UNUSED(msg); +#if (MI_DEBUG>0) + if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { + _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); + return NULL; + } +#endif + + mi_segment_t* const segment = _mi_ptr_segment(p); + if (mi_unlikely(segment == NULL)) return NULL; // checks also for (p==NULL) + +#if (MI_DEBUG>0) + if (mi_unlikely(!mi_is_in_heap_region(p))) { + _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); + if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + } + } +#endif +#if (MI_DEBUG>0 || MI_SECURE>=4) + if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { + _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); + return NULL; + } +#endif + return segment; +} + +// Free a block +void mi_free(void* p) mi_attr_noexcept +{ + mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); + if (mi_unlikely(segment == NULL)) return; + + mi_threadid_t tid = _mi_thread_id(); + mi_page_t* const page = _mi_segment_page_of(segment, p); + + if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks + // local, and not full or aligned + mi_block_t* block = (mi_block_t*)(p); + if (mi_unlikely(mi_check_is_double_free(page,block))) return; + mi_check_padding(page, block); + mi_stat_free(page, block); + #if (MI_DEBUG!=0) + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + if (mi_unlikely(--page->used == 0)) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) + _mi_page_retire(page); + } + } + else { + // non-local, aligned blocks, or a full page; use the more generic path + // note: recalc page in generic to improve code generation + mi_free_generic(segment, tid == segment->thread_id, p); + } +} + +bool _mi_free_delayed_block(mi_block_t* block) { + // get segment and page + const mi_segment_t* const segment = _mi_ptr_segment(block); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(_mi_thread_id() == segment->thread_id); + mi_page_t* const page = _mi_segment_page_of(segment, block); + + // Clear the no-delayed flag so delayed freeing is used again for this page. + // This must be done before collecting the free lists on this page -- otherwise + // some blocks may end up in the page `thread_free` list with no blocks in the + // heap `thread_delayed_free` list which may cause the page to be never freed! + // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); + + // collect all other non-local frees to ensure up-to-date `used` count + _mi_page_free_collect(page, false); + + // and free the block (possibly freeing the page as well since used is updated) + _mi_free_block(page, true, block); + return true; +} + +// Bytes available in a block +mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { + const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); + const size_t size = mi_page_usable_size_of(page, block); + const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; + mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); + return (size - adjust); +} + +static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { + const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); + if (segment==NULL) return 0; // also returns 0 if `p == NULL` + const mi_page_t* const page = _mi_segment_page_of(segment, p); + if (mi_likely(!mi_page_has_aligned(page))) { + const mi_block_t* block = (const mi_block_t*)p; + return mi_page_usable_size_of(page, block); + } + else { + // split out to separate routine for improved code generation + return mi_page_usable_aligned_size_of(segment, page, p); + } +} + +size_t mi_usable_size(const void* p) mi_attr_noexcept { + return _mi_usable_size(p, "mi_usable_size"); +} + + +// ------------------------------------------------------ +// ensure explicit external inline definitions are emitted! +// ------------------------------------------------------ + +#ifdef __cplusplus +void* _mi_externs[] = { + (void*)&_mi_page_malloc, + (void*)&mi_malloc, + (void*)&mi_malloc_small, + (void*)&mi_zalloc_small, + (void*)&mi_heap_malloc, + (void*)&mi_heap_zalloc, + (void*)&mi_heap_malloc_small +}; +#endif + + +// ------------------------------------------------------ +// Allocation extensions +// ------------------------------------------------------ + +void mi_free_size(void* p, size_t size) mi_attr_noexcept { + MI_UNUSED_RELEASE(size); + mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + mi_free(p); +} + +void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { + MI_UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free_size(p,size); +} + +void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { + MI_UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free(p); +} + +extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_count_size_overflow(count,size,&total)) return NULL; + return mi_heap_zalloc(heap,total); +} + +mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { + return mi_heap_calloc(mi_get_default_heap(),count,size); +} + +// Uninitialized `calloc` +extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_count_size_overflow(count, size, &total)) return NULL; + return mi_heap_malloc(heap, total); +} + +mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { + return mi_heap_mallocn(mi_get_default_heap(),count,size); +} + +// Expand (or shrink) in place (or fail) +void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { + #if MI_PADDING + // we do not shrink/expand with padding enabled + MI_UNUSED(p); MI_UNUSED(newsize); + return NULL; + #else + if (p == NULL) return NULL; + const size_t size = _mi_usable_size(p,"mi_expand"); + if (newsize > size) return NULL; + return p; // it fits + #endif +} + +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept { + const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL + if (mi_unlikely(newsize <= size && newsize >= (size / 2))) { + // todo: adjust potential padding to reflect the new size? + return p; // reallocation still fits and not more than 50% waste + } + void* newp = mi_heap_malloc(heap,newsize); + if (mi_likely(newp != NULL)) { + if (zero && newsize > size) { + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + memset((uint8_t*)newp + start, 0, newsize - start); + } + if (mi_likely(p != NULL)) { + _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); + mi_free(p); // only free the original pointer if successful + } + } + return newp; +} + +void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { + return _mi_heap_realloc_zero(heap, p, newsize, false); +} + +void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_count_size_overflow(count, size, &total)) return NULL; + return mi_heap_realloc(heap, p, total); +} + + +// Reallocate but free `p` on errors +void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { + void* newp = mi_heap_realloc(heap, p, newsize); + if (newp==NULL && p!=NULL) mi_free(p); + return newp; +} + +void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { + return _mi_heap_realloc_zero(heap, p, newsize, true); +} + +void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { + size_t total; + if (mi_count_size_overflow(count, size, &total)) return NULL; + return mi_heap_rezalloc(heap, p, total); +} + + +void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_realloc(mi_get_default_heap(),p,newsize); +} + +void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { + return mi_heap_reallocn(mi_get_default_heap(),p,count,size); +} + +// Reallocate but free `p` on errors +void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_reallocf(mi_get_default_heap(),p,newsize); +} + +void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); +} + +void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { + return mi_heap_recalloc(mi_get_default_heap(), p, count, size); +} + + + +// ------------------------------------------------------ +// strdup, strndup, and realpath +// ------------------------------------------------------ + +// `strdup` using mi_malloc +mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { + if (s == NULL) return NULL; + size_t n = strlen(s); + char* t = (char*)mi_heap_malloc(heap,n+1); + if (t != NULL) _mi_memcpy(t, s, n + 1); + return t; +} + +mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { + return mi_heap_strdup(mi_get_default_heap(), s); +} + +// `strndup` using mi_malloc +mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { + if (s == NULL) return NULL; + const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found) + const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string + mi_assert_internal(m <= n); + char* t = (char*)mi_heap_malloc(heap, m+1); + if (t == NULL) return NULL; + _mi_memcpy(t, s, m); + t[m] = 0; + return t; +} + +mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { + return mi_heap_strndup(mi_get_default_heap(),s,n); +} + +#ifndef __wasi__ +// `realpath` using mi_malloc +#ifdef _WIN32 +#ifndef PATH_MAX +#define PATH_MAX MAX_PATH +#endif +#include +mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { + // todo: use GetFullPathNameW to allow longer file names + char buf[PATH_MAX]; + DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL); + if (res == 0) { + errno = GetLastError(); return NULL; + } + else if (res > PATH_MAX) { + errno = EINVAL; return NULL; + } + else if (resolved_name != NULL) { + return resolved_name; + } + else { + return mi_heap_strndup(heap, buf, PATH_MAX); + } +} +#else +#include // pathconf +static size_t mi_path_max(void) { + static size_t path_max = 0; + if (path_max <= 0) { + long m = pathconf("/",_PC_PATH_MAX); + if (m <= 0) path_max = 4096; // guess + else if (m < 256) path_max = 256; // at least 256 + else path_max = m; + } + return path_max; +} + +char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { + if (resolved_name != NULL) { + return realpath(fname,resolved_name); + } + else { + size_t n = mi_path_max(); + char* buf = (char*)mi_malloc(n+1); + if (buf==NULL) return NULL; + char* rname = realpath(fname,buf); + char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL` + mi_free(buf); + return result; + } +} +#endif + +mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { + return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); +} +#endif + +/*------------------------------------------------------- +C++ new and new_aligned +The standard requires calling into `get_new_handler` and +throwing the bad_alloc exception on failure. If we compile +with a C++ compiler we can implement this precisely. If we +use a C compiler we cannot throw a `bad_alloc` exception +but we call `exit` instead (i.e. not returning). +-------------------------------------------------------*/ + +#ifdef __cplusplus +#include +static bool mi_try_new_handler(bool nothrow) { + #if defined(_MSC_VER) || (__cplusplus >= 201103L) + std::new_handler h = std::get_new_handler(); + #else + std::new_handler h = std::set_new_handler(); + std::set_new_handler(h); + #endif + if (h==NULL) { + _mi_error_message(ENOMEM, "out of memory in 'new'"); + if (!nothrow) { + throw std::bad_alloc(); + } + return false; + } + else { + h(); + return true; + } +} +#else +typedef void (*std_new_handler_t)(void); + +#if (defined(__GNUC__) || defined(__clang__)) +std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv(void) { + return NULL; +} +static std_new_handler_t mi_get_new_handler(void) { + return _ZSt15get_new_handlerv(); +} +#else +// note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`. +static std_new_handler_t mi_get_new_handler() { + return NULL; +} +#endif + +static bool mi_try_new_handler(bool nothrow) { + std_new_handler_t h = mi_get_new_handler(); + if (h==NULL) { + _mi_error_message(ENOMEM, "out of memory in 'new'"); + if (!nothrow) { + abort(); // cannot throw in plain C, use abort + } + return false; + } + else { + h(); + return true; + } +} +#endif + +static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) { + void* p = NULL; + while(p == NULL && mi_try_new_handler(nothrow)) { + p = mi_malloc(size); + } + return p; +} + +mi_decl_restrict void* mi_new(size_t size) { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size,false); + return p; +} + +mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size, true); + return p; +} + +mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { + void* p; + do { + p = mi_malloc_aligned(size, alignment); + } + while(p == NULL && mi_try_new_handler(false)); + return p; +} + +mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept { + void* p; + do { + p = mi_malloc_aligned(size, alignment); + } + while(p == NULL && mi_try_new_handler(true)); + return p; +} + +mi_decl_restrict void* mi_new_n(size_t count, size_t size) { + size_t total; + if (mi_unlikely(mi_count_size_overflow(count, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new(total); + } +} + +void* mi_new_realloc(void* p, size_t newsize) { + void* q; + do { + q = mi_realloc(p, newsize); + } while (q == NULL && mi_try_new_handler(false)); + return q; +} + +void* mi_new_reallocn(void* p, size_t newcount, size_t size) { + size_t total; + if (mi_unlikely(mi_count_size_overflow(newcount, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new_realloc(p, total); + } +} diff --git a/compat/mimalloc/arena.c b/compat/mimalloc/arena.c new file mode 100644 index 00000000000000..567c8a93ac30c8 --- /dev/null +++ b/compat/mimalloc/arena.c @@ -0,0 +1,446 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +"Arenas" are fixed area's of OS memory from which we can allocate +large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). +In contrast to the rest of mimalloc, the arenas are shared between +threads and need to be accessed using atomic operations. + +Currently arenas are only used to for huge OS page (1GiB) reservations, +or direct OS memory reservations -- otherwise it delegates to direct allocation from the OS. +In the future, we can expose an API to manually add more kinds of arenas +which is sometimes needed for embedded devices or shared memory for example. +(We can also employ this with WASI or `sbrk` systems to reserve large arenas + on demand and be able to reuse them efficiently). + +The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset +#include // ENOMEM + +#include "bitmap.h" // atomic bitmap + + +// os.c +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); + +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); + + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + + +// Block info: bit 0 contains the `in_use` bit, the upper bits the +// size in count of arena blocks. +typedef uintptr_t mi_block_info_t; +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 8MiB (must be at least MI_SEGMENT_ALIGN) +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 4MiB +#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) + +// A memory arena descriptor +typedef struct mi_arena_s { + _Atomic(uint8_t*) start; // the start of the memory area + size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + int numa_node; // associated NUMA node + bool is_zero_init; // is the arena zero initialized? + bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL + bool is_large; // large- or huge OS pages (always committed) + _Atomic(size_t) search_idx; // optimization to start the search for free blocks + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) +} mi_arena_t; + + +// The available arenas +static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 + + +/* ----------------------------------------------------------- + Arena allocations get a memory id where the lower 8 bits are + the arena index +1, and the upper bits the block index. +----------------------------------------------------------- */ + +// Use `0` as a special id for direct OS allocated memory. +#define MI_MEMID_OS 0 + +static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { + mi_assert_internal(arena_index < 0xFE); + mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? + return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); +} + +static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + mi_assert_internal(memid != MI_MEMID_OS); + *arena_index = (memid & 0xFF) - 1; + *bitmap_index = (memid >> 8); +} + +static size_t mi_block_count_of_size(size_t size) { + return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); +} + +/* ----------------------------------------------------------- + Thread safe allocation in an arena +----------------------------------------------------------- */ +static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) +{ + size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter + if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around + return true; + }; + return false; +} + + +/* ----------------------------------------------------------- + Arena Allocation +----------------------------------------------------------- */ + +static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_bitmap_index_t bitmap_index; + if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); + *memid = mi_arena_id_create(arena_index, bitmap_index); + *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *large = arena->is_large; + *is_pinned = (arena->is_large || !arena->allow_decommit); + if (arena->blocks_committed == NULL) { + // always committed + *commit = true; + } + else if (*commit) { + // arena not committed as a whole, but commit requested: ensure commit now + bool any_uncommitted; + _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + if (any_uncommitted) { + bool commit_zero; + _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); + if (commit_zero) *is_zero = true; + } + } + else { + // no need to commit, but check if already fully committed + *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + } + return p; +} + +static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + MI_UNUSED_RELEASE(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + const size_t bcount = mi_block_count_of_size(size); + if (mi_likely(max_arena == 0)) return NULL; + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + + // try numa affine allocation + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) { + return p; + } + } + } + + // try from another numa node instead.. + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) { + return p; + } + } + } + return NULL; +} + + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, + size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = MI_MEMID_OS; + *is_zero = false; + *is_pinned = false; + + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` + const int numa_node = _mi_os_numa_node(tld); // current numa node + + // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) { + void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, memid, tld); + if (p != NULL) return p; + } + + // finally, fall back to the OS + if (mi_option_is_enabled(mi_option_limit_os_alloc)) { + errno = ENOMEM; + return NULL; + } + *is_zero = true; + *memid = MI_MEMID_OS; + void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); + if (p != NULL) *is_pinned = *large; + return p; +} + +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, memid, tld); +} + +/* ----------------------------------------------------------- + Arena free +----------------------------------------------------------- */ + +void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld->stats != NULL); + if (p==NULL) return; + if (size==0) return; + + if (memid == MI_MEMID_OS) { + // was a direct OS allocation, pass through + _mi_os_free_ex(p, size, all_committed, tld->stats); + } + else { + // allocated in an arena + size_t arena_idx; + size_t bitmap_idx; + mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_assert_internal(arena != NULL); + const size_t blocks = mi_block_count_of_size(size); + // checks + if (arena == NULL) { + _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); + if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { + _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + // potentially decommit + if (!arena->allow_decommit || arena->blocks_committed == NULL) { + mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) + } + else { + mi_assert_internal(arena->blocks_committed != NULL); + _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, tld->stats); // ok if this fails + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + } + // and make it available to others again + bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); + if (!all_inuse) { + _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); + return; + }; + } +} + +/* ----------------------------------------------------------- + Add an arena. +----------------------------------------------------------- */ + +static bool mi_arena_add(mi_arena_t* arena) { + mi_assert_internal(arena != NULL); + mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); + mi_assert_internal(arena->block_count > 0); + + size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); + if (i >= MI_MAX_ARENAS) { + mi_atomic_decrement_acq_rel(&mi_arena_count); + return false; + } + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + return true; +} + +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept +{ + if (size < MI_ARENA_BLOCK_SIZE) return false; + + if (is_large) { + mi_assert_internal(is_committed); + is_committed = true; + } + + const size_t bcount = size / MI_ARENA_BLOCK_SIZE; + const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); + const size_t bitmaps = (is_committed ? 2 : 3); + const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + if (arena == NULL) return false; + + arena->block_count = bcount; + arena->field_count = fields; + arena->start = (uint8_t*)start; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) + arena->is_large = is_large; + arena->is_zero_init = is_zero; + arena->allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory + arena->search_idx = 0; + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap + arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap + // the bitmaps are already zero initialized due to os_alloc + // initialize committed bitmap? + if (arena->blocks_committed != NULL && is_committed) { + memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning + } + // and claim leftover blocks if needed (so we never allocate there) + ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + mi_assert_internal(post >= 0); + if (post > 0) { + // don't use leftover bits at the end + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); + } + + mi_arena_add(arena); + return true; +} + +// Reserve a range of regular OS memory +int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept +{ + size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block + bool large = allow_large; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); + if (start==NULL) return ENOMEM; + if (!mi_manage_os_memory(start, size, (large || commit), large, true, -1)) { + _mi_os_free_ex(start, size, commit, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); + return ENOMEM; + } + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : ""); + return 0; +} + +static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) { + size_t inuse_count = 0; + for (size_t i = 0; i < field_count; i++) { + char buf[MI_BITMAP_FIELD_BITS + 1]; + uintptr_t field = mi_atomic_load_relaxed(&fields[i]); + for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) { + bool inuse = ((((uintptr_t)1 << bit) & field) != 0); + if (inuse) inuse_count++; + buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.'); + } + buf[MI_BITMAP_FIELD_BITS] = 0; + _mi_verbose_message("%s%s\n", prefix, buf); + } + return inuse_count; +} + +void mi_debug_show_arenas(void) mi_attr_noexcept { + size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arenas; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; + size_t inuse_count = 0; + _mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count); + inuse_count += mi_debug_show_bitmap(" ", arena->blocks_inuse, arena->field_count); + _mi_verbose_message(" blocks in use ('x'): %zu\n", inuse_count); + } +} + +/* ----------------------------------------------------------- + Reserve a huge page arena. +----------------------------------------------------------- */ +// reserve at a specific numa node +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { + if (pages==0) return 0; + if (numa_node < -1) numa_node = -1; + if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); + size_t hsize = 0; + size_t pages_reserved = 0; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); + if (p==NULL || pages_reserved==0) { + _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); + return ENOMEM; + } + _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); + + if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) { + _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); + return ENOMEM; + } + return 0; +} + + +// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { + if (pages == 0) return 0; + + // pages per numa node + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); + if (numa_count <= 0) numa_count = 1; + const size_t pages_per = pages / numa_count; + const size_t pages_mod = pages % numa_count; + const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); + + // reserve evenly among numa nodes + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + size_t node_pages = pages_per; // can be 0 + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if (err) return err; + if (pages < node_pages) { + pages = 0; + } + else { + pages -= node_pages; + } + } + + return 0; +} + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + MI_UNUSED(max_secs); + _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + if (pages_reserved != NULL) *pages_reserved = 0; + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); + if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; + return err; +} diff --git a/compat/mimalloc/bitmap.c b/compat/mimalloc/bitmap.c new file mode 100644 index 00000000000000..8634b32ab13fa9 --- /dev/null +++ b/compat/mimalloc/bitmap.c @@ -0,0 +1,395 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2021 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +Concurrent bitmap that can set/reset sequences of bits atomically, +represeted as an array of fields where each field is a machine word (`size_t`) + +There are two api's; the standard one cannot have sequences that cross +between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). +(this is used in region allocation) + +The `_across` postfixed functions do allow sequences that can cross over +between the fields. (This is used in arena allocation) +---------------------------------------------------------------------------- */ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "bitmap.h" + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +// The bit mask for a given number of blocks at a specified bit index. +static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) { + mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + mi_assert_internal(count > 0); + if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + if (count == 0) return 0; + return ((((size_t)1 << count) - 1) << bitidx); +} + + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); + mi_assert_internal(count > 0); + mi_bitmap_field_t* field = &bitmap[idx]; + size_t map = mi_atomic_load_relaxed(field); + if (map==MI_BITMAP_FIELD_FULL) return false; // short cut + + // search for 0-bit sequence of length count + const size_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + +#ifdef MI_HAVE_FAST_BITSCAN + size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible +#else + size_t bitidx = 0; // otherwise start at 0 +#endif + size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while (bitidx <= bitidx_max) { + const size_t mapm = map & m; + if (mapm == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + const size_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going (with updated `map`) + continue; + } + else { + // success, we claimed the bits! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + } + } + else { + // on to the next bit range +#ifdef MI_HAVE_FAST_BITSCAN + const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); + mi_assert_internal(shift > 0 && shift <= count); +#else + const size_t shift = 1; +#endif + bitidx += shift; + m <<= shift; + } + } + // no bits found + return false; +} + +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +// `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + return false; +} + +/* +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); +} +*/ + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == mask); + size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + return ((prev & mask) == mask); +} + + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); + size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); + if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + return ((prev & mask) == 0); +} + +// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. +static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + size_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) *any_ones = ((field & mask) != 0); + return ((field & mask) == mask); +} + +bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + + +//-------------------------------------------------------------------------- +// the `_across` functions work on bitmaps where sequences can cross over +// between the fields. This is used in arena allocation +//-------------------------------------------------------------------------- + +// Try to atomically claim a sequence of `count` bits starting from the field +// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + + // check initial trailing zeros + mi_bitmap_field_t* field = &bitmap[idx]; + size_t map = mi_atomic_load_relaxed(field); + const size_t initial = mi_clz(map); // count of initial zeros starting at idx + mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); + if (initial == 0) return false; + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries + + // scan ahead + size_t found = initial; + size_t mask = 0; // mask bits for the final field + while(found < count) { + field++; + map = mi_atomic_load_relaxed(field); + const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mask = mi_bitmap_mask_(mask_bits, 0); + if ((map & mask) != 0) return false; + found += mask_bits; + } + mi_assert_internal(field < &bitmap[bitmap_fields]); + + // found range of zeros up to the final field; mask contains mask in the final field + // now claim it atomically + mi_bitmap_field_t* const final_field = field; + const size_t final_mask = mask; + mi_bitmap_field_t* const initial_field = &bitmap[idx]; + const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + + // initial field + size_t newmap; + field = initial_field; + map = mi_atomic_load_relaxed(field); + do { + newmap = map | initial_mask; + if ((map & initial_mask) != 0) { goto rollback; }; + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // intermediate fields + while (++field < final_field) { + newmap = MI_BITMAP_FIELD_FULL; + map = 0; + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } + } + + // final field + mi_assert_internal(field == final_field); + map = mi_atomic_load_relaxed(field); + do { + newmap = map | final_mask; + if ((map & final_mask) != 0) { goto rollback; } + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // claimed! + *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); + return true; + +rollback: + // roll back intermediate fields + while (--field > initial_field) { + newmap = 0; + map = MI_BITMAP_FIELD_FULL; + mi_assert_internal(mi_atomic_load_relaxed(field) == map); + mi_atomic_store_release(field, newmap); + } + if (field == initial_field) { + map = mi_atomic_load_relaxed(field); + do { + mi_assert_internal((map & initial_mask) == initial_mask); + newmap = map & ~initial_mask; + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + } + // retry? (we make a recursive call instead of goto to be able to use const declarations) + if (retries < 4) { + return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); + } + else { + return false; + } +} + + +// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + mi_assert_internal(count > 0); + if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap + // try to claim inside the field + if (count <= MI_BITMAP_FIELD_BITS) { + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + // try to claim across fields + if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { + return true; + } + } + return false; +} + +// Helper for masks across fields; returns the mid count, post_mask may be 0 +static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) { + MI_UNUSED_RELEASE(bitmap_fields); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) { + *pre_mask = mi_bitmap_mask_(count, bitidx); + *mid_mask = 0; + *post_mask = 0; + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields); + return 0; + } + else { + const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx; + mi_assert_internal(pre_bits < count); + *pre_mask = mi_bitmap_mask_(pre_bits, bitidx); + count -= pre_bits; + const size_t mid_count = (count / MI_BITMAP_FIELD_BITS); + *mid_mask = MI_BITMAP_FIELD_FULL; + count %= MI_BITMAP_FIELD_BITS; + *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0)); + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields); + return mid_count; + } +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + size_t pre_mask; + size_t mid_mask; + size_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_one = true; + mi_bitmap_field_t* field = &bitmap[idx]; + size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + if ((prev & pre_mask) != pre_mask) all_one = false; + while(mid_count-- > 0) { + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + if ((prev & mid_mask) != mid_mask) all_one = false; + } + if (post_mask!=0) { + prev = mi_atomic_and_acq_rel(field, ~post_mask); + if ((prev & post_mask) != post_mask) all_one = false; + } + return all_one; +} + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + size_t pre_mask; + size_t mid_mask; + size_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_zero = true; + bool any_zero = false; + _Atomic(size_t)*field = &bitmap[idx]; + size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); + if ((prev & pre_mask) != 0) all_zero = false; + if ((prev & pre_mask) != pre_mask) any_zero = true; + while (mid_count-- > 0) { + prev = mi_atomic_or_acq_rel(field++, mid_mask); + if ((prev & mid_mask) != 0) all_zero = false; + if ((prev & mid_mask) != mid_mask) any_zero = true; + } + if (post_mask!=0) { + prev = mi_atomic_or_acq_rel(field, post_mask); + if ((prev & post_mask) != 0) all_zero = false; + if ((prev & post_mask) != post_mask) any_zero = true; + } + if (pany_zero != NULL) *pany_zero = any_zero; + return all_zero; +} + + +// Returns `true` if all `count` bits were 1. +// `any_ones` is `true` if there was at least one bit set to one. +static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + size_t pre_mask; + size_t mid_mask; + size_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_ones = true; + bool any_ones = false; + mi_bitmap_field_t* field = &bitmap[idx]; + size_t prev = mi_atomic_load_relaxed(field++); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + while (mid_count-- > 0) { + prev = mi_atomic_load_relaxed(field++); + if ((prev & mid_mask) != mid_mask) all_ones = false; + if ((prev & mid_mask) != 0) any_ones = true; + } + if (post_mask!=0) { + prev = mi_atomic_load_relaxed(field); + if ((prev & post_mask) != post_mask) all_ones = false; + if ((prev & post_mask) != 0) any_ones = true; + } + if (pany_ones != NULL) *pany_ones = any_ones; + return all_ones; +} + +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} diff --git a/compat/mimalloc/bitmap.h b/compat/mimalloc/bitmap.h new file mode 100644 index 00000000000000..e3375033a9326e --- /dev/null +++ b/compat/mimalloc/bitmap.h @@ -0,0 +1,107 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2020 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +Concurrent bitmap that can set/reset sequences of bits atomically, +represeted as an array of fields where each field is a machine word (`size_t`) + +There are two api's; the standard one cannot have sequences that cross +between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). +(this is used in region allocation) + +The `_across` postfixed functions do allow sequences that can cross over +between the fields. (This is used in arena allocation) +---------------------------------------------------------------------------- */ +#pragma once +#ifndef MI_BITMAP_H +#define MI_BITMAP_H + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE) +#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set + +// An atomic bitmap of `size_t` fields +typedef _Atomic(size_t) mi_bitmap_field_t; +typedef mi_bitmap_field_t* mi_bitmap_t; + +// A bitmap index is the index of the bit in a bitmap. +typedef size_t mi_bitmap_index_t; + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) { + return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS); +} + +// Get the field index from a bit index. +static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx / MI_BITMAP_FIELD_BITS); +} + +// Get the bit index in a bitmap field +static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx % MI_BITMAP_FIELD_BITS); +} + +// Get the full bit index +static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx; +} + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); + +bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + + +//-------------------------------------------------------------------------- +// the `_across` functions work on bitmaps where sequences can cross over +// between the fields. This is used in arena allocation +//-------------------------------------------------------------------------- + +// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); + +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +#endif diff --git a/compat/mimalloc/heap.c b/compat/mimalloc/heap.c new file mode 100644 index 00000000000000..42c6cfd63699db --- /dev/null +++ b/compat/mimalloc/heap.c @@ -0,0 +1,580 @@ +/*---------------------------------------------------------------------------- +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset, memcpy + +#if defined(_MSC_VER) && (_MSC_VER < 1920) +#pragma warning(disable:4204) // non-constant aggregate initializer +#endif + +/* ----------------------------------------------------------- + Helpers +----------------------------------------------------------- */ + +// return `true` if ok, `false` to break +typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2); + +// Visit all pages in a heap; returns `false` if break was called. +static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2) +{ + if (heap==NULL || heap->page_count==0) return 0; + + // visit all pages + #if MI_DEBUG>1 + size_t total = heap->page_count; + #endif + size_t count = 0; + for (size_t i = 0; i <= MI_BIN_FULL; i++) { + mi_page_queue_t* pq = &heap->pages[i]; + mi_page_t* page = pq->first; + while(page != NULL) { + mi_page_t* next = page->next; // save next in case the page gets removed from the queue + mi_assert_internal(mi_page_heap(page) == heap); + count++; + if (!fn(heap, pq, page, arg1, arg2)) return false; + page = next; // and continue + } + } + mi_assert_internal(count == total); + return true; +} + + +#if MI_DEBUG>=2 +static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(pq); + mi_assert_internal(mi_page_heap(page) == heap); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_internal(segment->thread_id == heap->thread_id); + mi_assert_expensive(_mi_page_is_valid(page)); + return true; +} +#endif +#if MI_DEBUG>=3 +static bool mi_heap_is_valid(mi_heap_t* heap) { + mi_assert_internal(heap!=NULL); + mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL); + return true; +} +#endif + + + + +/* ----------------------------------------------------------- + "Collect" pages by migrating `local_free` and `thread_free` + lists and freeing empty pages. This is done when a thread + stops (and in that case abandons pages if there are still + blocks alive) +----------------------------------------------------------- */ + +typedef enum mi_collect_e { + MI_NORMAL, + MI_FORCE, + MI_ABANDON +} mi_collect_t; + + +static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { + MI_UNUSED(arg2); + MI_UNUSED(heap); + mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); + mi_collect_t collect = *((mi_collect_t*)arg_collect); + _mi_page_free_collect(page, collect >= MI_FORCE); + if (mi_page_all_free(page)) { + // no more used blocks, free the page. + // note: this will free retired pages as well. + _mi_page_free(page, pq, collect >= MI_FORCE); + } + else if (collect == MI_ABANDON) { + // still used blocks but the thread is done; abandon the page + _mi_page_abandon(page, pq); + } + return true; // don't break +} + +static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(heap); + MI_UNUSED(pq); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); + return true; // don't break +} + +static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) +{ + if (heap==NULL || !mi_heap_is_initialized(heap)) return; + + const bool force = collect >= MI_FORCE; + _mi_deferred_free(heap, force); + + // note: never reclaim on collect but leave it to threads that need storage to reclaim + const bool force_main = + #ifdef NDEBUG + collect == MI_FORCE + #else + collect >= MI_FORCE + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim; + + if (force_main) { + // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. + // if all memory is freed by now, all segments should be freed. + _mi_abandoned_reclaim_all(heap, &heap->tld->segments); + } + + // if abandoning, mark all pages to no longer add to delayed_free + if (collect == MI_ABANDON) { + mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); + } + + // free thread delayed blocks. + // (if abandoning, after this there are no more thread-delayed references into the pages.) + _mi_heap_delayed_free(heap); + + // collect retired pages + _mi_heap_collect_retired(heap, force); + + // collect all pages owned by this thread + mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); + mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); + + // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list) + // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment + _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments); + + // collect segment local caches + if (force) { + _mi_segment_thread_collect(&heap->tld->segments); + } + + // decommit in global segment caches + // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment + _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os); + + // collect regions on program-exit (or shared library unload) + if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + //_mi_mem_collect(&heap->tld->os); + } +} + +void _mi_heap_collect_abandon(mi_heap_t* heap) { + mi_heap_collect_ex(heap, MI_ABANDON); +} + +void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { + mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL)); +} + +void mi_collect(bool force) mi_attr_noexcept { + mi_heap_collect(mi_get_default_heap(), force); +} + + +/* ----------------------------------------------------------- + Heap new +----------------------------------------------------------- */ + +mi_heap_t* mi_heap_get_default(void) { + mi_thread_init(); + return mi_get_default_heap(); +} + +mi_heap_t* mi_heap_get_backing(void) { + mi_heap_t* heap = mi_heap_get_default(); + mi_assert_internal(heap!=NULL); + mi_heap_t* bheap = heap->tld->heap_backing; + mi_assert_internal(bheap!=NULL); + mi_assert_internal(bheap->thread_id == _mi_thread_id()); + return bheap; +} + +mi_heap_t* mi_heap_new(void) { + mi_heap_t* bheap = mi_heap_get_backing(); + mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? + if (heap==NULL) return NULL; + _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); + heap->tld = bheap->tld; + heap->thread_id = _mi_thread_id(); + _mi_random_split(&bheap->random, &heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); + heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe + // push on the thread local heaps list + heap->next = heap->tld->heaps; + heap->tld->heaps = heap; + return heap; +} + +uintptr_t _mi_heap_random_next(mi_heap_t* heap) { + return _mi_random_next(&heap->random); +} + +// zero out the page queues +static void mi_heap_reset_pages(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + mi_assert_internal(mi_heap_is_initialized(heap)); + // TODO: copy full empty heap instead? + memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); +#ifdef MI_MEDIUM_DIRECT + memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium)); +#endif + _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages)); + heap->thread_delayed_free = NULL; + heap->page_count = 0; +} + +// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources. +static void mi_heap_free(mi_heap_t* heap) { + mi_assert(heap != NULL); + mi_assert_internal(mi_heap_is_initialized(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return; + if (mi_heap_is_backing(heap)) return; // dont free the backing heap + + // reset default + if (mi_heap_is_default(heap)) { + _mi_heap_set_default_direct(heap->tld->heap_backing); + } + + // remove ourselves from the thread local heaps list + // linear search but we expect the number of heaps to be relatively small + mi_heap_t* prev = NULL; + mi_heap_t* curr = heap->tld->heaps; + while (curr != heap && curr != NULL) { + prev = curr; + curr = curr->next; + } + mi_assert_internal(curr == heap); + if (curr == heap) { + if (prev != NULL) { prev->next = heap->next; } + else { heap->tld->heaps = heap->next; } + } + mi_assert_internal(heap->tld->heaps != NULL); + + // and free the used memory + mi_free(heap); +} + + +/* ----------------------------------------------------------- + Heap destroy +----------------------------------------------------------- */ + +static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(heap); + MI_UNUSED(pq); + + // ensure no more thread_delayed_free will be added + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); + + // stats + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) { + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, large, bsize); + } + else { + mi_heap_stat_decrease(heap, huge, bsize); + } + } +#if (MI_STAT) + _mi_page_free_collect(page, false); // update used count + const size_t inuse = page->used; + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal, bsize * inuse); +#if (MI_STAT>1) + mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse); +#endif + } + mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... +#endif + + /// pretend it is all free now + mi_assert_internal(mi_page_thread_free(page) == NULL); + page->used = 0; + + // and free the page + // mi_page_free(page,false); + page->next = NULL; + page->prev = NULL; + _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); + + return true; // keep going +} + +void _mi_heap_destroy_pages(mi_heap_t* heap) { + mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL); + mi_heap_reset_pages(heap); +} + +void mi_heap_destroy(mi_heap_t* heap) { + mi_assert(heap != NULL); + mi_assert(mi_heap_is_initialized(heap)); + mi_assert(heap->no_reclaim); + mi_assert_expensive(mi_heap_is_valid(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return; + if (!heap->no_reclaim) { + // don't free in case it may contain reclaimed pages + mi_heap_delete(heap); + } + else { + // free all pages + _mi_heap_destroy_pages(heap); + mi_heap_free(heap); + } +} + + + +/* ----------------------------------------------------------- + Safe Heap delete +----------------------------------------------------------- */ + +// Transfer the pages from one heap to the other +static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { + mi_assert_internal(heap!=NULL); + if (from==NULL || from->page_count == 0) return; + + // reduce the size of the delayed frees + _mi_heap_delayed_free(from); + + // transfer all pages by appending the queues; this will set a new heap field + // so threads may do delayed frees in either heap for a while. + // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state + // so after this only the new heap will get delayed frees + for (size_t i = 0; i <= MI_BIN_FULL; i++) { + mi_page_queue_t* pq = &heap->pages[i]; + mi_page_queue_t* append = &from->pages[i]; + size_t pcount = _mi_page_queue_append(heap, pq, append); + heap->page_count += pcount; + from->page_count -= pcount; + } + mi_assert_internal(from->page_count == 0); + + // and do outstanding delayed frees in the `from` heap + // note: be careful here as the `heap` field in all those pages no longer point to `from`, + // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a + // the regular `_mi_free_delayed_block` which is safe. + _mi_heap_delayed_free(from); + #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353 + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL); + #endif + + // and reset the `from` heap + mi_heap_reset_pages(from); +} + +// Safe delete a heap without freeing any still allocated blocks in that heap. +void mi_heap_delete(mi_heap_t* heap) +{ + mi_assert(heap != NULL); + mi_assert(mi_heap_is_initialized(heap)); + mi_assert_expensive(mi_heap_is_valid(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return; + + if (!mi_heap_is_backing(heap)) { + // tranfer still used pages to the backing heap + mi_heap_absorb(heap->tld->heap_backing, heap); + } + else { + // the backing heap abandons its pages + _mi_heap_collect_abandon(heap); + } + mi_assert_internal(heap->page_count==0); + mi_heap_free(heap); +} + +mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { + mi_assert(heap != NULL); + mi_assert(mi_heap_is_initialized(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL; + mi_assert_expensive(mi_heap_is_valid(heap)); + mi_heap_t* old = mi_get_default_heap(); + _mi_heap_set_default_direct(heap); + return old; +} + + + + +/* ----------------------------------------------------------- + Analysis +----------------------------------------------------------- */ + +// static since it is not thread safe to access heaps from other threads. +static mi_heap_t* mi_heap_of_block(const void* p) { + if (p == NULL) return NULL; + mi_segment_t* segment = _mi_ptr_segment(p); + bool valid = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(valid); + if (mi_unlikely(!valid)) return NULL; + return mi_page_heap(_mi_segment_page_of(segment,p)); +} + +bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { + mi_assert(heap != NULL); + if (heap==NULL || !mi_heap_is_initialized(heap)) return false; + return (heap == mi_heap_of_block(p)); +} + + +static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) { + MI_UNUSED(heap); + MI_UNUSED(pq); + bool* found = (bool*)vfound; + mi_segment_t* segment = _mi_page_segment(page); + void* start = _mi_page_start(segment, page, NULL); + void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); + *found = (p >= start && p < end); + return (!*found); // continue if not found +} + +bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { + mi_assert(heap != NULL); + if (heap==NULL || !mi_heap_is_initialized(heap)) return false; + if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false; // only aligned pointers + bool found = false; + mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found); + return found; +} + +bool mi_check_owned(const void* p) { + return mi_heap_check_owned(mi_get_default_heap(), p); +} + +/* ----------------------------------------------------------- + Visit all heap blocks and areas + Todo: enable visiting abandoned pages, and + enable visiting all blocks of all heaps across threads +----------------------------------------------------------- */ + +// Separate struct to keep `mi_page_t` out of the public interface +typedef struct mi_heap_area_ex_s { + mi_heap_area_t area; + mi_page_t* page; +} mi_heap_area_ex_t; + +static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) { + mi_assert(xarea != NULL); + if (xarea==NULL) return true; + const mi_heap_area_t* area = &xarea->area; + mi_page_t* page = xarea->page; + mi_assert(page != NULL); + if (page == NULL) return true; + + _mi_page_free_collect(page,true); + mi_assert_internal(page->local_free == NULL); + if (page->used == 0) return true; + + const size_t bsize = mi_page_block_size(page); + const size_t ubsize = mi_page_usable_block_size(page); // without padding + size_t psize; + uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); + + if (page->capacity == 1) { + // optimize page with one block + mi_assert_internal(page->used == 1 && page->free == NULL); + return visitor(mi_page_heap(page), area, pstart, ubsize, arg); + } + + // create a bitmap of free blocks. + #define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*)) + uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; + memset(free_map, 0, sizeof(free_map)); + + size_t free_count = 0; + for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + free_count++; + mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); + size_t offset = (uint8_t*)block - pstart; + mi_assert_internal(offset % bsize == 0); + size_t blockidx = offset / bsize; // Todo: avoid division? + mi_assert_internal( blockidx < MI_MAX_BLOCKS); + size_t bitidx = (blockidx / sizeof(uintptr_t)); + size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); + free_map[bitidx] |= ((uintptr_t)1 << bit); + } + mi_assert_internal(page->capacity == (free_count + page->used)); + + // walk through all blocks skipping the free ones + size_t used_count = 0; + for (size_t i = 0; i < page->capacity; i++) { + size_t bitidx = (i / sizeof(uintptr_t)); + size_t bit = i - (bitidx * sizeof(uintptr_t)); + uintptr_t m = free_map[bitidx]; + if (bit == 0 && m == UINTPTR_MAX) { + i += (sizeof(uintptr_t) - 1); // skip a run of free blocks + } + else if ((m & ((uintptr_t)1 << bit)) == 0) { + used_count++; + uint8_t* block = pstart + (i * bsize); + if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; + } + } + mi_assert_internal(page->used == used_count); + return true; +} + +typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg); + + +static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { + MI_UNUSED(heap); + MI_UNUSED(pq); + mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; + mi_heap_area_ex_t xarea; + const size_t bsize = mi_page_block_size(page); + const size_t ubsize = mi_page_usable_block_size(page); + xarea.page = page; + xarea.area.reserved = page->reserved * bsize; + xarea.area.committed = page->capacity * bsize; + xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); + xarea.area.used = page->used * bsize; + xarea.area.block_size = ubsize; + xarea.area.full_block_size = bsize; + return fun(heap, &xarea, arg); +} + +// Visit all heap pages as areas +static bool mi_heap_visit_areas(const mi_heap_t* heap, mi_heap_area_visit_fun* visitor, void* arg) { + if (visitor == NULL) return false; + return mi_heap_visit_pages((mi_heap_t*)heap, &mi_heap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{ +} + +// Just to pass arguments +typedef struct mi_visit_blocks_args_s { + bool visit_blocks; + mi_block_visit_fun* visitor; + void* arg; +} mi_visit_blocks_args_t; + +static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* xarea, void* arg) { + mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg; + if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false; + if (args->visit_blocks) { + return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg); + } + else { + return true; + } +} + +// Visit all blocks in a heap +bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { + mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; + return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); +} diff --git a/compat/mimalloc/init.c b/compat/mimalloc/init.c new file mode 100644 index 00000000000000..6b2a99e47a6180 --- /dev/null +++ b/compat/mimalloc/init.c @@ -0,0 +1,693 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memcpy, memset +#include // atexit + +// Empty page used to initialize the small free pages array +const mi_page_t _mi_page_empty = { + 0, false, false, false, false, + 0, // capacity + 0, // reserved capacity + { 0 }, // flags + false, // is_zero + 0, // retire_expire + NULL, // free + #if MI_ENCODE_FREELIST + { 0, 0 }, + #endif + 0, // used + 0, // xblock_size + NULL, // local_free + MI_ATOMIC_VAR_INIT(0), // xthread_free + MI_ATOMIC_VAR_INIT(0), // xheap + NULL, NULL + #if MI_INTPTR_SIZE==8 + , { 0 } // padding + #endif +}; + +#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) + +#if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#elif (MI_PADDING>0) +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } +#else +#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } +#endif + + +// Empty page queues for every bin +#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } +#define MI_PAGE_QUEUES_EMPTY \ + { QNULL(1), \ + QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ + QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ + QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ + QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \ + QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \ + QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ + QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ + QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ + QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ + QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \ + QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ } + +#define MI_STAT_COUNT_NULL() {0,0,0,0} + +// Empty statistics +#if MI_STAT>1 +#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) } +#else +#define MI_STAT_COUNT_END_NULL() +#endif + +#define MI_STATS_NULL \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ + MI_STAT_COUNT_END_NULL() + + +// Empty slice span queues for every bin +#define SQNULL(sz) { NULL, NULL, sz } +#define MI_SEGMENT_SPAN_QUEUES_EMPTY \ + { SQNULL(1), \ + SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ + SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ + SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ + SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ + SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } + + +// -------------------------------------------------------- +// Statically allocate an empty heap as the initial +// thread local value for the default heap, +// and statically allocate the backing heap for the main +// thread so it can function without doing any allocation +// itself (as accessing a thread local for the first time +// may lead to allocation itself on some platforms) +// -------------------------------------------------------- + +mi_decl_cache_align const mi_heap_t _mi_heap_empty = { + NULL, + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY, + MI_ATOMIC_VAR_INIT(NULL), + 0, // tid + 0, // cookie + { 0, 0 }, // keys + { {0}, {0}, 0 }, + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next + false +}; + +#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) +#define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os))) + +mi_decl_cache_align static const mi_tld_t tld_empty = { + 0, + false, + NULL, NULL, + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments + { 0, tld_empty_stats }, // os + { MI_STATS_NULL } // stats +}; + +// the thread-local default heap for allocation +mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; + +extern mi_heap_t _mi_heap_main; + +static mi_tld_t tld_main = { + 0, false, + &_mi_heap_main, & _mi_heap_main, + { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments + { 0, &tld_main.stats }, // os + { MI_STATS_NULL } // stats +}; + +mi_heap_t _mi_heap_main = { + &tld_main, + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY, + MI_ATOMIC_VAR_INIT(NULL), + 0, // thread id + 0, // initial cookie + { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0x846ca68b}, {0}, 0 }, // random + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next heap + false // can reclaim +}; + +bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. + +mi_stats_t _mi_stats_main = { MI_STATS_NULL }; + + +static void mi_heap_main_init(void) { + if (_mi_heap_main.cookie == 0) { + _mi_heap_main.thread_id = _mi_thread_id(); + _mi_heap_main.cookie = _mi_os_random_weak((uintptr_t)&mi_heap_main_init); + _mi_random_init(&_mi_heap_main.random); + _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); + } +} + +mi_heap_t* _mi_heap_main_get(void) { + mi_heap_main_init(); + return &_mi_heap_main; +} + + +/* ----------------------------------------------------------- + Initialization and freeing of the thread local heaps +----------------------------------------------------------- */ + +// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). +typedef struct mi_thread_data_s { + mi_heap_t heap; // must come first due to cast in `_mi_heap_done` + mi_tld_t tld; +} mi_thread_data_t; + + +// Thread meta-data is allocated directly from the OS. For +// some programs that do not use thread pools and allocate and +// destroy many OS threads, this may causes too much overhead +// per thread so we maintain a small cache of recently freed metadata. + +#define TD_CACHE_SIZE (8) +static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; + +static mi_thread_data_t* mi_thread_data_alloc(void) { + // try to find thread metadata in the cache + mi_thread_data_t* td; + for (int i = 0; i < TD_CACHE_SIZE; i++) { + td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td != NULL) { + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { + return td; + } + } + } + // if that fails, allocate directly from the OS + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + if (td == NULL) { + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } + } + return td; +} + +static void mi_thread_data_free( mi_thread_data_t* tdfree ) { + // try to add the thread metadata to the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td == NULL) { + mi_thread_data_t* expected = NULL; + if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { + return; + } + } + } + // if that fails, just free it directly + _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); +} + +static void mi_thread_data_collect(void) { + // free all thread metadata from the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td != NULL) { + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { + _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); + } + } + } +} + +// Initialize the thread local default heap, called from `mi_thread_init` +static bool _mi_heap_init(void) { + if (mi_heap_is_initialized(mi_get_default_heap())) return true; + if (_mi_is_main_thread()) { + // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization + // the main heap is statically allocated + mi_heap_main_init(); + _mi_heap_set_default_direct(&_mi_heap_main); + //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); + } + else { + // use `_mi_os_alloc` to allocate directly from the OS + mi_thread_data_t* td = mi_thread_data_alloc(); + if (td == NULL) return false; + + // OS allocated so already zero initialized + mi_tld_t* tld = &td->tld; + mi_heap_t* heap = &td->heap; + _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); + _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); + heap->thread_id = _mi_thread_id(); + _mi_random_init(&heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->keys[0] = _mi_heap_random_next(heap); + heap->keys[1] = _mi_heap_random_next(heap); + heap->tld = tld; + tld->heap_backing = heap; + tld->heaps = heap; + tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; + tld->os.stats = &tld->stats; + _mi_heap_set_default_direct(heap); + } + return false; +} + +// Free the thread local default heap (called from `mi_thread_done`) +static bool _mi_heap_done(mi_heap_t* heap) { + if (!mi_heap_is_initialized(heap)) return true; + + // reset default heap + _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); + + // switch to backing heap + heap = heap->tld->heap_backing; + if (!mi_heap_is_initialized(heap)) return false; + + // delete all non-backing heaps in this thread + mi_heap_t* curr = heap->tld->heaps; + while (curr != NULL) { + mi_heap_t* next = curr->next; // save `next` as `curr` will be freed + if (curr != heap) { + mi_assert_internal(!mi_heap_is_backing(curr)); + mi_heap_delete(curr); + } + curr = next; + } + mi_assert_internal(heap->tld->heaps == heap && heap->next == NULL); + mi_assert_internal(mi_heap_is_backing(heap)); + + // collect if not the main thread + if (heap != &_mi_heap_main) { + _mi_heap_collect_abandon(heap); + } + + // merge stats + _mi_stats_done(&heap->tld->stats); + + // free if not the main thread + if (heap != &_mi_heap_main) { + // the following assertion does not always hold for huge segments as those are always treated + // as abondened: one may allocate it in one thread, but deallocate in another in which case + // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363 + // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); + mi_thread_data_free((mi_thread_data_t*)heap); + } + else { + mi_thread_data_collect(); // free cached thread metadata + #if 0 + // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, + // there may still be delete/free calls after the mi_fls_done is called. Issue #207 + _mi_heap_destroy_pages(heap); + mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + #endif + } + return false; +} + + + +// -------------------------------------------------------- +// Try to run `mi_thread_done()` automatically so any memory +// owned by the thread but not yet released can be abandoned +// and re-owned by another thread. +// +// 1. windows dynamic library: +// call from DllMain on DLL_THREAD_DETACH +// 2. windows static library: +// use `FlsAlloc` to call a destructor when the thread is done +// 3. unix, pthreads: +// use a pthread key to call a destructor when a pthread is done +// +// In the last two cases we also need to call `mi_process_init` +// to set up the thread local keys. +// -------------------------------------------------------- + +static void _mi_thread_done(mi_heap_t* default_heap); + +#if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain +#elif defined(_WIN32) && !defined(MI_SHARED_LIB) + // use thread local storage keys to detect thread ending + #include + #include + #if (_WIN32_WINNT < 0x600) // before Windows Vista + WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); + WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); + WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); + WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); + #endif + static DWORD mi_fls_key = (DWORD)(-1); + static void NTAPI mi_fls_done(PVOID value) { + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); + } +#elif defined(MI_USE_PTHREADS) + // use pthread local storage keys to detect thread ending + // (and used with MI_TLS_PTHREADS for the default heap) + pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); + static void mi_pthread_done(void* value) { + if (value!=NULL) _mi_thread_done((mi_heap_t*)value); + } +#elif defined(__wasi__) +// no pthreads in the WebAssembly Standard Interface +#else + #pragma message("define a way to call mi_thread_done when a thread is done") +#endif + +// Set up handlers so `mi_thread_done` is called automatically +static void mi_process_setup_auto_thread_done(void) { + static bool tls_initialized = false; // fine if it races + if (tls_initialized) return; + tls_initialized = true; + #if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain + #elif defined(_WIN32) && !defined(MI_SHARED_LIB) + mi_fls_key = FlsAlloc(&mi_fls_done); + #elif defined(MI_USE_PTHREADS) + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); + #endif + _mi_heap_set_default_direct(&_mi_heap_main); +} + + +bool _mi_is_main_thread(void) { + return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); +} + +static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); + +size_t _mi_current_thread_count(void) { + return mi_atomic_load_relaxed(&thread_count); +} + +// This is called from the `mi_malloc_generic` +void mi_thread_init(void) mi_attr_noexcept +{ + // ensure our process has started already + mi_process_init(); + + // initialize the thread local default heap + // (this will call `_mi_heap_set_default_direct` and thus set the + // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) + if (_mi_heap_init()) return; // returns true if already initialized + + _mi_stat_increase(&_mi_stats_main.threads, 1); + mi_atomic_increment_relaxed(&thread_count); + //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); +} + +void mi_thread_done(void) mi_attr_noexcept { + _mi_thread_done(mi_get_default_heap()); +} + +static void _mi_thread_done(mi_heap_t* heap) { + mi_atomic_decrement_relaxed(&thread_count); + _mi_stat_decrease(&_mi_stats_main.threads, 1); + + // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... + if (heap->thread_id != _mi_thread_id()) return; + + // abandon the thread local heap + if (_mi_heap_done(heap)) return; // returns true if already ran +} + +void _mi_heap_set_default_direct(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + #if defined(MI_TLS_SLOT) + mi_tls_slot_set(MI_TLS_SLOT,heap); + #elif defined(MI_TLS_PTHREAD_SLOT_OFS) + *mi_tls_pthread_heap_slot() = heap; + #elif defined(MI_TLS_PTHREAD) + // we use _mi_heap_default_key + #else + _mi_heap_default = heap; + #endif + + // ensure the default heap is passed to `_mi_thread_done` + // setting to a non-NULL value also ensures `mi_thread_done` is called. + #if defined(_WIN32) && defined(MI_SHARED_LIB) + // nothing to do as it is done in DllMain + #elif defined(_WIN32) && !defined(MI_SHARED_LIB) + mi_assert_internal(mi_fls_key != 0); + FlsSetValue(mi_fls_key, heap); + #elif defined(MI_USE_PTHREADS) + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD + pthread_setspecific(_mi_heap_default_key, heap); + } + #endif +} + + +// -------------------------------------------------------- +// Run functions on process init/done, and thread init/done +// -------------------------------------------------------- +static void mi_process_done(void); + +static bool os_preloading = true; // true until this module is initialized +static bool mi_redirected = false; // true if malloc redirects to mi_malloc + +// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. +bool _mi_preloading(void) { + return os_preloading; +} + +mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { + return mi_redirected; +} + +// Communicate with the redirection module on Windows +#if defined(_WIN32) && defined(MI_SHARED_LIB) +#ifdef __cplusplus +extern "C" { +#endif +mi_decl_export void _mi_redirect_entry(DWORD reason) { + // called on redirection; careful as this may be called before DllMain + if (reason == DLL_PROCESS_ATTACH) { + mi_redirected = true; + } + else if (reason == DLL_PROCESS_DETACH) { + mi_redirected = false; + } + else if (reason == DLL_THREAD_DETACH) { + mi_thread_done(); + } +} +__declspec(dllimport) bool mi_allocator_init(const char** message); +__declspec(dllimport) void mi_allocator_done(void); +#ifdef __cplusplus +} +#endif +#else +static bool mi_allocator_init(const char** message) { + if (message != NULL) *message = NULL; + return true; +} +static void mi_allocator_done(void) { + // nothing to do +} +#endif + +// Called once by the process loader +static void mi_process_load(void) { + mi_heap_main_init(); + #if defined(MI_TLS_RECURSE_GUARD) + volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; + MI_UNUSED(dummy); + #endif + os_preloading = false; + #if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521) + atexit(&mi_process_done); + #endif + _mi_options_init(); + mi_process_init(); + //mi_stats_reset();- + if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); + + // show message from the redirector (if present) + const char* msg = NULL; + mi_allocator_init(&msg); + if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { + _mi_fputs(NULL,NULL,NULL,msg); + } +} + +#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) +#include +mi_decl_cache_align bool _mi_cpu_has_fsrm = false; + +static void mi_detect_cpu_features(void) { + // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017)) + int32_t cpu_info[4]; + __cpuid(cpu_info, 7); + _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see +} +#else +static void mi_detect_cpu_features(void) { + // nothing +} +#endif + +// Initialize the process; called by thread_init or the process loader +void mi_process_init(void) mi_attr_noexcept { + // ensure we are called once + if (_mi_process_is_initialized) return; + _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); + _mi_process_is_initialized = true; + mi_process_setup_auto_thread_done(); + + + mi_detect_cpu_features(); + _mi_os_init(); + mi_heap_main_init(); + #if (MI_DEBUG) + _mi_verbose_message("debug level : %d\n", MI_DEBUG); + #endif + _mi_verbose_message("secure level: %d\n", MI_SECURE); + mi_thread_init(); + + #if defined(_WIN32) && !defined(MI_SHARED_LIB) + // When building as a static lib the FLS cleanup happens to early for the main thread. + // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup + // will not call _mi_thread_done on the (still executing) main thread. See issue #508. + FlsSetValue(mi_fls_key, NULL); + #endif + + mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + + if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); + long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at); + if (reserve_at != -1) { + mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500); + } else { + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); + } + } + if (mi_option_is_enabled(mi_option_reserve_os_memory)) { + long ksize = mi_option_get(mi_option_reserve_os_memory); + if (ksize > 0) { + mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */); + } + } +} + +// Called when the process is done (through `at_exit`) +static void mi_process_done(void) { + // only shutdown if we were initialized + if (!_mi_process_is_initialized) return; + // ensure we are called once + static bool process_done = false; + if (process_done) return; + process_done = true; + + #if defined(_WIN32) && !defined(MI_SHARED_LIB) + FlsFree(mi_fls_key); // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208 + #endif + + #ifndef MI_SKIP_COLLECT_ON_EXIT + #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) + // free all memory if possible on process exit. This is not needed for a stand-alone process + // but should be done if mimalloc is statically linked into another shared library which + // is repeatedly loaded/unloaded, see issue #281. + mi_collect(true /* force */ ); + #endif + #endif + + if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { + mi_stats_print(NULL); + } + mi_allocator_done(); + _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); + os_preloading = true; // don't call the C runtime anymore +} + + + +#if defined(_WIN32) && defined(MI_SHARED_LIB) + // Windows DLL: easy to hook into process_init and thread_done + __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { + MI_UNUSED(reserved); + MI_UNUSED(inst); + if (reason==DLL_PROCESS_ATTACH) { + mi_process_load(); + } + else if (reason==DLL_PROCESS_DETACH) { + mi_process_done(); + } + else if (reason==DLL_THREAD_DETACH) { + if (!mi_is_redirected()) { + mi_thread_done(); + } + } + return TRUE; + } + +#elif defined(_MSC_VER) + // MSVC: use data section magic for static libraries + // See + static int _mi_process_init(void) { + mi_process_load(); + return 0; + } + typedef int(*_mi_crt_callback_t)(void); + #if defined(_M_X64) || defined(_M_ARM64) + __pragma(comment(linker, "/include:" "_mi_msvc_initu")) + #pragma section(".CRT$XIU", long, read) + #else + __pragma(comment(linker, "/include:" "__mi_msvc_initu")) + #endif + #pragma data_seg(".CRT$XIU") + mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init }; + #pragma data_seg() + +#elif defined(__cplusplus) + // C++: use static initialization to detect process start + static bool _mi_process_init(void) { + mi_process_load(); + return (_mi_heap_main.thread_id != 0); + } + static bool mi_initialized = _mi_process_init(); + +#elif defined(__GNUC__) || defined(__clang__) + // GCC,Clang: use the constructor attribute + static void __attribute__((constructor)) _mi_process_init(void) { + mi_process_load(); + } + +#else +#pragma message("define a way to call mi_process_load on your platform") +#endif diff --git a/compat/mimalloc/mimalloc-atomic.h b/compat/mimalloc/mimalloc-atomic.h new file mode 100644 index 00000000000000..c66f80493321ee --- /dev/null +++ b/compat/mimalloc/mimalloc-atomic.h @@ -0,0 +1,338 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2021 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_ATOMIC_H +#define MIMALLOC_ATOMIC_H + +// -------------------------------------------------------------------------------------------- +// Atomics +// We need to be portable between C, C++, and MSVC. +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// instead of passing the memory order as a parameter. +// ----------------------------------------------------------------------------------------------- + +#if defined(__cplusplus) +// Use C++ atomics +#include +#define _Atomic(tp) std::atomic +#define mi_atomic(name) std::atomic_##name +#define mi_memory_order(name) std::memory_order_##name +#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571 + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif +#elif defined(_MSC_VER) +// Use MSVC C wrapper for C11 atomics +#define _Atomic(tp) tp +#define MI_ATOMIC_VAR_INIT(x) x +#define mi_atomic(name) mi_atomic_##name +#define mi_memory_order(name) mi_memory_order_##name +#else +// Use C11 atomics +#include +#define mi_atomic(name) atomic_##name +#define mi_memory_order(name) memory_order_##name +#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif + +// Various defines for all used memory orders in mimalloc +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) + +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) + +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) + +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) + +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1) +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1) +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1) +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) + +static inline void mi_atomic_yield(void); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); + + +#if defined(__cplusplus) || !defined(_MSC_VER) + +// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value) +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) + +// In C++ we need to add casts to help resolve templates if NULL is passed +#if defined(__cplusplus) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) +#else +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) +#endif + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); +} +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; +} + +// Used by timers +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) + + + +#elif defined(_MSC_VER) + +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. +#define WIN32_LEAN_AND_MEAN +#include +#include +#ifdef _WIN64 +typedef LONG64 msc_intptr_t; +#define MI_64(f) f##64 +#else +typedef LONG msc_intptr_t; +#define MI_64(f) f +#endif + +typedef enum mi_memory_order_e { + mi_memory_order_relaxed, + mi_memory_order_consume, + mi_memory_order_acquire, + mi_memory_order_release, + mi_memory_order_acq_rel, + mi_memory_order_seq_cst +} mi_memory_order; + +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); +} +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + (void)(mo1); (void)(mo2); + uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); + if (read == *expected) { + return true; + } + else { + *expected = read; + return false; + } +} +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); +} +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); +} +static inline void mi_atomic_thread_fence(mi_memory_order mo) { + (void)(mo); + _Atomic(uintptr_t) x = 0; + mi_atomic_exchange_explicit(&x, 1, mo); +} +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) + return *p; +#else + uintptr_t x = *p; + if (mo > mi_memory_order_relaxed) { + while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + } + return x; +#endif +} +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) + *p = x; +#else + mi_atomic_exchange_explicit(p, x, mo); +#endif +} +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_X64) + return *p; +#else + int64_t old = *p; + int64_t x = old; + while ((old = InterlockedCompareExchange64(p, x, old)) != x) { + x = old; + } + return x; +#endif +} +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { + (void)(mo); +#if defined(x_M_IX86) || defined(_M_X64) + *p = x; +#else + InterlockedExchange64(p, x); +#endif +} + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { +#ifdef _WIN64 + return (int64_t)mi_atomic_addi((int64_t*)p, add); +#else + int64_t current; + int64_t sum; + do { + current = *p; + sum = current + add; + } while (_InterlockedCompareExchange64(p, sum, current) != current); + return current; +#endif +} +static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { + int64_t current; + do { + current = *p; + } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); +} + +// The pointer macros cast to `uintptr_t`. +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) + +#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) + + +#endif + + +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); +} + +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p, -sub); +} + +// Yield +#if defined(__cplusplus) +#include +static inline void mi_atomic_yield(void) { + std::this_thread::yield(); +} +#elif defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +static inline void mi_atomic_yield(void) { + YieldProcessor(); +} +#elif defined(__SSE2__) +#include +static inline void mi_atomic_yield(void) { + _mm_pause(); +} +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) +#if defined(__x86_64__) || defined(__i386__) +static inline void mi_atomic_yield(void) { + __asm__ volatile ("pause" ::: "memory"); +} +#elif defined(__aarch64__) +static inline void mi_atomic_yield(void) { + __asm__ volatile("wfe"); +} +#elif (defined(__arm__) && __ARM_ARCH__ >= 7) +static inline void mi_atomic_yield(void) { + __asm__ volatile("yield" ::: "memory"); +} +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +static inline void mi_atomic_yield(void) { + __asm__ __volatile__ ("or 27,27,27" ::: "memory"); +} +#elif defined(__armel__) || defined(__ARMEL__) +static inline void mi_atomic_yield(void) { + __asm__ volatile ("nop" ::: "memory"); +} +#endif +#elif defined(__sun) +// Fallback for other archs +#include +static inline void mi_atomic_yield(void) { + smt_pause(); +} +#elif defined(__wasi__) +#include +static inline void mi_atomic_yield(void) { + sched_yield(); +} +#else +#include +static inline void mi_atomic_yield(void) { + sleep(0); +} +#endif + + +#endif // __MIMALLOC_ATOMIC_H diff --git a/compat/mimalloc/mimalloc-internal.h b/compat/mimalloc/mimalloc-internal.h new file mode 100644 index 00000000000000..de5c53b1e52f93 --- /dev/null +++ b/compat/mimalloc/mimalloc-internal.h @@ -0,0 +1,1049 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_INTERNAL_H +#define MIMALLOC_INTERNAL_H + +#include "mimalloc-types.h" + +#if (MI_DEBUG>0) +#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) +#else +#define mi_trace_message(...) +#endif + +#define MI_CACHE_LINE 64 +#if defined(_MSC_VER) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#pragma warning(disable:26812) // unscoped enum warning +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#else +#define mi_decl_noinline +#define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align +#endif + +#if defined(__EMSCRIPTEN__) && !defined(__wasi__) +#define __wasi__ +#endif + +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc +#endif + +#if !defined(_WIN32) && !defined(__wasi__) +#define MI_USE_PTHREADS +#include +#endif + +// "options.c" +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); +void _mi_warning_message(const char* fmt, ...); +void _mi_verbose_message(const char* fmt, ...); +void _mi_trace_message(const char* fmt, ...); +void _mi_options_init(void); +void _mi_error_message(int err, const char* fmt, ...); + +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +uintptr_t _mi_os_random_weak(uintptr_t extra_seed); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c +extern mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_cache_align const mi_page_t _mi_page_empty; +bool _mi_is_main_thread(void); +size_t _mi_current_thread_count(void); +bool _mi_preloading(void); // true while the C runtime is not ready + +// os.c +size_t _mi_os_page_size(void); +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data + +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +// bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +size_t _mi_os_good_alloc_size(size_t size); +bool _mi_os_has_overcommit(void); + +// arena.c +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); + +// "segment-cache.c" +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); +void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); +void _mi_segment_map_allocated_at(const mi_segment_t* segment); +void _mi_segment_map_freed_at(const mi_segment_t* segment); + +// "segment.c" +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); +bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); +void _mi_segment_thread_collect(mi_segments_tld_t* tld); +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); + +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +void _mi_abandoned_await_readers(void); +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); + + + +// "page.c" +void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; + +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks +void _mi_page_unfull(mi_page_t* page); +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); + +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); +void _mi_deferred_free(mi_heap_t* heap, bool force); + +void _mi_page_free_collect(mi_page_t* page,bool force); +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments + +size_t _mi_bin_size(uint8_t bin); // for stats +uint8_t _mi_bin(size_t size); // for stats + +// "heap.c" +void _mi_heap_destroy_pages(mi_heap_t* heap); +void _mi_heap_collect_abandon(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); + +// "stats.c" +void _mi_stats_done(mi_stats_t* stats); + +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); + +// "alloc.c" +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); +bool _mi_free_delayed_block(mi_block_t* block); +void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size); + +#if MI_DEBUG>1 +bool _mi_page_is_valid(mi_page_t* page); +#endif + + +// ------------------------------------------------------ +// Branches +// ------------------------------------------------------ + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) __builtin_expect(!!(x),false) +#define mi_likely(x) __builtin_expect(!!(x),true) +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + + +/* ----------------------------------------------------------- + Error codes passed to `_mi_fatal_error` + All are recoverable but EFAULT is a serious error and aborts by default in secure mode. + For portability define undefined error codes using common Unix codes: + +----------------------------------------------------------- */ +#include +#ifndef EAGAIN // double free +#define EAGAIN (11) +#endif +#ifndef ENOMEM // out of memory +#define ENOMEM (12) +#endif +#ifndef EFAULT // corrupted free-list or meta-data +#define EFAULT (14) +#endif +#ifndef EINVAL // trying to free an invalid pointer +#define EINVAL (22) +#endif +#ifndef EOVERFLOW // count*size overflow +#define EOVERFLOW (75) +#endif + + +/* ----------------------------------------------------------- + Inlined definitions +----------------------------------------------------------- */ +#define MI_UNUSED(x) (void)(x) +#if (MI_DEBUG>0) +#define MI_UNUSED_RELEASE(x) +#else +#define MI_UNUSED_RELEASE(x) MI_UNUSED(x) +#endif + +#define MI_INIT4(x) x(),x(),x(),x() +#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x) +#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x) +#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x) +#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x) +#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x) +#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) + + +// Is `x` a power of two? (0 is considered a power of two) +static inline bool _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} + +// Align upwards +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + +// Align downwards +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return (sz & ~mask); + } + else { + return ((sz / alignment) * alignment); + } +} + +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + +// Is memory zero initialized? +static inline bool mi_mem_is_zero(void* p, size_t size) { + for (size_t i = 0; i < size; i++) { + if (((uint8_t*)p)[i] != 0) return false; + } + return true; +} + + +// Align a byte size to a size in _machine words_, +// i.e. byte size == `wsize*sizeof(void*)`. +static inline size_t _mi_wsize_from_size(size_t size) { + mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t)); + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Overflow detecting multiply +#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5)) +#include // UINT_MAX, ULONG_MAX +#if defined(_CLOCK_T) // for Illumos +#undef _CLOCK_T +#endif +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + #if (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, (unsigned long *)total); + #elif (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, (unsigned int *)total); + #else + return __builtin_umulll_overflow(count, size, (unsigned long long *)total); + #endif +} +#else /* __builtin_umul_overflow is unavailable */ +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + *total = count * size; + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) + && size > 0 && (SIZE_MAX / size) < count); +} +#endif + +// Safe multiply `count*size` into `total`; return `true` on overflow. +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { + if (count==1) { // quick check for the case where count is one (common for C++ allocators) + *total = size; + return false; + } + else if (mi_unlikely(mi_mul_overflow(count, size, total))) { + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); + *total = SIZE_MAX; + return true; + } + else return false; +} + + +/* ---------------------------------------------------------------------------------------- +The thread local default heap: `_mi_get_default_heap` returns the thread local heap. +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures +that the storage will always be available (allocated on the thread stacks). +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +We try to circumvent this in an efficient way: +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the + loader itself calls `malloc` even before the modules are initialized. +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). +- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +------------------------------------------------------------------------------------------- */ + +extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap +extern bool _mi_process_is_initialized; +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap + +#if defined(MI_MALLOC_OVERRIDE) +#if defined(__APPLE__) // macOS +#define MI_TLS_SLOT 89 // seems unused? +// #define MI_TLS_RECURSE_GUARD 1 +// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) +// see +#elif defined(__OpenBSD__) +// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) +// see +#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) +// #elif defined(__DragonFly__) +// #warning "mimalloc is not working correctly on DragonFly yet." +// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) +#elif defined(__ANDROID__) +// See issue #381 +#define MI_TLS_PTHREAD +#endif +#endif + +#if defined(MI_TLS_SLOT) +static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) +static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { + pthread_t self = pthread_self(); + #if defined(__DragonFly__) + if (self==NULL) { + mi_heap_t* pheap_main = _mi_heap_main_get(); + return &pheap_main; + } + #endif + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); +} +#elif defined(MI_TLS_PTHREAD) +extern pthread_key_t _mi_heap_default_key; +#endif + +// Default heap to allocate from (if not using TLS- or pthread slots). +// Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`). +// This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined. +// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356). +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from + +static inline mi_heap_t* mi_get_default_heap(void) { +#if defined(MI_TLS_SLOT) + mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); + if (mi_unlikely(heap == NULL)) { + #ifdef __GNUC__ + __asm(""); // prevent conditional load of the address of _mi_heap_empty + #endif + heap = (mi_heap_t*)&_mi_heap_empty; + } + return heap; +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) + mi_heap_t* heap = *mi_tls_pthread_heap_slot(); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +#elif defined(MI_TLS_PTHREAD) + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +#else + #if defined(MI_TLS_RECURSE_GUARD) + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); + #endif + return _mi_heap_default; +#endif +} + +static inline bool mi_heap_is_default(const mi_heap_t* heap) { + return (heap == mi_get_default_heap()); +} + +static inline bool mi_heap_is_backing(const mi_heap_t* heap) { + return (heap->tld->heap_backing == heap); +} + +static inline bool mi_heap_is_initialized(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + return (heap != &_mi_heap_empty); +} + +static inline uintptr_t _mi_ptr_cookie(const void* p) { + extern mi_heap_t _mi_heap_main; + mi_assert_internal(_mi_heap_main.cookie != 0); + return ((uintptr_t)p ^ _mi_heap_main.cookie); +} + +/* ----------------------------------------------------------- + Pages +----------------------------------------------------------- */ + +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; +} + +// Get the page belonging to a certain size class +static inline mi_page_t* _mi_get_free_small_page(size_t size) { + return _mi_heap_get_free_small_page(mi_get_default_heap(), size); +} + +// Segment that contains the pointer +static inline mi_segment_t* _mi_ptr_segment(const void* p) { + // mi_assert_internal(p != NULL); + return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); +} + +static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { + mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); + return (mi_page_t*)(s); +} + +static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { + mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); + return (mi_slice_t*)(p); +} + +// Segment belonging to a page +static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries)); + return segment; +} + +static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { + mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); + mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); + mi_assert_internal(start->slice_offset == 0); + mi_assert_internal(start + start->slice_count > slice); + return start; +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; + mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); + size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; + mi_assert_internal(idx < segment->slice_entries); + mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; + mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries); + return mi_slice_to_page(slice); +} + +// Quick page start for initialized pages +static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { + return _mi_segment_page_start(segment, page, page_size); +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_ptr_page(void* p) { + return _mi_segment_page_of(_mi_ptr_segment(p), p); +} + +// Get the block size of a page (special case for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, &psize); + return psize; + } +} + +// Get the usable block size of a page without fixed padding. +// This may still include internal padding due to alignment and rounding up size classes. +static inline size_t mi_page_usable_block_size(const mi_page_t* page) { + return mi_page_block_size(page) - MI_PADDING_SIZE; +} + +// size of a segment +static inline size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; +} + +static inline uint8_t* mi_segment_end(mi_segment_t* segment) { + return (uint8_t*)segment + mi_segment_size(segment); +} + +// Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_store_release(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { + return (mi_block_t*)(tf & ~0x03); +} +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { + return (mi_delayed_t)(tf & 0x03); +} +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); +} +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { + return mi_tf_make(mi_tf_block(tf),delayed); +} +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { + return mi_tf_make(block, mi_tf_delayed(tf)); +} + +// are all blocks in a page freed? +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. +static inline bool mi_page_all_free(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->used == 0); +} + +// are there any available blocks? +static inline bool mi_page_has_any_available(const mi_page_t* page) { + mi_assert_internal(page != NULL && page->reserved > 0); + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); +} + +// are there immediately available blocks, i.e. blocks available on the free list. +static inline bool mi_page_immediate_available(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->free != NULL); +} + +// is more than 7/8th of a page in use? +static inline bool mi_page_mostly_used(const mi_page_t* page) { + if (page==NULL) return true; + uint16_t frac = page->reserved / 8U; + return (page->reserved - page->used <= frac); +} + +static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { + return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; +} + + + +//----------------------------------------------------------- +// Page flags +//----------------------------------------------------------- +static inline bool mi_page_is_in_full(const mi_page_t* page) { + return page->flags.x.in_full; +} + +static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { + page->flags.x.in_full = in_full; +} + +static inline bool mi_page_has_aligned(const mi_page_t* page) { + return page->flags.x.has_aligned; +} + +static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { + page->flags.x.has_aligned = has_aligned; +} + + +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read as well, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift)))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; + return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift)))); +} + +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); + return (mi_unlikely(p==null) ? NULL : p); +} + +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { + uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p); + return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { + #ifdef MI_ENCODE_FREELIST + return (mi_block_t*)mi_ptr_decode(null, block->next, keys); + #else + MI_UNUSED(keys); MI_UNUSED(null); + return (mi_block_t*)block->next; + #endif +} + +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { + #ifdef MI_ENCODE_FREELIST + block->next = mi_ptr_encode(null, next, keys); + #else + MI_UNUSED(keys); MI_UNUSED(null); + block->next = (mi_encoded_t)next; + #endif +} + +static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { + #ifdef MI_ENCODE_FREELIST + mi_block_t* next = mi_block_nextx(page,block,page->keys); + // check for free list corruption: is `next` at least in the same page? + // TODO: check if `next` is `page->block_size` aligned? + if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); + next = NULL; + } + return next; + #else + MI_UNUSED(page); + return mi_block_nextx(page,block,NULL); + #endif +} + +static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { + #ifdef MI_ENCODE_FREELIST + mi_block_set_nextx(page,block,next, page->keys); + #else + MI_UNUSED(page); + mi_block_set_nextx(page,block,next,NULL); + #endif +} + + +// ------------------------------------------------------------------- +// commit mask +// ------------------------------------------------------------------- + +static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + cm->mask[i] = 0; + } +} + +static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + cm->mask[i] = ~((size_t)0); + } +} + +static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if (cm->mask[i] != 0) return false; + } + return true; +} + +static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if (cm->mask[i] != ~((size_t)0)) return false; + } + return true; +} + +// defined in `segment.c`: +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); +size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); + +#define mi_commit_mask_foreach(cm,idx,count) \ + idx = 0; \ + while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { + +#define mi_commit_mask_foreach_end() \ + idx += count; \ + } + + + + +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); + +extern _Atomic(size_t) _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1)) return 0; + else return _mi_os_numa_node_get(tld); +} +static inline size_t _mi_os_numa_node_count(void) { + const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); + if (mi_likely(count>0)) return count; + else return _mi_os_numa_node_count_get(); +} + + +// ------------------------------------------------------------------- +// Getting the thread id should be performant as it is called in the +// fast path of `_mi_free` and we specialize for various platforms. +// We only require _mi_threadid() to return a unique id for each thread. +// ------------------------------------------------------------------- +#if defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include +static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on +// both the OS and libc implementation so we use specific tests for each main platform. +// If you test on another platform and it works please send a PR :-) +// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. +#elif defined(__GNUC__) && ( \ + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + ) + +static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { + void* res; + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + res = tcb[slot]; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + res = tcb[slot]; + #endif + return res; +} + +// setting a tls slot is only used on macOS for now +static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + tcb[slot] = value; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + tcb[slot] = value; + #endif +} + +static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { + #if defined(__BIONIC__) + // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id + // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 + return (uintptr_t)mi_tls_slot(1); + #else + // in all our other targets, slot 0 is the thread id + // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h + // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 + return (uintptr_t)mi_tls_slot(0); + #endif +} + +#else + +// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). +static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} + +#endif + + +// ----------------------------------------------------------------------- +// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) +// ----------------------------------------------------------------------- + +#if defined(__GNUC__) + +#include // LONG_MAX +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_clzl(x); +#else + return __builtin_clzll(x); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_ctzl(x); +#else + return __builtin_ctzll(x); +#endif +} + +#elif defined(_MSC_VER) + +#include // LONG_MAX +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanReverse(&idx, x); +#else + _BitScanReverse64(&idx, x); +#endif + return ((MI_INTPTR_BITS - 1) - idx); +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanForward(&idx, x); +#else + _BitScanForward64(&idx, x); +#endif + return idx; +} + +#else +static inline size_t mi_ctz32(uint32_t x) { + // de Bruijn multiplication, see + static const unsigned char debruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + if (x==0) return 32; + return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27]; +} +static inline size_t mi_clz32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, + 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 + }; + if (x==0) return 32; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27]; +} + +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_clz32((uint32_t)x); +#else + size_t count = mi_clz32((uint32_t)(x >> 32)); + if (count < 32) return count; + return (32 + mi_clz32((uint32_t)x)); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_ctz32((uint32_t)x); +#else + size_t count = mi_ctz32((uint32_t)x); + if (count < 32) return count; + return (32 + mi_ctz32((uint32_t)(x>>32))); +#endif +} + +#endif + +// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); +} + + +// --------------------------------------------------------------------------------- +// Provide our own `_mi_memcpy` for potential performance optimizations. +// +// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if +// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support +// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. +// --------------------------------------------------------------------------------- + +#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) +#include +#include +extern bool _mi_cpu_has_fsrm; +static inline void _mi_memcpy(void* dst, const void* src, size_t n) { + if (_mi_cpu_has_fsrm) { + __movsb((unsigned char*)dst, (const unsigned char*)src, n); + } + else { + memcpy(dst, src, n); // todo: use noinline? + } +} +#else +#include +static inline void _mi_memcpy(void* dst, const void* src, size_t n) { + memcpy(dst, src, n); +} +#endif + + +// ------------------------------------------------------------------------------- +// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned +// This is used for example in `mi_realloc`. +// ------------------------------------------------------------------------------- + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) +// On GCC/CLang we provide a hint that the pointers are word aligned. +#include +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); + const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); + _mi_memcpy(adst, asrc, n); +} +#else +// Default fallback on `_mi_memcpy` +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); + _mi_memcpy(dst, src, n); +} +#endif + + +#endif diff --git a/compat/mimalloc/mimalloc-types.h b/compat/mimalloc/mimalloc-types.h new file mode 100644 index 00000000000000..a07858e22bccc0 --- /dev/null +++ b/compat/mimalloc/mimalloc-types.h @@ -0,0 +1,598 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TYPES_H +#define MIMALLOC_TYPES_H + +#include // ptrdiff_t +#include // uintptr_t, uint16_t, etc +#include "mimalloc-atomic.h" // _Atomic + +#ifdef _MSC_VER +#pragma warning(disable:4214) // bitfield is not int +#endif + +// Minimal alignment necessary. On most platforms 16 bytes are needed +// due to SSE registers for example. This must be at least `sizeof(void*)` +#ifndef MI_MAX_ALIGN_SIZE +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) +#endif + +// ------------------------------------------------------ +// Variants +// ------------------------------------------------------ + +// Define NDEBUG in the release version to disable assertions. +// #define NDEBUG + +// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). +// #define MI_STAT 1 + +// Define MI_SECURE to enable security mitigations +// #define MI_SECURE 1 // guard page around metadata +// #define MI_SECURE 2 // guard page around each mimalloc page +// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) +// #define MI_SECURE 4 // checks for double free. (may be more expensive) + +#if !defined(MI_SECURE) +#define MI_SECURE 0 +#endif + +// Define MI_DEBUG for debug mode +// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. +// #define MI_DEBUG 2 // + internal assertion checks +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) +#if !defined(MI_DEBUG) +#if !defined(NDEBUG) || defined(_DEBUG) +#define MI_DEBUG 2 +#else +#define MI_DEBUG 0 +#endif +#endif + +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. +// The padding can detect byte-precise buffer overflow on free. +#if !defined(MI_PADDING) && (MI_DEBUG>=1) +#define MI_PADDING 1 +#endif + + +// Encoded free lists allow detection of corrupted free lists +// and can detect buffer overflows, modify after free, and double `free`s. +#if (MI_SECURE>=3 || MI_DEBUG>=1 || MI_PADDING > 0) +#define MI_ENCODE_FREELIST 1 +#endif + + +// ------------------------------------------------------ +// Platform specific values +// ------------------------------------------------------ + +// ------------------------------------------------------ +// Size of a pointer. +// We assume that `sizeof(void*)==sizeof(intptr_t)` +// and it holds for all platforms we know of. +// +// However, the C standard only requires that: +// p == (void*)((intptr_t)p)) +// but we also need: +// i == (intptr_t)((void*)i) +// or otherwise one might define an intptr_t type that is larger than a pointer... +// ------------------------------------------------------ + +#if INTPTR_MAX > INT64_MAX +# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example) +#elif INTPTR_MAX == INT64_MAX +# define MI_INTPTR_SHIFT (3) +#elif INTPTR_MAX == INT32_MAX +# define MI_INTPTR_SHIFT (2) +#else +#error platform pointers must be 32, 64, or 128 bits +#endif + +#if SIZE_MAX == UINT64_MAX +# define MI_SIZE_SHIFT (3) +typedef int64_t mi_ssize_t; +#elif SIZE_MAX == UINT32_MAX +# define MI_SIZE_SHIFT (2) +typedef int32_t mi_ssize_t; +#else +#error platform objects must be 32 or 64 bits +#endif + +#if (SIZE_MAX/2) > LONG_MAX +# define MI_ZU(x) x##ULL +# define MI_ZI(x) x##LL +#else +# define MI_ZU(x) x##UL +# define MI_ZI(x) x##L +#endif + +#define MI_INTPTR_SIZE (1< 4 +#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB +#else +#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit +#endif + +#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB + + +// Derived constants +#define MI_SEGMENT_SIZE (MI_ZU(1)<= 655360) +#error "mimalloc internal: define more bins" +#endif +#if (MI_ALIGNMENT_MAX > MI_SEGMENT_SIZE/2) +#error "mimalloc internal: the max aligned boundary is too large for the segment size" +#endif +#if (MI_ALIGNED_MAX % MI_SEGMENT_SLICE_SIZE != 0) +#error "mimalloc internal: the max aligned boundary must be an integral multiple of the segment slice size" +#endif + +// Maximum slice offset (15) +#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) + +// Used as a special value to encode block sizes in 32 bits. +#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB)) + +// blocks up to this size are always allocated aligned +#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) + + + + +// ------------------------------------------------------ +// Mimalloc pages contain allocated blocks +// ------------------------------------------------------ + +// The free lists use encoded next fields +// (Only actually encodes when MI_ENCODED_FREELIST is defined.) +typedef uintptr_t mi_encoded_t; + +// thread id's +typedef size_t mi_threadid_t; + +// free lists contain blocks +typedef struct mi_block_s { + mi_encoded_t next; +} mi_block_t; + + +// The delayed flags are used for efficient multi-threaded free-ing +typedef enum mi_delayed_e { + MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list + MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap + MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list + MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim +} mi_delayed_t; + + +// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// test if both are false (`full_aligned == 0`) in the `mi_free` routine. +#if !MI_TSAN +typedef union mi_page_flags_s { + uint8_t full_aligned; + struct { + uint8_t in_full : 1; + uint8_t has_aligned : 1; + } x; +} mi_page_flags_t; +#else +// under thread sanitizer, use a byte for each flag to suppress warning, issue #130 +typedef union mi_page_flags_s { + uint16_t full_aligned; + struct { + uint8_t in_full; + uint8_t has_aligned; + } x; +} mi_page_flags_t; +#endif + +// Thread free list. +// We use the bottom 2 bits of the pointer for mi_delayed_t flags +typedef uintptr_t mi_thread_free_t; + +// A page contains blocks of one specific size (`block_size`). +// Each page has three list of free blocks: +// `free` for blocks that can be allocated, +// `local_free` for freed blocks that are not yet available to `mi_malloc` +// `thread_free` for freed blocks by other threads +// The `local_free` and `thread_free` lists are migrated to the `free` list +// when it is exhausted. The separate `local_free` list is necessary to +// implement a monotonic heartbeat. The `thread_free` list is needed for +// avoiding atomic operations in the common case. +// +// +// `used - |thread_free|` == actual blocks that are in use (alive) +// `used - |thread_free| + |free| + |local_free| == capacity` +// +// We don't count `freed` (as |free|) but use `used` to reduce +// the number of memory accesses in the `mi_page_all_free` function(s). +// +// Notes: +// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// - Using `uint16_t` does not seem to slow things down +// - The size is 8 words on 64-bit which helps the page index calculations +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 +// and 12 are still good for address calculation) +// - To limit the structure size, the `xblock_size` is 32-bits only; for +// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// concurrent frees where only the first concurrent free adds to the owning +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// The invariant is that no-delayed-free is only set if there is +// at least one block that will be added, or as already been added, to +// the owning heap `thread_delayed_free` list. This guarantees that pages +// will be freed correctly even if only other threads free blocks. +typedef struct mi_page_s { + // "owned" by the segment + uint32_t slice_count; // slices in this page (0 if not a page) + uint32_t slice_offset; // distance from the actual page data slice (0 if a page) + uint8_t is_reset : 1; // `true` if the page memory was reset + uint8_t is_committed : 1; // `true` if the page virtual memory is committed + uint8_t is_zero_init : 1; // `true` if the page was zero initialized + + // layout like this to optimize access in `mi_malloc` and `mi_free` + uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` + uint16_t reserved; // number of blocks reserved in memory + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) + uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire : 7; // expiration count for retired blocks + + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + #ifdef MI_ENCODE_FREELIST + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) + #endif + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) + + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + _Atomic(uintptr_t) xheap; + + struct mi_page_s* next; // next page owned by this thread with the same `block_size` + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + + // 64-bit 9 words, 32-bit 12 words, (+2 for secure) + #if MI_INTPTR_SIZE==8 + uintptr_t padding[1]; + #endif +} mi_page_t; + + + +typedef enum mi_page_kind_e { + MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment + MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment + MI_PAGE_LARGE, // larger blocks go into a page of just one block + MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment. +} mi_page_kind_t; + +typedef enum mi_segment_kind_e { + MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. + MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. +} mi_segment_kind_t; + +// ------------------------------------------------------ +// A segment holds a commit mask where a bit is set if +// the corresponding MI_COMMIT_SIZE area is committed. +// The MI_COMMIT_SIZE must be a multiple of the slice +// size. If it is equal we have the most fine grained +// decommit (but setting it higher can be more efficient). +// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will +// be committed in one go which can be set higher than +// MI_COMMIT_SIZE for efficiency (while the decommit mask +// is still tracked in fine-grained MI_COMMIT_SIZE chunks) +// ------------------------------------------------------ + +#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB) +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB +#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) +#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS +#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) + +#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS)) +#error "the segment size must be exactly divisible by the (commit size * size_t bits)" +#endif + +typedef struct mi_commit_mask_s { + size_t mask[MI_COMMIT_MASK_FIELD_COUNT]; +} mi_commit_mask_t; + +typedef mi_page_t mi_slice_t; +typedef int64_t mi_msecs_t; + + +// Segments are large allocated memory blocks (8mb on 64 bit) from +// the OS. Inside segments we allocated fixed size _pages_ that +// contain blocks. +typedef struct mi_segment_s { + size_t memid; // memory id for arena allocation + bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_large; // in large/huge os pages? + bool mem_is_committed; // `true` if the whole segment is eagerly committed + + bool allow_decommit; + mi_msecs_t decommit_expire; + mi_commit_mask_t decommit_mask; + mi_commit_mask_t commit_mask; + + _Atomic(struct mi_segment_s*) abandoned_next; + + // from here is zero initialized + struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) + + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t used; // count of pages in use + uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + + size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` + size_t segment_info_slices; // initial slices we are using segment info and possible guard pages. + + // layout like this to optimize access in `mi_free` + mi_segment_kind_t kind; + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` + mi_slice_t slices[MI_SLICES_PER_SEGMENT]; +} mi_segment_t; + + +// ------------------------------------------------------ +// Heaps +// Provide first-class heaps to allocate from. +// A heap just owns a set of pages for allocation and +// can only be allocate/reallocate from the thread that created it. +// Freeing blocks can be done from any thread though. +// Per thread, the segments are shared among its heaps. +// Per thread, there is always a default heap that is +// used for allocation; it is initialized to statically +// point to an empty heap to avoid initialization checks +// in the fast path. +// ------------------------------------------------------ + +// Thread local data +typedef struct mi_tld_s mi_tld_t; + +// Pages of a certain block size are held in a queue. +typedef struct mi_page_queue_s { + mi_page_t* first; + mi_page_t* last; + size_t block_size; +} mi_page_queue_t; + +#define MI_BIN_FULL (MI_BIN_HUGE+1) + +// Random context +typedef struct mi_random_cxt_s { + uint32_t input[16]; + uint32_t output[16]; + int output_available; +} mi_random_ctx_t; + + +// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows +#if (MI_PADDING) +typedef struct mi_padding_s { + uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) + uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) +} mi_padding_t; +#define MI_PADDING_SIZE (sizeof(mi_padding_t)) +#define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE) +#else +#define MI_PADDING_SIZE 0 +#define MI_PADDING_WSIZE 0 +#endif + +#define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1) + + +// A heap owns a set of pages. +struct mi_heap_s { + mi_tld_t* tld; + mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + _Atomic(mi_block_t*) thread_delayed_free; + mi_threadid_t thread_id; // thread this heap belongs too + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation + size_t page_count; // total number of pages in the `pages` queues. + size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) + size_t page_retired_max; // largest retired index into the `pages` array. + mi_heap_t* next; // list of heaps per thread + bool no_reclaim; // `true` if this heap should not reclaim abandoned pages +}; + + + +// ------------------------------------------------------ +// Debug +// ------------------------------------------------------ + +#if !defined(MI_DEBUG_UNINIT) +#define MI_DEBUG_UNINIT (0xD0) +#endif +#if !defined(MI_DEBUG_FREED) +#define MI_DEBUG_FREED (0xDF) +#endif +#if !defined(MI_DEBUG_PADDING) +#define MI_DEBUG_PADDING (0xDE) +#endif + +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + +// ------------------------------------------------------ +// Statistics +// ------------------------------------------------------ + +#ifndef MI_STAT +#if (MI_DEBUG>0) +#define MI_STAT 2 +#else +#define MI_STAT 0 +#endif +#endif + +typedef struct mi_stat_count_s { + int64_t allocated; + int64_t freed; + int64_t peak; + int64_t current; +} mi_stat_count_t; + +typedef struct mi_stat_counter_s { + int64_t total; + int64_t count; +} mi_stat_counter_t; + +typedef struct mi_stats_s { + mi_stat_count_t segments; + mi_stat_count_t pages; + mi_stat_count_t reserved; + mi_stat_count_t committed; + mi_stat_count_t reset; + mi_stat_count_t page_committed; + mi_stat_count_t segments_abandoned; + mi_stat_count_t pages_abandoned; + mi_stat_count_t threads; + mi_stat_count_t normal; + mi_stat_count_t huge; + mi_stat_count_t large; + mi_stat_count_t malloc; + mi_stat_count_t segments_cache; + mi_stat_counter_t pages_extended; + mi_stat_counter_t mmap_calls; + mi_stat_counter_t commit_calls; + mi_stat_counter_t page_no_retire; + mi_stat_counter_t searches; + mi_stat_counter_t normal_count; + mi_stat_counter_t huge_count; + mi_stat_counter_t large_count; +#if MI_STAT>1 + mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; +#endif +} mi_stats_t; + + +void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); +void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); + +#if (MI_STAT) +#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) +#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) +#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) +#else +#define mi_stat_increase(stat,amount) (void)0 +#define mi_stat_decrease(stat,amount) (void)0 +#define mi_stat_counter_increase(stat,amount) (void)0 +#endif + +#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) +#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) + +// ------------------------------------------------------ +// Thread Local data +// ------------------------------------------------------ + +// A "span" is is an available range of slices. The span queues keep +// track of slice spans of at most the given `slice_count` (but more than the previous size class). +typedef struct mi_span_queue_s { + mi_slice_t* first; + mi_slice_t* last; + size_t slice_count; +} mi_span_queue_t; + +#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) + +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats +} mi_os_tld_t; + + +// Segments thread local data +typedef struct mi_segments_tld_s { + mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments + size_t count; // current number of segments; + size_t peak_count; // peak number of segments + size_t current_size; // current size of all segments + size_t peak_size; // peak size of all segments + mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats +} mi_segments_tld_t; + +// Thread local data +struct mi_tld_s { + unsigned long long heartbeat; // monotonic heartbeat count + bool recurse; // true if deferred was called; used to prevent infinite recursion. + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + mi_segments_tld_t segments; // segment tld + mi_os_tld_t os; // os tld + mi_stats_t stats; // statistics +}; + +#endif diff --git a/compat/mimalloc/mimalloc.h b/compat/mimalloc/mimalloc.h new file mode 100644 index 00000000000000..6a6f1b373be1d4 --- /dev/null +++ b/compat/mimalloc/mimalloc.h @@ -0,0 +1,453 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_H +#define MIMALLOC_H + +#define MI_MALLOC_VERSION 206 // major + 2 digits minor + +// ------------------------------------------------------ +// Compiler specific attributes +// ------------------------------------------------------ + +#ifdef __cplusplus + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 + #define mi_attr_noexcept noexcept + #else + #define mi_attr_noexcept throw() + #endif +#else + #define mi_attr_noexcept +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201703) + #define mi_decl_nodiscard [[nodiscard]] +#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl + #define mi_decl_nodiscard __attribute__((warn_unused_result)) +#elif (_MSC_VER >= 1700) + #define mi_decl_nodiscard _Check_return_ +#else + #define mi_decl_nodiscard +#endif + +#if defined(_MSC_VER) || defined(__MINGW32__) + #if !defined(MI_SHARED_LIB) + #define mi_decl_export + #elif defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __declspec(dllexport) + #else + #define mi_decl_export __declspec(dllimport) + #endif + #if defined(__MINGW32__) + #define mi_decl_restrict + #define mi_attr_malloc __attribute__((malloc)) + #else + #if (_MSC_VER >= 1900) && !defined(__EDG__) + #define mi_decl_restrict __declspec(allocator) __declspec(restrict) + #else + #define mi_decl_restrict __declspec(restrict) + #endif + #define mi_attr_malloc + #endif + #define mi_cdecl __cdecl + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) +#elif defined(__GNUC__) // includes clang and icc + #if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __attribute__((visibility("default"))) + #else + #define mi_decl_export + #endif + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) + #define mi_decl_restrict + #define mi_attr_malloc __attribute__((malloc)) + #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) + #elif defined(__INTEL_COMPILER) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) + #else + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) + #endif +#else + #define mi_cdecl + #define mi_decl_export + #define mi_decl_restrict + #define mi_attr_malloc + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) +#endif + +// ------------------------------------------------------ +// Includes +// ------------------------------------------------------ + +#include // size_t +#include // bool + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------------------------------------------ +// Standard malloc interface +// ------------------------------------------------------ + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); + +mi_decl_export void mi_free(void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; + +// ------------------------------------------------------ +// Extended functionality +// ------------------------------------------------------ +#define MI_SMALL_WSIZE_MAX (128) +#define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*)) + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); + +mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; + + +// ------------------------------------------------------ +// Internals +// ------------------------------------------------------ + +typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); +mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_error_fun)(int err, void* arg); +mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); + +mi_decl_export void mi_collect(bool force) mi_attr_noexcept; +mi_decl_export int mi_version(void) mi_attr_noexcept; +mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; +mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; +mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL +mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; + +mi_decl_export void mi_process_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_done(void) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; + +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, + size_t* current_rss, size_t* peak_rss, + size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; + +// ------------------------------------------------------------------------------------- +// Aligned allocation +// Note that `alignment` always follows `size` for consistency with unaligned +// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. +// ------------------------------------------------------------------------------------- +#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment is 1MiB + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); + + +// ------------------------------------------------------------------------------------- +// Heaps: first-class, but can only allocate from the same thread that created it. +// ------------------------------------------------------------------------------------- + +struct mi_heap_s; +typedef struct mi_heap_s mi_heap_t; + +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void); +mi_decl_export void mi_heap_delete(mi_heap_t* heap); +mi_decl_export void mi_heap_destroy(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_get_default(void); +mi_decl_export mi_heap_t* mi_heap_get_backing(void); +mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); + +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); + + +// -------------------------------------------------------------------------------- +// Zero initialized re-allocation. +// Only valid on memory that was originally allocated with zero initialization too. +// e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc. +// see +// -------------------------------------------------------------------------------- + +mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); + +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3); + +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); + +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4); + + +// ------------------------------------------------------ +// Analysis +// ------------------------------------------------------ + +mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p); +mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p); +mi_decl_export bool mi_check_owned(const void* p); + +// An area of heap space contains blocks of a single size. +typedef struct mi_heap_area_s { + void* blocks; // start of the area containing heap blocks + size_t reserved; // bytes reserved for this area (virtual) + size_t committed; // current available bytes for this area + size_t used; // number of allocated blocks + size_t block_size; // size in bytes of each block + size_t full_block_size; // size in bytes of a full block including padding and metadata. +} mi_heap_area_t; + +typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); + +mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); + +// Experimental +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept; + +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; + +mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; + +mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept; + +// deprecated +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + + +// ------------------------------------------------------ +// Convenience +// ------------------------------------------------------ + +#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp))) +#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp))) +#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp))) +#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp))) +#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp))) +#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp))) + +#define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp))) +#define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp))) +#define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp))) +#define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp))) +#define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp))) +#define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp))) + + +// ------------------------------------------------------ +// Options +// ------------------------------------------------------ + +typedef enum mi_option_e { + // stable options + mi_option_show_errors, + mi_option_show_stats, + mi_option_verbose, + // some of the following options are experimental + // (deprecated options are kept for binary backward compatibility with v1.x versions) + mi_option_eager_commit, + mi_option_deprecated_eager_region_commit, + mi_option_deprecated_reset_decommits, + mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup + mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node + mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup + mi_option_deprecated_segment_cache, + mi_option_page_reset, + mi_option_abandoned_page_decommit, + mi_option_deprecated_segment_reset, + mi_option_eager_commit_delay, + mi_option_decommit_delay, + mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. + mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas) + mi_option_os_tag, + mi_option_max_errors, + mi_option_max_warnings, + mi_option_max_segment_reclaim, + mi_option_allow_decommit, + mi_option_segment_decommit_delay, + mi_option_decommit_extend_delay, + _mi_option_last +} mi_option_t; + + +mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option); +mi_decl_export void mi_option_enable(mi_option_t option); +mi_decl_export void mi_option_disable(mi_option_t option); +mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); +mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); + +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_export void mi_option_set(mi_option_t option, long value); +mi_decl_export void mi_option_set_default(mi_option_t option, long value); + + +// ------------------------------------------------------------------------------------------------------- +// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions. +// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) +// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing. +// ------------------------------------------------------------------------------------------------------- + +mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; +mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; + +mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); + +mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; +mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; + +mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; +mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; + +// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); + +#ifdef __cplusplus +} +#endif + +// --------------------------------------------------------------------------------------------- +// Implement the C++ std::allocator interface for use in STL containers. +// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally) +// --------------------------------------------------------------------------------------------- +#ifdef __cplusplus + +#include // std::size_t +#include // PTRDIFF_MAX +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include // std::true_type +#include // std::forward +#endif + +template struct mi_stl_allocator { + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef value_type& reference; + typedef value_type const& const_reference; + typedef value_type* pointer; + typedef value_type const* const_pointer; + template struct rebind { typedef mi_stl_allocator other; }; + + mi_stl_allocator() mi_attr_noexcept = default; + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default; + template mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + mi_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } + #else + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } + #endif + + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +}; + +template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } +template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } +#endif // __cplusplus + +#endif diff --git a/compat/mimalloc/options.c b/compat/mimalloc/options.c new file mode 100644 index 00000000000000..7497e70a7baee2 --- /dev/null +++ b/compat/mimalloc/options.c @@ -0,0 +1,627 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include +#include // strtol +#include // strncpy, strncat, strlen, strstr +#include // toupper +#include + +#ifdef _MSC_VER +#pragma warning(disable:4996) // strncpy, strncat +#endif + + +static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit) +static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit) + +static void mi_add_stderr_output(void); + +int mi_version(void) mi_attr_noexcept { + return MI_MALLOC_VERSION; +} + +#ifdef _WIN32 +#include +#endif + +// -------------------------------------------------------- +// Options +// These can be accessed by multiple threads and may be +// concurrently initialized, but an initializing data race +// is ok since they resolve to the same value. +// -------------------------------------------------------- +typedef enum mi_init_e { + UNINIT, // not yet initialized + DEFAULTED, // not found in the environment, use default value + INITIALIZED // found in environment or set explicitly +} mi_init_t; + +typedef struct mi_option_desc_s { + long value; // the value + mi_init_t init; // is it initialized yet? (from the environment) + mi_option_t option; // for debugging: the option index should match the option + const char* name; // option name without `mimalloc_` prefix + const char* legacy_name; // potential legacy v1.x option name +} mi_option_desc_t; + +#define MI_OPTION(opt) mi_option_##opt, #opt, NULL +#define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy + +static mi_option_desc_t options[_mi_option_last] = +{ + // stable options + #if MI_DEBUG || defined(MI_SHOW_ERRORS) + { 1, UNINIT, MI_OPTION(show_errors) }, + #else + { 0, UNINIT, MI_OPTION(show_errors) }, + #endif + { 0, UNINIT, MI_OPTION(show_stats) }, + { 0, UNINIT, MI_OPTION(verbose) }, + + // Some of the following options are experimental and not all combinations are valid. Use with care. + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) + { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) }, + { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) }, + { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N + { 0, UNINIT, MI_OPTION(reserve_os_memory) }, + { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, + #if defined(__NetBSD__) + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + #elif defined(_WIN32) + { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + #else + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + #endif + { 25, UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. + { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) + { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments + { 2, UNINIT, MI_OPTION(decommit_extend_delay) } +}; + +static void mi_option_init(mi_option_desc_t* desc); + +void _mi_options_init(void) { + // called on process load; should not be called before the CRT is initialized! + // (e.g. do not call this from process_init as that may run before CRT initialization) + mi_add_stderr_output(); // now it safe to use stderr for output + for(int i = 0; i < _mi_option_last; i++ ) { + mi_option_t option = (mi_option_t)i; + long l = mi_option_get(option); MI_UNUSED(l); // initialize + if (option != mi_option_verbose) { + mi_option_desc_t* desc = &options[option]; + _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); + } + } + mi_max_error_count = mi_option_get(mi_option_max_errors); + mi_max_warning_count = mi_option_get(mi_option_max_warnings); +} + +mi_decl_nodiscard long mi_option_get(mi_option_t option) { + mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return 0; + mi_option_desc_t* desc = &options[option]; + mi_assert(desc->option == option); // index should match the option + if (mi_unlikely(desc->init == UNINIT)) { + mi_option_init(desc); + } + return desc->value; +} + +mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long max) { + long x = mi_option_get(option); + return (x < min ? min : (x > max ? max : x)); +} + +void mi_option_set(mi_option_t option, long value) { + mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return; + mi_option_desc_t* desc = &options[option]; + mi_assert(desc->option == option); // index should match the option + desc->value = value; + desc->init = INITIALIZED; +} + +void mi_option_set_default(mi_option_t option, long value) { + mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return; + mi_option_desc_t* desc = &options[option]; + if (desc->init != INITIALIZED) { + desc->value = value; + } +} + +mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) { + return (mi_option_get(option) != 0); +} + +void mi_option_set_enabled(mi_option_t option, bool enable) { + mi_option_set(option, (enable ? 1 : 0)); +} + +void mi_option_set_enabled_default(mi_option_t option, bool enable) { + mi_option_set_default(option, (enable ? 1 : 0)); +} + +void mi_option_enable(mi_option_t option) { + mi_option_set_enabled(option,true); +} + +void mi_option_disable(mi_option_t option) { + mi_option_set_enabled(option,false); +} + + +static void mi_out_stderr(const char* msg, void* arg) { + MI_UNUSED(arg); + if (msg == NULL) return; + #ifdef _WIN32 + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so we use direct console output. + if (!_mi_preloading()) { + // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + static HANDLE hcon = INVALID_HANDLE_VALUE; + if (hcon == INVALID_HANDLE_VALUE) { + hcon = GetStdHandle(STD_ERROR_HANDLE); + } + const size_t len = strlen(msg); + if (hcon != INVALID_HANDLE_VALUE && len > 0 && len < UINT32_MAX) { + DWORD written = 0; + WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + } + } + #else + fputs(msg, stderr); + #endif +} + +// Since an output function can be registered earliest in the `main` +// function we also buffer output that happens earlier. When +// an output function is registered it is called immediately with +// the output up to that point. +#ifndef MI_MAX_DELAY_OUTPUT +#define MI_MAX_DELAY_OUTPUT ((size_t)(32*1024)) +#endif +static char out_buf[MI_MAX_DELAY_OUTPUT+1]; +static _Atomic(size_t) out_len; + +static void mi_out_buf(const char* msg, void* arg) { + MI_UNUSED(arg); + if (msg==NULL) return; + if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; + size_t n = strlen(msg); + if (n==0) return; + // claim space + size_t start = mi_atomic_add_acq_rel(&out_len, n); + if (start >= MI_MAX_DELAY_OUTPUT) return; + // check bound + if (start+n >= MI_MAX_DELAY_OUTPUT) { + n = MI_MAX_DELAY_OUTPUT-start-1; + } + _mi_memcpy(&out_buf[start], msg, n); +} + +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { + if (out==NULL) return; + // claim (if `no_more_buf == true`, no more output will be added after this point) + size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + // and output the current contents + if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; + out_buf[count] = 0; + out(out_buf,arg); + if (!no_more_buf) { + out_buf[count] = '\n'; // if continue with the buffer, insert a newline + } +} + + +// Once this module is loaded, switch to this routine +// which outputs to stderr and the delayed output buffer. +static void mi_out_buf_stderr(const char* msg, void* arg) { + mi_out_stderr(msg,arg); + mi_out_buf(msg,arg); +} + + + +// -------------------------------------------------------- +// Default output handler +// -------------------------------------------------------- + +// Should be atomic but gives errors on many platforms as generally we cannot cast a function pointer to a uintptr_t. +// For now, don't register output from multiple threads. +static mi_output_fun* volatile mi_out_default; // = NULL +static _Atomic(void*) mi_out_arg; // = NULL + +static mi_output_fun* mi_out_get_default(void** parg) { + if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); } + mi_output_fun* out = mi_out_default; + return (out == NULL ? &mi_out_buf : out); +} + +void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer + mi_atomic_store_ptr_release(void,&mi_out_arg, arg); + if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now +} + +// add stderr to the delayed output after the module is loaded +static void mi_add_stderr_output() { + mi_assert_internal(mi_out_default == NULL); + mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output +} + +// -------------------------------------------------------- +// Messages, all end up calling `_mi_fputs`. +// -------------------------------------------------------- +static _Atomic(size_t) error_count; // = 0; // when >= max_error_count stop emitting errors +static _Atomic(size_t) warning_count; // = 0; // when >= max_warning_count stop emitting warnings + +// When overriding malloc, we may recurse into mi_vfprintf if an allocation +// inside the C runtime causes another message. +// In some cases (like on macOS) the loader already allocates which +// calls into mimalloc; if we then access thread locals (like `recurse`) +// this may crash as the access may call _tlv_bootstrap that tries to +// (recursively) invoke malloc again to allocate space for the thread local +// variables on demand. This is why we use a _mi_preloading test on such +// platforms. However, C code generator may move the initial thread local address +// load before the `if` and we therefore split it out in a separate funcion. +static mi_decl_thread bool recurse = false; + +static mi_decl_noinline bool mi_recurse_enter_prim(void) { + if (recurse) return false; + recurse = true; + return true; +} + +static mi_decl_noinline void mi_recurse_exit_prim(void) { + recurse = false; +} + +static bool mi_recurse_enter(void) { + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) + if (_mi_preloading()) return true; + #endif + return mi_recurse_enter_prim(); +} + +static void mi_recurse_exit(void) { + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) + if (_mi_preloading()) return; + #endif + mi_recurse_exit_prim(); +} + +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { + if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? + if (!mi_recurse_enter()) return; + out = mi_out_get_default(&arg); + if (prefix != NULL) out(prefix, arg); + out(message, arg); + mi_recurse_exit(); + } + else { + if (prefix != NULL) out(prefix, arg); + out(message, arg); + } +} + +// Define our own limited `fprintf` that avoids memory allocation. +// We do this using `snprintf` with a limited buffer. +static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { + char buf[512]; + if (fmt==NULL) return; + if (!mi_recurse_enter()) return; + vsnprintf(buf,sizeof(buf)-1,fmt,args); + mi_recurse_exit(); + _mi_fputs(out,arg,prefix,buf); +} + +void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { + va_list args; + va_start(args,fmt); + mi_vfprintf(out,arg,NULL,fmt,args); + va_end(args); +} + +static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { + if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) { + char tprefix[64]; + snprintf(tprefix, sizeof(tprefix), "%sthread 0x%zx: ", prefix, _mi_thread_id()); + mi_vfprintf(out, arg, tprefix, fmt, args); + } + else { + mi_vfprintf(out, arg, prefix, fmt, args); + } +} + +void _mi_trace_message(const char* fmt, ...) { + if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher + va_list args; + va_start(args, fmt); + mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args); + va_end(args); +} + +void _mi_verbose_message(const char* fmt, ...) { + if (!mi_option_is_enabled(mi_option_verbose)) return; + va_list args; + va_start(args,fmt); + mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); + va_end(args); +} + +static void mi_show_error_message(const char* fmt, va_list args) { + if (!mi_option_is_enabled(mi_option_verbose)) { + if (!mi_option_is_enabled(mi_option_show_errors)) return; + if (mi_max_error_count >= 0 && (long)mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; + } + mi_vfprintf_thread(NULL, NULL, "mimalloc: error: ", fmt, args); +} + +void _mi_warning_message(const char* fmt, ...) { + if (!mi_option_is_enabled(mi_option_verbose)) { + if (!mi_option_is_enabled(mi_option_show_errors)) return; + if (mi_max_warning_count >= 0 && (long)mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return; + } + va_list args; + va_start(args,fmt); + mi_vfprintf_thread(NULL, NULL, "mimalloc: warning: ", fmt, args); + va_end(args); +} + + +#if MI_DEBUG +void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { + _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); + abort(); +} +#endif + +// -------------------------------------------------------- +// Errors +// -------------------------------------------------------- + +static mi_error_fun* volatile mi_error_handler; // = NULL +static _Atomic(void*) mi_error_arg; // = NULL + +static void mi_error_default(int err) { + MI_UNUSED(err); +#if (MI_DEBUG>0) + if (err==EFAULT) { + #ifdef _MSC_VER + __debugbreak(); + #endif + abort(); + } +#endif +#if (MI_SECURE>0) + if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) + abort(); + } +#endif +#if defined(MI_XMALLOC) + if (err==ENOMEM || err==EOVERFLOW) { // abort on memory allocation fails in xmalloc mode + abort(); + } +#endif +} + +void mi_register_error(mi_error_fun* fun, void* arg) { + mi_error_handler = fun; // can be NULL + mi_atomic_store_ptr_release(void,&mi_error_arg, arg); +} + +void _mi_error_message(int err, const char* fmt, ...) { + // show detailed error message + va_list args; + va_start(args, fmt); + mi_show_error_message(fmt, args); + va_end(args); + // and call the error handler which may abort (or return normally) + if (mi_error_handler != NULL) { + mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg)); + } + else { + mi_error_default(err); + } +} + +// -------------------------------------------------------- +// Initialize options by checking the environment +// -------------------------------------------------------- + +static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { + if (dest==NULL || src==NULL || dest_size == 0) return; + // copy until end of src, or when dest is (almost) full + while (*src != 0 && dest_size > 1) { + *dest++ = *src++; + dest_size--; + } + // always zero terminate + *dest = 0; +} + +static void mi_strlcat(char* dest, const char* src, size_t dest_size) { + if (dest==NULL || src==NULL || dest_size == 0) return; + // find end of string in the dest buffer + while (*dest != 0 && dest_size > 1) { + dest++; + dest_size--; + } + // and catenate + mi_strlcpy(dest, src, dest_size); +} + +#ifdef MI_NO_GETENV +static bool mi_getenv(const char* name, char* result, size_t result_size) { + MI_UNUSED(name); + MI_UNUSED(result); + MI_UNUSED(result_size); + return false; +} +#else +static inline int mi_strnicmp(const char* s, const char* t, size_t n) { + if (n==0) return 0; + for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { + if (toupper(*s) != toupper(*t)) break; + } + return (n==0 ? 0 : *s - *t); +} +#if defined _WIN32 +// On Windows use GetEnvironmentVariable instead of getenv to work +// reliably even when this is invoked before the C runtime is initialized. +// i.e. when `_mi_preloading() == true`. +// Note: on windows, environment names are not case sensitive. +#include +static bool mi_getenv(const char* name, char* result, size_t result_size) { + result[0] = 0; + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + return (len > 0 && len < result_size); +} +#elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) +// On Posix systemsr use `environ` to acces environment variables +// even before the C runtime is initialized. +#if defined(__APPLE__) && defined(__has_include) && __has_include() +#include +static char** mi_get_environ(void) { + return (*_NSGetEnviron()); +} +#else +extern char** environ; +static char** mi_get_environ(void) { + return environ; +} +#endif +static bool mi_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL) return false; + const size_t len = strlen(name); + if (len == 0) return false; + char** env = mi_get_environ(); + if (env == NULL) return false; + // compare up to 256 entries + for (int i = 0; i < 256 && env[i] != NULL; i++) { + const char* s = env[i]; + if (mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive + // found it + mi_strlcpy(result, s + len + 1, result_size); + return true; + } + } + return false; +} +#else +// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime +static bool mi_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = strlen(name); + if (len >= sizeof(buf)) len = sizeof(buf) - 1; + for (size_t i = 0; i < len; i++) { + buf[i] = toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s != NULL && strlen(s) < result_size) { + mi_strlcpy(result, s, result_size); + return true; + } + else { + return false; + } +} +#endif // !MI_USE_ENVIRON +#endif // !MI_NO_GETENV + +static void mi_option_init(mi_option_desc_t* desc) { + // Read option value from the environment + char s[64+1]; + char buf[64+1]; + mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + mi_strlcat(buf, desc->name, sizeof(buf)); + bool found = mi_getenv(buf,s,sizeof(s)); + if (!found && desc->legacy_name != NULL) { + mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + mi_strlcat(buf, desc->legacy_name, sizeof(buf)); + found = mi_getenv(buf,s,sizeof(s)); + if (found) { + _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name ); + } + } + + if (found) { + size_t len = strlen(s); + if (len >= sizeof(buf)) len = sizeof(buf) - 1; + for (size_t i = 0; i < len; i++) { + buf[i] = (char)toupper(s[i]); + } + buf[len] = 0; + if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { + desc->value = 1; + desc->init = INITIALIZED; + } + else if (strstr("0;FALSE;NO;OFF", buf) != NULL) { + desc->value = 0; + desc->init = INITIALIZED; + } + else { + char* end = buf; + long value = strtol(buf, &end, 10); + if (desc->option == mi_option_reserve_os_memory) { + // this option is interpreted in KiB to prevent overflow of `long` + if (*end == 'K') { end++; } + else if (*end == 'M') { value *= MI_KiB; end++; } + else if (*end == 'G') { value *= MI_MiB; end++; } + else { value = (value + MI_KiB - 1) / MI_KiB; } + if (end[0] == 'I' && end[1] == 'B') { end += 2; } + else if (*end == 'B') { end++; } + } + if (*end == 0) { + desc->value = value; + desc->init = INITIALIZED; + } + else { + // set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose. + desc->init = DEFAULTED; + if (desc->option == mi_option_verbose && desc->value == 0) { + // if the 'mimalloc_verbose' env var has a bogus value we'd never know + // (since the value defaults to 'off') so in that case briefly enable verbose + desc->value = 1; + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + desc->value = 0; + } + else { + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + } + } + } + mi_assert_internal(desc->init != UNINIT); + } + else if (!_mi_preloading()) { + desc->init = DEFAULTED; + } +} diff --git a/compat/mimalloc/os.c b/compat/mimalloc/os.c new file mode 100644 index 00000000000000..8bb236d92a2dbd --- /dev/null +++ b/compat/mimalloc/os.c @@ -0,0 +1,1443 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // ensure mmap flags are defined +#endif + +#if defined(__sun) +// illumos provides new mman.h api when any of these are defined +// otherwise the old api based on caddr_t which predates the void pointers one. +// stock solaris provides only the former, chose to atomically to discard those +// flags only here rather than project wide tough. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // strerror + +#ifdef _MSC_VER +#pragma warning(disable:4996) // strerror +#endif + +#if defined(__wasi__) +#define MI_USE_SBRK +#endif + +#if defined(_WIN32) +#include +#elif defined(__wasi__) +#include // sbrk +#else +#include // mmap +#include // sysconf +#if defined(__linux__) +#include +#include +#if defined(__GLIBC__) +#include // linux mmap flags +#else +#include +#endif +#endif +#if defined(__APPLE__) +#include +#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR +#include +#endif +#endif +#if defined(__FreeBSD__) || defined(__DragonFly__) +#include +#if __FreeBSD_version >= 1200000 +#include +#include +#endif +#include +#endif +#endif + +/* ----------------------------------------------------------- + Initialization. + On windows initializes support for aligned allocation and + large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). +----------------------------------------------------------- */ +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); + +static void* mi_align_up_ptr(void* p, size_t alignment) { + return (void*)_mi_align_up((uintptr_t)p, alignment); +} + +static void* mi_align_down_ptr(void* p, size_t alignment) { + return (void*)_mi_align_down((uintptr_t)p, alignment); +} + + +// page size (initialized properly in `os_init`) +static size_t os_page_size = 4096; + +// minimal allocation granularity +static size_t os_alloc_granularity = 4096; + +// if non-zero, use large page allocation +static size_t large_os_page_size = 0; + +// is memory overcommit allowed? +// set dynamically in _mi_os_init (and if true we use MAP_NORESERVE) +static bool os_overcommit = true; + +bool _mi_os_has_overcommit(void) { + return os_overcommit; +} + +// OS (small) page size +size_t _mi_os_page_size(void) { + return os_page_size; +} + +// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) +size_t _mi_os_large_page_size(void) { + return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size()); +} + +#if !defined(MI_USE_SBRK) && !defined(__wasi__) +static bool use_large_os_page(size_t size, size_t alignment) { + // if we have access, check the size and alignment requirements + if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; + return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); +} +#endif + +// round to a good OS allocation size (bounded by max 12.5% waste) +size_t _mi_os_good_alloc_size(size_t size) { + size_t align_size; + if (size < 512*MI_KiB) align_size = _mi_os_page_size(); + else if (size < 2*MI_MiB) align_size = 64*MI_KiB; + else if (size < 8*MI_MiB) align_size = 256*MI_KiB; + else if (size < 32*MI_MiB) align_size = 1*MI_MiB; + else align_size = 4*MI_MiB; + if (mi_unlikely(size >= (SIZE_MAX - align_size))) return size; // possible overflow? + return _mi_align_up(size, align_size); +} + +#if defined(_WIN32) +// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. +// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) +// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) +// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. +typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { + MiMemExtendedParameterInvalidType = 0, + MiMemExtendedParameterAddressRequirements, + MiMemExtendedParameterNumaNode, + MiMemExtendedParameterPartitionHandle, + MiMemExtendedParameterUserPhysicalHandle, + MiMemExtendedParameterAttributeFlags, + MiMemExtendedParameterMax +} MI_MEM_EXTENDED_PARAMETER_TYPE; + +typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { + struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; + union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; +} MI_MEM_EXTENDED_PARAMETER; + +typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { + PVOID LowestStartingAddress; + PVOID HighestEndingAddress; + SIZE_T Alignment; +} MI_MEM_ADDRESS_REQUIREMENTS; + +#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 + +#include +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +static PVirtualAlloc2 pVirtualAlloc2 = NULL; +static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; + +// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 +typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; + +typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); +typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); +static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; +static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; +static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; + +static bool mi_win_enable_large_os_pages(void) +{ + if (large_os_page_size > 0) return true; + + // Try to see if large OS pages are supported + // To use large pages on Windows, we first need access permission + // Set "Lock pages in memory" permission in the group policy editor + // + unsigned long err = 0; + HANDLE token = NULL; + BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + if (ok) { + TOKEN_PRIVILEGES tp; + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + if (ok) { + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + if (ok) { + err = GetLastError(); + ok = (err == ERROR_SUCCESS); + if (ok) { + large_os_page_size = GetLargePageMinimum(); + } + } + } + CloseHandle(token); + } + if (!ok) { + if (err == 0) err = GetLastError(); + _mi_warning_message("cannot enable large OS page support, error %lu\n", err); + } + return (ok!=0); +} + +void _mi_os_init(void) +{ + os_overcommit = false; + // get the page size + SYSTEM_INFO si; + GetSystemInfo(&si); + if (si.dwPageSize > 0) os_page_size = si.dwPageSize; + if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity; + // get the VirtualAlloc2 function + HINSTANCE hDll; + hDll = LoadLibrary(TEXT("kernelbase.dll")); + if (hDll != NULL) { + // use VirtualAlloc2FromApp if possible as it is available to Windows store apps + pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + FreeLibrary(hDll); + } + // NtAllocateVirtualMemoryEx is used for huge page allocation + hDll = LoadLibrary(TEXT("ntdll.dll")); + if (hDll != NULL) { + pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); + FreeLibrary(hDll); + } + // Try to use Win7+ numa API + hDll = LoadLibrary(TEXT("kernel32.dll")); + if (hDll != NULL) { + pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); + pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); + pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); + FreeLibrary(hDll); + } + if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + mi_win_enable_large_os_pages(); + } +} +#elif defined(__wasi__) +void _mi_os_init(void) { + os_overcommit = false; + os_page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB + os_alloc_granularity = 16; +} + +#else // generic unix + +static void os_detect_overcommit(void) { +#if defined(__linux__) + int fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd < 0) return; + char buf[32]; + ssize_t nread = read(fd, &buf, sizeof(buf)); + close(fd); + // + // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) + if (nread >= 1) { + os_overcommit = (buf[0] == '0' || buf[0] == '1'); + } +#elif defined(__FreeBSD__) + int val = 0; + size_t olen = sizeof(val); + if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { + os_overcommit = (val != 0); + } +#else + // default: overcommit is true +#endif +} + +void _mi_os_init(void) { + // get the page size + long result = sysconf(_SC_PAGESIZE); + if (result > 0) { + os_page_size = (size_t)result; + os_alloc_granularity = os_page_size; + } + large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + os_detect_overcommit(); +} +#endif + + +#if defined(MADV_NORMAL) +static int mi_madvise(void* addr, size_t length, int advice) { + #if defined(__sun) + return madvise((caddr_t)addr, length, advice); // Solaris needs cast (issue #520) + #else + return madvise(addr, length, advice); + #endif +} +#endif + + +/* ----------------------------------------------------------- + aligned hinting +-------------------------------------------------------------- */ + +// On 64-bit systems, we can do efficient aligned allocation by using +// the 2TiB to 30TiB area to allocate those. +#if (MI_INTPTR_SIZE >= 8) +static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; + +// Return a MI_SEGMENT_SIZE aligned address that is probably available. +// If this returns NULL, the OS will determine the address but on some OS's that may not be +// properly aligned which can be more costly as it needs to be adjusted afterwards. +// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; +// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses +// in the middle of the 2TiB - 6TiB address range (see issue #372)) + +#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start +#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) +#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) + +static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) +{ + if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; + size = _mi_align_up(size, MI_SEGMENT_SIZE); + if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. + #if (MI_SECURE>0) + size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. + #endif + + uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); + if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize + uintptr_t init = MI_HINT_BASE; + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB + #endif + uintptr_t expected = hint + size; + mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); + hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all + } + if (hint%try_alignment != 0) return NULL; + return (void*)hint; +} +#else +static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { + MI_UNUSED(try_alignment); MI_UNUSED(size); + return NULL; +} +#endif + +/* ----------------------------------------------------------- + Free memory +-------------------------------------------------------------- */ + +static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) +{ + if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) + bool err = false; +#if defined(_WIN32) + DWORD errcode = 0; + err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + if (errcode == ERROR_INVALID_ADDRESS) { + // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside + // the memory region returned by VirtualAlloc; in that case we need to free using + // the start of the region. + MEMORY_BASIC_INFORMATION info = { 0, 0 }; + VirtualQuery(addr, &info, sizeof(info)); + if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < MI_SEGMENT_SIZE) { + errcode = 0; + err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + } + } + if (errcode != 0) { + _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); + } +#elif defined(MI_USE_SBRK) || defined(__wasi__) + err = false; // sbrk heap cannot be shrunk +#else + err = (munmap(addr, size) == -1); + if (err) { + _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); + } +#endif + if (was_committed) { _mi_stat_decrease(&stats->committed, size); } + _mi_stat_decrease(&stats->reserved, size); + return !err; +} + + +/* ----------------------------------------------------------- + Raw allocation on Windows (VirtualAlloc) +-------------------------------------------------------------- */ + +#ifdef _WIN32 + +#define MEM_COMMIT_RESERVE (MEM_COMMIT|MEM_RESERVE) + +static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { +#if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = mi_os_get_aligned_hint(try_alignment,size); + if (hint != NULL) { + void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); + if (p != NULL) return p; + _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); + // fall through on error + } + } +#endif + // on modern Windows try use VirtualAlloc2 for aligned allocation + if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; + reqs.Alignment = try_alignment; + MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; + param.Type.Type = MiMemExtendedParameterAddressRequirements; + param.Arg.Pointer = &reqs; + void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); + if (p != NULL) return p; + _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); + // fall through on error + } + // last resort + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { + mi_assert_internal(!(large_only && !allow_large)); + static _Atomic(size_t) large_page_try_ok; // = 0; + void* p = NULL; + // Try to allocate large OS pages (2MiB) if allowed or required. + if ((large_only || use_large_os_page(size, try_alignment)) + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + // large OS pages must always reserve and commit. + *is_large = true; + p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES); + if (large_only) return p; + // fall back to non-large page allocation on error (`p == NULL`). + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations + } + } + } + // Fall back to regular page allocation + if (p == NULL) { + *is_large = ((flags&MEM_LARGE_PAGES) != 0); + p = mi_win_virtual_allocx(addr, size, try_alignment, flags); + } + if (p == NULL) { + _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); + } + return p; +} + +/* ----------------------------------------------------------- + Raw allocation using `sbrk` or `wasm_memory_grow` +-------------------------------------------------------------- */ + +#elif defined(MI_USE_SBRK) || defined(__wasi__) +#if defined(MI_USE_SBRK) + static void* mi_memory_grow( size_t size ) { + void* p = sbrk(size); + if (p == (void*)(-1)) return NULL; + #if !defined(__wasi__) // on wasi this is always zero initialized already (?) + memset(p,0,size); + #endif + return p; + } +#elif defined(__wasi__) + static void* mi_memory_grow( size_t size ) { + size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) + : __builtin_wasm_memory_size(0)); + if (base == SIZE_MAX) return NULL; + return (void*)(base * _mi_os_page_size()); + } +#endif + +#if defined(MI_USE_PTHREADS) +static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + +static void* mi_heap_grow(size_t size, size_t try_alignment) { + void* p = NULL; + if (try_alignment <= 1) { + // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + p = mi_memory_grow(size); + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + } + else { + void* base = NULL; + size_t alloc_size = 0; + // to allocate aligned use a lock to try to avoid thread interaction + // between getting the current size and actual allocation + // (also, `sbrk` is not thread safe in general) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + { + void* current = mi_memory_grow(0); // get current size + if (current != NULL) { + void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space + alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); + base = mi_memory_grow(alloc_size); + } + } + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + if (base != NULL) { + p = mi_align_up_ptr(base, try_alignment); + if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { + // another thread used wasm_memory_grow/sbrk in-between and we do not have enough + // space after alignment. Give up (and waste the space as we cannot shrink :-( ) + // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) + p = NULL; + } + } + } + if (p == NULL) { + _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); + errno = ENOMEM; + return NULL; + } + mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); + return p; +} + +/* ----------------------------------------------------------- + Raw allocation on Unix's (mmap) +-------------------------------------------------------------- */ +#else +#define MI_OS_USE_MMAP +static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + MI_UNUSED(try_alignment); + #if defined(MAP_ALIGNED) // BSD + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + size_t n = mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + flags |= MAP_ALIGNED(n); + void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #elif defined(MAP_ALIGN) // Solaris + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + #endif + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = mi_os_get_aligned_hint(try_alignment, size); + if (hint != NULL) { + void* p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #endif + // regular mmap + void* p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // failed to allocate + return NULL; +} + +static int mi_unix_mmap_fd(void) { +#if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) os_tag = 100; + return VM_MAKE_TAG(os_tag); +#else + return -1; +#endif +} + +static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { + void* p = NULL; + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif + const int fd = mi_unix_mmap_fd(); + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + if (_mi_os_has_overcommit()) { + flags |= MAP_NORESERVE; + } + #if defined(PROT_MAX) + protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + // huge page allocation + if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { + static _Atomic(size_t) large_page_try_ok; // = 0; + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // If the OS is not configured for large OS pages, or the user does not have + // enough permission, the `mmap` will always fail (but it might also fail for other reasons). + // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times + // to avoid too many failing calls to mmap. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux + int lfd = fd; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_1GB + static bool mi_huge_pages_available = true; + if ((size % MI_GiB) == 0 && mi_huge_pages_available) { + lflags |= MAP_HUGE_1GB; + } + else + #endif + { + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + } + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif + if (large_only || lflags != flags) { + // try large OS page allocation + *is_large = true; + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif + if (large_only) return p; + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations + } + } + } + } + // regular allocation + if (p == NULL) { + *is_large = false; + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); + if (p != NULL) { + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow THP if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. + if (allow_large && use_large_os_page(size, try_alignment)) { + if (mi_madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; + } + #elif defined(__sun) + if (allow_large && use_large_os_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = large_os_page_size; + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif + } + } + if (p == NULL) { + _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); + } + return p; +} +#endif + + +/* ----------------------------------------------------------- + Primitive allocation from the OS. +-------------------------------------------------------------- */ + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + if (size == 0) return NULL; + if (!commit) allow_large = false; + if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning + + void* p = NULL; + /* + if (commit && allow_large) { + p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); + if (p != NULL) { + *is_large = true; + return p; + } + } + */ + + #if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) { flags |= MEM_COMMIT; } + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + #elif defined(MI_USE_SBRK) || defined(__wasi__) + MI_UNUSED(allow_large); + *is_large = false; + p = mi_heap_grow(size, try_alignment); + #else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + #endif + mi_stat_counter_increase(stats->mmap_calls, 1); + if (p != NULL) { + _mi_stat_increase(&stats->reserved, size); + if (commit) { _mi_stat_increase(&stats->committed, size); } + } + return p; +} + + +// Primitive aligned allocation from the OS. +// This function guarantees the allocated memory is aligned. +static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { + mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(is_large != NULL); + if (!commit) allow_large = false; + if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; + size = _mi_align_up(size, _mi_os_page_size()); + + // try first with a hint (this will be aligned directly on Win 10+ or BSD) + void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); + if (p == NULL) return NULL; + + // if not aligned, free it, overallocate, and unmap around it + if (((uintptr_t)p % alignment != 0)) { + mi_os_mem_free(p, size, commit, stats); + _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); + if (size >= (SIZE_MAX - alignment)) return NULL; // overflow + const size_t over_size = size + alignment; + +#if _WIN32 + // over-allocate uncommitted (virtual) memory + p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); + if (p == NULL) return NULL; + + // set p to the aligned part in the full region + // note: this is dangerous on Windows as VirtualFree needs the actual region pointer + // but in mi_os_mem_free we handle this (hopefully exceptional) situation. + p = mi_align_up_ptr(p, alignment); + + // explicitly commit only the aligned part + if (commit) { + _mi_os_commit(p, size, NULL, stats); + } +#else + // overallocate... + p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); + if (p == NULL) return NULL; + // and selectively unmap parts around the over-allocated area. (noop on sbrk) + void* aligned_p = mi_align_up_ptr(p, alignment); + size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; + size_t mid_size = _mi_align_up(size, _mi_os_page_size()); + size_t post_size = over_size - pre_size - mid_size; + mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size); + if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); + if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + // we can return the aligned pointer on `mmap` (and sbrk) systems + p = aligned_p; +#endif + } + + mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); + return p; +} + + +/* ----------------------------------------------------------- + OS API: alloc, free, alloc_aligned +----------------------------------------------------------- */ + +void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + if (size == 0) return NULL; + size = _mi_os_good_alloc_size(size); + bool is_large = false; + return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); +} + +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + if (size == 0 || p == NULL) return; + size = _mi_os_good_alloc_size(size); + mi_os_mem_free(p, size, was_committed, stats); +} + +void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { + _mi_os_free_ex(p, size, true, stats); +} + +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) +{ + MI_UNUSED(&mi_os_get_aligned_hint); // suppress unused warnings + MI_UNUSED(tld_stats); + if (size == 0) return NULL; + size = _mi_os_good_alloc_size(size); + alignment = _mi_align_up(alignment, _mi_os_page_size()); + bool allow_large = false; + if (large != NULL) { + allow_large = *large; + *large = false; + } + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); +} + + + +/* ----------------------------------------------------------- + OS memory API: reset, commit, decommit, protect, unprotect. +----------------------------------------------------------- */ + + +// OS page align within a given area, either conservative (pages inside the area only), +// or not (straddling pages outside the area is possible) +static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) { + mi_assert(addr != NULL && size > 0); + if (newsize != NULL) *newsize = 0; + if (size == 0 || addr == NULL) return NULL; + + // page align conservatively within the range + void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size()) + : mi_align_down_ptr(addr, _mi_os_page_size())); + void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size()) + : mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size())); + ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start; + if (diff <= 0) return NULL; + + mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size)); + if (newsize != NULL) *newsize = (size_t)diff; + return start; +} + +static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) { + return mi_os_page_align_areax(true, addr, size, newsize); +} + +static void mi_mprotect_hint(int err) { +#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count. For example:\n" + " > sudo sysctl -w vm.max_map_count=262144\n"); + } +#else + MI_UNUSED(err); +#endif +} + +// Commit/Decommit memory. +// Usually commit is aligned liberal, while decommit is aligned conservative. +// (but not for the reset version where we want commit to be conservative as well) +static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { + // page align in the range, commit liberally, decommit conservative + if (is_zero != NULL) { *is_zero = false; } + size_t csize; + void* start = mi_os_page_align_areax(conservative, addr, size, &csize); + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) + int err = 0; + if (commit) { + _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit + _mi_stat_counter_increase(&stats->commit_calls, 1); + } + else { + _mi_stat_decrease(&stats->committed, size); + } + + #if defined(_WIN32) + if (commit) { + // *is_zero = true; // note: if the memory was already committed, the call succeeds but the memory is not zero'd + void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); + err = (p == start ? 0 : GetLastError()); + } + else { + BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); + err = (ok ? 0 : GetLastError()); + } + #elif defined(__wasi__) + // WebAssembly guests can't control memory protection + #elif 0 && defined(MAP_FIXED) && !defined(__APPLE__) + // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) + if (commit) { + // commit: just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) + const int fd = mi_unix_mmap_fd(); + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } + } + #else + // Linux, macOSX and others. + if (commit) { + // commit: ensure we can access the area + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) + err = madvise(start, csize, MADV_DONTNEED); + #else + // decommit: just disable access (also used in debug and secure mode to trap on illegal access) + err = mprotect(start, csize, PROT_NONE); + if (err != 0) { err = errno; } + #endif + //#if defined(MADV_FREE_REUSE) + // while ((err = mi_madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; } + //#endif + } + #endif + if (err != 0) { + _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + mi_mprotect_hint(err); + } + mi_assert_internal(err == 0); + return (err == 0); +} + +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); +} + +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + bool is_zero; + return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); +} + +/* +static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, true // conservative + , is_zero, stats); +} +*/ + +// Signal to the OS that the address range is no longer in use +// but may be used later again. This will release physical memory +// pages and reduce swapping while keeping the memory committed. +// We page align to a conservative area inside the range to reset. +static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) { + // page align conservatively within the range + size_t csize; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) + if (reset) _mi_stat_increase(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); + if (!reset) return true; // nothing to do on unreset! + + #if (MI_DEBUG>1) + if (MI_SECURE==0) { + memset(start, 0, csize); // pretend it is eagerly reset + } + #endif + +#if defined(_WIN32) + // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory + void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); + mi_assert_internal(p == start); + #if 1 + if (p == start && start != NULL) { + VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + } + #endif + if (p != start) return false; +#else +#if defined(MADV_FREE) + static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); + int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; + while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; + if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { + // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on + mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); + err = mi_madvise(start, csize, MADV_DONTNEED); + } +#elif defined(__wasi__) + int err = 0; +#else + int err = mi_madvise(start, csize, MADV_DONTNEED); +#endif + if (err != 0) { + _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno); + } + //mi_assert(err == 0); + if (err != 0) return false; +#endif + return true; +} + +// Signal to the OS that the address range is no longer in use +// but may be used later again. This will release physical memory +// pages and reduce swapping while keeping the memory committed. +// We page align to a conservative area inside the range to reset. +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + return mi_os_resetx(addr, size, true, stats); +} + +/* +bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) + } + else { + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); + } +} +*/ + +// Protect a region in memory to be not accessible. +static bool mi_os_protectx(void* addr, size_t size, bool protect) { + // page align conservatively within the range + size_t csize = 0; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return false; + /* + if (_mi_os_is_huge_reserved(addr)) { + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + } + */ + int err = 0; +#ifdef _WIN32 + DWORD oldprotect = 0; + BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); + err = (ok ? 0 : GetLastError()); +#elif defined(__wasi__) + err = 0; +#else + err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } +#endif + if (err != 0) { + _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err); + mi_mprotect_hint(err); + } + return (err == 0); +} + +bool _mi_os_protect(void* addr, size_t size) { + return mi_os_protectx(addr, size, true); +} + +bool _mi_os_unprotect(void* addr, size_t size) { + return mi_os_protectx(addr, size, false); +} + + + +bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { + // page align conservatively within the range + mi_assert_internal(oldsize > newsize && p != NULL); + if (oldsize < newsize || p == NULL) return false; + if (oldsize == newsize) return true; + + // oldsize and newsize should be page aligned or we cannot shrink precisely + void* addr = (uint8_t*)p + newsize; + size_t size = 0; + void* start = mi_os_page_align_area_conservative(addr, oldsize - newsize, &size); + if (size == 0 || start != addr) return false; + +#ifdef _WIN32 + // we cannot shrink on windows, but we can decommit + return _mi_os_decommit(start, size, stats); +#else + return mi_os_mem_free(start, size, true, stats); +#endif +} + + +/* ---------------------------------------------------------------------------- +Support for allocating huge OS pages (1Gib) that are reserved up-front +and possibly associated with a specific NUMA node. (use `numa_node>=0`) +-----------------------------------------------------------------------------*/ +#define MI_HUGE_OS_PAGE_SIZE (MI_GiB) + +#if defined(_WIN32) && (MI_INTPTR_SIZE >= 8) +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) +{ + mi_assert_internal(size%MI_GiB == 0); + mi_assert_internal(addr != NULL); + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + + mi_win_enable_large_os_pages(); + + MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { + params[0].Type.Type = MiMemExtendedParameterAttributeFlags; + params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + ULONG param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type.Type = MiMemExtendedParameterNumaNode; + params[1].Arg.ULong = (unsigned)numa_node; + } + SIZE_T psize = size; + void* base = addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; + } + else { + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + } + } + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type.Type = MiMemExtendedParameterNumaNode; + params[0].Arg.ULong = (unsigned)numa_node; + return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + } + + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) +#include +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif +#if defined(SYS_mbind) +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); + return 0; +} +#endif +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { + mi_assert_internal(size%MI_GiB == 0); + bool is_large = true; + void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (p == NULL) return NULL; + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + unsigned long numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno)); + } + } + return p; +} +#else +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); + return NULL; +} +#endif + +#if (MI_INTPTR_SIZE >= 8) +// To ensure proper alignment, use our own area for huge OS pages +static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0 + +// Claim an aligned address range for huge pages +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; + const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + + uintptr_t start = 0; + uintptr_t end = 0; + uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start); + do { + start = huge_start; + if (start == 0) { + // Initialize the start address after the 32TiB area + start = ((uintptr_t)32 << 40); // 32TiB virtual start address +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB +#endif + } + end = start + size; + mi_assert_internal(end % MI_SEGMENT_SIZE == 0); + } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end)); + + if (total_size != NULL) *total_size = size; + return (uint8_t*)start; +} +#else +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + MI_UNUSED(pages); + if (total_size != NULL) *total_size = 0; + return NULL; +} +#endif + +// Allocate MI_SEGMENT_SIZE aligned huge pages +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { + if (psize != NULL) *psize = 0; + if (pages_reserved != NULL) *pages_reserved = 0; + size_t size = 0; + uint8_t* start = mi_os_claim_huge_pages(pages, &size); + if (start == NULL) return NULL; // or 32-bit systems + + // Allocate one page at the time but try to place them contiguously + // We allocate one page at the time to be able to abort if it takes too long + // or to at least allocate as many as available on the system. + mi_msecs_t start_t = _mi_clock_start(); + size_t page; + for (page = 0; page < pages; page++) { + // allocate a page + void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); + void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + + // Did we succeed at a contiguous address? + if (p != addr) { + // no success, issue a warning and break + if (p != NULL) { + _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); + _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + } + break; + } + + // success, record it + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); + + // check for timeout + if (max_msecs > 0) { + mi_msecs_t elapsed = _mi_clock_end(start_t); + if (page >= 1) { + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; + } + } + if (elapsed > max_msecs) { + _mi_warning_message("huge page allocation timed out\n"); + break; + } + } + } + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + if (pages_reserved != NULL) { *pages_reserved = page; } + if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } + return (page == 0 ? NULL : start); +} + +// free every huge page in a range individually (as we allocated per page) +// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { + if (p==NULL || size==0) return; + uint8_t* base = (uint8_t*)p; + while (size >= MI_HUGE_OS_PAGE_SIZE) { + _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + size -= MI_HUGE_OS_PAGE_SIZE; + base += MI_HUGE_OS_PAGE_SIZE; + } +} + +/* ---------------------------------------------------------------------------- +Support NUMA aware allocation +-----------------------------------------------------------------------------*/ +#ifdef _WIN32 +static size_t mi_os_numa_nodex(void) { + USHORT numa_node = 0; + if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { + // Extended API is supported + MI_PROCESSOR_NUMBER pnum; + (*pGetCurrentProcessorNumberEx)(&pnum); + USHORT nnode = 0; + BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); + if (ok) numa_node = nnode; + } + else { + // Vista or earlier, use older API that is limited to 64 processors. Issue #277 + DWORD pnum = GetCurrentProcessorNumber(); + UCHAR nnode = 0; + BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode); + if (ok) numa_node = nnode; + } + return numa_node; +} + +static size_t mi_os_numa_node_countx(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + // find the highest node number that has actual processors assigned to it. Issue #282 + while(numa_max > 0) { + if (pGetNumaNodeProcessorMaskEx != NULL) { + // Extended API is supported + GROUP_AFFINITY affinity; + if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { + if (affinity.Mask != 0) break; // found the maximum non-empty node + } + } + else { + // Vista or earlier, use older API that is limited to 64 processors. + ULONGLONG mask; + if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + }; + } + // max node was invalid or had no processor assigned, try again + numa_max--; + } + return ((size_t)numa_max + 1); +} +#elif defined(__linux__) +#include // getcpu +#include // access + +static size_t mi_os_numa_nodex(void) { +#ifdef SYS_getcpu + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); + if (err != 0) return 0; + return node; +#else + return 0; +#endif +} +static size_t mi_os_numa_node_countx(void) { + char buf[128]; + unsigned node = 0; + for(node = 0; node < 256; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); + if (access(buf,R_OK) != 0) break; + } + return (node+1); +} +#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 +static size_t mi_os_numa_nodex(void) { + domainset_t dom; + size_t node; + int policy; + if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; + for (node = 0; node < MAXMEMDOM; node++) { + if (DOMAINSET_ISSET(node, &dom)) return node; + } + return 0ul; +} +static size_t mi_os_numa_node_countx(void) { + size_t ndomains = 0; + size_t len = sizeof(ndomains); + if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; + return ndomains; +} +#elif defined(__DragonFly__) +static size_t mi_os_numa_nodex(void) { + // TODO: DragonFly does not seem to provide any userland means to get this information. + return 0ul; +} +static size_t mi_os_numa_node_countx(void) { + size_t ncpus = 0, nvirtcoresperphys = 0; + size_t len = sizeof(size_t); + if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; + if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; + return nvirtcoresperphys * ncpus; +} +#else +static size_t mi_os_numa_nodex(void) { + return 0; +} +static size_t mi_os_numa_node_countx(void) { + return 1; +} +#endif + +_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count + +size_t _mi_os_numa_node_count_get(void) { + size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); + if (count <= 0) { + long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? + if (ncount > 0) { + count = (size_t)ncount; + } + else { + count = mi_os_numa_node_countx(); // or detect dynamically + if (count == 0) count = 1; + } + mi_atomic_store_release(&_mi_numa_node_count, count); // save it + _mi_verbose_message("using %zd numa regions\n", count); + } + return count; +} + +int _mi_os_numa_node_get(mi_os_tld_t* tld) { + MI_UNUSED(tld); + size_t numa_count = _mi_os_numa_node_count(); + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + // never more than the node count and >= 0 + size_t numa_node = mi_os_numa_nodex(); + if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } + return (int)numa_node; +} diff --git a/compat/mimalloc/page-queue.c b/compat/mimalloc/page-queue.c new file mode 100644 index 00000000000000..e1a8a6a6592b86 --- /dev/null +++ b/compat/mimalloc/page-queue.c @@ -0,0 +1,331 @@ +/*---------------------------------------------------------------------------- +Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + Definition of page queues for each block size +----------------------------------------------------------- */ + +#ifndef MI_IN_PAGE_C +#error "this file should be included from 'page.c'" +#endif + +/* ----------------------------------------------------------- + Minimal alignment in machine words (i.e. `sizeof(void*)`) +----------------------------------------------------------- */ + +#if (MI_MAX_ALIGN_SIZE > 4*MI_INTPTR_SIZE) + #error "define alignment for more than 4x word size for this platform" +#elif (MI_MAX_ALIGN_SIZE > 2*MI_INTPTR_SIZE) + #define MI_ALIGN4W // 4 machine words minimal alignment +#elif (MI_MAX_ALIGN_SIZE > MI_INTPTR_SIZE) + #define MI_ALIGN2W // 2 machine words minimal alignment +#else + // ok, default alignment is 1 word +#endif + + +/* ----------------------------------------------------------- + Queue query +----------------------------------------------------------- */ + + +static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) { + return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+sizeof(uintptr_t))); +} + +static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) { + return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+(2*sizeof(uintptr_t)))); +} + +static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { + return (pq->block_size > MI_MEDIUM_OBJ_SIZE_MAX); +} + +/* ----------------------------------------------------------- + Bins +----------------------------------------------------------- */ + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +static inline uint8_t mi_bin(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_MEDIUM_OBJ_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + #if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes + #endif + wsize--; + // find the highest bit + uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0 + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + mi_assert_internal(bin < MI_BIN_HUGE); + } + mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); + return bin; +} + + + +/* ----------------------------------------------------------- + Queue of pages with free blocks +----------------------------------------------------------- */ + +uint8_t _mi_bin(size_t size) { + return mi_bin(size); +} + +size_t _mi_bin_size(uint8_t bin) { + return _mi_heap_empty.pages[bin].block_size; +} + +// Good size for allocation +size_t mi_good_size(size_t size) mi_attr_noexcept { + if (size <= MI_MEDIUM_OBJ_SIZE_MAX) { + return _mi_bin_size(mi_bin(size)); + } + else { + return _mi_align_up(size,_mi_os_page_size()); + } +} + +#if (MI_DEBUG>1) +static bool mi_page_queue_contains(mi_page_queue_t* queue, const mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_page_t* list = queue->first; + while (list != NULL) { + mi_assert_internal(list->next == NULL || list->next->prev == list); + mi_assert_internal(list->prev == NULL || list->prev->next == list); + if (list == page) break; + list = list->next; + } + return (list == page); +} + +#endif + +#if (MI_DEBUG>1) +static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* pq) { + return (pq >= &heap->pages[0] && pq <= &heap->pages[MI_BIN_FULL]); +} +#endif + +static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size)); + mi_heap_t* heap = mi_page_heap(page); + mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); + mi_page_queue_t* pq = &heap->pages[bin]; + mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); + mi_assert_expensive(mi_page_queue_contains(pq, page)); + return pq; +} + +static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size)); + mi_assert_internal(bin <= MI_BIN_FULL); + mi_page_queue_t* pq = &heap->pages[bin]; + mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); + return pq; +} + +// The current small page array is for efficiency and for each +// small size (up to 256) it points directly to the page for that +// size without having to compute the bin. This means when the +// current free page queue is updated for a small bin, we need to update a +// range of entries in `_mi_page_small_free`. +static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_queue_t* pq) { + mi_assert_internal(mi_heap_contains_queue(heap,pq)); + size_t size = pq->block_size; + if (size > MI_SMALL_SIZE_MAX) return; + + mi_page_t* page = pq->first; + if (pq->first == NULL) page = (mi_page_t*)&_mi_page_empty; + + // find index in the right direct page array + size_t start; + size_t idx = _mi_wsize_from_size(size); + mi_page_t** pages_free = heap->pages_free_direct; + + if (pages_free[idx] == page) return; // already set + + // find start slot + if (idx<=1) { + start = 0; + } + else { + // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped + uint8_t bin = mi_bin(size); + const mi_page_queue_t* prev = pq - 1; + while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) { + prev--; + } + start = 1 + _mi_wsize_from_size(prev->block_size); + if (start > idx) start = idx; + } + + // set size range to the right page + mi_assert(start <= idx); + for (size_t sz = start; sz <= idx; sz++) { + pages_free[sz] = page; + } +} + +/* +static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { + return (queue->first == NULL); +} +*/ + +static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_expensive(mi_page_queue_contains(queue, page)); + mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_heap_t* heap = mi_page_heap(page); + + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == queue->last) queue->last = page->prev; + if (page == queue->first) { + queue->first = page->next; + // update first + mi_assert_internal(mi_heap_contains_queue(heap, queue)); + mi_heap_queue_first_update(heap,queue); + } + heap->page_count--; + page->next = NULL; + page->prev = NULL; + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL); + mi_page_set_in_full(page,false); +} + + +static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { + mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(!mi_page_queue_contains(queue, page)); + + mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + mi_assert_internal(page->xblock_size == queue->block_size || + (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX) || + (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + + mi_page_set_in_full(page, mi_page_queue_is_full(queue)); + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap); + page->next = queue->first; + page->prev = NULL; + if (queue->first != NULL) { + mi_assert_internal(queue->first->prev == NULL); + queue->first->prev = page; + queue->first = page; + } + else { + queue->first = queue->last = page; + } + + // update direct + mi_heap_queue_first_update(heap, queue); + heap->page_count++; +} + + +static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_expensive(mi_page_queue_contains(from, page)); + mi_assert_expensive(!mi_page_queue_contains(to, page)); + + mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || + (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || + (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + + mi_heap_t* heap = mi_page_heap(page); + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == from->last) from->last = page->prev; + if (page == from->first) { + from->first = page->next; + // update first + mi_assert_internal(mi_heap_contains_queue(heap, from)); + mi_heap_queue_first_update(heap, from); + } + + page->prev = to->last; + page->next = NULL; + if (to->last != NULL) { + mi_assert_internal(heap == mi_page_heap(to->last)); + to->last->next = page; + to->last = page; + } + else { + to->first = page; + to->last = page; + mi_heap_queue_first_update(heap, to); + } + + mi_page_set_in_full(page, mi_page_queue_is_full(to)); +} + +// Only called from `mi_heap_absorb`. +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { + mi_assert_internal(mi_heap_contains_queue(heap,pq)); + mi_assert_internal(pq->block_size == append->block_size); + + if (append->first==NULL) return 0; + + // set append pages to new heap and count + size_t count = 0; + for (mi_page_t* page = append->first; page != NULL; page = page->next) { + // inline `mi_page_set_heap` to avoid wrong assertion during absorption; + // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. + mi_atomic_store_release(&page->xheap, (uintptr_t)heap); + // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a + // side effect that it spins until any DELAYED_FREEING is finished. This ensures + // that after appending only the new heap will be used for delayed free operations. + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); + count++; + } + + if (pq->last==NULL) { + // take over afresh + mi_assert_internal(pq->first==NULL); + pq->first = append->first; + pq->last = append->last; + mi_heap_queue_first_update(heap, pq); + } + else { + // append to end + mi_assert_internal(pq->last!=NULL); + mi_assert_internal(append->first!=NULL); + pq->last->next = append->first; + append->first->prev = pq->last; + pq->last = append->last; + } + return count; +} diff --git a/compat/mimalloc/page.c b/compat/mimalloc/page.c new file mode 100644 index 00000000000000..121683015de6b2 --- /dev/null +++ b/compat/mimalloc/page.c @@ -0,0 +1,869 @@ +/*---------------------------------------------------------------------------- +Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + The core of the allocator. Every segment contains + pages of a certain block size. The main function + exported is `mi_malloc_generic`. +----------------------------------------------------------- */ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +/* ----------------------------------------------------------- + Definition of page queues for each block size +----------------------------------------------------------- */ + +#define MI_IN_PAGE_C +#include "page-queue.c" +#undef MI_IN_PAGE_C + + +/* ----------------------------------------------------------- + Page helpers +----------------------------------------------------------- */ + +// Index a block in a page +static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { + MI_UNUSED(page); + mi_assert_internal(page != NULL); + mi_assert_internal(i <= page->reserved); + return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); +} + +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); + +#if (MI_DEBUG>=3) +static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { + size_t count = 0; + while (head != NULL) { + mi_assert_internal(page == _mi_ptr_page(head)); + count++; + head = mi_block_next(page, head); + } + return count; +} + +/* +// Start of the page available memory +static inline uint8_t* mi_page_area(const mi_page_t* page) { + return _mi_page_start(_mi_page_segment(page), page, NULL); +} +*/ + +static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { + size_t psize; + uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize); + mi_block_t* start = (mi_block_t*)page_area; + mi_block_t* end = (mi_block_t*)(page_area + psize); + while(p != NULL) { + if (p < start || p >= end) return false; + p = mi_block_next(page, p); + } + return true; +} + +static bool mi_page_is_valid_init(mi_page_t* page) { + mi_assert_internal(page->xblock_size > 0); + mi_assert_internal(page->used <= page->capacity); + mi_assert_internal(page->capacity <= page->reserved); + + mi_segment_t* segment = _mi_page_segment(page); + uint8_t* start = _mi_page_start(segment,page,NULL); + mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); + //const size_t bsize = mi_page_block_size(page); + //mi_assert_internal(start + page->capacity*page->block_size == page->top); + + mi_assert_internal(mi_page_list_is_valid(page,page->free)); + mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); + + #if MI_DEBUG>3 // generally too expensive to check this + if (page->is_zero) { + const size_t ubsize = mi_page_usable_block_size(page); + for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); + } + } + #endif + + mi_block_t* tfree = mi_page_thread_free(page); + mi_assert_internal(mi_page_list_is_valid(page, tfree)); + //size_t tfree_count = mi_page_list_count(page, tfree); + //mi_assert_internal(tfree_count <= page->thread_freed + 1); + + size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); + mi_assert_internal(page->used + free_count == page->capacity); + + return true; +} + +bool _mi_page_is_valid(mi_page_t* page) { + mi_assert_internal(mi_page_is_valid_init(page)); + #if MI_SECURE + mi_assert_internal(page->keys[0] != 0); + #endif + if (mi_page_heap(page)!=NULL) { + mi_segment_t* segment = _mi_page_segment(page); + + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id); + if (segment->kind != MI_SEGMENT_HUGE) { + mi_page_queue_t* pq = mi_page_queue_of(page); + mi_assert_internal(mi_page_queue_contains(pq, page)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); + } + } + return true; +} +#endif + +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { + mi_thread_free_t tfreex; + mi_delayed_t old_delay; + mi_thread_free_t tfree; + do { + tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; + tfreex = mi_tf_set_delayed(tfree, delay); + old_delay = mi_tf_delayed(tfree); + if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { + mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail + } + else if (delay == old_delay) { + break; // avoid atomic operation if already equal + } + else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { + break; // leave never-delayed flag set + } + } while ((old_delay == MI_DELAYED_FREEING) || + !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); +} + +/* ----------------------------------------------------------- + Page collect the `local_free` and `thread_free` lists +----------------------------------------------------------- */ + +// Collect the local `thread_free` list using an atomic exchange. +// Note: The exchange must be done atomically as this is used right after +// moving to the full list in `mi_page_collect_ex` and we need to +// ensure that there was no race where the page became unfull just before the move. +static void _mi_page_thread_free_collect(mi_page_t* page) +{ + mi_block_t* head; + mi_thread_free_t tfreex; + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + head = mi_tf_block(tfree); + tfreex = mi_tf_set_block(tfree,NULL); + } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); + + // return if the list is empty + if (head == NULL) return; + + // find the tail -- also to get a proper count (without data races) + uint32_t max_count = page->capacity; // cannot collect more than capacity + uint32_t count = 1; + mi_block_t* tail = head; + mi_block_t* next; + while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { + count++; + tail = next; + } + // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free) + if (count > max_count) { + _mi_error_message(EFAULT, "corrupted thread-free list\n"); + return; // the thread-free items cannot be freed + } + + // and append the current local free list + mi_block_set_next(page,tail, page->local_free); + page->local_free = head; + + // update counts now + page->used -= count; +} + +void _mi_page_free_collect(mi_page_t* page, bool force) { + mi_assert_internal(page!=NULL); + + // collect the thread free list + if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation + _mi_page_thread_free_collect(page); + } + + // and the local free list + if (page->local_free != NULL) { + if (mi_likely(page->free == NULL)) { + // usual case + page->free = page->local_free; + page->local_free = NULL; + page->is_zero = false; + } + else if (force) { + // append -- only on shutdown (force) as this is a linear operation + mi_block_t* tail = page->local_free; + mi_block_t* next; + while ((next = mi_block_next(page, tail)) != NULL) { + tail = next; + } + mi_block_set_next(page, tail, page->free); + page->free = page->local_free; + page->local_free = NULL; + page->is_zero = false; + } + } + + mi_assert_internal(!force || page->local_free == NULL); +} + + + +/* ----------------------------------------------------------- + Page fresh and retire +----------------------------------------------------------- */ + +// called from segments when reclaiming abandoned pages +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { + mi_assert_expensive(mi_page_is_valid_init(page)); + + mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); + mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + mi_assert_internal(!page->is_reset); + // TODO: push on full queue immediately if it is full? + mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); + mi_page_queue_push(heap, pq, page); + mi_assert_expensive(_mi_page_is_valid(page)); +} + +// allocate a fresh page from a segment +static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { + mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); + mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); + if (page == NULL) { + // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) + return NULL; + } + mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + mi_page_init(heap, page, block_size, heap->tld); + mi_heap_stat_increase(heap, pages, 1); + if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + mi_assert_expensive(_mi_page_is_valid(page)); + return page; +} + +// Get a fresh page to use +static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { + mi_assert_internal(mi_heap_contains_queue(heap, pq)); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size); + if (page==NULL) return NULL; + mi_assert_internal(pq->block_size==mi_page_block_size(page)); + mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); + return page; +} + +/* ----------------------------------------------------------- + Do any delayed frees + (put there by other threads if they deallocated in a full page) +----------------------------------------------------------- */ +void _mi_heap_delayed_free(mi_heap_t* heap) { + // take over the list (note: no atomic exchange since it is often NULL) + mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; + + // and free them all + while(block != NULL) { + mi_block_t* next = mi_block_nextx(heap,block, heap->keys); + // use internal free instead of regular one to keep stats etc correct + if (!_mi_free_delayed_block(block)) { + // we might already start delayed freeing while another thread has not yet + // reset the delayed_freeing flag; in that case delay it further by reinserting. + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + do { + mi_block_set_nextx(heap, block, dfree, heap->keys); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } + block = next; + } +} + +/* ----------------------------------------------------------- + Unfull, abandon, free and retire +----------------------------------------------------------- */ + +// Move a page from the full list back to a regular list +void _mi_page_unfull(mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(mi_page_is_in_full(page)); + if (!mi_page_is_in_full(page)) return; + + mi_heap_t* heap = mi_page_heap(page); + mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; + mi_page_set_in_full(page, false); // to get the right queue + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); + mi_page_set_in_full(page, true); + mi_page_queue_enqueue_from(pq, pqfull, page); +} + +static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { + mi_assert_internal(pq == mi_page_queue_of(page)); + mi_assert_internal(!mi_page_immediate_available(page)); + mi_assert_internal(!mi_page_is_in_full(page)); + + if (mi_page_is_in_full(page)) return; + mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); + _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set +} + + +// Abandon a page with used blocks at the end of a thread. +// Note: only call if it is ensured that no references exist from +// the `page->heap->thread_delayed_free` into this page. +// Currently only called through `mi_heap_collect_ex` which ensures this. +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(pq == mi_page_queue_of(page)); + mi_assert_internal(mi_page_heap(page) != NULL); + + mi_heap_t* pheap = mi_page_heap(page); + + // remove from our page list + mi_segments_tld_t* segments_tld = &pheap->tld->segments; + mi_page_queue_remove(pq, page); + + // page is no longer associated with our heap + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, NULL); + +#if MI_DEBUG>1 + // check there are no references left.. + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) { + mi_assert_internal(_mi_ptr_page(block) != page); + } +#endif + + // and abandon it + mi_assert_internal(mi_page_heap(page) == NULL); + _mi_segment_page_abandon(page,segments_tld); +} + + +// Free a page with no more free blocks +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(pq == mi_page_queue_of(page)); + mi_assert_internal(mi_page_all_free(page)); + mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING); + + // no more aligned blocks in here + mi_page_set_has_aligned(page, false); + + mi_heap_t* heap = mi_page_heap(page); + + // remove from the page list + // (no need to do _mi_heap_delayed_free first as all blocks are already free) + mi_segments_tld_t* segments_tld = &heap->tld->segments; + mi_page_queue_remove(pq, page); + + // and free it + mi_page_set_heap(page,NULL); + _mi_segment_page_free(page, force, segments_tld); +} + +// Retire parameters +#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX +#define MI_RETIRE_CYCLES (8) + +// Retire a page with no more used blocks +// Important to not retire too quickly though as new +// allocations might coming. +// Note: called from `mi_free` and benchmarks often +// trigger this due to freeing everything and then +// allocating again so careful when changing this. +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { + mi_assert_internal(page != NULL); + mi_assert_expensive(_mi_page_is_valid(page)); + mi_assert_internal(mi_page_all_free(page)); + + mi_page_set_has_aligned(page, false); + + // don't retire too often.. + // (or we end up retiring and re-allocating most of the time) + // NOTE: refine this more: we should not retire if this + // is the only page left with free blocks. It is not clear + // how to check this efficiently though... + // for now, we don't retire if it is the only page left of this size class. + mi_page_queue_t* pq = mi_page_queue_of(page); + if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { + if (pq->last==page && pq->first==page) { // the only page in the queue? + mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); + page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); + mi_heap_t* heap = mi_page_heap(page); + mi_assert_internal(pq >= heap->pages); + const size_t index = pq - heap->pages; + mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE); + if (index < heap->page_retired_min) heap->page_retired_min = index; + if (index > heap->page_retired_max) heap->page_retired_max = index; + mi_assert_internal(mi_page_all_free(page)); + return; // dont't free after all + } + } + _mi_page_free(page, pq, false); +} + +// free retired pages: we don't need to look at the entire queues +// since we only retire pages that are at the head position in a queue. +void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { + size_t min = MI_BIN_FULL; + size_t max = 0; + for(size_t bin = heap->page_retired_min; bin <= heap->page_retired_max; bin++) { + mi_page_queue_t* pq = &heap->pages[bin]; + mi_page_t* page = pq->first; + if (page != NULL && page->retire_expire != 0) { + if (mi_page_all_free(page)) { + page->retire_expire--; + if (force || page->retire_expire == 0) { + _mi_page_free(pq->first, pq, force); + } + else { + // keep retired, update min/max + if (bin < min) min = bin; + if (bin > max) max = bin; + } + } + else { + page->retire_expire = 0; + } + } + } + heap->page_retired_min = min; + heap->page_retired_max = max; +} + + +/* ----------------------------------------------------------- + Initialize the initial free list in a page. + In secure mode we initialize a randomized list by + alternating between slices. +----------------------------------------------------------- */ + +#define MI_MAX_SLICE_SHIFT (6) // at most 64 slices +#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) +#define MI_MIN_SLICES (2) + +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { + MI_UNUSED(stats); + #if (MI_SECURE<=2) + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->local_free == NULL); + #endif + mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + + // initialize a randomized free list + // set up `slice_count` slices to alternate between + size_t shift = MI_MAX_SLICE_SHIFT; + while ((extend >> shift) == 0) { + shift--; + } + const size_t slice_count = (size_t)1U << shift; + const size_t slice_extend = extend / slice_count; + mi_assert_internal(slice_extend >= 1); + mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice + size_t counts[MI_MAX_SLICES]; // available objects in the slice + for (size_t i = 0; i < slice_count; i++) { + blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend); + counts[i] = slice_extend; + } + counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) + + // and initialize the free list by randomly threading through them + // set up first element + const uintptr_t r = _mi_heap_random_next(heap); + size_t current = r % slice_count; + counts[current]--; + mi_block_t* const free_start = blocks[current]; + // and iterate through the rest; use `random_shuffle` for performance + uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0 + for (size_t i = 1; i < extend; i++) { + // call random_shuffle only every INTPTR_SIZE rounds + const size_t round = i%MI_INTPTR_SIZE; + if (round == 0) rnd = _mi_random_shuffle(rnd); + // select a random next slice index + size_t next = ((rnd >> 8*round) & (slice_count-1)); + while (counts[next]==0) { // ensure it still has space + next++; + if (next==slice_count) next = 0; + } + // and link the current block to it + counts[next]--; + mi_block_t* const block = blocks[current]; + blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block + mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` + current = next; + } + // prepend to the free list (usually NULL) + mi_block_set_next(page, blocks[current], page->free); // end of the list + page->free = free_start; +} + +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) +{ + MI_UNUSED(stats); + #if (MI_SECURE <= 2) + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->local_free == NULL); + #endif + mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); + + mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); + + // initialize a sequential free list + mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1); + mi_block_t* block = start; + while(block <= last) { + mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); + mi_block_set_next(page,block,next); + block = next; + } + // prepend to free list (usually `NULL`) + mi_block_set_next(page, last, page->free); + page->free = start; +} + +/* ----------------------------------------------------------- + Page initialize and extend the capacity +----------------------------------------------------------- */ + +#define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well. +#if (MI_SECURE>0) +#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many +#else +#define MI_MIN_EXTEND (1) +#endif + +// Extend the capacity (up to reserved) by initializing a free list +// We do at most `MI_MAX_EXTEND` to avoid touching too much memory +// Note: we also experimented with "bump" allocation on the first +// allocations but this did not speed up any benchmark (due to an +// extra test in malloc? or cache effects?) +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { + MI_UNUSED(tld); + mi_assert_expensive(mi_page_is_valid_init(page)); + #if (MI_SECURE<=2) + mi_assert(page->free == NULL); + mi_assert(page->local_free == NULL); + if (page->free != NULL) return; + #endif + if (page->capacity >= page->reserved) return; + + size_t page_size; + _mi_page_start(_mi_page_segment(page), page, &page_size); + mi_stat_counter_increase(tld->stats.pages_extended, 1); + + // calculate the extend count + const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); + size_t extend = page->reserved - page->capacity; + mi_assert_internal(extend > 0); + + size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); + if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; } + mi_assert_internal(max_extend > 0); + + if (extend > max_extend) { + // ensure we don't touch memory beyond the page to reduce page commit. + // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. + extend = max_extend; + } + + mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); + mi_assert_internal(extend < (1UL<<16)); + + // and append the extend the free list + if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { + mi_page_free_list_extend(page, bsize, extend, &tld->stats ); + } + else { + mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats); + } + // enable the new free list + page->capacity += (uint16_t)extend; + mi_stat_increase(tld->stats.page_committed, extend * bsize); + + // extension into zero initialized memory preserves the zero'd free list + if (!page->is_zero_init) { + page->is_zero = false; + } + mi_assert_expensive(mi_page_is_valid_init(page)); +} + +// Initialize a fresh page +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) { + mi_assert(page != NULL); + mi_segment_t* segment = _mi_page_segment(page); + mi_assert(segment != NULL); + mi_assert_internal(block_size > 0); + // set fields + mi_page_set_heap(page, heap); + page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start + size_t page_size; + _mi_segment_page_start(segment, page, &page_size); + mi_assert_internal(mi_page_block_size(page) <= page_size); + mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE); + mi_assert_internal(page_size / block_size < (1L<<16)); + page->reserved = (uint16_t)(page_size / block_size); + #ifdef MI_ENCODE_FREELIST + page->keys[0] = _mi_heap_random_next(heap); + page->keys[1] = _mi_heap_random_next(heap); + #endif + #if MI_DEBUG > 0 + page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501 + #else + page->is_zero = page->is_zero_init; + #endif + + mi_assert_internal(page->is_committed); + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->capacity == 0); + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->used == 0); + mi_assert_internal(page->xthread_free == 0); + mi_assert_internal(page->next == NULL); + mi_assert_internal(page->prev == NULL); + mi_assert_internal(page->retire_expire == 0); + mi_assert_internal(!mi_page_has_aligned(page)); + #if (MI_ENCODE_FREELIST) + mi_assert_internal(page->keys[0] != 0); + mi_assert_internal(page->keys[1] != 0); + #endif + mi_assert_expensive(mi_page_is_valid_init(page)); + + // initialize an initial free list + mi_page_extend_free(heap,page,tld); + mi_assert(mi_page_immediate_available(page)); +} + + +/* ----------------------------------------------------------- + Find pages with free blocks +-------------------------------------------------------------*/ + +// Find a page with free blocks of `page->block_size`. +static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) +{ + // search through the pages in "next fit" order + size_t count = 0; + mi_page_t* page = pq->first; + while (page != NULL) + { + mi_page_t* next = page->next; // remember next + count++; + + // 0. collect freed blocks by us and other threads + _mi_page_free_collect(page, false); + + // 1. if the page contains free blocks, we are done + if (mi_page_immediate_available(page)) { + break; // pick this one + } + + // 2. Try to extend + if (page->capacity < page->reserved) { + mi_page_extend_free(heap, page, heap->tld); + mi_assert_internal(mi_page_immediate_available(page)); + break; + } + + // 3. If the page is completely full, move it to the `mi_pages_full` + // queue so we don't visit long-lived pages too often. + mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); + mi_page_to_full(page, pq); + + page = next; + } // for each page + + mi_heap_stat_counter_increase(heap, searches, count); + + if (page == NULL) { + _mi_heap_collect_retired(heap, false); // perhaps make a page available? + page = mi_page_fresh(heap, pq); + if (page == NULL && first_try) { + // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again + page = mi_page_queue_find_free_ex(heap, pq, false); + } + } + else { + mi_assert(pq->first == page); + page->retire_expire = 0; + } + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); + return page; +} + + + +// Find a page with free blocks of `size`. +static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { + mi_page_queue_t* pq = mi_page_queue(heap,size); + mi_page_t* page = pq->first; + if (page != NULL) { + #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness + if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { + mi_page_extend_free(heap, page, heap->tld); + mi_assert_internal(mi_page_immediate_available(page)); + } + else + #endif + { + _mi_page_free_collect(page,false); + } + + if (mi_page_immediate_available(page)) { + page->retire_expire = 0; + return page; // fast path + } + } + return mi_page_queue_find_free_ex(heap, pq, true); +} + + +/* ----------------------------------------------------------- + Users can register a deferred free function called + when the `free` list is empty. Since the `local_free` + is separate this is deterministically called after + a certain number of allocations. +----------------------------------------------------------- */ + +static mi_deferred_free_fun* volatile deferred_free = NULL; +static _Atomic(void*) deferred_arg; // = NULL + +void _mi_deferred_free(mi_heap_t* heap, bool force) { + heap->tld->heartbeat++; + if (deferred_free != NULL && !heap->tld->recurse) { + heap->tld->recurse = true; + deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg)); + heap->tld->recurse = false; + } +} + +void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { + deferred_free = fn; + mi_atomic_store_ptr_release(void,&deferred_arg, arg); +} + + +/* ----------------------------------------------------------- + General allocation +----------------------------------------------------------- */ + +// Large and huge page allocation. +// Huge pages are allocated directly without being in a queue. +// Because huge pages contain just one block, and the segment contains +// just that page, we always treat them as abandoned and any thread +// that frees the block can free the whole page and segment directly. +static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { + size_t block_size = _mi_os_good_alloc_size(size); + mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE); + bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX); + mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size); + if (page != NULL) { + mi_assert_internal(mi_page_immediate_available(page)); + + if (pq == NULL) { + // huge pages are directly abandoned + mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); + mi_assert_internal(_mi_page_segment(page)->used==1); + mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue + mi_page_set_heap(page, NULL); + } + else { + mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + } + + const size_t bsize = mi_page_usable_block_size(page); // note: not `mi_page_block_size` to account for padding + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_increase(heap, large, bsize); + mi_heap_stat_counter_increase(heap, large_count, 1); + } + else { + mi_heap_stat_increase(heap, huge, bsize); + mi_heap_stat_counter_increase(heap, huge_count, 1); + } + } + return page; +} + + +// Allocate a page +// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. +static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept { + // huge allocation? + const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` + if (mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) { + if (mi_unlikely(req_size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); + return NULL; + } + else { + return mi_large_huge_page_alloc(heap,size); + } + } + else { + // otherwise find a page with free blocks in our size segregated queues + mi_assert_internal(size >= MI_PADDING_SIZE); + return mi_find_free_page(heap, size); + } +} + +// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. +// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. +void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept +{ + mi_assert_internal(heap != NULL); + + // initialize if necessary + if (mi_unlikely(!mi_heap_is_initialized(heap))) { + mi_thread_init(); // calls `_mi_heap_init` in turn + heap = mi_get_default_heap(); + if (mi_unlikely(!mi_heap_is_initialized(heap))) { return NULL; } + } + mi_assert_internal(mi_heap_is_initialized(heap)); + + // call potential deferred free routines + _mi_deferred_free(heap, false); + + // free delayed frees from other threads + _mi_heap_delayed_free(heap); + + // find (or allocate) a page of the right size + mi_page_t* page = mi_find_page(heap, size); + if (mi_unlikely(page == NULL)) { // first time out of memory, try to collect and retry the allocation once more + mi_heap_collect(heap, true /* force */); + page = mi_find_page(heap, size); + } + + if (mi_unlikely(page == NULL)) { // out of memory + const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` + _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size); + return NULL; + } + + mi_assert_internal(mi_page_immediate_available(page)); + mi_assert_internal(mi_page_block_size(page) >= size); + + // and try again, this time succeeding! (i.e. this should never recurse) + return _mi_page_malloc(heap, page, size); +} diff --git a/compat/mimalloc/random.c b/compat/mimalloc/random.c new file mode 100644 index 00000000000000..21d052770dc0dd --- /dev/null +++ b/compat/mimalloc/random.c @@ -0,0 +1,367 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // for syscall() on Linux +#endif + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memset + +/* ---------------------------------------------------------------------------- +We use our own PRNG to keep predictable performance of random number generation +and to avoid implementations that use a lock. We only use the OS provided +random source to initialize the initial seeds. Since we do not need ultimate +performance but we do rely on the security (for secret cookies in secure mode) +we use a cryptographically secure generator (chacha20). +-----------------------------------------------------------------------------*/ + +#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance? + + +/* ---------------------------------------------------------------------------- +Chacha20 implementation as the original algorithm with a 64-bit nonce +and counter: https://en.wikipedia.org/wiki/Salsa20 +The input matrix has sixteen 32-bit values: +Position 0 to 3: constant key +Position 4 to 11: the key +Position 12 to 13: the counter. +Position 14 to 15: the nonce. + +The implementation uses regular C code which compiles very well on modern compilers. +(gcc x64 has no register spills, and clang 6+ uses SSE instructions) +-----------------------------------------------------------------------------*/ + +static inline uint32_t rotl(uint32_t x, uint32_t shift) { + return (x << shift) | (x >> (32 - shift)); +} + +static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) { + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12); + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); +} + +static void chacha_block(mi_random_ctx_t* ctx) +{ + // scramble into `x` + uint32_t x[16]; + for (size_t i = 0; i < 16; i++) { + x[i] = ctx->input[i]; + } + for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { + qround(x, 0, 4, 8, 12); + qround(x, 1, 5, 9, 13); + qround(x, 2, 6, 10, 14); + qround(x, 3, 7, 11, 15); + qround(x, 0, 5, 10, 15); + qround(x, 1, 6, 11, 12); + qround(x, 2, 7, 8, 13); + qround(x, 3, 4, 9, 14); + } + + // add scrambled data to the initial state + for (size_t i = 0; i < 16; i++) { + ctx->output[i] = x[i] + ctx->input[i]; + } + ctx->output_available = 16; + + // increment the counter for the next round + ctx->input[12] += 1; + if (ctx->input[12] == 0) { + ctx->input[13] += 1; + if (ctx->input[13] == 0) { // and keep increasing into the nonce + ctx->input[14] += 1; + } + } +} + +static uint32_t chacha_next32(mi_random_ctx_t* ctx) { + if (ctx->output_available <= 0) { + chacha_block(ctx); + ctx->output_available = 16; // (assign again to suppress static analysis warning) + } + const uint32_t x = ctx->output[16 - ctx->output_available]; + ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out + ctx->output_available--; + return x; +} + +static inline uint32_t read32(const uint8_t* p, size_t idx32) { + const size_t i = 4*idx32; + return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); +} + +static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) +{ + // since we only use chacha for randomness (and not encryption) we + // do not _need_ to read 32-bit values as little endian but we do anyways + // just for being compatible :-) + memset(ctx, 0, sizeof(*ctx)); + for (size_t i = 0; i < 4; i++) { + const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; + ctx->input[i] = read32(sigma,i); + } + for (size_t i = 0; i < 8; i++) { + ctx->input[i + 4] = read32(key,i); + } + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = (uint32_t)nonce; + ctx->input[15] = (uint32_t)(nonce >> 32); +} + +static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { + memset(ctx_new, 0, sizeof(*ctx_new)); + _mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input)); + ctx_new->input[12] = 0; + ctx_new->input[13] = 0; + ctx_new->input[14] = (uint32_t)nonce; + ctx_new->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces! + chacha_block(ctx_new); +} + + +/* ---------------------------------------------------------------------------- +Random interface +-----------------------------------------------------------------------------*/ + +#if MI_DEBUG>1 +static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { + return (ctx != NULL && ctx->input[0] != 0); +} +#endif + +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { + mi_assert_internal(mi_random_is_initialized(ctx)); + mi_assert_internal(ctx != ctx_new); + chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new); +} + +uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + #if MI_INTPTR_SIZE <= 4 + return chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif +} + + +/* ---------------------------------------------------------------------------- +To initialize a fresh random context we rely on the OS: +- Windows : BCryptGenRandom (or RtlGenRandom) +- macOS : CCRandomGenerateBytes, arc4random_buf +- bsd,wasi : arc4random_buf +- Linux : getrandom,/dev/urandom +If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. +-----------------------------------------------------------------------------*/ + +#if defined(_WIN32) + +#if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus) +// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using +// dynamic overriding, we observed it can raise an exception when compiled with C++, and +// sometimes deadlocks when also running under the VS debugger. +#pragma comment (lib,"advapi32.lib") +#define RtlGenRandom SystemFunction036 +#ifdef __cplusplus +extern "C" { +#endif +BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); +#ifdef __cplusplus +} +#endif +static bool os_random_buf(void* buf, size_t buf_len) { + return (RtlGenRandom(buf, (ULONG)buf_len) != 0); +} +#else +#pragma comment (lib,"bcrypt.lib") +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} +#endif + +#elif defined(__APPLE__) +#include +#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 +#include +#include +#endif +static bool os_random_buf(void* buf, size_t buf_len) { + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 + // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf + // may fail silently on macOS. See PR #390, and + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); + #else + // fall back on older macOS + arc4random_buf(buf, buf_len); + return true; + #endif +} + +#elif defined(__ANDROID__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__sun) // todo: what to use with __wasi__? +#include +static bool os_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} +#elif defined(__linux__) || defined(__HAIKU__) +#if defined(__linux__) +#include +#endif +#include +#include +#include +#include +#include +static bool os_random_buf(void* buf, size_t buf_len) { + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see ) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. +#ifdef SYS_getrandom + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_load_acquire(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (errno != ENOSYS) return false; + mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom + } +#endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = open("/dev/urandom", flags, 0); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + close(fd); + return (count==buf_len); +} +#else +static bool os_random_buf(void* buf, size_t buf_len) { + return false; +} +#endif + +#if defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { + uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random + + #if defined(_WIN32) + LARGE_INTEGER pcount; + QueryPerformanceCounter(&pcount); + x ^= (uintptr_t)(pcount.QuadPart); + #elif defined(__APPLE__) + x ^= (uintptr_t)mach_absolute_time(); + #else + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + x ^= (uintptr_t)time.tv_sec; + x ^= (uintptr_t)time.tv_nsec; + #endif + // and do a few randomization steps + uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; + for (uintptr_t i = 0; i < max; i++) { + x = _mi_random_shuffle(x); + } + mi_assert_internal(x != 0); + return x; +} + +void _mi_random_init(mi_random_ctx_t* ctx) { + uint8_t key[32]; + if (!os_random_buf(key, sizeof(key))) { + // if we fail to get random data from the OS, we fall back to a + // weak random source based on the current time + #if !defined(__wasi__) + _mi_warning_message("unable to use secure randomness\n"); + #endif + uintptr_t x = _mi_os_random_weak(0); + for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. + x = _mi_random_shuffle(x); + ((uint32_t*)key)[i] = (uint32_t)x; + } + } + chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); +} + +/* -------------------------------------------------------- +test vectors from +----------------------------------------------------------- */ +/* +static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { + for (size_t i = 0; i < n; i++) { + if (x[i] != y[i]) return false; + } + return true; +} +static void chacha_test(void) +{ + uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 }; + uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb }; + qround(x, 0, 1, 2, 3); + mi_assert_internal(array_equals(x, x_out, 4)); + + uint32_t y[16] = { + 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c, + 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 }; + uint32_t y_out[16] = { + 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2, + 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 }; + qround(y, 2, 7, 8, 13); + mi_assert_internal(array_equals(y, y_out, 16)); + + mi_random_ctx_t r = { + { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, + 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, + 0x00000001, 0x09000000, 0x4a000000, 0x00000000 }, + {0}, + 0 + }; + uint32_t r_out[16] = { + 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, + 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, + 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, + 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 }; + chacha_block(&r); + mi_assert_internal(array_equals(r.output, r_out, 16)); +} +*/ diff --git a/compat/mimalloc/readme.md b/compat/mimalloc/readme.md new file mode 100644 index 00000000000000..0db3ff6f112ca0 --- /dev/null +++ b/compat/mimalloc/readme.md @@ -0,0 +1,715 @@ + + + +[](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary) + +# mimalloc + +  + +mimalloc (pronounced "me-malloc") +is a general purpose allocator with excellent [performance](#performance) characteristics. +Initially developed by Daan Leijen for the run-time systems of the +[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. + +Latest release tag: `v2.0.6` (2022-04-14). +Latest stable tag: `v1.7.6` (2022-02-14). + +mimalloc is a drop-in replacement for `malloc` and can be used in other programs +without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: +``` +> LD_PRELOAD=/usr/lib/libmimalloc.so myprogram +``` +It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: + +- __small and consistent__: the library is about 8k LOC using simple and + consistent data structures. This makes it very suitable + to integrate and adapt in other projects. For runtime systems it + provides hooks for a monotonic _heartbeat_ and deferred freeing (for + bounded worst-case times with reference counting). +- __free list sharding__: instead of one big free list (per size class) we have + many smaller lists per "mimalloc page" which reduces fragmentation and + increases locality -- + things that are allocated close in time get allocated close in memory. + (A mimalloc page contains blocks of one size class and is usually 64KiB on a 64-bit system). +- __free list multi-sharding__: the big idea! Not only do we shard the free list + per mimalloc page, but for each page we have multiple free lists. In particular, there + is one list for thread-local `free` operations, and another one for concurrent `free` + operations. Free-ing from another thread can now be a single CAS without needing + sophisticated coordination between threads. Since there will be + thousands of separate free lists, contention is naturally distributed over the heap, + and the chance of contending on a single location will be low -- this is quite + similar to randomized algorithms like skip lists where adding + a random oracle removes the need for a more complex algorithm. +- __eager page reset__: when a "page" becomes empty (with increased chance + due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") + reducing (real) memory pressure and fragmentation, especially in long running + programs. +- __secure__: _mimalloc_ can be built in secure mode, adding guard pages, + randomized allocation, encrypted free lists, etc. to protect against various + heap vulnerabilities. The performance penalty is usually around 10% on average + over our benchmarks. +- __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions. + A heap can be destroyed at once instead of deallocating each object separately. +- __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation + times (_wcat_), bounded space overhead (~0.2% meta-data, with low internal fragmentation), + and has no internal points of contention using only atomic operations. +- __fast__: In our benchmarks (see [below](#performance)), + _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), + and often uses less memory. A nice property + is that it does consistently well over a wide range of benchmarks. There is also good huge OS page + support for larger server programs. + +The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. +You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. + +Enjoy! + +### Branches + +* `master`: latest stable release (based on `dev-slice`). +* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. +* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`. + +### Releases + +Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage + and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance + (see [below](#performance)); please report if you observe any significant performance regression. + +* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation + even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix + warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object + allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes. + +* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on + Windows 11, fix compilation with musl, potentially reduced + committed memory, add `bin/minject` for Windows, + improved wasm support, faster aligned allocation, + various small fixes. + +* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including + M1), improved performance for v2 for large objects, Python integration improvements, more standard + installation directories, various small fixes. + +* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix + thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. + +* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). + +* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. + +* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, + improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. + +* [Older release notes](#older-release-notes) + +Special thanks to: + +* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making + mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc. +* Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding + memory model bugs using the [genMC] model checker. +* Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment + at large scale services, leading to many improvements in the mimalloc algorithms for large workloads. +* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs + in (early versions of) `mimalloc`. +* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which + uses mimalloc internally. + + +[genMC]: https://plv.mpi-sws.org/genmc/ + +### Usage + +mimalloc is used in various large scale low-latency services and programs, for example: + + + + + + + + +# Building + +## Windows + +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build. +The `mimalloc` project builds a static library (in `out/msvc-x64`), while the +`mimalloc-override` project builds a DLL for overriding malloc +in the entire program. + +## macOS, Linux, BSD, etc. + +We use [`cmake`](https://cmake.org)1 as the build system: + +``` +> mkdir -p out/release +> cd out/release +> cmake ../.. +> make +``` +This builds the library as a shared (dynamic) +library (`.so` or `.dylib`), a static library (`.a`), and +as a single object file (`.o`). + +`> sudo make install` (install the library and header files in `/usr/local/lib` and `/usr/local/include`) + +You can build the debug version which does many internal checks and +maintains detailed statistics as: + +``` +> mkdir -p out/debug +> cd out/debug +> cmake -DCMAKE_BUILD_TYPE=Debug ../.. +> make +``` +This will name the shared library as `libmimalloc-debug.so`. + +Finally, you can build a _secure_ version that uses guard pages, encrypted +free lists, etc., as: +``` +> mkdir -p out/secure +> cd out/secure +> cmake -DMI_SECURE=ON ../.. +> make +``` +This will name the shared library as `libmimalloc-secure.so`. +Use `ccmake`2 instead of `cmake` +to see and customize all the available build options. + +Notes: +1. Install CMake: `sudo apt-get install cmake` +2. Install CCMake: `sudo apt-get install cmake-curses-gui` + + +## Single source + +You can also directly build the single `src/static.c` file as part of your project without +needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path. + + +# Using the library + +The preferred usage is including ``, linking with +the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example, +``` +> gcc -o myprogram -lmimalloc myfile.c +``` + +mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist +with other allocators linked to the same program. +If you use `cmake`, you can simply use: +``` +find_package(mimalloc 1.4 REQUIRED) +``` +in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either: +``` +target_link_libraries(myapp PUBLIC mimalloc) +``` +to link with the shared (dynamic) library, or: +``` +target_link_libraries(myapp PUBLIC mimalloc-static) +``` +to link with the static library. See `test\CMakeLists.txt` for an example. + +For best performance in C++ programs, it is also recommended to override the +global `new` and `delete` operators. For convience, mimalloc provides +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` +interface. + +You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) +and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): +``` +> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363 + +175451865205073170563711388363 = 374456281610909315237213 * 468551 + +heap stats: peak total freed unit +normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok +normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok +normal 4: 64 b 4.6 kb 4.6 kb 32 b ok +normal 5: 80 b 118.4 kb 118.4 kb 40 b ok +normal 6: 48 b 48 b 48 b 48 b ok +normal 17: 960 b 960 b 960 b 320 b ok + +heap stats: peak total freed unit + normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok + huge: 0 b 0 b 0 b 1 b ok + total: 33.9 kb 32.8 mb 32.8 mb 1 b ok +malloc requested: 32.8 mb + + committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok + reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok + reset: 0 b 0 b 0 b 1 b ok + segments: 1 1 1 +-abandoned: 0 + pages: 6 6 6 +-abandoned: 0 + mmaps: 3 + mmap fast: 0 + mmap slow: 1 + threads: 0 + elapsed: 2.022s + process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb +``` + +The above model of using the `mi_` prefixed API is not always possible +though in existing programs that already use the standard malloc interface, +and another option is to override the standard malloc interface +completely and redirect all calls to the _mimalloc_ library instead . + +## Environment Options + +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), +or via environment variables: + +- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. +- `MIMALLOC_VERBOSE=1`: show verbose messages. +- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. +- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS + that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server) + programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs. + As an alternative, the `MIMALLOC_RESET_DELAY=` can be set higher (100ms by default) to make the page + reset occur less frequently instead of turning it off completely. +- `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected + at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than + the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA + nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). +- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly + improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs + to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes + the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). + +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at + startup and sometimes this can give a large (latency) performance improvement on big workloads. + Usually it is better to not use + `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at + startup only once). + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). + With huge OS pages, it may be beneficial to set the setting + `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived + and allocate just a little to take up space in the huge OS page area (which cannot be reset). + The huge pages are usually allocated evenly among NUMA nodes. + We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all + the huge pages at a specific numa node instead. + +Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write +for all pages in the original process including the huge OS pages. When any memory is now written in that area, the +OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in large increments. + +[linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 +[windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 + +## Secure Mode + +_mimalloc_ can be build in secure mode by using the `-DMI_SECURE=ON` flags in `cmake`. This build enables various mitigations +to make mimalloc more robust against exploits. In particular: + +- All internal mimalloc pages are surrounded by guard pages and the heap metadata is behind a guard page as well (so a buffer overflow + exploit cannot reach into the metadata). +- All free list pointers are + [encoded](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) + with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption. +- Double free's are detected (and ignored). +- The free lists are initialized in a random order and allocation randomly chooses between extension and reuse within a page to + mitigate against attacks that rely on a predicable allocation order. Similarly, the larger heap blocks allocated by mimalloc + from the OS are also address randomized. + +As always, evaluate with care as part of an overall security strategy as all of the above are mitigations but not guarantees. + +## Debug Mode + +When _mimalloc_ is built using debug mode, various checks are done at runtime to catch development errors. + +- Statistics are maintained in detail for each object size. They can be shown using `MIMALLOC_SHOW_STATS=1` at runtime. +- All objects have padding at the end to detect (byte precise) heap block overflows. +- Double free's, and freeing invalid heap pointers are detected. +- Corrupted free-lists and some forms of use-after-free are detected. + + +# Overriding Standard Malloc + +Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_. + +## Dynamic override + +This is the recommended way to override the standard malloc interface. + +### Override on Linux, BSD + +On these ELF-based systems we preload the mimalloc shared +library so all calls to the standard `malloc` interface are +resolved to the _mimalloc_ library. +``` +> env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram +``` + +You can set extra environment variables to check that mimalloc is running, +like: +``` +> env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram +``` +or run with the debug version to get detailed statistics: +``` +> env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram +``` + +### Override on MacOS + +On macOS we can also preload the mimalloc shared +library so all calls to the standard `malloc` interface are +resolved to the _mimalloc_ library. +``` +> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram +``` + +Note that certain security restrictions may apply when doing this from +the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). + + +### Override on Windows + +Overriding on Windows is robust and has the +particular advantage to be able to redirect all malloc/free calls that go through +the (dynamic) C runtime allocator, including those from other DLL's or libraries. + +The overriding on Windows requires that you link your program explicitly with +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put +in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). +The redirection DLL ensures that all calls to the C runtime malloc API get redirected to +mimalloc (in `mimalloc-override.dll`). + +To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some +call to the mimalloc API in the `main` function, like `mi_version()` +(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project +for an example on how to use this. For best performance on Windows with C++, it +is also recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). + +The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. + +(Note: in principle, it is possible to even patch existing executables without any recompilation +if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll` +into the import table (and put `mimalloc-redirect.dll` in the same folder) +Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). + + +## Static override + +On Unix-like systems, you can also statically link with _mimalloc_ to override the standard +malloc interface. The recommended way is to link the final program with the +_mimalloc_ single object file (`mimalloc-override.o`). We use +an object file instead of a library file as linkers give preference to +that over archives to resolve symbols. To ensure that the standard +malloc interface resolves to the _mimalloc_ library, link it as the first +object file. For example: +``` +> gcc -o myprogram mimalloc-override.o myfile1.c ... +``` + +Another way to override statically that works on all platforms, is to +link statically to mimalloc (as shown in the introduction) and include a +header file in each source file that re-defines `malloc` etc. to `mi_malloc`. +This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are +under your control or otherwise mixing of pointers from different heaps may occur! + + +# Performance + +Last update: 2021-01-30 + +We tested _mimalloc_ against many other top allocators over a wide +range of benchmarks, ranging from various real world programs to +synthetic benchmarks that see how the allocator behaves under more +extreme circumstances. In our benchmark suite, _mimalloc_ outperforms other leading +allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and has a similar memory footprint. A nice property is that it +does consistently well over the wide range of benchmarks. + +General memory allocators are interesting as there exists no algorithm that is +optimal -- for a given allocator one can usually construct a workload +where it does not do so well. The goal is thus to find an allocation +strategy that performs well over a wide range of benchmarks without +suffering from (too much) underperformance in less common situations. + +As always, interpret these results with care since some benchmarks test synthetic +or uncommon situations that may never apply to your workloads. For example, most +allocators do not do well on `xmalloc-testN` but that includes even the best +industrial allocators like _jemalloc_ and _tcmalloc_ that are used in some of +the world's largest systems (like Chrome or FreeBSD). + +Also, the benchmarks here do not measure the behaviour on very large and long-running server workloads, +or worst-case latencies of allocation. Much work has gone into `mimalloc` to work well on such +workloads (for example, to reduce virtual memory fragmentation on long-running services) +but such optimizations are not always reflected in the current benchmark suite. + +We show here only an overview -- for +more specific details and further benchmarks we refer to the +[technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). +The benchmark suite is automated and available separately +as [mimalloc-bench](https://github.com/daanx/mimalloc-bench). + + +## Benchmark Results on a 16-core AMD 5950x (Zen3) + +Testing on the 16-core AMD 5950x processor at 3.4Ghz (4.9Ghz boost), with +with 32GiB memory at 3600Mhz, running Ubuntu 20.04 with glibc 2.31 and GCC 9.3.0. + +We measure three versions of _mimalloc_: the main version `mi` (tag:v1.7.0), +the new v2.0 beta version as `xmi` (tag:v2.0.0), and the main version in secure mode as `smi` (tag:v1.7.0). + +The other allocators are +Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (`tc`, tag:gperftools-2.8.1) used in Chrome, +Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (`je`, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD, +the Intel thread building blocks [allocator](https://github.com/intel/tbb) (`tbb`, tag:v2020.3), +[rpmalloc](https://github.com/mjansson/rpmalloc) (`rp`,tag:1.4.1) by Mattias Jansson, +the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (git:d880f72) allocator by Emery Berger \[1], +the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:67ff31a) allocator by +Bobby Powers _et al_ \[8], +and finally the default system allocator (`glibc`, 2.31) (based on _PtMalloc2_). + + + + +Any benchmarks ending in `N` run on all 32 logical cores in parallel. +Results are averaged over 10 runs and reported relative +to mimalloc (where 1.2 means it took 1.2× longer to run). +The legend also contains the _overall relative score_ between the +allocators where 100 points is the maximum if an allocator is fastest on +all benchmarks. + +The single threaded _cfrac_ benchmark by Dave Barrett is an implementation of +continued fraction factorization which uses many small short-lived allocations. +All allocators do well on such common usage, where _mimalloc_ is just a tad +faster than _tcmalloc_ and +_jemalloc_. + +The _leanN_ program is interesting as a large realistic and +concurrent workload of the [Lean](https://github.com/leanprover/lean) +theorem prover compiling its own standard library, and there is a 13% +speedup over _tcmalloc_. This is +quite significant: if Lean spends 20% of its time in the +allocator that means that _mimalloc_ is 1.6× faster than _tcmalloc_ +here. (This is surprising as that is not measured in a pure +allocation benchmark like _alloc-test_. We conjecture that we see this +outsized improvement here because _mimalloc_ has better locality in +the allocation which improves performance for the *other* computations +in a program as well). + +The single threaded _redis_ benchmark again show that most allocators do well on such workloads. + +The _larsonN_ server benchmark by Larson and Krishnan \[2] allocates and frees between threads. They observed this +behavior (which they call _bleeding_) in actual server applications, and the benchmark simulates this. +Here, _mimalloc_ is quite a bit faster than _tcmalloc_ and _jemalloc_ probably due to the object migration between different threads. + +The _mstressN_ workload performs many allocations and re-allocations, +and migrates objects between threads (as in _larsonN_). However, it also +creates and destroys the _N_ worker threads a few times keeping some objects +alive beyond the life time of the allocating thread. We observed this +behavior in many larger server applications. + +The [_rptestN_](https://github.com/mjansson/rpmalloc-benchmark) benchmark +by Mattias Jansson is a allocator test originally designed +for _rpmalloc_, and tries to simulate realistic allocation patterns over +multiple threads. Here the differences between allocators become more apparent. + +The second benchmark set tests specific aspects of the allocators and +shows even more extreme differences between them. + +The _alloc-test_, by +[OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of +allocations in various size classes. The test is scaled such that when an +allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it +means that it scales linearly. + +The _sh6bench_ and _sh8bench_ benchmarks are +developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap. +In _sh6bench_ _mimalloc_ does much +better than the others (more than 2.5× faster than _jemalloc_). +We cannot explain this well but believe it is +caused in part by the "reverse" free-ing pattern in _sh6bench_. +The _sh8bench_ is a variation with object migration +between threads; whereas _tcmalloc_ did well on _sh6bench_, the addition of object migration causes it to be 10× slower than before. + +The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, simulates an asymmetric workload where +some threads only allocate, and others only free -- they observed this pattern in +larger server applications. Here we see that +the _mimalloc_ technique of having non-contended sharded thread free +lists pays off as it outperforms others by a very large margin. Only _rpmalloc_, _tbb_, and _glibc_ also scale well on this benchmark. + +The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with +the Hoard allocator to test for _passive-false_ sharing of cache lines. +With a single thread they all +perform the same, but when running with multiple threads the potential allocator +induced false sharing of the cache lines can cause large run-time differences. +Crundal \[6] describes in detail why the false cache line sharing occurs in the _tcmalloc_ design, and also discusses how this +can be avoided with some small implementation changes. +Only the _tbb_, _rpmalloc_ and _mesh_ allocators also avoid the +cache line sharing completely, while _Hoard_ and _glibc_ seem to mitigate +the effects. Kukanov and Voss \[7] describe in detail +how the design of _tbb_ avoids the false cache line sharing. + + +## On a 36-core Intel Xeon + +For completeness, here are the results on a big Amazon +[c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized) instance +consisting of a 2×18-core Intel Xeon (Cascade Lake) at 3.4GHz (boost 3.5GHz) +with 144GiB ECC memory, running Ubuntu 20.04 with glibc 2.31, GCC 9.3.0, and +Clang 10.0.0. This time, the mimalloc allocators (mi, xmi, and smi) were +compiled with the Clang compiler instead of GCC. +The results are similar to the AMD results but it is interesting to +see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. + + + + + +## Peak Working Set + +The following figure shows the peak working set (rss) of the allocators +on the benchmarks (on the c5.18xlarge instance). + + + + +Note that the _xmalloc-testN_ memory usage should be disregarded as it +allocates more the faster the program runs. Similarly, memory usage of +_larsonN_, _mstressN_, _rptestN_ and _sh8bench_ can vary depending on scheduling and +speed. Nevertheless, we hope to improve the memory usage on _mstressN_ +and _rptestN_ (just as _cfrac_, _larsonN_ and _sh8bench_ have a small working set which skews the results). + + + + +# References + +- \[1] Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, and Paul R. Wilson. + _Hoard: A Scalable Memory Allocator for Multithreaded Applications_ + the Ninth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX). Cambridge, MA, November 2000. + [pdf](http://www.cs.utexas.edu/users/mckinley/papers/asplos-2000.pdf) + +- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. + In ISMM, Vancouver, B.C., Canada, 1998. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.45.1947&rep=rep1&type=pdf) + +- \[3] D. Grunwald, B. Zorn, and R. Henderson. + _Improving the cache locality of memory allocation_. In R. Cartwright, editor, + Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) + +- \[4] J. Barnes and P. Hut. _A hierarchical O(n*log(n)) force-calculation algorithm_. Nature, 324:446-449, 1986. + +- \[5] C. Lever, and D. Boreham. _Malloc() Performance in a Multithreaded Linux Environment._ + In USENIX Annual Technical Conference, Freenix Session. San Diego, CA. Jun. 2000. + Available at + +- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc_. 2016. CS16S1 project at the Australian National University. [pdf](http://courses.cecs.anu.edu.au/courses/CSPROJECTS/16S1/Reports/Timothy_Crundal_Report.pdf) + +- \[7] Alexey Kukanov, and Michael J Voss. + _The Foundations for Scalable Multi-Core Software in Intel Threading Building Blocks._ + Intel Technology Journal 11 (4). 2007 + +- \[8] Bobby Powers, David Tench, Emery D. Berger, and Andrew McGregor. + _Mesh: Compacting Memory Management for C/C++_ + In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'19), June 2019, pages 333-–346. + + + +# Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit https://cla.microsoft.com. + +When you submit a pull request, a CLA-bot will automatically determine whether you need to provide +a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions +provided by the bot. You will only need to do this once across all repos using our CLA. + + +# Older Release Notes + +* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved + handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. +* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, + support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support. +* 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS, + build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics. +* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda, + stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload, + fix aligned debug padding. +* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects). +* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding + and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise + heap block overflow detection in debug mode (besides the double-free detection and free-list + corruption detection). Add `nodiscard` attribute to most allocation functions. + Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages + for better memory footprint. +* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes. +* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, +more eager concurrent free, addition of STL allocator, fixed potential memory leak. +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger +free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. +* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. +* 2019-10-07, `v1.1.0`: stable release 1.1. +* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. +* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. diff --git a/compat/mimalloc/segment-cache.c b/compat/mimalloc/segment-cache.c new file mode 100644 index 00000000000000..c071239ce32bee --- /dev/null +++ b/compat/mimalloc/segment-cache.c @@ -0,0 +1,360 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2020, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + Implements a cache of segments to avoid expensive OS calls and to reuse + the commit_mask to optimize the commit/decommit calls. + The full memory map of all segments is also implemented here. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include "bitmap.h" // atomic bitmap + +//#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache + +#define MI_CACHE_FIELDS (16) +#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit + +#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) +#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes + +typedef struct mi_cache_slot_s { + void* p; + size_t memid; + bool is_pinned; + mi_commit_mask_t commit_mask; + mi_commit_mask_t decommit_mask; + _Atomic(mi_msecs_t) expire; +} mi_cache_slot_t; + +static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 + +static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! +static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; +static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free + + +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ +#ifdef MI_CACHE_DISABLE + return NULL; +#else + + // only segment blocks + if (size != MI_SEGMENT_SIZE) return NULL; + + // numa node determines start field + const int numa_node = _mi_os_numa_node(tld); + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // find an available slot + mi_bitmap_index_t bitidx = 0; + bool claimed = false; + if (*large) { // large allowed? + claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = true; + } + if (!claimed) { + claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (claimed) *large = false; + } + + if (!claimed) return NULL; + + // found a slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + void* p = slot->p; + *memid = slot->memid; + *is_pinned = slot->is_pinned; + *is_zero = false; + *commit_mask = slot->commit_mask; + *decommit_mask = slot->decommit_mask; + slot->p = NULL; + mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); + + // mark the slot as free again + mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); + return p; +#endif +} + +static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) +{ + if (mi_commit_mask_is_empty(cmask)) { + // nothing + } + else if (mi_commit_mask_is_full(cmask)) { + _mi_os_decommit(p, total, stats); + } + else { + // todo: one call to decommit the whole at once? + mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); + size_t part = total/MI_COMMIT_MASK_BITS; + size_t idx; + size_t count; + mi_commit_mask_foreach(cmask, idx, count) { + void* start = (uint8_t*)p + (idx*part); + size_t size = count*part; + _mi_os_decommit(start, size, stats); + } + mi_commit_mask_foreach_end() + } + mi_commit_mask_create_empty(cmask); +} + +#define MI_MAX_PURGE_PER_PUSH (4) + +static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld) +{ + MI_UNUSED(tld); + if (!mi_option_is_enabled(mi_option_allow_decommit)) return; + mi_msecs_t now = _mi_clock_now(); + size_t purged = 0; + const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); + size_t idx = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); + for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots + if (idx >= MI_CACHE_MAX) idx = 0; // wrap + mi_cache_slot_t* slot = &cache[idx]; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); + if (expire != 0 && (force || now >= expire)) { // racy read + // seems expired, first claim it from available + purged++; + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { + // was available, we claimed it + expire = mi_atomic_loadi64_acquire(&slot->expire); + if (expire != 0 && (force || now >= expire)) { // safe read + // still expired, decommit it + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); + mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + _mi_abandoned_await_readers(); // wait until safe to decommit + // decommit committed parts + // TODO: instead of decommit, we could also free to the OS? + mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); + mi_commit_mask_create_empty(&slot->decommit_mask); + } + _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop + } + if (!force && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push + } + } +} + +void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { + mi_segment_cache_purge(force, tld ); +} + +mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) +{ +#ifdef MI_CACHE_DISABLE + return false; +#else + + // only for normal segment blocks + if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; + + // numa node determines start field + int numa_node = _mi_os_numa_node(NULL); + size_t start_field = 0; + if (numa_node > 0) { + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + if (start_field >= MI_CACHE_FIELDS) start_field = 0; + } + + // purge expired entries + mi_segment_cache_purge(false /* force? */, tld); + + // find an available slot + mi_bitmap_index_t bitidx; + bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); + if (!claimed) return false; + + mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); +#if MI_DEBUG>1 + if (is_pinned || is_large) { + mi_assert_internal(mi_commit_mask_is_full(commit_mask)); + } +#endif + + // set the slot + mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; + slot->p = start; + slot->memid = memid; + slot->is_pinned = is_pinned; + mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); + slot->commit_mask = *commit_mask; + slot->decommit_mask = *decommit_mask; + if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { + long delay = mi_option_get(mi_option_segment_decommit_delay); + if (delay == 0) { + _mi_abandoned_await_readers(); // wait until safe to decommit + mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); + mi_commit_mask_create_empty(&slot->decommit_mask); + } + else { + mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); + } + } + + // make it available + _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + return true; +#endif +} + + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ + + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB +#else +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE? + if ((uintptr_t)segment >= MI_MAX_ADDRESS) { + *bitidx = 0; + return MI_SEGMENT_MAP_WSIZE; + } + else { + const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; + *bitidx = segindex % MI_INTPTR_BITS; + const size_t mapindex = segindex / MI_INTPTR_BITS; + mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); + return mapindex; + } +} + +void _mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index==MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +void _mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index == MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + mi_segment_t* segment = _mi_ptr_segment(p); + if (segment == NULL) return NULL; + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) { + return segment; // yes, allocated by us + } + if (index==MI_SEGMENT_MAP_WSIZE) return NULL; + + // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? + + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = mi_bsr(lobits); // lobits != 0 + } + else if (index == 0) { + return NULL; + } + else { + mi_assert_internal(index > 0); + uintptr_t lomask = mask; + loindex = index; + do { + loindex--; + lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); + } while (lomask != 0 && loindex > 0); + if (lomask == 0) return NULL; + lobitidx = mi_bsr(lomask); // lomask != 0 + } + mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if (mi_unlikely(!cookie_ok)) return NULL; + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return (_mi_segment_of(p) != NULL); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); + return page; +} +*/ diff --git a/compat/mimalloc/segment.c b/compat/mimalloc/segment.c new file mode 100644 index 00000000000000..d772440d69f032 --- /dev/null +++ b/compat/mimalloc/segment.c @@ -0,0 +1,1542 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset +#include + +#define MI_PAGE_HUGE_ALIGN (256*1024) + +static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); + + +// ------------------------------------------------------------------- +// commit mask +// ------------------------------------------------------------------- + +static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false; + } + return true; +} + +static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if ((commit->mask[i] & cm->mask[i]) != 0) return true; + } + return false; +} + +static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + res->mask[i] = (commit->mask[i] & cm->mask[i]); + } +} + +static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + res->mask[i] &= ~(cm->mask[i]); + } +} + +static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + res->mask[i] |= cm->mask[i]; + } +} + +static void mi_commit_mask_create(size_t bitidx, size_t bitcount, mi_commit_mask_t* cm) { + mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); + mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); + if (bitcount == MI_COMMIT_MASK_BITS) { + mi_assert_internal(bitidx==0); + mi_commit_mask_create_full(cm); + } + else if (bitcount == 0) { + mi_commit_mask_create_empty(cm); + } + else { + mi_commit_mask_create_empty(cm); + size_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; + size_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; + while (bitcount > 0) { + mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT); + size_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; + size_t count = (bitcount > avail ? avail : bitcount); + size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs); + cm->mask[i] = mask; + bitcount -= count; + ofs = 0; + i++; + } + } +} + +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { + mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); + size_t count = 0; + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + size_t mask = cm->mask[i]; + if (~mask == 0) { + count += MI_COMMIT_MASK_FIELD_BITS; + } + else { + for (; mask != 0; mask >>= 1) { // todo: use popcount + if ((mask&1)!=0) count++; + } + } + } + // we use total since for huge segments each commit bit may represent a larger size + return ((total / MI_COMMIT_MASK_BITS) * count); +} + + +size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) { + size_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; + size_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; + size_t mask = 0; + // find first ones + while (i < MI_COMMIT_MASK_FIELD_COUNT) { + mask = cm->mask[i]; + mask >>= ofs; + if (mask != 0) { + while ((mask&1) == 0) { + mask >>= 1; + ofs++; + } + break; + } + i++; + ofs = 0; + } + if (i >= MI_COMMIT_MASK_FIELD_COUNT) { + // not found + *idx = MI_COMMIT_MASK_BITS; + return 0; + } + else { + // found, count ones + size_t count = 0; + *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs; + do { + mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1); + do { + count++; + mask >>= 1; + } while ((mask&1) == 1); + if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) { + i++; + if (i >= MI_COMMIT_MASK_FIELD_COUNT) break; + mask = cm->mask[i]; + ofs = 0; + } + } while ((mask&1) == 1); + mi_assert_internal(count > 0); + return count; + } +} + + +/* -------------------------------------------------------------------------------- + Segment allocation + + If a thread ends, it "abandons" pages with used blocks + and there is an abandoned segment list whose segments can + be reclaimed by still running threads, much like work-stealing. +-------------------------------------------------------------------------------- */ + + +/* ----------------------------------------------------------- + Slices +----------------------------------------------------------- */ + + +static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) { + return &segment->slices[segment->slice_entries]; +} + +static uint8_t* mi_slice_start(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment)); + return ((uint8_t*)segment + ((slice - segment->slices)*MI_SEGMENT_SLICE_SIZE)); +} + + +/* ----------------------------------------------------------- + Bins +----------------------------------------------------------- */ +// Use bit scan forward to quickly find the first zero bit if it is available + +static inline size_t mi_slice_bin8(size_t slice_count) { + if (slice_count<=1) return slice_count; + mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); + slice_count--; + size_t s = mi_bsr(slice_count); // slice_count > 1 + if (s <= 2) return slice_count + 1; + size_t bin = ((s << 2) | ((slice_count >> (s - 2))&0x03)) - 4; + return bin; +} + +static inline size_t mi_slice_bin(size_t slice_count) { + mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE); + mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) <= MI_SEGMENT_BIN_MAX); + size_t bin = mi_slice_bin8(slice_count); + mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX); + return bin; +} + +static inline size_t mi_slice_index(const mi_slice_t* slice) { + mi_segment_t* segment = _mi_ptr_segment(slice); + ptrdiff_t index = slice - segment->slices; + mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_entries); + return index; +} + + +/* ----------------------------------------------------------- + Slice span queues +----------------------------------------------------------- */ + +static void mi_span_queue_push(mi_span_queue_t* sq, mi_slice_t* slice) { + // todo: or push to the end? + mi_assert_internal(slice->prev == NULL && slice->next==NULL); + slice->prev = NULL; // paranoia + slice->next = sq->first; + sq->first = slice; + if (slice->next != NULL) slice->next->prev = slice; + else sq->last = slice; + slice->xblock_size = 0; // free +} + +static mi_span_queue_t* mi_span_queue_for(size_t slice_count, mi_segments_tld_t* tld) { + size_t bin = mi_slice_bin(slice_count); + mi_span_queue_t* sq = &tld->spans[bin]; + mi_assert_internal(sq->slice_count >= slice_count); + return sq; +} + +static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) { + mi_assert_internal(slice->xblock_size==0 && slice->slice_count>0 && slice->slice_offset==0); + // should work too if the queue does not contain slice (which can happen during reclaim) + if (slice->prev != NULL) slice->prev->next = slice->next; + if (slice == sq->first) sq->first = slice->next; + if (slice->next != NULL) slice->next->prev = slice->prev; + if (slice == sq->last) sq->last = slice->prev; + slice->prev = NULL; + slice->next = NULL; + slice->xblock_size = 1; // no more free +} + + +/* ----------------------------------------------------------- + Invariant checking +----------------------------------------------------------- */ + +static bool mi_slice_is_used(const mi_slice_t* slice) { + return (slice->xblock_size > 0); +} + + +#if (MI_DEBUG>=3) +static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) { + for (mi_slice_t* s = sq->first; s != NULL; s = s->next) { + if (s==slice) return true; + } + return false; +} + +static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(segment != NULL); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(segment->abandoned <= segment->used); + mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks + //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); + mi_slice_t* slice = &segment->slices[0]; + const mi_slice_t* end = mi_segment_slices_end(segment); + size_t used_count = 0; + mi_span_queue_t* sq; + while(slice < end) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + size_t index = mi_slice_index(slice); + size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1; + if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets + used_count++; + for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) { + mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t)); + mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0); + mi_assert_internal(i==0 || segment->slices[index + i].xblock_size == 1); + } + // and the last entry as well (for coalescing) + const mi_slice_t* last = slice + slice->slice_count - 1; + if (last > slice && last < mi_segment_slices_end(segment)) { + mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t)); + mi_assert_internal(last->slice_count == 0); + mi_assert_internal(last->xblock_size == 1); + } + } + else { // free range of slices; only last slice needs a valid back offset + mi_slice_t* last = &segment->slices[maxindex]; + if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= (segment->slice_entries - segment->segment_info_slices)) { + mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); + } + mi_assert_internal(slice == last || last->slice_count == 0 ); + mi_assert_internal(last->xblock_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->xblock_size==1)); + if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandoned + sq = mi_span_queue_for(slice->slice_count,tld); + mi_assert_internal(mi_span_queue_contains(sq,slice)); + } + } + slice = &segment->slices[maxindex+1]; + } + mi_assert_internal(slice == end); + mi_assert_internal(used_count == segment->used + 1); + return true; +} +#endif + +/* ----------------------------------------------------------- + Segment size calculations +----------------------------------------------------------- */ + +static size_t mi_segment_info_size(mi_segment_t* segment) { + return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; +} + +static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size) +{ + ptrdiff_t idx = slice - segment->slices; + size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; + // make the start not OS page aligned for smaller blocks to avoid page/cache effects + size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0); + if (page_size != NULL) { *page_size = psize - start_offset; } + return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); +} + +// Start of the page available memory; can be used on uninitialized pages +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) +{ + const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); + uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size); + mi_assert_internal(page->xblock_size > 0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_segment(p) == segment); + return p; +} + + +static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) { + size_t page_size = _mi_os_page_size(); + size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); + size_t guardsize = 0; + + if (MI_SECURE>0) { + // in secure mode, we set up a protected page in between the segment info + // and the page data (and one at the end of the segment) + guardsize = page_size; + required = _mi_align_up(required, page_size); + } + + if (pre_size != NULL) *pre_size = isize; + isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); + if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE; + size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); + mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); + return (segment_size / MI_SEGMENT_SLICE_SIZE); +} + + +/* ---------------------------------------------------------------------------- +Segment caches +We keep a small segment cache per thread to increase local +reuse and avoid setting/clearing guard pages in secure mode. +------------------------------------------------------------------------------- */ + +static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { + if (segment_size>=0) _mi_stat_increase(&tld->stats->segments,1); + else _mi_stat_decrease(&tld->stats->segments,1); + tld->count += (segment_size >= 0 ? 1 : -1); + if (tld->count > tld->peak_count) tld->peak_count = tld->count; + tld->current_size += segment_size; + if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; +} + +static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { + segment->thread_id = 0; + _mi_segment_map_freed_at(segment); + mi_segments_track_size(-((long)mi_segment_size(segment)),tld); + if (MI_SECURE>0) { + // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set + // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted + size_t os_pagesize = _mi_os_page_size(); + _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); + uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; + _mi_os_unprotect(end, os_pagesize); + } + + // purge delayed decommits now? (no, leave it to the cache) + // mi_segment_delayed_decommit(segment,true,tld->stats); + + // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); + const size_t size = mi_segment_size(segment); + if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os); + } +} + +// called by threads that are terminating +void _mi_segment_thread_collect(mi_segments_tld_t* tld) { + MI_UNUSED(tld); + // nothing to do +} + + +/* ----------------------------------------------------------- + Span management +----------------------------------------------------------- */ + +static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) { + mi_assert_internal(_mi_ptr_segment(p) == segment); + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); + mi_commit_mask_create_empty(cm); + if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return; + const size_t segstart = mi_segment_info_size(segment); + const size_t segsize = mi_segment_size(segment); + if (p >= (uint8_t*)segment + segsize) return; + + size_t pstart = (p - (uint8_t*)segment); + mi_assert_internal(pstart + size <= segsize); + + size_t start; + size_t end; + if (conservative) { + // decommit conservative + start = _mi_align_up(pstart, MI_COMMIT_SIZE); + end = _mi_align_down(pstart + size, MI_COMMIT_SIZE); + mi_assert_internal(start >= segstart); + mi_assert_internal(end <= segsize); + } + else { + // commit liberal + start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE); + end = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE); + } + if (pstart >= segstart && start < segstart) { // note: the mask is also calculated for an initial commit of the info area + start = segstart; + } + if (end > segsize) { + end = segsize; + } + + mi_assert_internal(start <= pstart && (pstart + size) <= end); + mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); + *start_p = (uint8_t*)segment + start; + *full_size = (end > start ? end - start : 0); + if (*full_size == 0) return; + + size_t bitidx = start / MI_COMMIT_SIZE; + mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); + + size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 + if (bitidx + bitcount > MI_COMMIT_MASK_BITS) { + _mi_warning_message("commit mask overflow: idx=%zu count=%zu start=%zx end=%zx p=0x%p size=%zu fullsize=%zu\n", bitidx, bitcount, start, end, p, size, *full_size); + } + mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); + mi_commit_mask_create(bitidx, bitcount, cm); +} + + +static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + + // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes. + /* + if (commit && size > 0) { + const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE); + if (p + csize <= mi_segment_end(segment)) { + size = csize; + } + } + */ + // commit liberal, but decommit conservative + uint8_t* start = NULL; + size_t full_size = 0; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + + if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + bool is_zero = false; + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap + if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; + mi_commit_mask_set(&segment->commit_mask, &mask); + } + else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { + mi_assert_internal((void*)start != (void*)segment); + //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask)); + + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap + if (segment->allow_decommit) { + _mi_os_decommit(start, full_size, stats); // ok if this fails + } + mi_commit_mask_clear(&segment->commit_mask, &mask); + } + // increase expiration of reusing part of the delayed decommit + if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) { + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); + } + // always undo delayed decommits + mi_commit_mask_clear(&segment->decommit_mask, &mask); + return true; +} + +static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow + if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed + return mi_segment_commitx(segment,true,p,size,stats); +} + +static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (!segment->allow_decommit) return; + if (mi_option_get(mi_option_decommit_delay) == 0) { + mi_segment_commitx(segment, false, p, size, stats); + } + else { + // register for future decommit in the decommit mask + uint8_t* start = NULL; + size_t full_size = 0; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return; + + // update delayed commit + mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more + mi_commit_mask_set(&segment->decommit_mask, &cmask); + mi_msecs_t now = _mi_clock_now(); + if (segment->decommit_expire == 0) { + // no previous decommits, initialize now + segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); + } + else if (segment->decommit_expire <= now) { + // previous decommit mask already expired + // mi_segment_delayed_decommit(segment, true, stats); + segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + } + else { + // previous decommit mask is not yet expired, increase the expiration by a bit. + segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay); + } + } +} + +static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { + if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return; + mi_msecs_t now = _mi_clock_now(); + if (!force && now < segment->decommit_expire) return; + + mi_commit_mask_t mask = segment->decommit_mask; + segment->decommit_expire = 0; + mi_commit_mask_create_empty(&segment->decommit_mask); + + size_t idx; + size_t count; + mi_commit_mask_foreach(&mask, idx, count) { + // if found, decommit that sequence + if (count > 0) { + uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); + size_t size = count * MI_COMMIT_SIZE; + mi_segment_commitx(segment, false, p, size, stats); + } + } + mi_commit_mask_foreach_end() + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); +} + + +static bool mi_segment_is_abandoned(mi_segment_t* segment) { + return (segment->thread_id == 0); +} + +// note: can be called on abandoned segments +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(slice_index < segment->slice_entries); + mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) + ? NULL : mi_span_queue_for(slice_count,tld)); + if (slice_count==0) slice_count = 1; + mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries); + + // set first and last slice (the intermediates can be undetermined) + mi_slice_t* slice = &segment->slices[slice_index]; + slice->slice_count = (uint32_t)slice_count; + mi_assert_internal(slice->slice_count == slice_count); // no overflow? + slice->slice_offset = 0; + if (slice_count > 1) { + mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + last->slice_count = 0; + last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); + last->xblock_size = 0; + } + + // perhaps decommit + mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats); + + // and push it on the free page queue (if it was not a huge page) + if (sq != NULL) mi_span_queue_push( sq, slice ); + else slice->xblock_size = 0; // mark huge page as free anyways +} + +/* +// called from reclaim to add existing free spans +static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_segment_t* segment = _mi_ptr_segment(slice); + mi_assert_internal(slice->xblock_size==0 && slice->slice_count>0 && slice->slice_offset==0); + size_t slice_index = mi_slice_index(slice); + mi_segment_span_free(segment,slice_index,slice->slice_count,tld); +} +*/ + +static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->xblock_size==0); + mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE); + mi_span_queue_t* sq = mi_span_queue_for(slice->slice_count, tld); + mi_span_queue_delete(sq, slice); +} + +// note: can be called on abandoned segments +static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) { + mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0); + mi_segment_t* segment = _mi_ptr_segment(slice); + bool is_abandoned = mi_segment_is_abandoned(segment); + + // for huge pages, just mark as free but don't add to the queues + if (segment->kind == MI_SEGMENT_HUGE) { + mi_assert_internal(segment->used == 1); // decreased right after this call in `mi_segment_page_clear` + slice->xblock_size = 0; // mark as free anyways + // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to + // avoid a possible cache miss (and the segment is about to be freed) + return slice; + } + + // otherwise coalesce the span and add to the free span queues + size_t slice_count = slice->slice_count; + mi_slice_t* next = slice + slice->slice_count; + mi_assert_internal(next <= mi_segment_slices_end(segment)); + if (next < mi_segment_slices_end(segment) && next->xblock_size==0) { + // free next block -- remove it from free and merge + mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); + slice_count += next->slice_count; // extend + if (!is_abandoned) { mi_segment_span_remove_from_queue(next, tld); } + } + if (slice > segment->slices) { + mi_slice_t* prev = mi_slice_first(slice - 1); + mi_assert_internal(prev >= segment->slices); + if (prev->xblock_size==0) { + // free previous slice -- remove it from free and merge + mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); + slice_count += prev->slice_count; + if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); } + slice = prev; + } + } + + // and add the new free page + mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld); + return slice; +} + + +static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_ptr_segment(slice)==segment); + mi_assert_internal(slice->slice_count >= slice_count); + mi_assert_internal(slice->xblock_size > 0); // no more in free queue + if (slice->slice_count <= slice_count) return; + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); + size_t next_index = mi_slice_index(slice) + slice_count; + size_t next_count = slice->slice_count - slice_count; + mi_segment_span_free(segment, next_index, next_count, tld); + slice->slice_count = (uint32_t)slice_count; +} + +// Note: may still return NULL if committing the memory failed +static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(slice_index < segment->slice_entries); + mi_slice_t* slice = &segment->slices[slice_index]; + mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1); + + // commit before changing the slice data + if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, 0, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) { + return NULL; // commit failed! + } + + // convert the slices to a page + slice->slice_offset = 0; + slice->slice_count = (uint32_t)slice_count; + mi_assert_internal(slice->slice_count == slice_count); + const size_t bsize = slice_count * MI_SEGMENT_SLICE_SIZE; + slice->xblock_size = (uint32_t)(bsize >= MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : bsize); + mi_page_t* page = mi_slice_to_page(slice); + mi_assert_internal(mi_page_block_size(page) == bsize); + + // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries + size_t extra = slice_count-1; + if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET; + if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices + slice++; + for (size_t i = 1; i <= extra; i++, slice++) { + slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); + slice->slice_count = 0; + slice->xblock_size = 1; + } + + // and also for the last one (if not set already) (the last one is needed for coalescing) + // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) + mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; + if (last < mi_segment_slices_end(segment) && last >= slice) { + last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1)); + last->slice_count = 0; + last->xblock_size = 1; + } + + // and initialize the page + page->is_reset = false; + page->is_committed = true; + segment->used++; + return page; +} + +static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) { + mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX); + // search from best fit up + mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld); + if (slice_count == 0) slice_count = 1; + while (sq <= &tld->spans[MI_SEGMENT_BIN_MAX]) { + for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) { + if (slice->slice_count >= slice_count) { + // found one + mi_span_queue_delete(sq, slice); + mi_segment_t* segment = _mi_ptr_segment(slice); + if (slice->slice_count > slice_count) { + mi_segment_slice_split(segment, slice, slice_count, tld); + } + mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); + mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); + if (page == NULL) { + // commit failed; return NULL but first restore the slice + mi_segment_span_free_coalesce(slice, tld); + return NULL; + } + return page; + } + } + sq++; + } + // could not find a page.. + return NULL; +} + + +/* ----------------------------------------------------------- + Segment allocation +----------------------------------------------------------- */ + +// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . +static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) +{ + mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL)); + mi_assert_internal((segment==NULL) || (segment!=NULL && required==0)); + // calculate needed sizes first + size_t info_slices; + size_t pre_size; + const size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); + const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); + const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE; + + // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) + const bool eager_delay = (// !_mi_os_has_overcommit() && // never delay on overcommit systems + _mi_current_thread_count() > 1 && // do not delay for the first N threads + tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); + bool commit = eager || (required > 0); + + // Try to get from our cache first + bool is_zero = false; + const bool commit_info_still_good = (segment != NULL); + mi_commit_mask_t commit_mask; + mi_commit_mask_t decommit_mask; + if (segment != NULL) { + commit_mask = segment->commit_mask; + decommit_mask = segment->decommit_mask; + } + else { + mi_commit_mask_create_empty(&commit_mask); + mi_commit_mask_create_empty(&decommit_mask); + } + if (segment==NULL) { + // Allocate the segment from the OS + bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool is_pinned = false; + size_t memid = 0; + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld); + if (segment==NULL) { + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); + if (segment == NULL) return NULL; // failed to allocate + if (commit) { + mi_commit_mask_create_full(&commit_mask); + } + else { + mi_commit_mask_create_empty(&commit_mask); + } + } + mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + + const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_assert_internal(commit_needed>0); + mi_commit_mask_t commit_needed_mask; + mi_commit_mask_create(0, commit_needed, &commit_needed_mask); + if (!mi_commit_mask_all_set(&commit_mask, &commit_needed_mask)) { + // at least commit the info slices + mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= info_slices*MI_SEGMENT_SLICE_SIZE); + bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, &is_zero, tld->stats); + if (!ok) return NULL; // failed to commit + mi_commit_mask_set(&commit_mask, &commit_needed_mask); + } + segment->memid = memid; + segment->mem_is_pinned = is_pinned; + segment->mem_is_large = mem_large; + segment->mem_is_committed = mi_commit_mask_is_full(&commit_mask); + mi_segments_track_size((long)(segment_size), tld); + _mi_segment_map_allocated_at(segment); + } + + // zero the segment info? -- not always needed as it is zero initialized from the OS + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan + if (!is_zero) { + ptrdiff_t ofs = offsetof(mi_segment_t, next); + size_t prefix = offsetof(mi_segment_t, slices) - ofs; + memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices); + } + + if (!commit_info_still_good) { + segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed + segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); + if (segment->allow_decommit) { + segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); + segment->decommit_mask = decommit_mask; + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + #if MI_DEBUG>2 + const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_commit_mask_t commit_needed_mask; + mi_commit_mask_create(0, commit_needed, &commit_needed_mask); + mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); + #endif + } + else { + mi_assert_internal(mi_commit_mask_is_empty(&decommit_mask)); + segment->decommit_expire = 0; + mi_commit_mask_create_empty( &segment->decommit_mask ); + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + } + } + + + // initialize segment info + segment->segment_slices = segment_slices; + segment->segment_info_slices = info_slices; + segment->thread_id = _mi_thread_id(); + segment->cookie = _mi_ptr_cookie(segment); + segment->slice_entries = slice_entries; + segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); + + // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); + _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); + + // set up guard pages + size_t guard_slices = 0; + if (MI_SECURE>0) { + // in secure mode, we set up a protected page in between the segment info + // and the page data, and at the end of the segment. + size_t os_pagesize = _mi_os_page_size(); + mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size); + _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); + uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; + mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats); + _mi_os_protect(end, os_pagesize); + if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-( + guard_slices = 1; + } + + // reserve first slices for segment info + mi_page_t* page0 = mi_segment_span_allocate(segment, 0, info_slices, tld); + mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance + mi_assert_internal(segment->used == 1); + segment->used = 0; // don't count our internal slices towards usage + + // initialize initial free pages + if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page + mi_assert_internal(huge_page==NULL); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld); + } + else { + mi_assert_internal(huge_page!=NULL); + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); + *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); + mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance + } + + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + return segment; +} + + +// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . +static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { + return mi_segment_init(NULL, required, tld, os_tld, huge_page); +} + + +static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { + MI_UNUSED(force); + mi_assert_internal(segment != NULL); + mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->used == 0); + + // Remove the free pages + mi_slice_t* slice = &segment->slices[0]; + const mi_slice_t* end = mi_segment_slices_end(segment); + size_t page_count = 0; + while (slice < end) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(mi_slice_index(slice)==0 || slice->xblock_size == 0); // no more used pages .. + if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) { + mi_segment_span_remove_from_queue(slice, tld); + } + page_count++; + slice = slice + slice->slice_count; + } + mi_assert_internal(page_count == 2); // first page is allocated by the segment itself + + // stats + _mi_stat_decrease(&tld->stats->page_committed, mi_segment_info_size(segment)); + + // return it to the OS + mi_segment_os_free(segment, tld); +} + + +/* ----------------------------------------------------------- + Page Free +----------------------------------------------------------- */ + +static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); + +// note: can be called on abandoned pages +static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page->xblock_size > 0); + mi_assert_internal(mi_page_all_free(page)); + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment->used > 0); + + size_t inuse = page->capacity * mi_page_block_size(page); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); + + // reset the page memory to reduce memory pressure? + if (!segment->mem_is_pinned && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + size_t psize; + uint8_t* start = _mi_page_start(segment, page, &psize); + page->is_reset = true; + _mi_os_reset(start, psize, tld->stats); + } + + // zero the page data, but not the segment fields + page->is_zero_init = false; + ptrdiff_t ofs = offsetof(mi_page_t, capacity); + memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + page->xblock_size = 1; + + // and free it + mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld); + segment->used--; + // cannot assert segment valid as it is called during reclaim + // mi_assert_expensive(mi_segment_is_valid(segment, tld)); + return slice; +} + +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) +{ + mi_assert(page != NULL); + + mi_segment_t* segment = _mi_page_segment(page); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + + // mark it as free now + mi_segment_page_clear(page, tld); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + + if (segment->used == 0) { + // no more used pages; remove from the free list and free the segment + mi_segment_free(segment, force, tld); + } + else if (segment->used == segment->abandoned) { + // only abandoned pages; remove from free list and abandon + mi_segment_abandon(segment,tld); + } +} + + +/* ----------------------------------------------------------- +Abandonment + +When threads terminate, they can leave segments with +live blocks (reachable through other threads). Such segments +are "abandoned" and will be reclaimed by other threads to +reuse their pages and/or free them eventually + +We maintain a global list of abandoned segments that are +reclaimed on demand. Since this is shared among threads +the implementation needs to avoid the A-B-A problem on +popping abandoned segments: +We use tagged pointers to avoid accidentially identifying +reused segments, much like stamped references in Java. +Secondly, we maintain a reader counter to avoid resetting +or decommitting segments that have a pending read operation. + +Note: the current implementation is one possible design; +another way might be to keep track of abandoned segments +in the arenas/segment_cache's. This would have the advantage of keeping +all concurrent code in one place and not needing to deal +with ABA issues. The drawback is that it is unclear how to +scan abandoned segments efficiently in that case as they +would be spread among all other segments in the arenas. +----------------------------------------------------------- */ + +// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers +// to put in a tag that increments on update to avoid the A-B-A problem. +#define MI_TAGGED_MASK MI_SEGMENT_MASK +typedef uintptr_t mi_tagged_segment_t; + +static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { + return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); +} + +static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { + mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); + uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; + return ((uintptr_t)segment | tag); +} + +// This is a list of visited abandoned pages that were full at the time. +// this list migrates to `abandoned` when that becomes NULL. The use of +// this list reduces contention and the rate at which segments are visited. +static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL + +// The abandoned page list (tagged as it supports pop) +static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL + +// Maintain these for debug purposes (these counts may be a bit off) +static mi_decl_cache_align _Atomic(size_t) abandoned_count; +static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count; + +// We also maintain a count of current readers of the abandoned list +// in order to prevent resetting/decommitting segment memory if it might +// still be read. +static mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0 + +// Push on the visited list +static void mi_abandoned_visited_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); + mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->used > 0); + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); + do { + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); + } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); + mi_atomic_increment_relaxed(&abandoned_visited_count); +} + +// Move the visited list to the abandoned list. +static bool mi_abandoned_visited_revisit(void) +{ + // quick check if the visited list is empty + if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; + + // grab the whole visited list + mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); + if (first == NULL) return false; + + // first try to swap directly if the abandoned list happens to be NULL + mi_tagged_segment_t afirst; + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); + if (mi_tagged_segment_ptr(ts)==NULL) { + size_t count = mi_atomic_load_relaxed(&abandoned_visited_count); + afirst = mi_tagged_segment(first, ts); + if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) { + mi_atomic_add_relaxed(&abandoned_count, count); + mi_atomic_sub_relaxed(&abandoned_visited_count, count); + return true; + } + } + + // find the last element of the visited list: O(n) + mi_segment_t* last = first; + mi_segment_t* next; + while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { + last = next; + } + + // and atomically prepend to the abandoned list + // (no need to increase the readers as we don't access the abandoned segments) + mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); + size_t count; + do { + count = mi_atomic_load_relaxed(&abandoned_visited_count); + mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); + afirst = mi_tagged_segment(first, anext); + } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); + mi_atomic_add_relaxed(&abandoned_count, count); + mi_atomic_sub_relaxed(&abandoned_visited_count, count); + return true; +} + +// Push on the abandoned list. +static void mi_abandoned_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->used > 0); + mi_tagged_segment_t next; + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); + do { + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); + next = mi_tagged_segment(segment, ts); + } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); + mi_atomic_increment_relaxed(&abandoned_count); +} + +// Wait until there are no more pending reads on segments that used to be in the abandoned list +// called for example from `arena.c` before decommitting +void _mi_abandoned_await_readers(void) { + size_t n; + do { + n = mi_atomic_load_acquire(&abandoned_readers); + if (n != 0) mi_atomic_yield(); + } while (n != 0); +} + +// Pop from the abandoned list +static mi_segment_t* mi_abandoned_pop(void) { + mi_segment_t* segment; + // Check efficiently if it is empty (or if the visited list needs to be moved) + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); + segment = mi_tagged_segment_ptr(ts); + if (mi_likely(segment == NULL)) { + if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL + return NULL; + } + } + + // Do a pop. We use a reader count to prevent + // a segment to be decommitted while a read is still pending, + // and a tagged pointer to prevent A-B-A link corruption. + // (this is called from `region.c:_mi_mem_free` for example) + mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted + mi_tagged_segment_t next = 0; + ts = mi_atomic_load_acquire(&abandoned); + do { + segment = mi_tagged_segment_ptr(ts); + if (segment != NULL) { + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); + next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted + } + } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); + mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock + if (segment != NULL) { + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); + mi_atomic_decrement_relaxed(&abandoned_count); + } + return segment; +} + +/* ----------------------------------------------------------- + Abandon segment/page +----------------------------------------------------------- */ + +static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(segment->used == segment->abandoned); + mi_assert_internal(segment->used > 0); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_internal(segment->abandoned_visits == 0); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + + // remove the free pages from the free page queues + mi_slice_t* slice = &segment->slices[0]; + const mi_slice_t* end = mi_segment_slices_end(segment); + while (slice < end) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (slice->xblock_size == 0) { // a free page + mi_segment_span_remove_from_queue(slice,tld); + slice->xblock_size = 0; // but keep it free + } + slice = slice + slice->slice_count; + } + + // perform delayed decommits + mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats); + + // all pages in the segment are abandoned; add it to the abandoned list + _mi_stat_increase(&tld->stats->segments_abandoned, 1); + mi_segments_track_size(-((long)mi_segment_size(segment)), tld); + segment->thread_id = 0; + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); + segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned + mi_abandoned_push(segment); +} + +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert(page != NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); + mi_segment_t* segment = _mi_page_segment(page); + + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + segment->abandoned++; + + _mi_stat_increase(&tld->stats->pages_abandoned, 1); + mi_assert_internal(segment->abandoned <= segment->used); + if (segment->used == segment->abandoned) { + // all pages are abandoned, abandon the entire segment + mi_segment_abandon(segment, tld); + } +} + +/* ----------------------------------------------------------- + Reclaim abandoned pages +----------------------------------------------------------- */ + +static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) { + mi_slice_t* slice = &segment->slices[0]; + *end = mi_segment_slices_end(segment); + mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page + slice = slice + slice->slice_count; // skip the first segment allocated page + return slice; +} + +// Possibly free pages and check if free space is available +static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld) +{ + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + mi_assert_internal(mi_segment_is_abandoned(segment)); + bool has_page = false; + + // for all slices + const mi_slice_t* end; + mi_slice_t* slice = mi_slices_start_iterate(segment, &end); + while (slice < end) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (mi_slice_is_used(slice)) { // used page + // ensure used count is up to date and collect potential concurrent frees + mi_page_t* const page = mi_slice_to_page(slice); + _mi_page_free_collect(page, false); + if (mi_page_all_free(page)) { + // if this page is all free now, free it without adding to any queues (yet) + mi_assert_internal(page->next == NULL && page->prev==NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + segment->abandoned--; + slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce! + mi_assert_internal(!mi_slice_is_used(slice)); + if (slice->slice_count >= slices_needed) { + has_page = true; + } + } + else { + if (page->xblock_size == block_size && mi_page_has_any_available(page)) { + // a page has available free blocks of the right size + has_page = true; + } + } + } + else { + // empty span + if (slice->slice_count >= slices_needed) { + has_page = true; + } + } + slice = slice + slice->slice_count; + } + return has_page; +} + +// Reclaim an abandoned segment; returns NULL if the segment was freed +// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } + + segment->thread_id = _mi_thread_id(); + segment->abandoned_visits = 0; + mi_segments_track_size((long)mi_segment_size(segment), tld); + mi_assert_internal(segment->next == NULL); + _mi_stat_decrease(&tld->stats->segments_abandoned, 1); + + // for all slices + const mi_slice_t* end; + mi_slice_t* slice = mi_slices_start_iterate(segment, &end); + while (slice < end) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (mi_slice_is_used(slice)) { + // in use: reclaim the page in our heap + mi_page_t* page = mi_slice_to_page(slice); + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); + mi_assert_internal(page->next == NULL && page->prev==NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + segment->abandoned--; + // set the heap again and allow delayed free again + mi_page_set_heap(page, heap); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + _mi_page_free_collect(page, false); // ensure used count is up to date + if (mi_page_all_free(page)) { + // if everything free by now, free the page + slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing + } + else { + // otherwise reclaim it into the heap + _mi_page_reclaim(heap, page); + if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } + } + } + } + else { + // the span is free, add it to our page queues + slice = mi_segment_span_free_coalesce(slice, tld); // set slice again due to coalesceing + } + mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0); + slice = slice + slice->slice_count; + } + + mi_assert(segment->abandoned == 0); + if (segment->used == 0) { // due to page_clear + mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed)); + mi_segment_free(segment, false, tld); + return NULL; + } + else { + return segment; + } +} + + +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* segment; + while ((segment = mi_abandoned_pop()) != NULL) { + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } +} + +static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld) +{ + *reclaimed = false; + mi_segment_t* segment; + long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + segment->abandoned_visits++; + bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees) + if (segment->used == 0) { + // free the segment (by forced reclaim) to make it available to other threads. + // note1: we prefer to free a segment as that might lead to reclaiming another + // segment that is still partially used. + // note2: we could in principle optimize this by skipping reclaim and directly + // freeing but that would violate some invariants temporarily) + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } + else if (has_page) { + // found a large enough free span, or a page of the right block_size with free space + // we return the result of reclaim (which is usually `segment`) as it might free + // the segment due to concurrent frees (in which case `NULL` is returned). + return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); + } + else if (segment->abandoned_visits > 3) { + // always reclaim on 3rd visit to limit the abandoned queue length. + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } + else { + // otherwise, push on the visited list so it gets not looked at too quickly again + mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again + mi_abandoned_visited_push(segment); + } + } + return NULL; +} + + +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) +{ + mi_segment_t* segment; + int max_tries = (force ? 16*1024 : 1024); // limit latency + if (force) { + mi_abandoned_visited_revisit(); + } + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees) + if (segment->used == 0) { + // free the segment (by forced reclaim) to make it available to other threads. + // note: we could in principle optimize this by skipping reclaim and directly + // freeing but that would violate some invariants temporarily) + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } + else { + // otherwise, decommit if needed and push on the visited list + // note: forced decommit can be expensive if many threads are destroyed/created as in mstress. + mi_segment_delayed_decommit(segment, force, tld->stats); + mi_abandoned_visited_push(segment); + } + } +} + +/* ----------------------------------------------------------- + Reclaim or allocate +----------------------------------------------------------- */ + +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); + + // 1. try to reclaim an abandoned segment + bool reclaimed; + mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld); + if (reclaimed) { + // reclaimed the right page right into the heap + mi_assert_internal(segment != NULL); + return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks + } + else if (segment != NULL) { + // reclaimed a segment with a large enough empty span in it + return segment; + } + // 2. otherwise allocate a fresh segment + return mi_segment_alloc(0, tld, os_tld, NULL); +} + + +/* ----------------------------------------------------------- + Page allocation +----------------------------------------------------------- */ + +static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE); + + // find a free page + size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); + size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; + mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size); + mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); + if (page==NULL) { + // no free page, allocate a new segment and try again + if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) { + // OOM or reclaimed a good page in the heap + return NULL; + } + else { + // otherwise try again + return mi_segments_page_alloc(heap, page_kind, required, block_size, tld, os_tld); + } + } + mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); + mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); + mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats); + return page; +} + + + +/* ----------------------------------------------------------- + Huge page allocation +----------------------------------------------------------- */ + +static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_page_t* page = NULL; + mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page); + if (segment == NULL || page==NULL) return NULL; + mi_assert_internal(segment->used==1); + mi_assert_internal(mi_page_block_size(page) >= size); + segment->thread_id = 0; // huge segments are immediately abandoned + return page; +} + +// free huge block from another thread +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + // huge page segments are always abandoned and can be freed immediately by any thread + mi_assert_internal(segment->kind==MI_SEGMENT_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0); + + // claim it and free + mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. + // paranoia: if this it the last reference, the cas should always succeed + size_t expected_tid = 0; + if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_tld_t* tld = heap->tld; + _mi_segment_page_free(page, true, &tld->segments); + } +#if (MI_DEBUG!=0) + else { + mi_assert_internal(false); + } +#endif +} + +/* ----------------------------------------------------------- + Page allocation and free +----------------------------------------------------------- */ +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_page_t* page; + if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { + page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld); + } + else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { + page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld, os_tld); + } + else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { + page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld); + } + else { + page = mi_segment_huge_page_alloc(block_size,tld,os_tld); + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + return page; +} diff --git a/compat/mimalloc/stats.c b/compat/mimalloc/stats.c new file mode 100644 index 00000000000000..9ff4485be7decc --- /dev/null +++ b/compat/mimalloc/stats.c @@ -0,0 +1,583 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // fputs, stderr +#include // memset + +#if defined(_MSC_VER) && (_MSC_VER < 1920) +#pragma warning(disable:4204) // non-constant aggregate initializer +#endif + +/* ----------------------------------------------------------- + Statistics operations +----------------------------------------------------------- */ + +static bool mi_is_in_main(void* stat) { + return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main + && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); +} + +static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + if (mi_is_in_main(stat)) + { + // add atomically (for abandoned pages) + int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_maxi64_relaxed(&stat->peak, current + amount); + if (amount > 0) { + mi_atomic_addi64_relaxed(&stat->allocated,amount); + } + else { + mi_atomic_addi64_relaxed(&stat->freed, -amount); + } + } + else { + // add thread local + stat->current += amount; + if (stat->current > stat->peak) stat->peak = stat->current; + if (amount > 0) { + stat->allocated += amount; + } + else { + stat->freed += -amount; + } + } +} + +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { + if (mi_is_in_main(stat)) { + mi_atomic_addi64_relaxed( &stat->count, 1 ); + mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); + } + else { + stat->count++; + stat->total += amount; + } +} + +void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) { + mi_stat_update(stat, (int64_t)amount); +} + +void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { + mi_stat_update(stat, -((int64_t)amount)); +} + +// must be thread safe as it is called from stats_merge +static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { + if (stat==src) return; + if (src->allocated==0 && src->freed==0) return; + mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit); + mi_atomic_addi64_relaxed( &stat->current, src->current * unit); + mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit); + // peak scores do not work across threads.. + mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit); +} + +static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { + if (stat==src) return; + mi_atomic_addi64_relaxed( &stat->total, src->total * unit); + mi_atomic_addi64_relaxed( &stat->count, src->count * unit); +} + +// must be thread safe as it is called from stats_merge +static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { + if (stats==src) return; + mi_stat_add(&stats->segments, &src->segments,1); + mi_stat_add(&stats->pages, &src->pages,1); + mi_stat_add(&stats->reserved, &src->reserved, 1); + mi_stat_add(&stats->committed, &src->committed, 1); + mi_stat_add(&stats->reset, &src->reset, 1); + mi_stat_add(&stats->page_committed, &src->page_committed, 1); + + mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); + mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); + mi_stat_add(&stats->threads, &src->threads, 1); + + mi_stat_add(&stats->malloc, &src->malloc, 1); + mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); + mi_stat_add(&stats->normal, &src->normal, 1); + mi_stat_add(&stats->huge, &src->huge, 1); + mi_stat_add(&stats->large, &src->large, 1); + + mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); + mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); + mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); + + mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); + mi_stat_counter_add(&stats->searches, &src->searches, 1); + mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); + mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); + mi_stat_counter_add(&stats->large_count, &src->large_count, 1); +#if MI_STAT>1 + for (size_t i = 0; i <= MI_BIN_HUGE; i++) { + if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) { + mi_stat_add(&stats->normal_bins[i], &src->normal_bins[i], 1); + } + } +#endif +} + +/* ----------------------------------------------------------- + Display statistics +----------------------------------------------------------- */ + +// unit > 0 : size in binary bytes +// unit == 0: count as decimal +// unit < 0 : count in binary +static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) { + char buf[32]; buf[0] = 0; + int len = 32; + const char* suffix = (unit <= 0 ? " " : "B"); + const int64_t base = (unit == 0 ? 1000 : 1024); + if (unit>0) n *= unit; + + const int64_t pos = (n < 0 ? -n : n); + if (pos < base) { + if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column + snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); + } + } + else { + int64_t divider = base; + const char* magnitude = "K"; + if (pos >= divider*base) { divider *= base; magnitude = "M"; } + if (pos >= divider*base) { divider *= base; magnitude = "G"; } + const int64_t tens = (n / (divider/10)); + const long whole = (long)(tens/10); + const long frac1 = (long)(tens%10); + char unitdesc[8]; + snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix); + snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc); + } + _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); +} + + +static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + mi_printf_amount(n,unit,out,arg,NULL); +} + +static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + else mi_print_amount(n,0,out,arg); +} + +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg,"%10s:", msg); + if (unit>0) { + mi_print_amount(stat->peak, unit, out, arg); + mi_print_amount(stat->allocated, unit, out, arg); + mi_print_amount(stat->freed, unit, out, arg); + mi_print_amount(stat->current, unit, out, arg); + mi_print_amount(unit, 1, out, arg); + mi_print_count(stat->allocated, unit, out, arg); + if (stat->allocated > stat->freed) + _mi_fprintf(out, arg, " not all freed!\n"); + else + _mi_fprintf(out, arg, " ok\n"); + } + else if (unit<0) { + mi_print_amount(stat->peak, -1, out, arg); + mi_print_amount(stat->allocated, -1, out, arg); + mi_print_amount(stat->freed, -1, out, arg); + mi_print_amount(stat->current, -1, out, arg); + if (unit==-1) { + _mi_fprintf(out, arg, "%22s", ""); + } + else { + mi_print_amount(-unit, 1, out, arg); + mi_print_count((stat->allocated / -unit), 0, out, arg); + } + if (stat->allocated > stat->freed) + _mi_fprintf(out, arg, " not all freed!\n"); + else + _mi_fprintf(out, arg, " ok\n"); + } + else { + mi_print_amount(stat->peak, 1, out, arg); + mi_print_amount(stat->allocated, 1, out, arg); + _mi_fprintf(out, arg, "%11s", " "); // no freed + mi_print_amount(stat->current, 1, out, arg); + _mi_fprintf(out, arg, "\n"); + } +} + +static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->total, -1, out, arg); + _mi_fprintf(out, arg, "\n"); +} + +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { + const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); + const long avg_whole = (long)(avg_tens/10); + const long avg_frac1 = (long)(avg_tens%10); + _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); +} + + +static void mi_print_header(mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); +} + +#if MI_STAT>1 +static void mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) { + bool found = false; + char buf[64]; + for (size_t i = 0; i <= max; i++) { + if (bins[i].allocated > 0) { + found = true; + int64_t unit = _mi_bin_size((uint8_t)i); + snprintf(buf, 64, "%s %3lu", fmt, (long)i); + mi_stat_print(&bins[i], buf, unit, out, arg); + } + } + if (found) { + _mi_fprintf(out, arg, "\n"); + mi_print_header(out, arg); + } +} +#endif + + + +//------------------------------------------------------------ +// Use an output wrapper for line-buffered output +// (which is nice when using loggers etc.) +//------------------------------------------------------------ +typedef struct buffered_s { + mi_output_fun* out; // original output function + void* arg; // and state + char* buf; // local buffer of at least size `count+1` + size_t used; // currently used chars `used <= count` + size_t count; // total chars available for output +} buffered_t; + +static void mi_buffered_flush(buffered_t* buf) { + buf->buf[buf->used] = 0; + _mi_fputs(buf->out, buf->arg, NULL, buf->buf); + buf->used = 0; +} + +static void mi_buffered_out(const char* msg, void* arg) { + buffered_t* buf = (buffered_t*)arg; + if (msg==NULL || buf==NULL) return; + for (const char* src = msg; *src != 0; src++) { + char c = *src; + if (buf->used >= buf->count) mi_buffered_flush(buf); + mi_assert_internal(buf->used < buf->count); + buf->buf[buf->used++] = c; + if (c == '\n') mi_buffered_flush(buf); + } +} + +//------------------------------------------------------------ +// Print statistics +//------------------------------------------------------------ + +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); + +static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept { + // wrap the output function to be line buffered + char buf[256]; + buffered_t buffer = { out0, arg0, NULL, 0, 255 }; + buffer.buf = buf; + mi_output_fun* out = &mi_buffered_out; + void* arg = &buffer; + + // and print using that + mi_print_header(out,arg); + #if MI_STAT>1 + mi_stats_print_bins(stats->normal_bins, MI_BIN_HUGE, "normal",out,arg); + #endif + #if MI_STAT + mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg); + mi_stat_print(&stats->large, "large", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out, arg); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); + mi_stat_count_t total = { 0,0,0,0 }; + mi_stat_add(&total, &stats->normal, 1); + mi_stat_add(&total, &stats->large, 1); + mi_stat_add(&total, &stats->huge, 1); + mi_stat_print(&total, "total", 1, out, arg); + #endif + #if MI_STAT>1 + mi_stat_print(&stats->malloc, "malloc req", 1, out, arg); + _mi_fprintf(out, arg, "\n"); + #endif + mi_stat_print(&stats->reserved, "reserved", 1, out, arg); + mi_stat_print(&stats->committed, "committed", 1, out, arg); + mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_print(&stats->page_committed, "touched", 1, out, arg); + mi_stat_print(&stats->segments, "segments", -1, out, arg); + mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); + mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg); + mi_stat_print(&stats->pages, "pages", -1, out, arg); + mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg); + mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); + mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); + mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); + mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_print(&stats->threads, "threads", -1, out, arg); + mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); + _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); + + mi_msecs_t elapsed; + mi_msecs_t user_time; + mi_msecs_t sys_time; + size_t current_rss; + size_t peak_rss; + size_t current_commit; + size_t peak_commit; + size_t page_faults; + mi_stat_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", + user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); + mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); + if (peak_commit > 0) { + _mi_fprintf(out, arg, ", commit: "); + mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); + } + _mi_fprintf(out, arg, "\n"); +} + +static mi_msecs_t mi_process_start; // = 0 + +static mi_stats_t* mi_stats_get_default(void) { + mi_heap_t* heap = mi_heap_get_default(); + return &heap->tld->stats; +} + +static void mi_stats_merge_from(mi_stats_t* stats) { + if (stats != &_mi_stats_main) { + mi_stats_add(&_mi_stats_main, stats); + memset(stats, 0, sizeof(mi_stats_t)); + } +} + +void mi_stats_reset(void) mi_attr_noexcept { + mi_stats_t* stats = mi_stats_get_default(); + if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } + memset(&_mi_stats_main, 0, sizeof(mi_stats_t)); + if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); }; +} + +void mi_stats_merge(void) mi_attr_noexcept { + mi_stats_merge_from( mi_stats_get_default() ); +} + +void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` + mi_stats_merge_from(stats); +} + +void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_stats_merge_from(mi_stats_get_default()); + _mi_stats_print(&_mi_stats_main, out, arg); +} + +void mi_stats_print(void* out) mi_attr_noexcept { + // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`) + mi_stats_print_out((mi_output_fun*)out, NULL); +} + +void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { + _mi_stats_print(mi_stats_get_default(), out, arg); +} + + +// ---------------------------------------------------------------- +// Basic timer for convenience; use milli-seconds to avoid doubles +// ---------------------------------------------------------------- +#ifdef _WIN32 +#include +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; + } + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); +} + +mi_msecs_t _mi_clock_now(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return mi_to_msecs(t); +} +#else +#include +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) +mi_msecs_t _mi_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} +#else +// low resolution timer +mi_msecs_t _mi_clock_now(void) { + return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); +} +#endif +#endif + + +static mi_msecs_t mi_clock_diff; + +mi_msecs_t _mi_clock_start(void) { + if (mi_clock_diff == 0.0) { + mi_msecs_t t0 = _mi_clock_now(); + mi_clock_diff = _mi_clock_now() - t0; + } + return _mi_clock_now(); +} + +mi_msecs_t _mi_clock_end(mi_msecs_t start) { + mi_msecs_t end = _mi_clock_now(); + return (end - start - mi_clock_diff); +} + + +// -------------------------------------------------------- +// Basic process statistics +// -------------------------------------------------------- + +#if defined(_WIN32) +#include +#include +#pragma comment(lib,"psapi.lib") + +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { + ULARGE_INTEGER i; + i.LowPart = ftime->dwLowDateTime; + i.HighPart = ftime->dwHighDateTime; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; +} + +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *elapsed = _mi_clock_end(mi_process_start); + FILETIME ct; + FILETIME ut; + FILETIME st; + FILETIME et; + GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); + *utime = filetime_msecs(&ut); + *stime = filetime_msecs(&st); + PROCESS_MEMORY_COUNTERS info; + GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + *current_rss = (size_t)info.WorkingSetSize; + *peak_rss = (size_t)info.PeakWorkingSetSize; + *current_commit = (size_t)info.PagefileUsage; + *peak_commit = (size_t)info.PeakPagefileUsage; + *page_faults = (size_t)info.PageFaultCount; +} + +#elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)) +#include +#include +#include + +#if defined(__APPLE__) +#include +#endif + +#if defined(__HAIKU__) +#include +#endif + +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); +} + +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *elapsed = _mi_clock_end(mi_process_start); + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + *utime = timeval_secs(&rusage.ru_utime); + *stime = timeval_secs(&rusage.ru_stime); +#if !defined(__HAIKU__) + *page_faults = rusage.ru_majflt; +#endif + // estimate commit using our stats + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *current_rss = *current_commit; // estimate +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process + thread_info tid; + area_info mem; + ssize_t c; + get_thread_info(find_thread(0), &tid); + while (get_next_area_info(tid.team, &c, &mem) == B_OK) { + *peak_rss += mem.ram_size; + } + *page_faults = 0; +#elif defined(__APPLE__) + *peak_rss = rusage.ru_maxrss; // BSD reports in bytes + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + *current_rss = (size_t)info.resident_size; + } +#else + *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB +#endif +} + +#else +#ifndef __wasi__ +// WebAssembly instances are not processes +#pragma message("define a way to get process info") +#endif + +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *elapsed = _mi_clock_end(mi_process_start); + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *peak_rss = *peak_commit; + *current_rss = *current_commit; + *page_faults = 0; + *utime = 0; + *stime = 0; +} +#endif + + +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept +{ + mi_msecs_t elapsed = 0; + mi_msecs_t utime = 0; + mi_msecs_t stime = 0; + size_t current_rss0 = 0; + size_t peak_rss0 = 0; + size_t current_commit0 = 0; + size_t peak_commit0 = 0; + size_t page_faults0 = 0; + mi_stat_process_info(&elapsed,&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); + if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); + if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); + if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); + if (current_rss!=NULL) *current_rss = current_rss0; + if (peak_rss!=NULL) *peak_rss = peak_rss0; + if (current_commit!=NULL) *current_commit = current_commit0; + if (peak_commit!=NULL) *peak_commit = peak_commit0; + if (page_faults!=NULL) *page_faults = page_faults0; +} From 95435ff7978fb29ba8094d0942e812e2d42ed6f1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 11 Feb 2021 15:09:57 +0100 Subject: [PATCH 172/303] ci: reinstate Azure Pipelines support ... so that we can test a MinGit backport in a private repository (with GitHub Actions, minutes and parallel jobs are limited way more than with Azure Pipelines in private repositories). In this commit, we reinstate the exact version of `azure-pipelines.yml` as 6081d3898fe (ci: retire the Azure Pipelines definition, 2020-04-11) deleted. Naturally, many adjustments are required to make it work again. Some of the changes are actually outside of that file (such as the `runs_on_pool` changes that are needed in the Azure Pipelines part of `ci/lib.sh`) and they were made in the commits leading up to this here commit. However, other adjustments are required in the `azure-pipelines.yml` file itself, and for ease of review (read: to build confidence in those changes) they will be made in subsequent, individual commits that explain the intent, context, implementation and justification like every good commit message should do. Signed-off-by: Johannes Schindelin --- azure-pipelines.yml | 558 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 558 insertions(+) create mode 100644 azure-pipelines.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000000000..11413f66f89662 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,558 @@ +variables: + Agent.Source.Git.ShallowFetchDepth: 1 + +jobs: +- job: windows_build + displayName: Windows Build + condition: succeeded() + pool: + vmImage: windows-latest + timeoutInMinutes: 240 + steps: + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no + cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ + } + displayName: 'Mount test-cache' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - powershell: | + $urlbase = "https://dev.azure.com/git-for-windows/git/_apis/build/builds" + $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=22&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id + $downloadUrl = ((Invoke-WebRequest -UseBasicParsing "${urlbase}/$id/artifacts").content | ConvertFrom-JSON).value[1].resource.downloadUrl + (New-Object Net.WebClient).DownloadFile($downloadUrl,"git-sdk-64-minimal.zip") + Expand-Archive git-sdk-64-minimal.zip -DestinationPath . -Force + Remove-Item git-sdk-64-minimal.zip + + # Let Git ignore the SDK and the test-cache + "/git-sdk-64-minimal/`n/test-cache/`n" | Out-File -NoNewLine -Encoding ascii -Append "$(Build.SourcesDirectory)\.git\info\exclude" + displayName: 'Download git-sdk-64-minimal' + - powershell: | + & git-sdk-64-minimal\usr\bin\bash.exe -lc @" + ci/make-test-artifacts.sh artifacts + "@ + if (!$?) { exit(1) } + displayName: Build + env: + HOME: $(Build.SourcesDirectory) + MSYSTEM: MINGW64 + DEVELOPER: 1 + NO_PERL: 1 + - task: PublishPipelineArtifact@0 + displayName: 'Publish Pipeline Artifact: test artifacts' + inputs: + artifactName: 'windows-artifacts' + targetPath: '$(Build.SourcesDirectory)\artifacts' + - task: PublishPipelineArtifact@0 + displayName: 'Publish Pipeline Artifact: git-sdk-64-minimal' + inputs: + artifactName: 'git-sdk-64-minimal' + targetPath: '$(Build.SourcesDirectory)\git-sdk-64-minimal' + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" + } + displayName: 'Unmount test-cache' + condition: true + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + +- job: windows_test + displayName: Windows Test + dependsOn: windows_build + condition: succeeded() + pool: + vmImage: windows-latest + timeoutInMinutes: 240 + strategy: + parallel: 10 + steps: + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no + cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ + } + displayName: 'Mount test-cache' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact: test artifacts' + inputs: + artifactName: 'windows-artifacts' + targetPath: '$(Build.SourcesDirectory)' + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact: git-sdk-64-minimal' + inputs: + artifactName: 'git-sdk-64-minimal' + targetPath: '$(Build.SourcesDirectory)\git-sdk-64-minimal' + - powershell: | + & git-sdk-64-minimal\usr\bin\bash.exe -lc @" + test -f artifacts.tar.gz || { + echo No test artifacts found\; skipping >&2 + exit 0 + } + tar xf artifacts.tar.gz || exit 1 + + # Let Git ignore the SDK and the test-cache + printf '%s\n' /git-sdk-64-minimal/ /test-cache/ >>.git/info/exclude + + ci/run-test-slice.sh `$SYSTEM_JOBPOSITIONINPHASE `$SYSTEM_TOTALJOBSINPHASE || { + ci/print-test-failures.sh + exit 1 + } + "@ + if (!$?) { exit(1) } + displayName: 'Test (parallel)' + env: + HOME: $(Build.SourcesDirectory) + MSYSTEM: MINGW64 + NO_SVN_TESTS: 1 + GIT_TEST_SKIP_REBASE_P: 1 + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" + } + displayName: 'Unmount test-cache' + condition: true + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'windows' + platform: Windows + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-test-artifacts + +- job: vs_build + displayName: Visual Studio Build + condition: succeeded() + pool: + vmImage: windows-latest + timeoutInMinutes: 240 + steps: + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no + cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ + } + displayName: 'Mount test-cache' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - powershell: | + $urlbase = "https://dev.azure.com/git-for-windows/git/_apis/build/builds" + $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=22&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id + $downloadUrl = ((Invoke-WebRequest -UseBasicParsing "${urlbase}/$id/artifacts").content | ConvertFrom-JSON).value[1].resource.downloadUrl + (New-Object Net.WebClient).DownloadFile($downloadUrl,"git-sdk-64-minimal.zip") + Expand-Archive git-sdk-64-minimal.zip -DestinationPath . -Force + Remove-Item git-sdk-64-minimal.zip + + # Let Git ignore the SDK and the test-cache + "/git-sdk-64-minimal/`n/test-cache/`n" | Out-File -NoNewLine -Encoding ascii -Append "$(Build.SourcesDirectory)\.git\info\exclude" + displayName: 'Download git-sdk-64-minimal' + - powershell: | + & git-sdk-64-minimal\usr\bin\bash.exe -lc @" + make NDEBUG=1 DEVELOPER=1 vcxproj + "@ + if (!$?) { exit(1) } + displayName: Generate Visual Studio Solution + env: + HOME: $(Build.SourcesDirectory) + MSYSTEM: MINGW64 + DEVELOPER: 1 + NO_PERL: 1 + GIT_CONFIG_PARAMETERS: "'user.name=CI' 'user.email=ci@git'" + - powershell: | + $urlbase = "https://dev.azure.com/git/git/_apis/build/builds" + $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=9&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id + $downloadUrl = ((Invoke-WebRequest -UseBasicParsing "${urlbase}/$id/artifacts").content | ConvertFrom-JSON).value[0].resource.downloadUrl + (New-Object Net.WebClient).DownloadFile($downloadUrl, "compat.zip") + Expand-Archive compat.zip -DestinationPath . -Force + Remove-Item compat.zip + displayName: 'Download vcpkg artifacts' + - task: MSBuild@1 + inputs: + solution: git.sln + platform: x64 + configuration: Release + maximumCpuCount: 4 + msbuildArguments: /p:PlatformToolset=v142 + - powershell: | + & compat\vcbuild\vcpkg_copy_dlls.bat release + if (!$?) { exit(1) } + & git-sdk-64-minimal\usr\bin\bash.exe -lc @" + mkdir -p artifacts && + eval \"`$(make -n artifacts-tar INCLUDE_DLLS_IN_ARTIFACTS=YesPlease ARTIFACTS_DIRECTORY=artifacts | grep ^tar)\" + "@ + if (!$?) { exit(1) } + displayName: Bundle artifact tar + env: + HOME: $(Build.SourcesDirectory) + MSYSTEM: MINGW64 + DEVELOPER: 1 + NO_PERL: 1 + MSVC: 1 + VCPKG_ROOT: $(Build.SourcesDirectory)\compat\vcbuild\vcpkg + - powershell: | + $tag = (Invoke-WebRequest -UseBasicParsing "https://gitforwindows.org/latest-tag.txt").content + $version = (Invoke-WebRequest -UseBasicParsing "https://gitforwindows.org/latest-version.txt").content + $url = "https://github.com/git-for-windows/git/releases/download/${tag}/PortableGit-${version}-64-bit.7z.exe" + (New-Object Net.WebClient).DownloadFile($url,"PortableGit.exe") + & .\PortableGit.exe -y -oartifacts\PortableGit + # Wait until it is unpacked + while (-not @(Remove-Item -ErrorAction SilentlyContinue PortableGit.exe; $?)) { sleep 1 } + displayName: Download & extract portable Git + - task: PublishPipelineArtifact@0 + displayName: 'Publish Pipeline Artifact: MSVC test artifacts' + inputs: + artifactName: 'vs-artifacts' + targetPath: '$(Build.SourcesDirectory)\artifacts' + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" + } + displayName: 'Unmount test-cache' + condition: true + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + +- job: vs_test + displayName: Visual Studio Test + dependsOn: vs_build + condition: succeeded() + pool: + vmImage: windows-latest + timeoutInMinutes: 240 + strategy: + parallel: 10 + steps: + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no + cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ + } + displayName: 'Mount test-cache' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact: VS test artifacts' + inputs: + artifactName: 'vs-artifacts' + targetPath: '$(Build.SourcesDirectory)' + - powershell: | + & PortableGit\git-cmd.exe --command=usr\bin\bash.exe -lc @" + test -f artifacts.tar.gz || { + echo No test artifacts found\; skipping >&2 + exit 0 + } + tar xf artifacts.tar.gz || exit 1 + + # Let Git ignore the SDK and the test-cache + printf '%s\n' /PortableGit/ /test-cache/ >>.git/info/exclude + + cd t && + PATH=\"`$PWD/helper:`$PATH\" && + test-tool.exe run-command testsuite --jobs=10 -V -x --write-junit-xml \ + `$(test-tool.exe path-utils slice-tests \ + `$SYSTEM_JOBPOSITIONINPHASE `$SYSTEM_TOTALJOBSINPHASE t[0-9]*.sh) + "@ + if (!$?) { exit(1) } + displayName: 'Test (parallel)' + env: + HOME: $(Build.SourcesDirectory) + MSYSTEM: MINGW64 + NO_SVN_TESTS: 1 + GIT_TEST_SKIP_REBASE_P: 1 + - powershell: | + if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { + cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" + } + displayName: 'Unmount test-cache' + condition: true + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'vs' + platform: Windows + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-vs-test-artifacts + +- job: linux_clang + displayName: linux-clang + condition: succeeded() + pool: + vmImage: ubuntu-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + sudo apt-get update && + sudo apt-get -y install git gcc make libssl-dev libcurl4-openssl-dev libexpat-dev tcl tk gettext git-email zlib1g-dev apache2-bin && + + export CC=clang || exit 1 + + ci/install-dependencies.sh || exit 1 + ci/run-build-and-tests.sh || { + ci/print-test-failures.sh + exit 1 + } + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 + displayName: 'ci/run-build-and-tests.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'linux-clang' + platform: Linux + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-test-artifacts + +- job: linux_gcc + displayName: linux-gcc + condition: succeeded() + pool: + vmImage: ubuntu-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + sudo add-apt-repository ppa:ubuntu-toolchain-r/test && + sudo apt-get update && + sudo apt-get -y install git gcc make libssl-dev libcurl4-openssl-dev libexpat-dev tcl tk gettext git-email zlib1g-dev apache2 language-pack-is git-svn gcc-8 || exit 1 + + ci/install-dependencies.sh || exit 1 + ci/run-build-and-tests.sh || { + ci/print-test-failures.sh + exit 1 + } + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 + displayName: 'ci/run-build-and-tests.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'linux-gcc' + platform: Linux + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-test-artifacts + +- job: osx_clang + displayName: osx-clang + condition: succeeded() + pool: + vmImage: macOS-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + export CC=clang + + ci/install-dependencies.sh || exit 1 + ci/run-build-and-tests.sh || { + ci/print-test-failures.sh + exit 1 + } + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || umount "$HOME/test-cache" || exit 1 + displayName: 'ci/run-build-and-tests.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'osx-clang' + platform: macOS + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-test-artifacts + +- job: osx_gcc + displayName: osx-gcc + condition: succeeded() + pool: + vmImage: macOS-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + ci/install-dependencies.sh || exit 1 + ci/run-build-and-tests.sh || { + ci/print-test-failures.sh + exit 1 + } + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || umount "$HOME/test-cache" || exit 1 + displayName: 'ci/run-build-and-tests.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'osx-gcc' + platform: macOS + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-test-artifacts + +- job: gettext_poison + displayName: GETTEXT_POISON + condition: succeeded() + pool: + vmImage: ubuntu-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + sudo apt-get update && + sudo apt-get -y install git gcc make libssl-dev libcurl4-openssl-dev libexpat-dev tcl tk gettext git-email zlib1g-dev && + + export jobname=GETTEXT_POISON || exit 1 + + ci/run-build-and-tests.sh || { + ci/print-test-failures.sh + exit 1 + } + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 + displayName: 'ci/run-build-and-tests.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'gettext-poison' + platform: Linux + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-test-artifacts + +- job: linux32 + displayName: Linux32 + condition: succeeded() + pool: + vmImage: ubuntu-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + res=0 + sudo AGENT_OS="$AGENT_OS" BUILD_BUILDNUMBER="$BUILD_BUILDNUMBER" BUILD_REPOSITORY_URI="$BUILD_REPOSITORY_URI" BUILD_SOURCEBRANCH="$BUILD_SOURCEBRANCH" BUILD_SOURCEVERSION="$BUILD_SOURCEVERSION" SYSTEM_PHASENAME="$SYSTEM_PHASENAME" SYSTEM_TASKDEFINITIONSURI="$SYSTEM_TASKDEFINITIONSURI" SYSTEM_TEAMPROJECT="$SYSTEM_TEAMPROJECT" CC=$CC MAKEFLAGS="$MAKEFLAGS" jobname=Linux32 bash -lxc ci/run-docker.sh || res=1 + + sudo chmod a+r t/out/TEST-*.xml + test ! -d t/failed-test-artifacts || sudo chmod a+r t/failed-test-artifacts + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || res=1 + exit $res + displayName: 'jobname=Linux32 ci/run-docker.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + - task: PublishTestResults@2 + displayName: 'Publish Test Results **/TEST-*.xml' + inputs: + mergeTestResults: true + testRunTitle: 'linux32' + platform: Linux + publishRunAttachments: false + condition: succeededOrFailed() + - task: PublishBuildArtifacts@1 + displayName: 'Publish trash directories of failed tests' + condition: failed() + inputs: + PathtoPublish: t/failed-test-artifacts + ArtifactName: failed-test-artifacts + +- job: static_analysis + displayName: StaticAnalysis + condition: succeeded() + pool: + vmImage: ubuntu-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + sudo apt-get update && + sudo apt-get install -y coccinelle libcurl4-openssl-dev libssl-dev libexpat-dev gettext && + + export jobname=StaticAnalysis && + + ci/run-static-analysis.sh || exit 1 + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 + displayName: 'ci/run-static-analysis.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) + +- job: documentation + displayName: Documentation + condition: succeeded() + pool: + vmImage: ubuntu-latest + steps: + - bash: | + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 + + sudo apt-get update && + sudo apt-get install -y asciidoc xmlto asciidoctor docbook-xsl-ns && + + export ALREADY_HAVE_ASCIIDOCTOR=yes. && + export jobname=Documentation && + + ci/test-documentation.sh || exit 1 + + test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 + displayName: 'ci/test-documentation.sh' + env: + GITFILESHAREPWD: $(gitfileshare.pwd) From 04957135f9a3c9594855c279afda8daf403ed3a5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 24 Jun 2019 23:41:27 +0200 Subject: [PATCH 173/303] mimalloc: adjust for building inside Git We want to compile mimalloc's source code as part of Git, rather than requiring the code to be built as an external library: mimalloc uses a CMake-based build, which is not necessarily easy to integrate into the flavors of Git for Windows (which will be the main benefitting port). Signed-off-by: Johannes Schindelin --- compat/mimalloc/alloc.c | 4 ---- compat/mimalloc/mimalloc.h | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/compat/mimalloc/alloc.c b/compat/mimalloc/alloc.c index 70d74cb95790e8..8f084d3ad35170 100644 --- a/compat/mimalloc/alloc.c +++ b/compat/mimalloc/alloc.c @@ -15,10 +15,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset, strlen #include // malloc, exit -#define MI_IN_ALLOC_C -#include "alloc-override.c" -#undef MI_IN_ALLOC_C - // ------------------------------------------------------ // Allocation // ------------------------------------------------------ diff --git a/compat/mimalloc/mimalloc.h b/compat/mimalloc/mimalloc.h index 6a6f1b373be1d4..41ccc62d73d182 100644 --- a/compat/mimalloc/mimalloc.h +++ b/compat/mimalloc/mimalloc.h @@ -93,7 +93,8 @@ terms of the MIT license. A copy of the license can be found in the file // Includes // ------------------------------------------------------ -#include // size_t +#include "git-compat-util.h" + #include // bool #ifdef __cplusplus From b0b193404d8752d9eb3fb89ba8c72059b08f2097 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 29 Mar 2022 13:42:19 +0200 Subject: [PATCH 174/303] azure-pipeline: drop the `GETTEXT_POISON` job This is a follow-up to 6c280b4142 (ci: remove GETTEXT_POISON jobs, 2021-01-20) after reinstating the Azure Pipeline. Signed-off-by: Johannes Schindelin --- azure-pipelines.yml | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 11413f66f89662..7b20ad2667fdc9 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -441,44 +441,6 @@ jobs: PathtoPublish: t/failed-test-artifacts ArtifactName: failed-test-artifacts -- job: gettext_poison - displayName: GETTEXT_POISON - condition: succeeded() - pool: - vmImage: ubuntu-latest - steps: - - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - - sudo apt-get update && - sudo apt-get -y install git gcc make libssl-dev libcurl4-openssl-dev libexpat-dev tcl tk gettext git-email zlib1g-dev && - - export jobname=GETTEXT_POISON || exit 1 - - ci/run-build-and-tests.sh || { - ci/print-test-failures.sh - exit 1 - } - - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 - displayName: 'ci/run-build-and-tests.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - - task: PublishTestResults@2 - displayName: 'Publish Test Results **/TEST-*.xml' - inputs: - mergeTestResults: true - testRunTitle: 'gettext-poison' - platform: Linux - publishRunAttachments: false - condition: succeededOrFailed() - - task: PublishBuildArtifacts@1 - displayName: 'Publish trash directories of failed tests' - condition: failed() - inputs: - PathtoPublish: t/failed-test-artifacts - ArtifactName: failed-test-artifacts - - job: linux32 displayName: Linux32 condition: succeeded() From 0cf73584d0331835607d55a3db16e6149d8a6197 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 12 Aug 2022 13:41:53 +0200 Subject: [PATCH 175/303] mimalloc: avoid `%z` format This format is not supported by MSVC runtimes targeted by the MINGW toolchain. Signed-off-by: Johannes Schindelin --- compat/mimalloc/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/mimalloc/options.c b/compat/mimalloc/options.c index 7497e70a7baee2..76aafe62b1062c 100644 --- a/compat/mimalloc/options.c +++ b/compat/mimalloc/options.c @@ -346,7 +346,7 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) { char tprefix[64]; - snprintf(tprefix, sizeof(tprefix), "%sthread 0x%zx: ", prefix, _mi_thread_id()); + snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id()); mi_vfprintf(out, arg, tprefix, fmt, args); } else { From 3f4dd050f8d507b22f04f22c383471d533b8fc12 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 29 Mar 2022 12:28:12 +0200 Subject: [PATCH 176/303] azure-pipeline: stop hard-coding `apt-get` calls We have `ci/install-dependencies.sh` for that. Incidentally, this avoids the following error in the linux-* jobs: The following packages have unmet dependencies: git-email : Depends: git (< 1:2.25.1-.) but 1:2.35.1-0ppa1~ubuntu20.04.1 is to be installed Recommends: libemail-valid-perl but it is not going to be installed Signed-off-by: Johannes Schindelin --- azure-pipelines.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7b20ad2667fdc9..e311d3055e5eca 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -303,9 +303,6 @@ jobs: - bash: | test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - sudo apt-get update && - sudo apt-get -y install git gcc make libssl-dev libcurl4-openssl-dev libexpat-dev tcl tk gettext git-email zlib1g-dev apache2-bin && - export CC=clang || exit 1 ci/install-dependencies.sh || exit 1 @@ -342,10 +339,6 @@ jobs: - bash: | test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - sudo add-apt-repository ppa:ubuntu-toolchain-r/test && - sudo apt-get update && - sudo apt-get -y install git gcc make libssl-dev libcurl4-openssl-dev libexpat-dev tcl tk gettext git-email zlib1g-dev apache2 language-pack-is git-svn gcc-8 || exit 1 - ci/install-dependencies.sh || exit 1 ci/run-build-and-tests.sh || { ci/print-test-failures.sh From 7c6d4adb24675c629f94b3f7605c5fe0efc7dc20 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 20 Oct 2019 22:05:35 +0200 Subject: [PATCH 177/303] mimalloc: avoid having to link to `psapi` just for mimalloc Instead, load the `GetProcessMemoryInfo()` function dynamically. When needed. If needed. This is necessary because the start-up cost of Git processes spent on loading dynamic libraries is non-negligible. Signed-off-by: Johannes Schindelin --- compat/mimalloc/stats.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/compat/mimalloc/stats.c b/compat/mimalloc/stats.c index 9ff4485be7decc..c722189f7044e7 100644 --- a/compat/mimalloc/stats.c +++ b/compat/mimalloc/stats.c @@ -459,6 +459,7 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { #include #include #pragma comment(lib,"psapi.lib") +#include "compat/win32/lazyload.h" static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; @@ -479,12 +480,17 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec *utime = filetime_msecs(&ut); *stime = filetime_msecs(&st); PROCESS_MEMORY_COUNTERS info; - GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; + DECLARE_PROC_ADDR(psapi, BOOL, WINAPI, GetProcessMemoryInfo, HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); + if (INIT_PROC_ADDR(GetProcessMemoryInfo)) { + GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + *current_rss = (size_t)info.WorkingSetSize; + *peak_rss = (size_t)info.PeakWorkingSetSize; + *current_commit = (size_t)info.PagefileUsage; + *peak_commit = (size_t)info.PeakPagefileUsage; + *page_faults = (size_t)info.PageFaultCount; + } else { + *current_rss = *peak_rss = *current_commit = *peak_commit = *page_faults = 0; + } } #elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)) From 8c69f10bf93e713912c70362e08f7cc25da6448b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 10 Jul 2022 00:02:30 +0200 Subject: [PATCH 178/303] azure-pipeline: drop the code to write to/read from a file share We haven't used this feature in ages, we don't actually need to. Signed-off-by: Johannes Schindelin --- azure-pipelines.yml | 105 -------------------------------------------- 1 file changed, 105 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index e311d3055e5eca..94bc7ea1f51b47 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -9,14 +9,6 @@ jobs: vmImage: windows-latest timeoutInMinutes: 240 steps: - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no - cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ - } - displayName: 'Mount test-cache' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - powershell: | $urlbase = "https://dev.azure.com/git-for-windows/git/_apis/build/builds" $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=22&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id @@ -49,14 +41,6 @@ jobs: inputs: artifactName: 'git-sdk-64-minimal' targetPath: '$(Build.SourcesDirectory)\git-sdk-64-minimal' - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" - } - displayName: 'Unmount test-cache' - condition: true - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - job: windows_test displayName: Windows Test @@ -68,14 +52,6 @@ jobs: strategy: parallel: 10 steps: - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no - cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ - } - displayName: 'Mount test-cache' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: DownloadPipelineArtifact@0 displayName: 'Download Pipeline Artifact: test artifacts' inputs: @@ -109,14 +85,6 @@ jobs: MSYSTEM: MINGW64 NO_SVN_TESTS: 1 GIT_TEST_SKIP_REBASE_P: 1 - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" - } - displayName: 'Unmount test-cache' - condition: true - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -139,14 +107,6 @@ jobs: vmImage: windows-latest timeoutInMinutes: 240 steps: - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no - cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ - } - displayName: 'Mount test-cache' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - powershell: | $urlbase = "https://dev.azure.com/git-for-windows/git/_apis/build/builds" $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=22&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id @@ -215,14 +175,6 @@ jobs: inputs: artifactName: 'vs-artifacts' targetPath: '$(Build.SourcesDirectory)\artifacts' - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" - } - displayName: 'Unmount test-cache' - condition: true - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - job: vs_test displayName: Visual Studio Test @@ -234,14 +186,6 @@ jobs: strategy: parallel: 10 steps: - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - net use s: \\gitfileshare.file.core.windows.net\test-cache "$GITFILESHAREPWD" /user:AZURE\gitfileshare /persistent:no - cmd /c mklink /d "$(Build.SourcesDirectory)\test-cache" S:\ - } - displayName: 'Mount test-cache' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: DownloadPipelineArtifact@0 displayName: 'Download Pipeline Artifact: VS test artifacts' inputs: @@ -271,14 +215,6 @@ jobs: MSYSTEM: MINGW64 NO_SVN_TESTS: 1 GIT_TEST_SKIP_REBASE_P: 1 - - powershell: | - if ("$GITFILESHAREPWD" -ne "" -and "$GITFILESHAREPWD" -ne "`$`(gitfileshare.pwd)") { - cmd /c rmdir "$(Build.SourcesDirectory)\test-cache" - } - displayName: 'Unmount test-cache' - condition: true - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -301,8 +237,6 @@ jobs: vmImage: ubuntu-latest steps: - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - export CC=clang || exit 1 ci/install-dependencies.sh || exit 1 @@ -310,11 +244,7 @@ jobs: ci/print-test-failures.sh exit 1 } - - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 displayName: 'ci/run-build-and-tests.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -337,18 +267,12 @@ jobs: vmImage: ubuntu-latest steps: - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - ci/install-dependencies.sh || exit 1 ci/run-build-and-tests.sh || { ci/print-test-failures.sh exit 1 } - - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 displayName: 'ci/run-build-and-tests.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -371,8 +295,6 @@ jobs: vmImage: macOS-latest steps: - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - export CC=clang ci/install-dependencies.sh || exit 1 @@ -380,11 +302,7 @@ jobs: ci/print-test-failures.sh exit 1 } - - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || umount "$HOME/test-cache" || exit 1 displayName: 'ci/run-build-and-tests.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -407,18 +325,12 @@ jobs: vmImage: macOS-latest steps: - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - ci/install-dependencies.sh || exit 1 ci/run-build-and-tests.sh || { ci/print-test-failures.sh exit 1 } - - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || umount "$HOME/test-cache" || exit 1 displayName: 'ci/run-build-and-tests.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -441,19 +353,14 @@ jobs: vmImage: ubuntu-latest steps: - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - res=0 sudo AGENT_OS="$AGENT_OS" BUILD_BUILDNUMBER="$BUILD_BUILDNUMBER" BUILD_REPOSITORY_URI="$BUILD_REPOSITORY_URI" BUILD_SOURCEBRANCH="$BUILD_SOURCEBRANCH" BUILD_SOURCEVERSION="$BUILD_SOURCEVERSION" SYSTEM_PHASENAME="$SYSTEM_PHASENAME" SYSTEM_TASKDEFINITIONSURI="$SYSTEM_TASKDEFINITIONSURI" SYSTEM_TEAMPROJECT="$SYSTEM_TEAMPROJECT" CC=$CC MAKEFLAGS="$MAKEFLAGS" jobname=Linux32 bash -lxc ci/run-docker.sh || res=1 sudo chmod a+r t/out/TEST-*.xml test ! -d t/failed-test-artifacts || sudo chmod a+r t/failed-test-artifacts - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || res=1 exit $res displayName: 'jobname=Linux32 ci/run-docker.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -476,19 +383,13 @@ jobs: vmImage: ubuntu-latest steps: - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - sudo apt-get update && sudo apt-get install -y coccinelle libcurl4-openssl-dev libssl-dev libexpat-dev gettext && export jobname=StaticAnalysis && ci/run-static-analysis.sh || exit 1 - - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 displayName: 'ci/run-static-analysis.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) - job: documentation displayName: Documentation @@ -497,8 +398,6 @@ jobs: vmImage: ubuntu-latest steps: - bash: | - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || ci/mount-fileshare.sh //gitfileshare.file.core.windows.net/test-cache gitfileshare "$GITFILESHAREPWD" "$HOME/test-cache" || exit 1 - sudo apt-get update && sudo apt-get install -y asciidoc xmlto asciidoctor docbook-xsl-ns && @@ -506,8 +405,4 @@ jobs: export jobname=Documentation && ci/test-documentation.sh || exit 1 - - test "$GITFILESHAREPWD" = '$(gitfileshare.pwd)' || sudo umount "$HOME/test-cache" || exit 1 displayName: 'ci/test-documentation.sh' - env: - GITFILESHAREPWD: $(gitfileshare.pwd) From 64f09f359a54aef636d93f5b539671f64f3554bd Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 12 Aug 2022 13:49:22 +0200 Subject: [PATCH 179/303] mimalloc: avoid having to link to `bcrypt` just for mimalloc Instead, load the `BCryptGenRandom()` function dynamically. When needed. If needed. This is necessary because the start-up cost of Git processes spent on loading dynamic libraries is non-negligible. Signed-off-by: Johannes Schindelin --- compat/mimalloc/random.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/compat/mimalloc/random.c b/compat/mimalloc/random.c index 21d052770dc0dd..4e334d3dd76ebd 100644 --- a/compat/mimalloc/random.c +++ b/compat/mimalloc/random.c @@ -185,9 +185,15 @@ static bool os_random_buf(void* buf, size_t buf_len) { return (RtlGenRandom(buf, (ULONG)buf_len) != 0); } #else -#pragma comment (lib,"bcrypt.lib") -#include +#include "compat/win32/lazyload.h" +#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG +#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 +#endif + static bool os_random_buf(void* buf, size_t buf_len) { + DECLARE_PROC_ADDR(bcrypt, LONG, NTAPI, BCryptGenRandom, HANDLE, PUCHAR, ULONG, ULONG); + if (!INIT_PROC_ADDR(BCryptGenRandom)) + return 0; return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); } #endif From 2a15ef6218fff8ebd6edf2429e4c47a188e34a19 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 10 Jul 2022 00:14:53 +0200 Subject: [PATCH 180/303] azure-pipeline: use partial clone/parallel checkout to initialize minimal-sdk The Azure Pipeline `git-sdk-64-minimal` was retired... Signed-off-by: Johannes Schindelin --- azure-pipelines.yml | 125 +++++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 70 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 94bc7ea1f51b47..f11f1342cd080b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,5 +1,6 @@ variables: Agent.Source.Git.ShallowFetchDepth: 1 + GIT_CONFIG_PARAMETERS: "'checkout.workers=56' 'user.name=CI' 'user.email=ci@git'" jobs: - job: windows_build @@ -9,28 +10,24 @@ jobs: vmImage: windows-latest timeoutInMinutes: 240 steps: - - powershell: | - $urlbase = "https://dev.azure.com/git-for-windows/git/_apis/build/builds" - $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=22&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id - $downloadUrl = ((Invoke-WebRequest -UseBasicParsing "${urlbase}/$id/artifacts").content | ConvertFrom-JSON).value[1].resource.downloadUrl - (New-Object Net.WebClient).DownloadFile($downloadUrl,"git-sdk-64-minimal.zip") - Expand-Archive git-sdk-64-minimal.zip -DestinationPath . -Force - Remove-Item git-sdk-64-minimal.zip - + - bash: git clone --bare --depth=1 --filter=blob:none --single-branch -b main https://github.com/git-for-windows/git-sdk-64 + displayName: 'clone git-sdk-64' + - bash: git clone --depth=1 --single-branch -b main https://github.com/git-for-windows/build-extra + displayName: 'clone build-extra' + - bash: sh -x ./build-extra/please.sh create-sdk-artifact --sdk=git-sdk-64.git --out=git-sdk-64-minimal minimal-sdk + displayName: 'build git-sdk-64-minimal-sdk' + - bash: | # Let Git ignore the SDK and the test-cache - "/git-sdk-64-minimal/`n/test-cache/`n" | Out-File -NoNewLine -Encoding ascii -Append "$(Build.SourcesDirectory)\.git\info\exclude" - displayName: 'Download git-sdk-64-minimal' - - powershell: | - & git-sdk-64-minimal\usr\bin\bash.exe -lc @" - ci/make-test-artifacts.sh artifacts - "@ - if (!$?) { exit(1) } + printf "%s\n" /git-sdk-64.git/ /build-extra/ /git-sdk-64-minimal/ /test-cache/ >>'.git/info/exclude' + displayName: 'Ignore untracked directories' + - bash: ci/make-test-artifacts.sh artifacts displayName: Build env: HOME: $(Build.SourcesDirectory) MSYSTEM: MINGW64 DEVELOPER: 1 NO_PERL: 1 + PATH: "$(Build.SourcesDirectory)\\git-sdk-64-minimal\\mingw64\\bin;$(Build.SourcesDirectory)\\git-sdk-64-minimal\\usr\\bin;C:\\Windows\\system32;C:\\Windows;C:\\Windows\\system32\\wbem" - task: PublishPipelineArtifact@0 displayName: 'Publish Pipeline Artifact: test artifacts' inputs: @@ -62,29 +59,27 @@ jobs: inputs: artifactName: 'git-sdk-64-minimal' targetPath: '$(Build.SourcesDirectory)\git-sdk-64-minimal' - - powershell: | - & git-sdk-64-minimal\usr\bin\bash.exe -lc @" - test -f artifacts.tar.gz || { - echo No test artifacts found\; skipping >&2 - exit 0 - } - tar xf artifacts.tar.gz || exit 1 + - bash: | + test -f artifacts.tar.gz || { + echo No test artifacts found\; skipping >&2 + exit 0 + } + tar xf artifacts.tar.gz || exit 1 - # Let Git ignore the SDK and the test-cache - printf '%s\n' /git-sdk-64-minimal/ /test-cache/ >>.git/info/exclude + # Let Git ignore the SDK and the test-cache + printf '%s\n' /git-sdk-64.git/ /build-extra/ /git-sdk-64-minimal/ /test-cache/ >>.git/info/exclude - ci/run-test-slice.sh `$SYSTEM_JOBPOSITIONINPHASE `$SYSTEM_TOTALJOBSINPHASE || { - ci/print-test-failures.sh - exit 1 - } - "@ - if (!$?) { exit(1) } + ci/run-test-slice.sh $SYSTEM_JOBPOSITIONINPHASE $SYSTEM_TOTALJOBSINPHASE || { + ci/print-test-failures.sh + exit 1 + } displayName: 'Test (parallel)' env: HOME: $(Build.SourcesDirectory) MSYSTEM: MINGW64 NO_SVN_TESTS: 1 GIT_TEST_SKIP_REBASE_P: 1 + PATH: "$(Build.SourcesDirectory)\\git-sdk-64-minimal\\mingw64\\bin;$(Build.SourcesDirectory)\\git-sdk-64-minimal\\usr\\bin\\core_perl;$(Build.SourcesDirectory)\\git-sdk-64-minimal\\usr\\bin;C:\\Windows\\system32;C:\\Windows;C:\\Windows\\system32\\wbem" - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: @@ -107,29 +102,24 @@ jobs: vmImage: windows-latest timeoutInMinutes: 240 steps: - - powershell: | - $urlbase = "https://dev.azure.com/git-for-windows/git/_apis/build/builds" - $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=22&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id - $downloadUrl = ((Invoke-WebRequest -UseBasicParsing "${urlbase}/$id/artifacts").content | ConvertFrom-JSON).value[1].resource.downloadUrl - (New-Object Net.WebClient).DownloadFile($downloadUrl,"git-sdk-64-minimal.zip") - Expand-Archive git-sdk-64-minimal.zip -DestinationPath . -Force - Remove-Item git-sdk-64-minimal.zip - + - bash: git clone --bare --depth=1 --filter=blob:none --single-branch -b main https://github.com/git-for-windows/git-sdk-64 + displayName: 'clone git-sdk-64' + - bash: git clone --depth=1 --single-branch -b main https://github.com/git-for-windows/build-extra + displayName: 'clone build-extra' + - bash: sh -x ./build-extra/please.sh create-sdk-artifact --sdk=git-sdk-64.git --out=git-sdk-64-minimal minimal-sdk + displayName: 'build git-sdk-64-minimal-sdk' + - bash: | # Let Git ignore the SDK and the test-cache - "/git-sdk-64-minimal/`n/test-cache/`n" | Out-File -NoNewLine -Encoding ascii -Append "$(Build.SourcesDirectory)\.git\info\exclude" - displayName: 'Download git-sdk-64-minimal' - - powershell: | - & git-sdk-64-minimal\usr\bin\bash.exe -lc @" - make NDEBUG=1 DEVELOPER=1 vcxproj - "@ - if (!$?) { exit(1) } + printf "%s\n" /git-sdk-64-minimal/ /test-cache/ >>'.git/info/exclude' + displayName: 'Ignore untracked directories' + - bash: make NDEBUG=1 DEVELOPER=1 vcxproj displayName: Generate Visual Studio Solution env: HOME: $(Build.SourcesDirectory) MSYSTEM: MINGW64 DEVELOPER: 1 NO_PERL: 1 - GIT_CONFIG_PARAMETERS: "'user.name=CI' 'user.email=ci@git'" + PATH: "$(Build.SourcesDirectory)\\git-sdk-64-minimal\\mingw64\\bin;$(Build.SourcesDirectory)\\git-sdk-64-minimal\\usr\\bin;C:\\Windows\\system32;C:\\Windows;C:\\Windows\\system32\\wbem" - powershell: | $urlbase = "https://dev.azure.com/git/git/_apis/build/builds" $id = ((Invoke-WebRequest -UseBasicParsing "${urlbase}?definitions=9&statusFilter=completed&resultFilter=succeeded&`$top=1").content | ConvertFrom-JSON).value[0].id @@ -145,14 +135,10 @@ jobs: configuration: Release maximumCpuCount: 4 msbuildArguments: /p:PlatformToolset=v142 - - powershell: | - & compat\vcbuild\vcpkg_copy_dlls.bat release - if (!$?) { exit(1) } - & git-sdk-64-minimal\usr\bin\bash.exe -lc @" - mkdir -p artifacts && - eval \"`$(make -n artifacts-tar INCLUDE_DLLS_IN_ARTIFACTS=YesPlease ARTIFACTS_DIRECTORY=artifacts | grep ^tar)\" - "@ - if (!$?) { exit(1) } + - bash: | + ./compat/vcbuild/vcpkg_copy_dlls.bat release && + mkdir -p artifacts && + eval "$(make -n artifacts-tar INCLUDE_DLLS_IN_ARTIFACTS=YesPlease ARTIFACTS_DIRECTORY=artifacts | grep ^tar)" displayName: Bundle artifact tar env: HOME: $(Build.SourcesDirectory) @@ -161,6 +147,7 @@ jobs: NO_PERL: 1 MSVC: 1 VCPKG_ROOT: $(Build.SourcesDirectory)\compat\vcbuild\vcpkg + PATH: "$(Build.SourcesDirectory)\\git-sdk-64-minimal\\mingw64\\bin;$(Build.SourcesDirectory)\\git-sdk-64-minimal\\usr\\bin;C:\\Windows\\system32;C:\\Windows;C:\\Windows\\system32\\wbem" - powershell: | $tag = (Invoke-WebRequest -UseBasicParsing "https://gitforwindows.org/latest-tag.txt").content $version = (Invoke-WebRequest -UseBasicParsing "https://gitforwindows.org/latest-version.txt").content @@ -191,30 +178,28 @@ jobs: inputs: artifactName: 'vs-artifacts' targetPath: '$(Build.SourcesDirectory)' - - powershell: | - & PortableGit\git-cmd.exe --command=usr\bin\bash.exe -lc @" - test -f artifacts.tar.gz || { - echo No test artifacts found\; skipping >&2 - exit 0 - } - tar xf artifacts.tar.gz || exit 1 + - bash: | + test -f artifacts.tar.gz || { + echo No test artifacts found\; skipping >&2 + exit 0 + } + tar xf artifacts.tar.gz || exit 1 - # Let Git ignore the SDK and the test-cache - printf '%s\n' /PortableGit/ /test-cache/ >>.git/info/exclude + # Let Git ignore the SDK and the test-cache + printf '%s\n' /PortableGit/ /test-cache/ >>.git/info/exclude - cd t && - PATH=\"`$PWD/helper:`$PATH\" && - test-tool.exe run-command testsuite --jobs=10 -V -x --write-junit-xml \ - `$(test-tool.exe path-utils slice-tests \ - `$SYSTEM_JOBPOSITIONINPHASE `$SYSTEM_TOTALJOBSINPHASE t[0-9]*.sh) - "@ - if (!$?) { exit(1) } + cd t && + PATH="$PWD/helper:$PATH" && + test-tool.exe run-command testsuite --jobs=10 -V -x --write-junit-xml \ + $(test-tool.exe path-utils slice-tests \ + $SYSTEM_JOBPOSITIONINPHASE $SYSTEM_TOTALJOBSINPHASE t[0-9]*.sh) displayName: 'Test (parallel)' env: HOME: $(Build.SourcesDirectory) MSYSTEM: MINGW64 NO_SVN_TESTS: 1 GIT_TEST_SKIP_REBASE_P: 1 + PATH: "$(Build.SourcesDirectory)\\PortableGit\\mingw64\\bin;$(Build.SourcesDirectory)\\PortableGit\\usr\\bin;C:\\Windows\\system32;C:\\Windows;C:\\Windows\\system32\\wbem" - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: From c5d12c117d04659ac2fe48ab1d9754d0e8c42e7d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 24 Jun 2019 23:43:06 +0200 Subject: [PATCH 181/303] mimalloc: offer a build-time option to enable it By defining `USE_MIMALLOC`, Git can now be compiled with that nicely-fast and small allocator. Note that we have to disable a couple `DEVELOPER` options to build mimalloc's source code, as it makes heavy use of declarations after statements, among other things that disagree with Git's conventions. For example, the `-Wno-array-bounds` flag is needed because in `-O2` builds, trying to call `NtCurrentTeb()` (which `_mi_thread_id()` does on Windows) causes the bogus warning about a system header, likely related to https://sourceforge.net/p/mingw-w64/mailman/message/37674519/ and to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578: C:/git-sdk-64-minimal/mingw64/include/psdk_inc/intrin-impl.h:838:1: error: array subscript 0 is outside array bounds of 'long long unsigned int[0]' [-Werror=array-bounds] 838 | __buildreadseg(__readgsqword, unsigned __int64, "gs", "q") | ^~~~~~~~~~~~~~ Also: The `mimalloc` library uses C11-style atomics, therefore we must require that standard when compiling with GCC if we want to use `mimalloc` (instead of requiring "only" C99). This is what we do in the CMake definition already, therefore this commit does not need to touch `contrib/buildsystems/`. Signed-off-by: Johannes Schindelin --- Makefile | 33 +++++++++++++++++++++++++++++++++ config.mak.dev | 2 ++ config.mak.uname | 2 +- git-compat-util.h | 10 ++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 97c45505cd7713..b16ef887501eec 100644 --- a/Makefile +++ b/Makefile @@ -1942,6 +1942,39 @@ ifdef USE_NED_ALLOCATOR OVERRIDE_STRDUP = YesPlease endif +ifdef USE_MIMALLOC + MIMALLOC_OBJS = \ + compat/mimalloc/alloc-aligned.o \ + compat/mimalloc/alloc.o \ + compat/mimalloc/arena.o \ + compat/mimalloc/bitmap.o \ + compat/mimalloc/heap.o \ + compat/mimalloc/init.o \ + compat/mimalloc/options.o \ + compat/mimalloc/os.o \ + compat/mimalloc/page.o \ + compat/mimalloc/random.o \ + compat/mimalloc/segment.o \ + compat/mimalloc/segment-cache.o \ + compat/mimalloc/stats.o + + COMPAT_CFLAGS += -Icompat/mimalloc -DMI_DEBUG=0 -DUSE_MIMALLOC --std=gnu11 + COMPAT_OBJS += $(MIMALLOC_OBJS) + +$(MIMALLOC_OBJS): COMPAT_CFLAGS += -DBANNED_H + +ifdef DEVELOPER +$(MIMALLOC_OBJS): COMPAT_CFLAGS += \ + -Wno-attributes \ + -Wno-pedantic \ + -Wno-unknown-pragmas \ + -Wno-declaration-after-statement \ + -Wno-old-style-definition \ + -Wno-missing-prototypes \ + -Wno-array-bounds +endif +endif + ifdef OVERRIDE_STRDUP COMPAT_CFLAGS += -DOVERRIDE_STRDUP COMPAT_OBJS += compat/strdup.o diff --git a/config.mak.dev b/config.mak.dev index 4fa19d361b7837..8e99287376b4e0 100644 --- a/config.mak.dev +++ b/config.mak.dev @@ -22,8 +22,10 @@ endif ifneq ($(uname_S),FreeBSD) ifneq ($(or $(filter gcc6,$(COMPILER_FEATURES)),$(filter clang7,$(COMPILER_FEATURES))),) +ifndef USE_MIMALLOC DEVELOPER_CFLAGS += -std=gnu99 endif +endif else # FreeBSD cannot limit to C99 because its system headers unconditionally # rely on C11 features. diff --git a/config.mak.uname b/config.mak.uname index bf875867385aab..2f176759c02b0f 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -480,7 +480,7 @@ endif CC = compat/vcbuild/scripts/clink.pl AR = compat/vcbuild/scripts/lib.pl CFLAGS = - BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE + BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE -MP -std:c11 COMPAT_OBJS = compat/msvc.o compat/winansi.o \ compat/win32/flush.o \ compat/win32/path-utils.o \ diff --git a/git-compat-util.h b/git-compat-util.h index d2212479866369..92857f4f0e8580 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -371,6 +371,16 @@ char *gitdirname(char *); # include #endif +#ifdef USE_MIMALLOC +#include "mimalloc.h" +#define malloc mi_malloc +#define calloc mi_calloc +#define realloc mi_realloc +#define free mi_free +#define strdup mi_strdup +#define strndup mi_strndup +#endif + /* On most systems would have given us this, but * not on some systems (e.g. z/OS). */ From 2cb4e87076098be244fb86172a98a994e6ee88a4 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 10 Jul 2022 00:52:40 +0200 Subject: [PATCH 182/303] azure-pipeline: downcase the job name of the `Linux32` job These many refactorings in Git sure are gifts that keep on giving. Signed-off-by: Johannes Schindelin --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f11f1342cd080b..21ee5a463380d6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -339,13 +339,13 @@ jobs: steps: - bash: | res=0 - sudo AGENT_OS="$AGENT_OS" BUILD_BUILDNUMBER="$BUILD_BUILDNUMBER" BUILD_REPOSITORY_URI="$BUILD_REPOSITORY_URI" BUILD_SOURCEBRANCH="$BUILD_SOURCEBRANCH" BUILD_SOURCEVERSION="$BUILD_SOURCEVERSION" SYSTEM_PHASENAME="$SYSTEM_PHASENAME" SYSTEM_TASKDEFINITIONSURI="$SYSTEM_TASKDEFINITIONSURI" SYSTEM_TEAMPROJECT="$SYSTEM_TEAMPROJECT" CC=$CC MAKEFLAGS="$MAKEFLAGS" jobname=Linux32 bash -lxc ci/run-docker.sh || res=1 + sudo AGENT_OS="$AGENT_OS" BUILD_BUILDNUMBER="$BUILD_BUILDNUMBER" BUILD_REPOSITORY_URI="$BUILD_REPOSITORY_URI" BUILD_SOURCEBRANCH="$BUILD_SOURCEBRANCH" BUILD_SOURCEVERSION="$BUILD_SOURCEVERSION" SYSTEM_PHASENAME="$SYSTEM_PHASENAME" SYSTEM_TASKDEFINITIONSURI="$SYSTEM_TASKDEFINITIONSURI" SYSTEM_TEAMPROJECT="$SYSTEM_TEAMPROJECT" CC=$CC MAKEFLAGS="$MAKEFLAGS" jobname=linux32 bash -lxc ci/run-docker.sh || res=1 sudo chmod a+r t/out/TEST-*.xml test ! -d t/failed-test-artifacts || sudo chmod a+r t/failed-test-artifacts exit $res - displayName: 'jobname=Linux32 ci/run-docker.sh' + displayName: 'jobname=linux32 ci/run-docker.sh' - task: PublishTestResults@2 displayName: 'Publish Test Results **/TEST-*.xml' inputs: From baf7a64f976ddf885a67105e0707ee4d52064b9f Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 12 Aug 2022 14:23:36 +0200 Subject: [PATCH 183/303] mimalloc: make MIMALLOC_SHOW_STATS work with redirected `stderr` Setting `MIMALLOC_SHOW_STATS` to ask mimalloc to print out something after the process is done is the easiest way to verify that a mimalloc-enabled Git is running. So it better work and not try to write to a Win32 Console when it got a regular file handle instead or, as is the case in Git for Windows' regular Git Bash window, an emulated pseudo terminal. Signed-off-by: Johannes Schindelin --- compat/mimalloc/options.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compat/mimalloc/options.c b/compat/mimalloc/options.c index 76aafe62b1062c..3c68bff00d4d16 100644 --- a/compat/mimalloc/options.c +++ b/compat/mimalloc/options.c @@ -179,8 +179,15 @@ static void mi_out_stderr(const char* msg, void* arg) { if (!_mi_preloading()) { // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; + static int write_to_console; if (hcon == INVALID_HANDLE_VALUE) { + CONSOLE_SCREEN_BUFFER_INFO sbi; hcon = GetStdHandle(STD_ERROR_HANDLE); + write_to_console = GetConsoleScreenBufferInfo(hcon, &sbi) ? 1 : 0; + } + if (!write_to_console) { + fputs(msg, stderr); + return; } const size_t len = strlen(msg); if (hcon != INVALID_HANDLE_VALUE && len > 0 && len < UINT32_MAX) { From dd315da58563b1c8c393bd0d9948134775e9fe73 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 8 Feb 2021 16:22:34 -0500 Subject: [PATCH 184/303] azure-pipeline: run static-analysis on bionic This is a backport of d051ed77ee6 (.github/workflows/main.yml: run static-analysis on bionic, 2021-02-08) to the Azure Pipeline. When Azure Pipelines' build agents transitioned 'ubuntu-latest' from 18.04 to 20.04, it broke our `static-analysis` job, since Coccinelle isn't available on Ubuntu focal (it is only available in the universe suite). Until Coccinelle can be installed from 20.04's main suite, pin the static-analysis build to run on 18.04, where it can be installed by default. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano Signed-off-by: Johannes Schindelin --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 21ee5a463380d6..e7200599fbda28 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -365,7 +365,7 @@ jobs: displayName: StaticAnalysis condition: succeeded() pool: - vmImage: ubuntu-latest + vmImage: ubuntu-18.04 steps: - bash: | sudo apt-get update && From 6b3c69bc50bad5d3cab9ceffb1acdcf7589cfd4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20A=C3=9Fhauer?= Date: Sun, 10 Jul 2022 11:27:25 +0200 Subject: [PATCH 185/303] MinGW: link as terminal server aware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whith Windows 2000, Microsoft introduced a flag to the PE header to mark executables as "terminal server aware". Windows terminal servers provide a redirected Windows directory and redirected registry hives when launching legacy applications without this flag set. Since we do not use any INI files in the Windows directory and don't write to the registry, we don't need this additional preparation. Telling the OS that we don't need this should provide slightly improved startup times in terminal server environments. When building for supported Windows Versions with MSVC the /TSAWARE linker flag is automatically set, but MinGW requires us to set the --tsaware flag manually. This partially addresses https://github.com/git-for-windows/git/issues/3935. Signed-off-by: Matthias Aßhauer --- config.mak.uname | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.mak.uname b/config.mak.uname index d63629fe807f59..9c472b502a5183 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -661,7 +661,7 @@ ifeq ($(uname_S),MINGW) DEFAULT_HELP_FORMAT = html HAVE_PLATFORM_PROCINFO = YesPlease CSPRNG_METHOD = rtlgenrandom - BASIC_LDFLAGS += -municode + BASIC_LDFLAGS += -municode -Wl,--tsaware COMPAT_CFLAGS += -DNOGDI -Icompat -Icompat/win32 COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" COMPAT_OBJS += compat/mingw.o compat/winansi.o \ From 6076a6ee69bdd8a18beff729c5474cd94af630d5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 24 Jun 2019 23:45:21 +0200 Subject: [PATCH 186/303] mingw: use mimalloc Thorough benchmarking with repacking a subset of linux.git (the commit history reachable from 93a6fefe2f ([PATCH] fix the SYSCTL=n compilation, 2007-02-28), to be precise) suggest that this allocator is on par, in multi-threaded situations maybe even better than nedmalloc: `git repack -adfq` with mimalloc, 8 threads: 31.166991900 27.576763800 28.712311000 27.373859000 27.163141900 `git repack -adfq` with nedmalloc, 8 threads: 31.915032900 27.149883100 28.244933700 27.240188800 28.580849500 In a different test using GitHub Actions build agents (probably single-threaded, a core-strength of nedmalloc)): `git repack -q -d -l -A --unpack-unreachable=2.weeks.ago` with mimalloc: 943.426 978.500 939.709 959.811 954.605 `git repack -q -d -l -A --unpack-unreachable=2.weeks.ago` with nedmalloc: 995.383 952.179 943.253 963.043 980.468 While these measurements were not executed with complete scientific rigor, as no hardware was set aside specifically for these benchmarks, it shows that mimalloc and nedmalloc perform almost the same, nedmalloc with a bit higher variance and also slightly higher average (further testing suggests that nedmalloc performs worse in multi-threaded situations than in single-threaded ones). In short: mimalloc seems to be slightly better suited for our purposes than nedmalloc. Seeing that mimalloc is developed actively, while nedmalloc ceased to see any updates in eight years, let's use mimalloc on Windows instead. Signed-off-by: Johannes Schindelin --- config.mak.uname | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/config.mak.uname b/config.mak.uname index 2f176759c02b0f..59563f0a8389f9 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -652,7 +652,7 @@ ifeq ($(uname_S),MINGW) USE_WIN32_IPC = YesPlease USE_WIN32_MMAP = YesPlease MMAP_PREVENTS_DELETE = UnfortunatelyYes - USE_NED_ALLOCATOR = YesPlease + USE_MIMALLOC = YesPlease UNRELIABLE_FSTAT = UnfortunatelyYes OBJECT_CREATION_USES_RENAMES = UnfortunatelyNeedsTo NO_REGEX = YesPlease @@ -719,7 +719,6 @@ else HAVE_LIBCHARSET_H = YesPlease USE_GETTEXT_SCHEME = fallthrough USE_LIBPCRE = YesPlease - USE_NED_ALLOCATOR = YesPlease NO_PYTHON = ifeq (/mingw64,$(subst 32,64,$(prefix))) # Move system config into top-level /etc/ From ca5505dacd45068ecd15f6eb2e162008a8ef2d6f Mon Sep 17 00:00:00 2001 From: Heiko Voigt Date: Sun, 21 Feb 2010 21:05:04 +0100 Subject: [PATCH 187/303] git-gui: provide question helper for retry fallback on Windows Make use of the new environment variable GIT_ASK_YESNO to support the recently implemented fallback in case unlink, rename or rmdir fail for files in use on Windows. The added dialog will present a yes/no question to the the user which will currently be used by the windows compat layer to let the user retry a failed file operation. Signed-off-by: Heiko Voigt --- git-gui/Makefile | 2 ++ git-gui/git-gui--askyesno | 51 +++++++++++++++++++++++++++++++++++++++ git-gui/git-gui.sh | 3 +++ 3 files changed, 56 insertions(+) create mode 100755 git-gui/git-gui--askyesno diff --git a/git-gui/Makefile b/git-gui/Makefile index 56c85a85c1e493..8bc0789d7dfded 100644 --- a/git-gui/Makefile +++ b/git-gui/Makefile @@ -295,6 +295,7 @@ install: all $(QUIET)$(INSTALL_D0)'$(DESTDIR_SQ)$(gitexecdir_SQ)' $(INSTALL_D1) $(QUIET)$(INSTALL_X0)git-gui $(INSTALL_X1) '$(DESTDIR_SQ)$(gitexecdir_SQ)' $(QUIET)$(INSTALL_X0)git-gui--askpass $(INSTALL_X1) '$(DESTDIR_SQ)$(gitexecdir_SQ)' + $(QUIET)$(INSTALL_X0)git-gui--askyesno $(INSTALL_X1) '$(DESTDIR_SQ)$(gitexecdir_SQ)' $(QUIET)$(foreach p,$(GITGUI_BUILT_INS), $(INSTALL_L0)'$(DESTDIR_SQ)$(gitexecdir_SQ)/$p' $(INSTALL_L1)'$(DESTDIR_SQ)$(gitexecdir_SQ)/git-gui' $(INSTALL_L2)'$(DESTDIR_SQ)$(gitexecdir_SQ)/$p' $(INSTALL_L3) &&) true ifdef GITGUI_WINDOWS_WRAPPER $(QUIET)$(INSTALL_R0)git-gui.tcl $(INSTALL_R1) '$(DESTDIR_SQ)$(gitexecdir_SQ)' @@ -313,6 +314,7 @@ uninstall: $(QUIET)$(CLEAN_DST) '$(DESTDIR_SQ)$(gitexecdir_SQ)' $(QUIET)$(REMOVE_F0)'$(DESTDIR_SQ)$(gitexecdir_SQ)'/git-gui $(REMOVE_F1) $(QUIET)$(REMOVE_F0)'$(DESTDIR_SQ)$(gitexecdir_SQ)'/git-gui--askpass $(REMOVE_F1) + $(QUIET)$(REMOVE_F0)'$(DESTDIR_SQ)$(gitexecdir_SQ)'/git-gui--askyesno $(REMOVE_F1) $(QUIET)$(foreach p,$(GITGUI_BUILT_INS), $(REMOVE_F0)'$(DESTDIR_SQ)$(gitexecdir_SQ)'/$p $(REMOVE_F1) &&) true ifdef GITGUI_WINDOWS_WRAPPER $(QUIET)$(REMOVE_F0)'$(DESTDIR_SQ)$(gitexecdir_SQ)'/git-gui.tcl $(REMOVE_F1) diff --git a/git-gui/git-gui--askyesno b/git-gui/git-gui--askyesno new file mode 100755 index 00000000000000..2a6e6fd11122f5 --- /dev/null +++ b/git-gui/git-gui--askyesno @@ -0,0 +1,51 @@ +#!/bin/sh +# Tcl ignores the next line -*- tcl -*- \ +exec wish "$0" -- "$@" + +# This is an implementation of a simple yes no dialog +# which is injected into the git commandline by git gui +# in case a yesno question needs to be answered. + +set NS {} +set use_ttk [package vsatisfies [package provide Tk] 8.5] +if {$use_ttk} { + set NS ttk +} + +if {$argc < 1} { + puts stderr "Usage: $argv0 " + exit 1 +} else { + set prompt [join $argv " "] +} + +${NS}::frame .t +${NS}::label .t.m -text $prompt -justify center -width 40 +.t.m configure -wraplength 400 +pack .t.m -side top -fill x -padx 20 -pady 20 -expand 1 +pack .t -side top -fill x -ipadx 20 -ipady 20 -expand 1 + +${NS}::frame .b +${NS}::frame .b.left -width 200 +${NS}::button .b.yes -text Yes -command yes +${NS}::button .b.no -text No -command no + + +pack .b.left -side left -expand 1 -fill x +pack .b.yes -side left -expand 1 +pack .b.no -side right -expand 1 -ipadx 5 +pack .b -side bottom -fill x -ipadx 20 -ipady 15 + +bind . {exit 0} +bind . {exit 1} + +proc no {} { + exit 1 +} + +proc yes {} { + exit 0 +} + +wm title . "Question?" +tk::PlaceWindow . diff --git a/git-gui/git-gui.sh b/git-gui/git-gui.sh index 201524c34edac0..e6860a0ae8ba89 100755 --- a/git-gui/git-gui.sh +++ b/git-gui/git-gui.sh @@ -1230,6 +1230,9 @@ set have_tk85 [expr {[package vcompare $tk_version "8.5"] >= 0}] if {![info exists env(SSH_ASKPASS)]} { set env(SSH_ASKPASS) [gitexec git-gui--askpass] } +if {![info exists env(GIT_ASK_YESNO)]} { + set env(GIT_ASK_YESNO) [gitexec git-gui--askyesno] +} ###################################################################### ## From 509b410cbc40c95dc80781f28c4ea9342fa5590d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 20 Sep 2017 21:52:28 +0200 Subject: [PATCH 188/303] git-gui--askyesno: fix funny text wrapping The text wrapping seems to be aligned to the right side of the Yes button, leaving an awful lot of empty space. Let's try to counter this by using pixel units. Signed-off-by: Johannes Schindelin --- git-gui/git-gui--askyesno | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git-gui/git-gui--askyesno b/git-gui/git-gui--askyesno index 2a6e6fd11122f5..cf9c990d0919b3 100755 --- a/git-gui/git-gui--askyesno +++ b/git-gui/git-gui--askyesno @@ -20,8 +20,8 @@ if {$argc < 1} { } ${NS}::frame .t -${NS}::label .t.m -text $prompt -justify center -width 40 -.t.m configure -wraplength 400 +${NS}::label .t.m -text $prompt -justify center -width 400px +.t.m configure -wraplength 400px pack .t.m -side top -fill x -padx 20 -pady 20 -expand 1 pack .t -side top -fill x -ipadx 20 -ipady 20 -expand 1 From 57cd3d8d07c91f3319cea35bdb980d62d36cb09b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 23 Jul 2010 18:06:05 +0200 Subject: [PATCH 189/303] git gui: set GIT_ASKPASS=git-gui--askpass if not set yet Signed-off-by: Johannes Schindelin --- git-gui/git-gui.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/git-gui/git-gui.sh b/git-gui/git-gui.sh index e6860a0ae8ba89..b387c2533213d3 100755 --- a/git-gui/git-gui.sh +++ b/git-gui/git-gui.sh @@ -1230,6 +1230,9 @@ set have_tk85 [expr {[package vcompare $tk_version "8.5"] >= 0}] if {![info exists env(SSH_ASKPASS)]} { set env(SSH_ASKPASS) [gitexec git-gui--askpass] } +if {![info exists env(GIT_ASKPASS)]} { + set env(GIT_ASKPASS) [gitexec git-gui--askpass] +} if {![info exists env(GIT_ASK_YESNO)]} { set env(GIT_ASK_YESNO) [gitexec git-gui--askyesno] } From 4780a097c7aa885e5158bc346186cfb30b397fee Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 4 Oct 2018 14:46:00 +0200 Subject: [PATCH 190/303] respect core.hooksPath, falling back to .git/hooks Since v2.9.0, Git knows about the config variable core.hookspath that allows overriding the path to the directory containing the Git hooks. Since v2.10.0, the `--git-path` option respects that config variable, too, so we may just as well use that command. For Git versions older than v2.5.0 (which was the first version to support the `--git-path` option for the `rev-parse` command), we simply fall back to the previous code. This fixes https://github.com/git-for-windows/git/issues/1755 Initial-patch-by: Philipp Gortan Signed-off-by: Johannes Schindelin --- git-gui/git-gui.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/git-gui/git-gui.sh b/git-gui/git-gui.sh index 201524c34edac0..068634ab6eff7e 100755 --- a/git-gui/git-gui.sh +++ b/git-gui/git-gui.sh @@ -623,7 +623,11 @@ proc git_write {args} { } proc githook_read {hook_name args} { - set pchook [gitdir hooks $hook_name] + if {[package vcompare $::_git_version 2.5.0] >= 0} { + set pchook [git rev-parse --git-path "hooks/$hook_name"] + } else { + set pchook [gitdir hooks $hook_name] + } lappend args 2>@1 # On Windows [file executable] might lie so we need to ask From 98dc2659dfa728bd8a7b6403e7a1428269e2f547 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Sat, 4 Feb 2012 21:54:36 +0100 Subject: [PATCH 191/303] gitk: Unicode file name support Assumes file names in git tree objects are UTF-8 encoded. On most unix systems, the system encoding (and thus the TCL system encoding) will be UTF-8, so file names will be displayed correctly. On Windows, it is impossible to set the system encoding to UTF-8. Changing the TCL system encoding (via 'encoding system ...', e.g. in the startup code) is explicitly discouraged by the TCL docs. Change gitk functions dealing with file names to always convert from and to UTF-8. Signed-off-by: Karsten Blees Signed-off-by: Johannes Schindelin --- gitk-git/gitk | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/gitk-git/gitk b/gitk-git/gitk index 0ae7d685904b85..1346b2fd5923bd 100755 --- a/gitk-git/gitk +++ b/gitk-git/gitk @@ -7692,7 +7692,7 @@ proc gettreeline {gtf id} { if {[string index $fname 0] eq "\""} { set fname [lindex $fname 0] } - set fname [encoding convertfrom $fname] + set fname [encoding convertfrom utf-8 $fname] lappend treefilelist($id) $fname } if {![eof $gtf]} { @@ -7954,7 +7954,7 @@ proc gettreediffline {gdtf ids} { if {[string index $file 0] eq "\""} { set file [lindex $file 0] } - set file [encoding convertfrom $file] + set file [encoding convertfrom utf-8 $file] if {$file ne [lindex $treediff end]} { lappend treediff $file lappend sublist $file @@ -8099,7 +8099,7 @@ proc makediffhdr {fname ids} { global ctext curdiffstart treediffs diffencoding global ctext_file_names jump_to_here targetline diffline - set fname [encoding convertfrom $fname] + set fname [encoding convertfrom utf-8 $fname] set diffencoding [get_path_encoding $fname] set i [lsearch -exact $treediffs($ids) $fname] if {$i >= 0} { @@ -8161,7 +8161,7 @@ proc parseblobdiffline {ids line} { if {![string compare -length 5 "diff " $line]} { if {![regexp {^diff (--cc|--git) } $line m type]} { - set line [encoding convertfrom $line] + set line [encoding convertfrom utf-8 $line] $ctext insert end "$line\n" hunksep continue } @@ -8210,7 +8210,7 @@ proc parseblobdiffline {ids line} { makediffhdr $fname $ids } elseif {![string compare -length 16 "* Unmerged path " $line]} { - set fname [encoding convertfrom [string range $line 16 end]] + set fname [encoding convertfrom utf-8 [string range $line 16 end]] $ctext insert end "\n" set curdiffstart [$ctext index "end - 1c"] lappend ctext_file_names $fname @@ -8263,7 +8263,7 @@ proc parseblobdiffline {ids line} { if {[string index $fname 0] eq "\""} { set fname [lindex $fname 0] } - set fname [encoding convertfrom $fname] + set fname [encoding convertfrom utf-8 $fname] set i [lsearch -exact $treediffs($ids) $fname] if {$i >= 0} { setinlist difffilestart $i $curdiffstart @@ -8282,6 +8282,7 @@ proc parseblobdiffline {ids line} { set diffinhdr 0 return } + set line [encoding convertfrom utf-8 $line] $ctext insert end "$line\n" filesep } else { @@ -12244,7 +12245,7 @@ proc cache_gitattr {attr pathlist} { foreach row [split $rlist "\n"] { if {[regexp "(.*): $attr: (.*)" $row m path value]} { if {[string index $path 0] eq "\""} { - set path [encoding convertfrom [lindex $path 0]] + set path [encoding convertfrom utf-8 [lindex $path 0]] } set path_attr_cache($attr,$path) $value } From 402cfb192bc246342b9125fd1364e0fbc867f3d7 Mon Sep 17 00:00:00 2001 From: Sebastian Schuberth Date: Sun, 22 Jul 2012 23:19:24 +0200 Subject: [PATCH 192/303] gitk: Use an external icon file on Windows Git for Windows now ships with the new Git icon from git-scm.com. Use that icon file if it exists instead of the old procedurally drawn one. This patch was sent upstream but so far no decision on its inclusion was made, so commit it to our fork. Signed-off-by: Sebastian Schuberth --- gitk-git/gitk | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/gitk-git/gitk b/gitk-git/gitk index 1346b2fd5923bd..334ed89572938a 100755 --- a/gitk-git/gitk +++ b/gitk-git/gitk @@ -12275,7 +12275,6 @@ if { [info exists ::env(GITK_MSGSDIR)] } { set gitk_prefix [file dirname [file dirname [file normalize $argv0]]] set gitk_libdir [file join $gitk_prefix share gitk lib] set gitk_msgsdir [file join $gitk_libdir msgs] - unset gitk_prefix } ## Internationalization (i18n) through msgcat and gettext. See @@ -12638,28 +12637,32 @@ if {[expr {[exec git rev-parse --is-inside-work-tree] == "true"}]} { set worktree [gitworktree] setcoords makewindow -catch { - image create photo gitlogo -width 16 -height 16 - - image create photo gitlogominus -width 4 -height 2 - gitlogominus put #C00000 -to 0 0 4 2 - gitlogo copy gitlogominus -to 1 5 - gitlogo copy gitlogominus -to 6 5 - gitlogo copy gitlogominus -to 11 5 - image delete gitlogominus - - image create photo gitlogoplus -width 4 -height 4 - gitlogoplus put #008000 -to 1 0 3 4 - gitlogoplus put #008000 -to 0 1 4 3 - gitlogo copy gitlogoplus -to 1 9 - gitlogo copy gitlogoplus -to 6 9 - gitlogo copy gitlogoplus -to 11 9 - image delete gitlogoplus - - image create photo gitlogo32 -width 32 -height 32 - gitlogo32 copy gitlogo -zoom 2 2 - - wm iconphoto . -default gitlogo gitlogo32 +if {$::tcl_platform(platform) eq {windows} && [file exists $gitk_prefix/etc/git.ico]} { + wm iconbitmap . -default $gitk_prefix/etc/git.ico +} else { + catch { + image create photo gitlogo -width 16 -height 16 + + image create photo gitlogominus -width 4 -height 2 + gitlogominus put #C00000 -to 0 0 4 2 + gitlogo copy gitlogominus -to 1 5 + gitlogo copy gitlogominus -to 6 5 + gitlogo copy gitlogominus -to 11 5 + image delete gitlogominus + + image create photo gitlogoplus -width 4 -height 4 + gitlogoplus put #008000 -to 1 0 3 4 + gitlogoplus put #008000 -to 0 1 4 3 + gitlogo copy gitlogoplus -to 1 9 + gitlogo copy gitlogoplus -to 6 9 + gitlogo copy gitlogoplus -to 11 9 + image delete gitlogoplus + + image create photo gitlogo32 -width 32 -height 32 + gitlogo32 copy gitlogo -zoom 2 2 + + wm iconphoto . -default gitlogo gitlogo32 + } } # wait for the window to become visible tkwait visibility . From a4f0d2aeb4a65c78ca915c304352e75854f20546 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 20 Sep 2017 21:53:45 +0200 Subject: [PATCH 193/303] git-gui--askyesno: allow overriding the window title "Question?" is maybe not the most informative thing to ask. In the absence of better information, it is the best we can do, of course. However, Git for Windows' auto updater just learned the trick to use git-gui--askyesno to ask the user whether to update now or not. And in this scripted scenario, we can easily pass a command-line option to change the window title. So let's support that with the new `--title ` option. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- git-gui/git-gui--askyesno | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/git-gui/git-gui--askyesno b/git-gui/git-gui--askyesno index cf9c990d0919b3..45b0260eff8145 100755 --- a/git-gui/git-gui--askyesno +++ b/git-gui/git-gui--askyesno @@ -12,10 +12,15 @@ if {$use_ttk} { set NS ttk } +set title "Question?" if {$argc < 1} { puts stderr "Usage: $argv0 <question>" exit 1 } else { + if {$argc > 2 && [lindex $argv 0] == "--title"} { + set title [lindex $argv 1] + set argv [lreplace $argv 0 1] + } set prompt [join $argv " "] } @@ -47,5 +52,5 @@ proc yes {} { exit 0 } -wm title . "Question?" +wm title . $title tk::PlaceWindow . From b34d25e8a646858c6e5b908c9a93fc783255ddf5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 16 Feb 2016 16:42:06 +0100 Subject: [PATCH 194/303] gitk: fix arrow keys in input fields with Tcl/Tk >= 8.6 Tcl/Tk 8.6 introduced new events for the cursor left/right keys and apparently changed the behavior of the previous event. Let's work around that by using the new events when we are running with Tcl/Tk 8.6 or later. This fixes https://github.com/git-for-windows/git/issues/495 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- gitk-git/gitk | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/gitk-git/gitk b/gitk-git/gitk index 334ed89572938a..8170cb4c585a45 100755 --- a/gitk-git/gitk +++ b/gitk-git/gitk @@ -2079,7 +2079,7 @@ proc makewindow {} { global headctxmenu progresscanv progressitem progresscoords statusw global fprogitem fprogcoord lastprogupdate progupdatepending global rprogitem rprogcoord rownumsel numcommits - global have_tk85 use_ttk NS + global have_tk85 have_tk86 use_ttk NS global git_version global worddiff @@ -2577,8 +2577,13 @@ proc makewindow {} { bind . <Key-Down> "selnextline 1" bind . <Shift-Key-Up> "dofind -1 0" bind . <Shift-Key-Down> "dofind 1 0" - bindkey <Key-Right> "goforw" - bindkey <Key-Left> "goback" + if {$have_tk86} { + bindkey <<NextChar>> "goforw" + bindkey <<PrevChar>> "goback" + } else { + bindkey <Key-Right> "goforw" + bindkey <Key-Left> "goback" + } bind . <Key-Prior> "selnextpage -1" bind . <Key-Next> "selnextpage 1" bind . <$M1B-Home> "allcanvs yview moveto 0.0" @@ -12573,6 +12578,7 @@ set nullid2 "0000000000000000000000000000000000000001" set nullfile "/dev/null" set have_tk85 [expr {[package vcompare $tk_version "8.5"] >= 0}] +set have_tk86 [expr {[package vcompare $tk_version "8.6"] >= 0}] if {![info exists have_ttk]} { set have_ttk [llength [info commands ::ttk::style]] } From 9fa79fc255fbee91bc11d64cc5a78123a68478fc Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Wed, 20 Sep 2017 21:55:45 +0200 Subject: [PATCH 195/303] git-gui--askyesno (mingw): use Git for Windows' icon, if available For additional GUI goodness. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- git-gui/git-gui--askyesno | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/git-gui/git-gui--askyesno b/git-gui/git-gui--askyesno index 45b0260eff8145..c0c82e7cbd01d6 100755 --- a/git-gui/git-gui--askyesno +++ b/git-gui/git-gui--askyesno @@ -52,5 +52,17 @@ proc yes {} { exit 0 } +if {$::tcl_platform(platform) eq {windows}} { + set icopath [file dirname [file normalize $argv0]] + if {[file tail $icopath] eq {git-core}} { + set icopath [file dirname $icopath] + } + set icopath [file dirname $icopath] + set icopath [file join $icopath share git git-for-windows.ico] + if {[file exists $icopath]} { + wm iconbitmap . -default $icopath + } +} + wm title . $title tk::PlaceWindow . From 78632b684f34f89642fa16291531dc0cc555f7d7 Mon Sep 17 00:00:00 2001 From: "James J. Raden" <james.raden@gmail.com> Date: Thu, 21 Jan 2016 12:07:47 -0500 Subject: [PATCH 196/303] gitk: make the "list references" default window width wider When using remotes (with git-flow especially), the remote reference names are almost always wordwrapped in the "list references" window because it's somewhat narrow by default. It's possible to resize it with a mouse, but it's annoying to have to do this every time, especially on Windows 10, where the window border seems to be only one (1) pixel wide, thus making the grabbing of the window border tricky. Signed-off-by: James J. Raden <james.raden@gmail.com> --- gitk-git/gitk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gitk-git/gitk b/gitk-git/gitk index 8170cb4c585a45..73cef8d006bfd4 100755 --- a/gitk-git/gitk +++ b/gitk-git/gitk @@ -10046,7 +10046,7 @@ proc showrefs {} { text $top.list -background $bgcolor -foreground $fgcolor \ -selectbackground $selectbgcolor -font mainfont \ -xscrollcommand "$top.xsb set" -yscrollcommand "$top.ysb set" \ - -width 30 -height 20 -cursor $maincursor \ + -width 60 -height 20 -cursor $maincursor \ -spacing1 1 -spacing3 1 -state disabled $top.list tag configure highlight -background $selectbgcolor if {![lsearch -exact $bglist $top.list]} { From 677790f1e940f80fc4da7d586aa04bd5d401d52f Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 11 Oct 2019 13:22:24 +0200 Subject: [PATCH 197/303] Modify the Code of Conduct for Git for Windows The Git project followed suite and added their Code of Conduct, based on the Contributors' Covenant v1.4. We edit it slightly to reflect Git for Windows' particulars. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- CODE_OF_CONDUCT.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 0215b1fd4c05e6..8b373cf5518a5d 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,9 +1,9 @@ -# Git Code of Conduct +# Git for Windows Code of Conduct This code of conduct outlines our expectations for participants within -the Git community, as well as steps for reporting unacceptable behavior. -We are committed to providing a welcoming and inspiring community for -all and expect our code of conduct to be honored. Anyone who violates +the **Git for Windows** community, as well as steps for reporting unacceptable +behavior. We are committed to providing a welcoming and inspiring community +for all and expect our code of conduct to be honored. Anyone who violates this code of conduct may be banned from the community. ## Our Pledge @@ -65,8 +65,8 @@ representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported to the community leaders responsible for enforcement at -git@sfconservancy.org, or individually: +reported by contacting the Git for Windows maintainer or the community leaders +responsible for enforcement at git@sfconservancy.org, or individually: - Ævar Arnfjörð Bjarmason <avarab@gmail.com> - Christian Couder <christian.couder@gmail.com> From a5738309da5df33f6715c175419046b69599be58 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Thu, 1 Mar 2018 12:10:14 -0500 Subject: [PATCH 198/303] CONTRIBUTING.md: add guide for first-time contributors Getting started contributing to Git can be difficult on a Windows machine. CONTRIBUTING.md contains a guide to getting started, including detailed steps for setting up build tools, running tests, and submitting patches to upstream. [includes an example by Pratik Karki how to submit v2, v3, v4, etc.] Signed-off-by: Derrick Stolee <dstolee@microsoft.com> --- CONTRIBUTING.md | 417 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000000..ca9770df8c4808 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,417 @@ +How to Contribute to Git for Windows +==================================== + +Git was originally designed for Unix systems and still today, all the build tools for the Git +codebase assume you have standard Unix tools available in your path. If you have an open-source +mindset and want to start contributing to Git, but primarily use a Windows machine, then you may +have trouble getting started. This guide is for you. + +Get the Source +-------------- + +Clone the [GitForWindows repository on GitHub](https://github.com/git-for-windows/git). +It is helpful to create your own fork for storing your development branches. + +Windows uses different line endings than Unix systems. See +[this GitHub article on working with line endings](https://help.github.com/articles/dealing-with-line-endings/#refreshing-a-repository-after-changing-line-endings) +if you have trouble with line endings. + +Build the Source +---------------- + +First, download and install the latest [Git for Windows SDK (64-bit)](https://github.com/git-for-windows/build-extra/releases/latest). +When complete, you can run the Git SDK, which creates a new Git Bash terminal window with +the additional development commands, such as `make`. + + As of time of writing, the SDK uses a different credential manager, so you may still want to use normal Git + Bash for interacting with your remotes. Alternatively, use SSH rather than HTTPS and + avoid credential manager problems. + +You should now be ready to type `make` from the root of your `git` source directory. +Here are some helpful variations: + +* `make -j[N] DEVELOPER=1`: Compile new sources using up to N concurrent processes. + The `DEVELOPER` flag turns on all warnings; code failing these warnings will not be + accepted upstream ("upstream" = "the core Git project"). +* `make clean`: Delete all compiled files. + +When running `make`, you can use `-j$(nproc)` to automatically use the number of processors +on your machine as the number of concurrent build processes. + +You can go deeper on the Windows-specific build process by reading the +[technical overview](https://github.com/git-for-windows/git/wiki/Technical-overview) or the +[guide to compiling Git with Visual Studio](https://github.com/git-for-windows/git/wiki/Compiling-Git-with-Visual-Studio). + +## Building `git` on Windows with Visual Studio + +The typical approach to building `git` is to use the standard `Makefile` with GCC, as +above. Developers working in a Windows environment may want to instead build with the +[Microsoft Visual C++ compiler and libraries toolset (MSVC)](https://blogs.msdn.microsoft.com/vcblog/2017/03/07/msvc-the-best-choice-for-windows/). +There are a few benefits to using MSVC over GCC during your development, including creating +symbols for debugging and [performance tracing](https://github.com/Microsoft/perfview#perfview-overview). + +There are two ways to build Git for Windows using MSVC. Each have their own merits. + +### Using SDK Command Line + +Use one of the following commands from the SDK Bash window to build Git for Windows: + +``` + make MSVC=1 -j12 + make MSVC=1 DEBUG=1 -j12 +``` + +The first form produces release-mode binaries; the second produces debug-mode binaries. +Both forms produce PDB files and can be debugged. However, the first is best for perf +tracing and the second is best for single-stepping. + +You can then open Visual Studio and select File -> Open -> Project/Solution and select +the compiled `git.exe` file. This creates a basic solution and you can use the debugging +and performance tracing tools in Visual Studio to monitor a Git process. Use the Debug +Properties page to set the working directory and command line arguments. + +Be sure to clean up before switching back to GCC (or to switch between debug and +release MSVC builds): + +``` + make MSVC=1 -j12 clean + make MSVC=1 DEBUG=1 -j12 clean +``` + +### Using the IDE + +If you prefer working in Visual Studio with a solution full of projects, then you can use +CMake, either by letting Visual Studio configure it automatically (simply open Git's +top-level directory via `File>Open>Folder...`) or by (downloading and) running +[CMake](https://cmake.org) manually. + +What to Change? +--------------- + +Many new contributors ask: What should I start working on? + +One way to win big with the open-source community is to look at the +[issues page](https://github.com/git-for-windows/git/issues) and see if there are any issues that +you can fix quickly, or if anything catches your eye. + +You can also look at [the unofficial Chromium issues page](https://crbug.com/git) for +multi-platform issues. You can look at recent user questions on +[the Git mailing list](https://public-inbox.org/git). + +Or you can "scratch your own itch", i.e. address an issue you have with Git. The team at Microsoft where the Git for Windows maintainer works, for example, is focused almost entirely on [improving performance](https://blogs.msdn.microsoft.com/devops/2018/01/11/microsofts-performance-contributions-to-git-in-2017/). +We approach our work by finding something that is slow and try to speed it up. We start our +investigation by reliably reproducing the slow behavior, then running that example using +the MSVC build and tracing the results in PerfView. + +You could also think of something you wish Git could do, and make it do that thing! The +only concern I would have with this approach is whether or not that feature is something +the community also wants. If this excites you though, go for it! Don't be afraid to +[get involved in the mailing list](http://vger.kernel.org/vger-lists.html#git) early for +feedback on the idea. + +Test Your Changes +----------------- + +After you make your changes, it is important that you test your changes. Manual testing is +important, but checking and extending the existing test suite is even more important. You +want to run the functional tests to see if you broke something else during your change, and +you want to extend the functional tests to be sure no one breaks your feature in the future. + +### Functional Tests + +Navigate to the `t/` directory and type `make` to run all tests or use `prove` as +[described in the Git for Windows wiki](https://github.com/git-for-windows/git/wiki/Building-Git): + +``` +prove -j12 --state=failed,save ./t[0-9]*.sh +``` + +You can also run each test directly by running the corresponding shell script with a name +like `tNNNN-descriptor.sh`. + +If you are adding new functionality, you may need to create unit tests by creating +helper commands that test a very limited action. These commands are stored in `t/helpers`. +When adding a helper, be sure to add a line to `t/Makefile` and to the `.gitignore` for the +binary file you add. The Git community prefers functional tests using the full `git` +executable, so try to exercise your new code using `git` commands before creating a test +helper. + +To find out why a test failed, repeat the test with the `-x -v -d -i` options and then +navigate to the appropriate "trash" directory to see the data shape that was used for the +test failed step. + +Read [`t/README`](t/README) for more details. + +### Performance Tests + +If you are working on improving performance, you will need to be acquainted with the +performance tests in `t/perf`. There are not too many performance tests yet, but adding one +as your first commit in a patch series helps to communicate the boost your change provides. + +To check the change in performance across multiple versions of `git`, you can use the +`t/perf/run` script. For example, to compare the performance of `git rev-list` across the +`core/master` and `core/next` branches compared to a `topic` branch, you can run + +``` +cd t/perf +./run core/master core/next topic -- p0001-rev-list.sh +``` + +You can also set certain environment variables to help test the performance on different +repositories or with more repetitions. The full list is available in +[the `t/perf/README` file](t/perf/README), +but here are a few important ones: + +``` +GIT_PERF_REPO=/path/to/repo +GIT_PERF_LARGE_REPO=/path/to/large/repo +GIT_PERF_REPEAT_COUNT=10 +``` + +When running the performance tests on Linux, you may see a message "Can't locate JSON.pm in +@INC" and that means you need to run `sudo cpanm install JSON` to get the JSON perl package. + +For running performance tests, it can be helpful to set up a few repositories with strange +data shapes, such as: + +**Many objects:** Clone repos such as [Kotlin](https://github.com/jetbrains/kotlin), [Linux](https://github.com/torvalds/linux), or [Android](https://source.android.com/setup/downloading). + +**Many pack-files:** You can split a fresh clone into multiple pack-files of size at most +16MB by running `git repack -adfF --max-pack-size=16m`. See the +[`git repack` documentation](https://git-scm.com/docs/git-repack) for more information. +You can count the number of pack-files using `ls .git/objects/pack/*.pack | wc -l`. + +**Many loose objects:** If you already split your repository into multiple pack-files, then +you can pick one to split into loose objects using `cat .git/objects/pack/[id].pack | git unpack-objects`; +delete the `[id].pack` and `[id].idx` files after this. You can count the number of loose +bjects using `ls .git/objects/??/* | wc -l`. + +**Deep history:** Usually large repositories also have deep histories, but you can use the +[test-many-commits-1m repo](https://github.com/cirosantilli/test-many-commits-1m/) to +target deep histories without the overhead of many objects. One issue with this repository: +there are no merge commits, so you will need to use a different repository to test a "wide" +commit history. + +**Large Index:** You can generate a large index and repo by using the scripts in +`t/perf/repos`. There are two scripts. `many-files.sh` which will generate a repo with +same tree and blobs but different paths. Using `many-files.sh -d 5 -w 10 -f 9` will create +a repo with ~1 million entries in the index. `inflate-repo.sh` will use an existing repo +and copy the current work tree until it is a specified size. + +Test Your Changes on Linux +-------------------------- + +It can be important to work directly on the [core Git codebase](https://github.com/git/git), +such as a recent commit into the `master` or `next` branch that has not been incorporated +into Git for Windows. Also, it can help to run functional and performance tests on your +code in Linux before submitting patches to the mailing list, which focuses on many platforms. +The differences between Windows and Linux are usually enough to catch most cross-platform +issues. + +### Using the Windows Subsystem for Linux + +The [Windows Subsystem for Linux (WSL)](https://docs.microsoft.com/en-us/windows/wsl/install-win10) +allows you to [install Ubuntu Linux as an app](https://www.microsoft.com/en-us/store/p/ubuntu/9nblggh4msv6) +that can run Linux executables on top of the Windows kernel. Internally, +Linux syscalls are interpreted by the WSL, everything else is plain Ubuntu. + +First, open WSL (either type "Bash" in Cortana, or execute "bash.exe" in a CMD window). +Then install the prerequisites, and `git` for the initial clone: + +``` +sudo apt-get update +sudo apt-get install git gcc make libssl-dev libcurl4-openssl-dev \ + libexpat-dev tcl tk gettext git-email zlib1g-dev +``` + +Then, clone and build: + +``` +git clone https://github.com/git-for-windows/git +cd git +git remote add -f upstream https://github.com/git/git +make +``` + +Be sure to clone into `/home/[user]/` and not into any folder under `/mnt/?/` or your build +will fail due to colons in file names. + +### Using a Linux Virtual Machine with Hyper-V + +If you prefer, you can use a virtual machine (VM) to run Linux and test your changes in the +full environment. The test suite runs a lot faster on Linux than on Windows or with the WSL. +You can connect to the VM using an SSH terminal like +[PuTTY](https://www.chiark.greenend.org.uk/~sgtatham/putty/). + +The following instructions are for using Hyper-V, which is available in some versions of Windows. +There are many virtual machine alternatives available, if you do not have such a version installed. + +* [Download an Ubuntu Server ISO](https://www.ubuntu.com/download/server). +* Open [Hyper-V Manager](https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/quick-start/enable-hyper-v). +* [Set up a virtual switch](https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/quick-start/connect-to-network) + so your VM can reach the network. +* Select "Quick Create", name your machine, select the ISO as installation source, and un-check + "This virtual machine will run Windows." +* Go through the Ubuntu install process, being sure to select to install OpenSSH Server. +* When install is complete, log in and check the SSH server status with `sudo service ssh status`. + * If the service is not found, install with `sudo apt-get install openssh-server`. + * If the service is not running, then use `sudo service ssh start`. +* Use `shutdown -h now` to shutdown the VM, go to the Hyper-V settings for the VM, expand Network Adapter + to select "Advanced Features", and set the MAC address to be static (this can save your VM from losing + network if shut down incorrectly). +* Provide as many cores to your VM as you can (for parallel builds). +* Restart your VM, but do not connect. +* Use `ssh` in Git Bash, download [PuTTY](http://www.putty.org/), or use your favorite SSH client to connect to the VM through SSH. + +In order to build and use `git`, you will need the following libraries via `apt-get`: + +``` +sudo apt-get update +sudo apt-get install git gcc make libssl-dev libcurl4-openssl-dev \ + libexpat-dev tcl tk gettext git-email zlib1g-dev +``` + +To get your code from your Windows machine to the Linux VM, it is easiest to push the branch to your fork of Git and clone your fork in the Linux VM. + +Don't forget to set your `git` config with your preferred name, email, and editor. + +Polish Your Commits +------------------- + +Before submitting your patch, be sure to read the [coding guidelines](https://github.com/git/git/blob/master/Documentation/CodingGuidelines) +and check your code to match as best you can. This can be a lot of effort, but it saves +time during review to avoid style issues. + +The other possibly major difference between the mailing list submissions and GitHub PR workflows +is that each commit will be reviewed independently. Even if you are submitting a +patch series with multiple commits, each commit must stand on it's own and be reviewable +by itself. Make sure the commit message clearly explain the why of the commit not the how. +Describe what is wrong with the current code and how your changes have made the code better. + +When preparing your patch, it is important to put yourself in the shoes of the Git community. +Accepting a patch requires more justification than approving a pull request from someone on +your team. The community has a stable product and is responsible for keeping it stable. If +you introduce a bug, then they cannot count on you being around to fix it. When you decided +to start work on a new feature, they were not part of the design discussion and may not +even believe the feature is worth introducing. + +Questions to answer in your patch message (and commit messages) may include: +* Why is this patch necessary? +* How does the current behavior cause pain for users? +* What kinds of repositories are necessary for noticing a difference? +* What design options did you consider before writing this version? Do you have links to + code for those alternate designs? +* Is this a performance fix? Provide clear performance numbers for various well-known repos. + +Here are some other tips that we use when cleaning up our commits: + +* Commit messages should be wrapped at 76 columns per line (or less; 72 is also a + common choice). +* Make sure the commits are signed off using `git commit (-s|--signoff)`. See + [SubmittingPatches](https://github.com/git/git/blob/v2.8.1/Documentation/SubmittingPatches#L234-L286) + for more details about what this sign-off means. +* Check for whitespace errors using `git diff --check [base]...HEAD` or `git log --check`. +* Run `git rebase --whitespace=fix` to correct upstream issues with whitespace. +* Become familiar with interactive rebase (`git rebase -i`) because you will be reordering, + squashing, and editing commits as your patch or series of patches is reviewed. +* Make sure any shell scripts that you add have the executable bit set on them. This is + usually for test files that you add in the `/t` directory. You can use + `git add --chmod=+x [file]` to update it. You can test whether a file is marked as executable + using `git ls-files --stage \*.sh`; the first number is 100755 for executable files. +* Your commit titles should match the "area: change description" format. Rules of thumb: + * Choose "<area>: " prefix appropriately. + * Keep the description short and to the point. + * The word that follows the "<area>: " prefix is not capitalized. + * Do not include a full-stop at the end of the title. + * Read a few commit messages -- using `git log origin/master`, for instance -- to + become acquainted with the preferred commit message style. +* Build source using `make DEVELOPER=1` for extra-strict compiler warnings. + +Submit Your Patch +----------------- + +Git for Windows [accepts pull requests on GitHub](https://github.com/git-for-windows/git/pulls), but +these are reserved for Windows-specific improvements. For core Git, submissions are accepted on +[the Git mailing list](https://public-inbox.org/git). + +### Configure Git to Send Emails + +There are a bunch of options for configuring the `git send-email` command. These options can +be found in the documentation for +[`git config`](https://git-scm.com/docs/git-config) and +[`git send-email`](https://git-scm.com/docs/git-send-email). + +``` +git config --global sendemail.smtpserver <smtp server> +git config --global sendemail.smtpserverport 587 +git config --global sendemail.smtpencryption tls +git config --global sendemail.smtpuser <email address> +``` + +To avoid storing your password in the config file, store it in the Git credential manager: + +``` +$ git credential fill +protocol=smtp +host=<stmp server> +username=<email address> +password=password +``` + +Before submitting a patch, read the [Git documentation on submitting patches](https://github.com/git/git/blob/master/Documentation/SubmittingPatches). + +To construct a patch set, use the `git format-patch` command. There are three important options: + +* `--cover-letter`: If specified, create a `[v#-]0000-cover-letter.patch` file that can be + edited to describe the patch as a whole. If you previously added a branch description using + `git branch --edit-description`, you will end up with a 0/N mail with that description and + a nice overall diffstat. +* `--in-reply-to=[Message-ID]`: This will mark your cover letter as replying to the given + message (which should correspond to your previous iteration). To determine the correct Message-ID, + find the message you are replying to on [public-inbox.org/git](https://public-inbox.org/git) and take + the ID from between the angle brackets. + +* `--subject-prefix=[prefix]`: This defaults to [PATCH]. For subsequent iterations, you will want to + override it like `--subject-prefix="[PATCH v2]"`. You can also use the `-v` option to have it + automatically generate the version number in the patches. + +If you have multiple commits and use the `--cover-letter` option be sure to open the +`0000-cover-letter.patch` file to update the subject and add some details about the overall purpose +of the patch series. + +### Examples + +To generate a single commit patch file: +``` +git format-patch -s -o [dir] -1 +``` +To generate four patch files from the last three commits with a cover letter: +``` +git format-patch --cover-letter -s -o [dir] HEAD~4 +``` +To generate version 3 with four patch files from the last four commits with a cover letter: +``` +git format-patch --cover-letter -s -o [dir] -v 3 HEAD~4 +``` + +### Submit the Patch + +Run [`git send-email`](https://git-scm.com/docs/git-send-email), starting with a test email: + +``` +git send-email --to=yourself@address.com [dir with patches]/*.patch +``` + +After checking the receipt of your test email, you can send to the list and to any +potentially interested reviewers. + +``` +git send-email --to=git@vger.kernel.org --cc=<email1> --cc=<email2> [dir with patches]/*.patch +``` + +To submit a nth version patch (say version 3): + +``` +git send-email --to=git@vger.kernel.org --cc=<email1> --cc=<email2> \ + --in-reply-to=<the message id of cover letter of patch v2> [dir with patches]/*.patch +``` From 66acd8b37ed1108beb5e830fda35a6abf02f38f1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 10 Jan 2014 16:16:03 -0600 Subject: [PATCH 199/303] README.md: Add a Windows-specific preamble MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Includes touch-ups by 마누엘, Philip Oakley and 孙卓识. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- README.md | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7ce4f05bae8120..e2314c5a313c08 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,76 @@ -[![Build status](https://github.com/git/git/workflows/CI/badge.svg)](https://github.com/git/git/actions?query=branch%3Amaster+event%3Apush) +Git for Windows +=============== + +[![Open in Visual Studio Code](https://img.shields.io/static/v1?logo=visualstudiocode&label=&message=Open%20in%20Visual%20Studio%20Code&labelColor=2c2c32&color=007acc&logoColor=007acc)](https://open.vscode.dev/git-for-windows/git) +[![Build status](https://github.com/git-for-windows/git/workflows/CI/badge.svg)](https://github.com/git-for-windows/git/actions?query=branch%3Amain+event%3Apush) +[![Join the chat at https://gitter.im/git-for-windows/git](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/git-for-windows/git?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) + +This is [Git for Windows](http://git-for-windows.github.io/), the Windows port +of [Git](http://git-scm.com/). + +The Git for Windows project is run using a [governance +model](http://git-for-windows.github.io/governance-model.html). If you +encounter problems, you can report them as [GitHub +issues](https://github.com/git-for-windows/git/issues), discuss them on Git +for Windows' [Google Group](http://groups.google.com/group/git-for-windows), +and [contribute bug +fixes](https://github.com/git-for-windows/git/wiki/How-to-participate). + +To build Git for Windows, please either install [Git for Windows' +SDK](https://gitforwindows.org/#download-sdk), start its `git-bash.exe`, `cd` +to your Git worktree and run `make`, or open the Git worktree as a folder in +Visual Studio. + +To verify that your build works, use one of the following methods: + +- If you want to test the built executables within Git for Windows' SDK, + prepend `<worktree>/bin-wrappers` to the `PATH`. +- Alternatively, run `make install` in the Git worktree. +- If you need to test this in a full installer, run `sdk build + git-and-installer`. +- You can also "install" Git into an existing portable Git via `make install + DESTDIR=<dir>` where `<dir>` refers to the top-level directory of the + portable Git. In this instance, you will want to prepend that portable Git's + `/cmd` directory to the `PATH`, or test by running that portable Git's + `git-bash.exe` or `git-cmd.exe`. +- If you built using a recent Visual Studio, you can use the menu item + `Build>Install git` (you will want to click on `Project>CMake Settings for + Git` first, then click on `Edit JSON` and then point `installRoot` to the + `mingw64` directory of an already-unpacked portable Git). + + As in the previous bullet point, you will then prepend `/cmd` to the `PATH` + or run using the portable Git's `git-bash.exe` or `git-cmd.exe`. +- If you want to run the built executables in-place, but in a CMD instead of + inside a Bash, you can run a snippet like this in the `git-bash.exe` window + where Git was built (ensure that the `EOF` line has no leading spaces), and + then paste into the CMD window what was put in the clipboard: + + ```sh + clip.exe <<EOF + set GIT_EXEC_PATH=$(cygpath -aw .) + set PATH=$(cygpath -awp ".:contrib/scalar:/mingw64/bin:/usr/bin:$PATH") + set GIT_TEMPLATE_DIR=$(cygpath -aw templates/blt) + set GITPERLLIB=$(cygpath -aw perl/build/lib) + EOF + ``` +- If you want to run the built executables in-place, but outside of Git for + Windows' SDK, and without an option to set/override any environment + variables (e.g. in Visual Studio's debugger), you can call the Git executable + by its absolute path and use the `--exec-path` option, like so: + + ```cmd + C:\git-sdk-64\usr\src\git\git.exe --exec-path=C:\git-sdk-64\usr\src\git help + ``` + + Note: for this to work, you have to hard-link (or copy) the `.dll` files from + the `/mingw64/bin` directory to the Git worktree, or add the `/mingw64/bin` + directory to the `PATH` somehow or other. + +To make sure that you are testing the correct binary, call `./git.exe version` +in the Git worktree, and then call `git version` in a directory/window where +you want to test Git, and verify that they refer to the same version (you may +even want to pass the command-line option `--build-options` to look at the +exact commit from which the Git version was built). Git - fast, scalable, distributed revision control system ========================================================= @@ -29,7 +101,7 @@ CVS users may also want to read [Documentation/gitcvs-migration.txt][] (`man gitcvs-migration` or `git help cvs-migration` if git is installed). -The user discussion and development of Git take place on the Git +The user discussion and development of core Git take place on the Git mailing list -- everyone is welcome to post bug reports, feature requests, comments and patches to git@vger.kernel.org (read [Documentation/SubmittingPatches][] for instructions on patch submission @@ -43,6 +115,7 @@ To subscribe to the list, send an email with just "subscribe git" in the body to majordomo@vger.kernel.org (not the Git list). The mailing list archives are available at <https://lore.kernel.org/git/>, <http://marc.info/?l=git> and other archival sites. +The core git mailing list is plain text (no HTML!). Issues which are security relevant should be disclosed privately to the Git Security mailing list <git-security@googlegroups.com>. From cf303e2dfee6f00b371e5ccd5524483094539ab0 Mon Sep 17 00:00:00 2001 From: Brendan Forster <brendan@github.com> Date: Thu, 18 Feb 2016 21:29:50 +1100 Subject: [PATCH 200/303] Add an issue template With improvements by Clive Chan, Adric Norris, Ben Bodenmiller and Philip Oakley. Helped-by: Clive Chan <cc@clive.io> Helped-by: Adric Norris <landstander668@gmail.com> Helped-by: Ben Bodenmiller <bbodenmiller@hotmail.com> Helped-by: Philip Oakley <philipoakley@iee.org> Signed-off-by: Brendan Forster <brendan@github.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- .github/ISSUE_TEMPLATE.md | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000000000..4017ed82ca4341 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,64 @@ + - [ ] I was not able to find an [open](https://github.com/git-for-windows/git/issues?q=is%3Aopen) or [closed](https://github.com/git-for-windows/git/issues?q=is%3Aclosed) issue matching what I'm seeing + +### Setup + + - Which version of Git for Windows are you using? Is it 32-bit or 64-bit? + +``` +$ git --version --build-options + +** insert your machine's response here ** +``` + + - Which version of Windows are you running? Vista, 7, 8, 10? Is it 32-bit or 64-bit? + +``` +$ cmd.exe /c ver + +** insert your machine's response here ** +``` + + - What options did you set as part of the installation? Or did you choose the + defaults? + +``` +# One of the following: +> type "C:\Program Files\Git\etc\install-options.txt" +> type "C:\Program Files (x86)\Git\etc\install-options.txt" +> type "%USERPROFILE%\AppData\Local\Programs\Git\etc\install-options.txt" +> type "$env:USERPROFILE\AppData\Local\Programs\Git\etc\install-options.txt" +$ cat /etc/install-options.txt + +** insert your machine's response here ** +``` + + - Any other interesting things about your environment that might be related + to the issue you're seeing? + +** insert your response here ** + +### Details + + - Which terminal/shell are you running Git from? e.g Bash/CMD/PowerShell/other + +** insert your response here ** + + - What commands did you run to trigger this issue? If you can provide a + [Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve) + this will help us understand the issue. + +``` +** insert your commands here ** +``` + - What did you expect to occur after running these commands? + +** insert here ** + + - What actually happened instead? + +** insert here ** + + - If the problem was occurring with a specific repository, can you provide the + URL to that repository to help us with testing? + +** insert URL here ** From e53f1f878d4d39bf7c274567bb2ba6301a69fd15 Mon Sep 17 00:00:00 2001 From: Philip Oakley <philipoakley@iee.org> Date: Fri, 22 Dec 2017 17:15:50 +0000 Subject: [PATCH 201/303] Modify the GitHub Pull Request template (to reflect Git for Windows) Git for Windows accepts pull requests; Core Git does not. Therefore we need to adjust the template (because it only matches core Git's project management style, not ours). Also: direct Git for Windows enhancements to their contributions page, space out the text for easy reading, and clarify that the mailing list is plain text, not HTML. Signed-off-by: Philip Oakley <philipoakley@iee.org> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- .github/PULL_REQUEST_TEMPLATE.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 952c7c3a2aa11e..831ef6f19f1d11 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,7 +1,19 @@ -Thanks for taking the time to contribute to Git! Please be advised that the -Git community does not use github.com for their contributions. Instead, we use -a mailing list (git@vger.kernel.org) for code submissions, code reviews, and -bug reports. Nevertheless, you can use GitGitGadget (https://gitgitgadget.github.io/) +Thanks for taking the time to contribute to Git! + +Those seeking to contribute to the Git for Windows fork should see +http://gitforwindows.org/#contribute on how to contribute Windows specific +enhancements. + +If your contribution is for the core Git functions and documentation +please be aware that the Git community does not use the github.com issues +or pull request mechanism for their contributions. + +Instead, we use the Git mailing list (git@vger.kernel.org) for code and +documentation submissions, code reviews, and bug reports. The +mailing list is plain text only (anything with HTML is sent directly +to the spam folder). + +Nevertheless, you can use GitGitGadget (https://gitgitgadget.github.io/) to conveniently send your Pull Requests commits to our mailing list. Please read the "guidelines for contributing" linked above! From 9e4f6ba368fd08f8549fbc5fc82ce52530c592f0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 20 Feb 2018 15:44:57 +0100 Subject: [PATCH 202/303] .github: Add configuration for the Sentiment Bot The sentiment bot will help detect when things get too heated. Hopefully. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- .github/config.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/config.yml diff --git a/.github/config.yml b/.github/config.yml new file mode 100644 index 00000000000000..45edb7ba37ce02 --- /dev/null +++ b/.github/config.yml @@ -0,0 +1,10 @@ +# Configuration for sentiment-bot - https://github.com/behaviorbot/sentiment-bot + +# *Required* toxicity threshold between 0 and .99 with the higher numbers being +# the most toxic. Anything higher than this threshold will be marked as toxic +# and commented on +sentimentBotToxicityThreshold: .7 + +# *Required* Comment to reply with +sentimentBotReplyComment: > + Please be sure to review the code of conduct and be respectful of other users. cc/ @git-for-windows/trusted-git-for-windows-developers From ca7cd83085e73829ee656bd56208b6fe2148eb70 Mon Sep 17 00:00:00 2001 From: Alejandro Barreto <alejandro.barreto@ni.com> Date: Fri, 9 Mar 2018 14:17:54 -0600 Subject: [PATCH 203/303] Document how $HOME is set on Windows Git documentation refers to $HOME and $XDG_CONFIG_HOME often, but does not specify how or where these values come from on Windows where neither is set by default. The new documentation reflects the behavior of setup_windows_environment() in compat/mingw.c. Signed-off-by: Alejandro Barreto <alejandro.barreto@ni.com> --- Documentation/git.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/git.txt b/Documentation/git.txt index 0c15ef3a8e6a51..455eaf0489a226 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -460,6 +460,14 @@ Environment Variables --------------------- Various Git commands use the following environment variables: +System +~~~~~~ +`HOME`:: + Specifies the path to the user's home directory. On Windows, if + unset, Git will set a process environment variable equal to: + `$HOMEDRIVE$HOMEPATH` if both `$HOMEDRIVE` and `$HOMEPATH` exist; + otherwise `$USERPROFILE` if `$USERPROFILE` exists. + The Git Repository ~~~~~~~~~~~~~~~~~~ These environment variables apply to 'all' core Git commands. Nb: it From 901f599d82927b3f1c90301c274135e15c734bd7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 10 Dec 2019 21:41:57 +0100 Subject: [PATCH 204/303] reset: reinstate support for the deprecated --stdin option The `--stdin` option was a well-established paradigm in other commands, therefore we implemented it in `git reset` for use by Visual Studio. Unfortunately, upstream Git decided that it is time to introduce `--pathspec-from-file` instead. To keep backwards-compatibility for some grace period, we therefore reinstate the `--stdin` option on top of the `--pathspec-from-file` option, but mark it firmly as deprecated. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- Documentation/git-reset.txt | 11 +++++++++++ builtin/reset.c | 14 ++++++++++++++ t/t7108-reset-stdin.sh | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100755 t/t7108-reset-stdin.sh diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 01cb4c9b9c56a7..6a44de12788439 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -12,6 +12,7 @@ SYNOPSIS 'git reset' [-q] [--pathspec-from-file=<file> [--pathspec-file-nul]] [<tree-ish>] 'git reset' (--patch | -p) [<tree-ish>] [--] [<pathspec>...] 'git reset' [--soft | --mixed [-N] | --hard | --merge | --keep] [-q] [<commit>] +DEPRECATED: 'git reset' [-q] [--stdin [-z]] [<tree-ish>] DESCRIPTION ----------- @@ -132,6 +133,16 @@ OPTIONS + For more details, see the 'pathspec' entry in linkgit:gitglossary[7]. +--stdin:: + DEPRECATED (use `--pathspec-from-file=-` instead): Instead of taking + list of paths from the command line, read list of paths from the + standard input. Paths are separated by LF (i.e. one path per line) by + default. + +-z:: + DEPRECATED (use `--pathspec-file-nul` instead): Only meaningful with + `--stdin`; paths are separated with NUL character instead of LF. + EXAMPLES -------- diff --git a/builtin/reset.c b/builtin/reset.c index fdce6f8c85670c..0d471411ffe398 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -26,6 +26,8 @@ #include "submodule.h" #include "submodule-config.h" #include "dir.h" +#include "strbuf.h" +#include "quote.h" #define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000) @@ -34,6 +36,7 @@ static const char * const git_reset_usage[] = { N_("git reset [-q] [<tree-ish>] [--] <pathspec>..."), N_("git reset [-q] [--pathspec-from-file [--pathspec-file-nul]] [<tree-ish>]"), N_("git reset --patch [<tree-ish>] [--] [<pathspec>...]"), + N_("DEPRECATED: git reset [-q] [--stdin [-z]] [<tree-ish>]"), NULL }; @@ -316,6 +319,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) struct object_id oid; struct pathspec pathspec; int intent_to_add = 0; + int nul_term_line = 0, read_from_stdin = 0; const struct option options[] = { OPT__QUIET(&quiet, N_("be quiet, only report errors")), OPT_BOOL(0, "no-refresh", &no_refresh, @@ -337,6 +341,10 @@ int cmd_reset(int argc, const char **argv, const char *prefix) N_("record only the fact that removed paths will be added later")), OPT_PATHSPEC_FROM_FILE(&pathspec_from_file), OPT_PATHSPEC_FILE_NUL(&pathspec_file_nul), + OPT_BOOL('z', NULL, &nul_term_line, + N_("DEPRECATED (use --pathspec-file-nul instead): paths are separated with NUL character")), + OPT_BOOL(0, "stdin", &read_from_stdin, + N_("DEPRECATED (use --pathspec-from-file=- instead): read paths from <stdin>")), OPT_END() }; @@ -346,6 +354,12 @@ int cmd_reset(int argc, const char **argv, const char *prefix) PARSE_OPT_KEEP_DASHDASH); parse_args(&pathspec, argv, prefix, patch_mode, &rev); + if (read_from_stdin) { + pathspec_from_file = "-"; + if (nul_term_line) + pathspec_file_nul = 1; + } + if (pathspec_from_file) { if (patch_mode) die(_("options '%s' and '%s' cannot be used together"), "--pathspec-from-file", "--patch"); diff --git a/t/t7108-reset-stdin.sh b/t/t7108-reset-stdin.sh new file mode 100755 index 00000000000000..b7cbcbf869296c --- /dev/null +++ b/t/t7108-reset-stdin.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='reset --stdin' + +. ./test-lib.sh + +test_expect_success 'reset --stdin' ' + test_commit hello && + git rm hello.t && + test -z "$(git ls-files hello.t)" && + echo hello.t | git reset --stdin && + test hello.t = "$(git ls-files hello.t)" +' + +test_expect_success 'reset --stdin -z' ' + test_commit world && + git rm hello.t world.t && + test -z "$(git ls-files hello.t world.t)" && + printf world.tQworld.tQhello.tQ | q_to_nul | git reset --stdin -z && + printf "hello.t\nworld.t\n" >expect && + git ls-files >actual && + test_cmp expect actual +' + +test_expect_success '--stdin requires --mixed' ' + echo hello.t >list && + test_must_fail git reset --soft --stdin <list && + test_must_fail git reset --hard --stdin <list && + git reset --mixed --stdin <list +' + +test_done From ee532c7108e7d14c26f5392d82bf21a1f970ed75 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 11 Dec 2015 06:59:13 +0100 Subject: [PATCH 205/303] mingw: deprecate old-style runtime-prefix handling in interpolate_path() On Windows, an absolute POSIX path needs to be turned into a Windows one. We used to interpret paths starting with a single `/` as relative to the runtime-prefix, but now these need to be prefixed with `%(prefix)/`. Let's warn for now, but still handle it. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- path.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/path.c b/path.c index a3cfcd8a6e95b3..156c6133d32d91 100644 --- a/path.c +++ b/path.c @@ -739,6 +739,12 @@ char *interpolate_path(const char *path, int real_home) if (skip_prefix(path, "%(prefix)/", &path)) return system_path(path); +#ifdef __MINGW32__ + if (path[0] == '/') { + warning(_("encountered old-style '%s' that should be '%%(prefix)/%s'"), path, path); + return system_path(path + 1); + } +#endif if (path[0] == '~') { const char *first_slash = strchrnul(path, '/'); const char *username = path + 1; From 7dd328905f8fd3578ee9d7b48b5c43186573d659 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 29 Sep 2020 13:50:59 +0200 Subject: [PATCH 206/303] Add a GitHub workflow to monitor component updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than using private IFTTT Applets that send mails to this maintainer whenever a new version of a Git for Windows component was released, let's use the power of GitHub workflows to make this process publicly visible. This workflow monitors the Atom/RSS feeds, and opens a ticket whenever a new version was released. Note: Bash sometimes releases multiple patched versions within a few minutes of each other (i.e. 5.1p1 through 5.1p4, 5.0p15 and 5.0p16). The MSYS2 runtime also has a similar system. We can address those patches as a group, so we shouldn't get multiple issues about them. Note further: We're not acting on newlib releases, OpenSSL alphas, Perl release candidates or non-stable Perl releases. There's no need to open issues about them. Co-authored-by: Matthias Aßhauer <mha1993@live.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- .github/workflows/monitor-components.yml | 86 ++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 .github/workflows/monitor-components.yml diff --git a/.github/workflows/monitor-components.yml b/.github/workflows/monitor-components.yml new file mode 100644 index 00000000000000..9c78d6d3e92445 --- /dev/null +++ b/.github/workflows/monitor-components.yml @@ -0,0 +1,86 @@ +name: Monitor component updates + +# Git for Windows is a slightly modified subset of MSYS2. Some of its +# components are maintained by Git for Windows, others by MSYS2. To help +# keeping the former up to date, this workflow monitors the Atom/RSS feeds +# and opens new tickets for each new component version. + +on: + schedule: + - cron: "23 8,11,14,17 * * *" + workflow_dispatch: + +env: + CHARACTER_LIMIT: 5000 + MAX_AGE: 48h + +jobs: + job: + runs-on: ubuntu-latest + environment: monitor-components + strategy: + matrix: + component: + - label: git + feed: https://github.com/git/git/tags.atom + - label: git-lfs + feed: https://github.com/git-lfs/git-lfs/tags.atom + - label: gcm-core + feed: https://github.com/microsoft/git-credential-manager-core/tags.atom + - label: tig + feed: https://github.com/jonas/tig/tags.atom + - label: cygwin + feed: https://github.com/cygwin/cygwin/releases.atom + title-pattern: ^(?!.*newlib) + - label: msys2-runtime-package + feed: https://github.com/msys2/MSYS2-packages/commits/master/msys2-runtime.atom + - label: msys2-runtime + feed: https://github.com/msys2/msys2-runtime/commits/HEAD.atom + aggregate: true + - label: openssh + feed: https://github.com/openssh/openssh-portable/tags.atom + - label: openssl + feed: https://github.com/openssl/openssl/tags.atom + title-pattern: ^(?!.*alpha) + - label: gnutls + feed: https://gnutls.org/news.atom + - label: heimdal + feed: https://github.com/heimdal/heimdal/tags.atom + - label: git-sizer + feed: https://github.com/github/git-sizer/tags.atom + - label: gitflow + feed: https://github.com/petervanderdoes/gitflow-avh/tags.atom + - label: curl + feed: https://github.com/curl/curl/tags.atom + - label: libgpg-error + feed: https://github.com/gpg/libgpg-error/releases.atom + title-pattern: ^libgpg-error-[0-9\.]*$ + - label: libgcrypt + feed: https://github.com/gpg/libgcrypt/releases.atom + title-pattern: ^libgcrypt-[0-9\.]*$ + - label: gpg + feed: https://github.com/gpg/gnupg/releases.atom + - label: mintty + feed: https://github.com/mintty/mintty/releases.atom + - label: p7zip + feed: https://sourceforge.net/projects/p7zip/rss?path=/p7zip + - label: bash + feed: https://git.savannah.gnu.org/cgit/bash.git/atom/?h=master + aggregate: true + - label: perl + feed: https://github.com/Perl/perl5/tags.atom + title-pattern: ^(?!.*(5\.[0-9]+[13579]|RC)) + - label: pcre2 + feed: https://github.com/PCRE2Project/pcre2/tags.atom + fail-fast: false + steps: + - uses: git-for-windows/rss-to-issues@v0 + with: + feed: ${{matrix.component.feed}} + prefix: "[New ${{matrix.component.label}} version]" + labels: component-update + github-token: ${{ secrets.MONITOR_COMPONENTS_PAT }} + character-limit: ${{ env.CHARACTER_LIMIT }} + max-age: ${{ env.MAX_AGE }} + aggregate: ${{matrix.component.aggregate}} + title-pattern: ${{matrix.component.title-pattern}} From 6c563251be4c6365702d52a3e85318be6074028a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 23 Aug 2019 14:14:42 +0200 Subject: [PATCH 207/303] SECURITY.md: document Git for Windows' policies This is the recommended way on GitHub to describe policies revolving around security issues and about supported versions. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- SECURITY.md | 56 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index c720c2ae7f9580..328351684298a4 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -28,24 +28,38 @@ Examples for details to include: ## Supported Versions -There are no official "Long Term Support" versions in Git. -Instead, the maintenance track (i.e. the versions based on the -most recently published feature release, also known as ".0" -version) sees occasional updates with bug fixes. - -Fixes to vulnerabilities are made for the maintenance track for -the latest feature release and merged up to the in-development -branches. The Git project makes no formal guarantee for any -older maintenance tracks to receive updates. In practice, -though, critical vulnerability fixes are applied not only to the -most recent track, but to at least a couple more maintenance -tracks. - -This is typically done by making the fix on the oldest and still -relevant maintenance track, and merging it upwards to newer and -newer maintenance tracks. - -For example, v2.24.1 was released to address a couple of -[CVEs](https://cve.mitre.org/), and at the same time v2.14.6, -v2.15.4, v2.16.6, v2.17.3, v2.18.2, v2.19.3, v2.20.2, v2.21.1, -v2.22.2 and v2.23.1 were released. +Git for Windows is a "friendly fork" of [Git](https://git-scm.com/), i.e. changes in Git for Windows are frequently contributed back, and Git for Windows' release cycle closely following Git's. + +While Git maintains several release trains (when v2.19.1 was released, there were updates to v2.14.x-v2.18.x, too, for example), Git for Windows follows only the latest Git release. For example, there is no Git for Windows release corresponding to Git v2.16.5 (which was released after v2.19.0). + +One exception is [MinGit for Windows](https://github.com/git-for-windows/git/wiki/MinGit) (a minimal subset of Git for Windows, intended for bundling with third-party applications that do not need any interactive commands nor support for `git svn`): critical security fixes are backported to the v2.11.x, v2.14.x, v2.19.x, v2.21.x and v2.23.x release trains. + +## Version number scheme + +The Git for Windows versions reflect the Git version on which they are based. For example, Git for Windows v2.21.0 is based on Git v2.21.0. + +As Git for Windows bundles more than just Git (such as Bash, OpenSSL, OpenSSH, GNU Privacy Guard), sometimes there are interim releases without corresponding Git releases. In these cases, Git for Windows appends a number in parentheses, starting with the number 2, then 3, etc. For example, both Git for Windows v2.17.1 and v2.17.1(2) were based on Git v2.17.1, but the latter included updates for Git Credential Manager and Git LFS, fixing critical regressions. + +## Tag naming scheme + +Every Git for Windows version is tagged using a name that starts with the Git version on which it is based, with the suffix `.windows.<patchlevel>` appended. For example, Git for Windows v2.17.1' source code is tagged as [`v2.17.1.windows.1`](https://github.com/git-for-windows/git/releases/tag/v2.17.1.windows.1) (the patch level is always at least 1, given that Git for Windows always has patches on top of Git). Likewise, Git for Windows v2.17.1(2)' source code is tagged as [`v2.17.1.windows.2`](https://github.com/git-for-windows/git/releases/tag/v2.17.1.windows.2). + +## Release Candidate (rc) versions + +As a friendly fork of Git (the "upstream" project), Git for Windows is closely corelated to that project. + +Consequently, Git for Windows publishes versions based on Git's release candidates (for upcoming "`.0`" versions, see [Git's release schedule](https://tinyurl.com/gitCal)). These versions end in `-rc<n>`, starting with `-rc0` for a very early preview of what is to come, and as with regular versions, Git for Windows tries to follow Git's releases as quickly as possible. + +Note: there is currently a bug in the "Check daily for updates" code, where it mistakes the final version as a downgrade from release candidates. Example: if you installed Git for Windows v2.23.0-rc3 and enabled the auto-updater, it would ask you whether you want to "downgrade" to v2.23.0 when that version was available. + +[All releases](https://github.com/git-for-windows/git/releases/), including release candidates, are listed via a link at the footer of the [Git for Windows](https://gitforwindows.org/) home page. + +## Snapshot versions ('nightly builds') + +Git for Windows also provides snapshots (these are not releases) of the the current development as per git-for-Windows/git's `master` branch at the [Snapshots](https://wingit.blob.core.windows.net/files/index.html) page. This link is also listed in the footer of the [Git for Windows](https://gitforwindows.org/) home page. + +Note: even if those builds are not exactly "nightly", they are sometimes referred to as "nightly builds" to keep with other projects' nomenclature. + +## Following upstream's developments + +The [gitforwindows/git repository](https://github.com/git-for-windows/git) also provides the `shears/*` branches. The `shears/*` branches reflect Git for Windows' patches, rebased onto the upstream integration branches, [updated (mostly) via automated CI builds](https://dev.azure.com/git-for-windows/git/_build?definitionId=25). From ac40eb3daf5be541bbce14d6ee061e59f42cd320 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Sat, 12 Sep 2015 12:25:47 +0200 Subject: [PATCH 208/303] t3701: verify that we can add *lots* of files interactively Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t3701-add-interactive.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index 5841f280fb2d4c..0c6d914b9b3b8c 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -1044,6 +1044,27 @@ test_expect_success 'checkout -p patch editing of added file' ' ) ' +test_expect_success EXPENSIVE 'add -i with a lot of files' ' + git reset --hard && + x160=0123456789012345678901234567890123456789 && + x160=$x160$x160$x160$x160 && + y= && + i=0 && + while test $i -le 200 + do + name=$(printf "%s%03d" $x160 $i) && + echo $name >$name && + git add -N $name && + y="${y}y$LF" && + i=$(($i+1)) || + exit 1 + done && + echo "$y" | git add -p -- . && + git diff --cached >staged && + test_line_count = 1407 staged && + git reset --hard +' + test_expect_success 'show help from add--helper' ' git reset --hard && cat >expect <<-EOF && From f2c74aabdd693836fca20be036f84bd19beea966 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sat, 6 Jul 2013 02:09:35 +0200 Subject: [PATCH 209/303] Win32: make FILETIME conversion functions public We will use them in the upcoming "FSCache" patches (to accelerate sequential lstat() calls). Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 18 ------------------ compat/mingw.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 3d34cea55a5e8b..5b0d759573d6b6 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -762,24 +762,6 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } -/* - * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. - * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. - */ -static inline long long filetime_to_hnsec(const FILETIME *ft) -{ - long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; - /* Windows to Unix Epoch conversion */ - return winTime - 116444736000000000LL; -} - -static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) -{ - long long hnsec = filetime_to_hnsec(ft); - ts->tv_sec = (time_t)(hnsec / 10000000); - ts->tv_nsec = (hnsec % 10000000) * 100; -} - /** * Verifies that safe_create_leading_directories() would succeed. */ diff --git a/compat/mingw.h b/compat/mingw.h index 611e04f2e5d311..a3379284c7870f 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -351,6 +351,17 @@ static inline int getrlimit(int resource, struct rlimit *rlp) return 0; } +/* + * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. + * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. + */ +static inline long long filetime_to_hnsec(const FILETIME *ft) +{ + long long winTime = ((long long)ft->dwHighDateTime << 32) + ft->dwLowDateTime; + /* Windows to Unix Epoch conversion */ + return winTime - 116444736000000000LL; +} + /* * Use mingw specific stat()/lstat()/fstat() implementations on Windows, * including our own struct stat with 64 bit st_size and nanosecond-precision @@ -367,6 +378,13 @@ struct timespec { #endif #endif +static inline void filetime_to_timespec(const FILETIME *ft, struct timespec *ts) +{ + long long hnsec = filetime_to_hnsec(ft); + ts->tv_sec = (time_t)(hnsec / 10000000); + ts->tv_nsec = (hnsec % 10000000) * 100; +} + struct mingw_stat { _dev_t st_dev; _ino_t st_ino; From 8c6f15888179c40d31b0759e4fa6efcd96797ac2 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 8 Sep 2013 14:17:31 +0200 Subject: [PATCH 210/303] Win32: dirent.c: Move opendir down Move opendir down in preparation for the next patch. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/win32/dirent.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 52420ec7d4dad7..2603a0fa39f45a 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -18,40 +18,6 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -DIR *opendir(const char *name) -{ - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; - HANDLE h; - int len; - DIR *dir; - - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) - return NULL; - - /* append optional '/' and wildcard '*' */ - if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; - pattern[len++] = '*'; - pattern[len] = 0; - - /* open find handle */ - h = FindFirstFileW(pattern, &fdata); - if (h == INVALID_HANDLE_VALUE) { - DWORD err = GetLastError(); - errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); - return NULL; - } - - /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); - dir->dd_handle = h; - dir->dd_stat = 0; - finddata2dirent(&dir->dd_dir, &fdata); - return dir; -} - struct dirent *readdir(DIR *dir) { if (!dir) { @@ -90,3 +56,37 @@ int closedir(DIR *dir) free(dir); return 0; } + +DIR *opendir(const char *name) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int len; + DIR *dir; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((len = xutftowcs_path(pattern, name)) < 0) + return NULL; + + /* append optional '/' and wildcard '*' */ + if (len && !is_dir_sep(pattern[len - 1])) + pattern[len++] = '/'; + pattern[len++] = '*'; + pattern[len] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* initialize DIR structure and copy first dir entry */ + dir = xmalloc(sizeof(DIR)); + dir->dd_handle = h; + dir->dd_stat = 0; + finddata2dirent(&dir->dd_dir, &fdata); + return dir; +} From 5220aeab0303e787b5e93ce189ad810155c6a9ce Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 8 Sep 2013 14:18:40 +0200 Subject: [PATCH 211/303] mingw: make the dirent implementation pluggable Emulating the POSIX `dirent` API on Windows via `FindFirstFile()`/`FindNextFile()` is pretty staightforward, however, most of the information provided in the `WIN32_FIND_DATA` structure is thrown away in the process. A more sophisticated implementation may cache this data, e.g. for later reuse in calls to `lstat()`. Make the `dirent` implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Define a base DIR structure with pointers to `readdir()`/`closedir()` that match the `opendir()` implementation (similar to vtable pointers in Object-Oriented Programming). Define `readdir()`/`closedir()` so that they call the function pointers in the `DIR` structure. This allows to choose the `opendir()` implementation on a call-by-call basis. Make the fixed-size `dirent.d_name` buffer a flex array, as `d_name` may be implementation specific (e.g. a caching implementation may allocate a `struct dirent` with _just_ the size needed to hold the `d_name` in question). Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/win32/dirent.c | 30 +++++++++++++++++++----------- compat/win32/dirent.h | 26 +++++++++++++++++++------- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 2603a0fa39f45a..139d2ba3c4da34 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -1,15 +1,21 @@ #include "../../git-compat-util.h" -struct DIR { - struct dirent dd_dir; /* includes d_type */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +typedef struct dirent_DIR { + struct DIR base_dir; /* extend base struct DIR */ HANDLE dd_handle; /* FindFirstFile handle */ int dd_stat; /* 0-based index */ -}; + struct dirent dd_dir; /* includes d_type */ +} dirent_DIR; +#pragma GCC diagnostic pop + +DIR *(*opendir)(const char *dirname) = dirent_opendir; static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) { - /* convert UTF-16 name to UTF-8 */ - xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name)); + /* convert UTF-16 name to UTF-8 (d_name points to dirent_DIR.dd_name) */ + xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) @@ -18,7 +24,7 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) ent->d_type = DT_REG; } -struct dirent *readdir(DIR *dir) +static struct dirent *dirent_readdir(dirent_DIR *dir) { if (!dir) { errno = EBADF; /* No set_errno for mingw */ @@ -45,7 +51,7 @@ struct dirent *readdir(DIR *dir) return &dir->dd_dir; } -int closedir(DIR *dir) +static int dirent_closedir(dirent_DIR *dir) { if (!dir) { errno = EBADF; @@ -57,13 +63,13 @@ int closedir(DIR *dir) return 0; } -DIR *opendir(const char *name) +DIR *dirent_opendir(const char *name) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; HANDLE h; int len; - DIR *dir; + dirent_DIR *dir; /* convert name to UTF-16 and check length < MAX_PATH */ if ((len = xutftowcs_path(pattern, name)) < 0) @@ -84,9 +90,11 @@ DIR *opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(DIR)); + dir = xmalloc(sizeof(dirent_DIR) + MAX_PATH); + dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; + dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; dir->dd_handle = h; dir->dd_stat = 0; finddata2dirent(&dir->dd_dir, &fdata); - return dir; + return (DIR*) dir; } diff --git a/compat/win32/dirent.h b/compat/win32/dirent.h index 058207e4bfed62..e0e0e1700f64d1 100644 --- a/compat/win32/dirent.h +++ b/compat/win32/dirent.h @@ -1,20 +1,32 @@ #ifndef DIRENT_H #define DIRENT_H -typedef struct DIR DIR; - #define DT_UNKNOWN 0 #define DT_DIR 1 #define DT_REG 2 #define DT_LNK 3 struct dirent { - unsigned char d_type; /* file type to prevent lstat after readdir */ - char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */ + unsigned char d_type; /* file type to prevent lstat after readdir */ + char d_name[FLEX_ARRAY]; /* file name */ }; -DIR *opendir(const char *dirname); -struct dirent *readdir(DIR *dir); -int closedir(DIR *dir); +/* + * Base DIR structure, contains pointers to readdir/closedir implementations so + * that opendir may choose a concrete implementation on a call-by-call basis. + */ +typedef struct DIR { + struct dirent *(*preaddir)(struct DIR *dir); + int (*pclosedir)(struct DIR *dir); +} DIR; + +/* default dirent implementation */ +extern DIR *dirent_opendir(const char *dirname); + +/* current dirent implementation */ +extern DIR *(*opendir)(const char *dirname); + +#define readdir(dir) (dir->preaddir(dir)) +#define closedir(dir) (dir->pclosedir(dir)) #endif /* DIRENT_H */ From 7210ff76ec1fb32b0bf75464a25a1c04909302d7 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 8 Sep 2013 14:21:30 +0200 Subject: [PATCH 212/303] Win32: make the lstat implementation pluggable Emulating the POSIX lstat API on Windows via GetFileAttributes[Ex] is quite slow. Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. A caching implementation may improve performance by bulk-reading entire directories or reusing data obtained via opendir / readdir. Make the lstat implementation pluggable so that it can be switched at runtime, e.g. based on a config option. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 2 ++ compat/mingw.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 5b0d759573d6b6..432a0467d38963 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -901,6 +901,8 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return do_lstat(follow, alt_name, buf); } +int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; + static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) { BY_HANDLE_FILE_INFORMATION fdata; diff --git a/compat/mingw.h b/compat/mingw.h index a3379284c7870f..6471ab8892e3c8 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -417,7 +417,7 @@ int mingw_fstat(int fd, struct stat *buf); #ifdef lstat #undef lstat #endif -#define lstat mingw_lstat +extern int (*lstat)(const char *file_name, struct stat *buf); int mingw_utime(const char *file_name, const struct utimbuf *times); From 6cdb2fba1b935c76ee3bba151f10e3ca79a45719 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 8 Sep 2013 14:23:27 +0200 Subject: [PATCH 213/303] add infrastructure for read-only file system level caches Add a macro to mark code sections that only read from the file system, along with a config option and documentation. This facilitates implementation of relatively simple file system level caches without the need to synchronize with the file system. Enable read-only sections for 'git status' and preload_index. Signed-off-by: Karsten Blees <blees@dcon.de> --- Documentation/config/core.txt | 6 ++++++ builtin/commit.c | 1 + compat/mingw.c | 6 ++++++ compat/mingw.h | 2 ++ git-compat-util.h | 15 +++++++++++++++ preload-index.c | 3 +++ 6 files changed, 33 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 37afbaf5a419d1..a22e25fb0ba17e 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -670,6 +670,12 @@ relatively high IO latencies. When enabled, Git will do the index comparison to the filesystem data in parallel, allowing overlapping IO's. Defaults to true. +core.fscache:: + Enable additional caching of file system data for some operations. ++ +Git for Windows uses this to bulk-read and cache lstat data of entire +directories (instead of doing lstat file by file). + core.unsetenvvars:: Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other process. diff --git a/builtin/commit.c b/builtin/commit.c index fcf9c85947e6a1..482897300348a9 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1540,6 +1540,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) PATHSPEC_PREFER_FULL, prefix, argv); + enable_fscache(1); if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) progress_flag = REFRESH_PROGRESS; diff --git a/compat/mingw.c b/compat/mingw.c index 432a0467d38963..1bea9d1507e1f2 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -233,6 +233,7 @@ enum hide_dotfiles_type { static int core_restrict_inherited_handles = -1; static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; +int core_fscache; int mingw_core_config(const char *var, const char *value, void *cb) { @@ -244,6 +245,11 @@ int mingw_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.fscache")) { + core_fscache = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { free(unset_environment_variables); unset_environment_variables = xstrdup(value); diff --git a/compat/mingw.h b/compat/mingw.h index 6471ab8892e3c8..5401902dcba5e9 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -11,6 +11,8 @@ typedef _sigset_t sigset_t; #undef _POSIX_THREAD_SAFE_FUNCTIONS #endif +extern int core_fscache; + int mingw_core_config(const char *var, const char *value, void *cb); #define platform_core_config mingw_core_config diff --git a/git-compat-util.h b/git-compat-util.h index aa693bc3b70ff1..b6c9f2ad0309cb 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1527,6 +1527,21 @@ static inline int is_missing_file_error(int errno_) return (errno_ == ENOENT || errno_ == ENOTDIR); } +/* + * Enable/disable a read-only cache for file system data on platforms that + * support it. + * + * Implementing a live-cache is complicated and requires special platform + * support (inotify, ReadDirectoryChangesW...). enable_fscache shall be used + * to mark sections of git code that extensively read from the file system + * without modifying anything. Implementations can use this to cache e.g. stat + * data or even file content without the need to synchronize with the file + * system. + */ +#ifndef enable_fscache +#define enable_fscache(x) /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/preload-index.c b/preload-index.c index 100f7a374dca1b..2275e4078693ae 100644 --- a/preload-index.c +++ b/preload-index.c @@ -126,6 +126,7 @@ void preload_index(struct index_state *index, pthread_mutex_init(&pd.mutex, NULL); } + enable_fscache(1); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; int err; @@ -161,6 +162,8 @@ void preload_index(struct index_state *index, trace2_data_intmax("index", NULL, "preload/sum_lstat", t2_sum_lstat); trace2_region_leave("index", "preload", NULL); + + enable_fscache(0); } int repo_read_index_preload(struct repository *repo, From a8598ed63a0433be260635599aaa4a5975c9e3b5 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Tue, 24 Jan 2017 15:12:13 -0500 Subject: [PATCH 214/303] fscache: add key for GIT_TRACE_FSCACHE Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/win32/fscache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index ff6d272285fe45..02814edb214604 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -8,6 +8,7 @@ static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* * An entry in the file system cache. Used for both entire directory listings @@ -206,6 +207,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", + errno, dir->dirent.d_name); return NULL; } @@ -391,6 +394,7 @@ int fscache_enable(int enable) fscache_clear(); LeaveCriticalSection(&mutex); } + trace_printf_key(&trace_fscache, "fscache: enable(%d)\n", enable); return result; } From 74e7e79ed2b760fe398fd04e3aa48c3eeb05fd33 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Wed, 1 Nov 2017 15:05:44 -0400 Subject: [PATCH 215/303] dir.c: make add_excludes aware of fscache during status Teach read_directory_recursive() and add_excludes() to be aware of optional fscache and avoid trying to open() and fstat() non-existant ".gitignore" files in every directory in the worktree. The current code in add_excludes() calls open() and then fstat() for a ".gitignore" file in each directory present in the worktree. Change that when fscache is enabled to call lstat() first and if present, call open(). This seems backwards because both lstat needs to do more work than fstat. But when fscache is enabled, fscache will already know if the .gitignore file exists and can completely avoid the IO calls. This works because of the lstat diversion to mingw_lstat when fscache is enabled. This reduced status times on a 350K file enlistment of the Windows repo on a NVMe SSD by 0.25 seconds. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> --- compat/win32/fscache.c | 5 +++++ compat/win32/fscache.h | 3 +++ dir.c | 33 ++++++++++++++++++++++++--------- git-compat-util.h | 4 ++++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 403441f6a96a5a..330e3d9a11d750 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -10,6 +10,11 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); +int fscache_is_enabled(void) +{ + return enabled; +} + /* * An entry in the file system cache. Used for both entire directory listings * and file entries. diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index ed518b422d705e..9a21fd5709c5bc 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,6 +4,9 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) +int fscache_is_enabled(void); +#define is_fscache_enabled() (fscache_is_enabled()) + DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/dir.c b/dir.c index 75429508200a9d..818e36df1b3acf 100644 --- a/dir.c +++ b/dir.c @@ -1063,16 +1063,31 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; - if (flags & PATTERN_NOFOLLOW) - fd = open_nofollow(fname, O_RDONLY); - else - fd = open(fname, O_RDONLY); - - if (fd < 0 || fstat(fd, &st) < 0) { - if (fd < 0) - warn_on_fopen_errors(fname); + if (is_fscache_enabled()) { + if (lstat(fname, &st) < 0) { + fd = -1; + } else { + fd = open(fname, O_RDONLY); + if (fd < 0) + warn_on_fopen_errors(fname); + } + } else { + if (flags & PATTERN_NOFOLLOW) + fd = open_nofollow(fname, O_RDONLY); else - close(fd); + fd = open(fname, O_RDONLY); + + if (fd < 0 || fstat(fd, &st) < 0) { + if (fd < 0) + warn_on_fopen_errors(fname); + else { + close(fd); + fd = -1; + } + } + } + + if (fd < 0) { if (!istate) return -1; r = read_skip_worktree_file_from_index(istate, fname, diff --git a/git-compat-util.h b/git-compat-util.h index 9a7ed9fabc9754..16ab6835980021 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1544,6 +1544,10 @@ static inline int is_missing_file_error(int errno_) #define enable_fscache(x) /* noop */ #endif +#ifndef is_fscache_enabled +#define is_fscache_enabled() (0) +#endif + int cmd_main(int, const char **); /* From 5dc079effa06306de148b83c8256d2b0791d3b1d Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 1 Oct 2013 12:51:54 +0200 Subject: [PATCH 216/303] mingw: add a cache below mingw's lstat and dirent implementations Checking the work tree status is quite slow on Windows, due to slow `lstat()` emulation (git calls `lstat()` once for each file in the index). Windows operating system APIs seem to be much better at scanning the status of entire directories than checking single files. Add an `lstat()` implementation that uses a cache for lstat data. Cache misses read the entire parent directory and add it to the cache. Subsequent `lstat()` calls for the same directory are served directly from the cache. Also implement `opendir()`/`readdir()`/`closedir()` so that they create and use directory listings in the cache. The cache doesn't track file system changes and doesn't plug into any modifying file APIs, so it has to be explicitly enabled for git functions that don't modify the working copy. Note: in an earlier version of this patch, the cache was always active and tracked file system changes via ReadDirectoryChangesW. However, this was much more complex and had negative impact on the performance of modifying git commands such as 'git checkout'. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/win32/fscache.c | 462 ++++++++++++++++++++++++++++ compat/win32/fscache.h | 10 + config.mak.uname | 4 +- contrib/buildsystems/CMakeLists.txt | 3 +- git-compat-util.h | 2 + 5 files changed, 478 insertions(+), 3 deletions(-) create mode 100644 compat/win32/fscache.c create mode 100644 compat/win32/fscache.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c new file mode 100644 index 00000000000000..f3bcb1881b700b --- /dev/null +++ b/compat/win32/fscache.c @@ -0,0 +1,462 @@ +#include "../../cache.h" +#include "../../hashmap.h" +#include "../win32.h" +#include "fscache.h" +#include "../../dir.h" + +static int initialized; +static volatile long enabled; +static struct hashmap map; +static CRITICAL_SECTION mutex; + +/* + * An entry in the file system cache. Used for both entire directory listings + * and file entries. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +struct fsentry { + struct hashmap_entry ent; + mode_t st_mode; + /* Pointer to the directory listing, or NULL for the listing itself. */ + struct fsentry *list; + /* Pointer to the next file entry of the list. */ + struct fsentry *next; + + union { + /* Reference count of the directory listing. */ + volatile long refcnt; + struct { + /* More stat members (only used for file entries). */ + off64_t st_size; + struct timespec st_atim; + struct timespec st_mtim; + struct timespec st_ctim; + } s; + } u; + + /* Length of name. */ + unsigned short len; + /* + * Name of the entry. For directory listings: relative path of the + * directory, without trailing '/' (empty for cwd()). For file entries: + * name of the file. Typically points to the end of the structure if + * the fsentry is allocated on the heap (see fsentry_alloc), or to a + * local variable if on the stack (see fsentry_init). + */ + struct dirent dirent; +}; +#pragma GCC diagnostic pop + +struct heap_fsentry { + union { + struct fsentry ent; + char dummy[sizeof(struct fsentry) + MAX_PATH]; + } u; +}; + +/* + * Compares the paths of two fsentry structures for equality. + */ +static int fsentry_cmp(void *unused_cmp_data, + const struct fsentry *fse1, const struct fsentry *fse2, + void *unused_keydata) +{ + int res; + if (fse1 == fse2) + return 0; + + /* compare the list parts first */ + if (fse1->list != fse2->list && + (res = fsentry_cmp(NULL, fse1->list ? fse1->list : fse1, + fse2->list ? fse2->list : fse2, NULL))) + return res; + + /* if list parts are equal, compare len and name */ + if (fse1->len != fse2->len) + return fse1->len - fse2->len; + return fspathncmp(fse1->dirent.d_name, fse2->dirent.d_name, fse1->len); +} + +/* + * Calculates the hash code of an fsentry structure's path. + */ +static unsigned int fsentry_hash(const struct fsentry *fse) +{ + unsigned int hash = fse->list ? fse->list->ent.hash : 0; + return hash ^ memihash(fse->dirent.d_name, fse->len); +} + +/* + * Initialize an fsentry structure for use by fsentry_hash and fsentry_cmp. + */ +static void fsentry_init(struct fsentry *fse, struct fsentry *list, + const char *name, size_t len) +{ + fse->list = list; + if (len > MAX_PATH) + BUG("Trying to allocate fsentry for long path '%.*s'", + (int)len, name); + memcpy(fse->dirent.d_name, name, len); + fse->dirent.d_name[len] = 0; + fse->len = len; + hashmap_entry_init(&fse->ent, fsentry_hash(fse)); +} + +/* + * Allocate an fsentry structure on the heap. + */ +static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, + size_t len) +{ + /* overallocate fsentry and copy the name to the end */ + struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + /* init the rest of the structure */ + fsentry_init(fse, list, name, len); + fse->next = NULL; + fse->u.refcnt = 1; + return fse; +} + +/* + * Add a reference to an fsentry. + */ +inline static void fsentry_addref(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + InterlockedIncrement(&(fse->u.refcnt)); +} + +/* + * Release the reference to an fsentry, frees the memory if its the last ref. + */ +static void fsentry_release(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + if (InterlockedDecrement(&(fse->u.refcnt))) + return; + + while (fse) { + struct fsentry *next = fse->next; + free(fse); + fse = next; + } +} + +/* + * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + */ +static struct fsentry *fseentry_create_entry(struct fsentry *list, + const WIN32_FIND_DATAW *fdata) +{ + char buf[MAX_PATH * 3]; + int len; + struct fsentry *fse; + len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + fse = fsentry_alloc(list, buf, len); + + fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); + fse->dirent.d_type = S_ISDIR(fse->st_mode) ? DT_DIR : DT_REG; + fse->u.s.st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) + | fdata->nFileSizeLow; + filetime_to_timespec(&(fdata->ftLastAccessTime), &(fse->u.s.st_atim)); + filetime_to_timespec(&(fdata->ftLastWriteTime), &(fse->u.s.st_mtim)); + filetime_to_timespec(&(fdata->ftCreationTime), &(fse->u.s.st_ctim)); + + return fse; +} + +/* + * Create an fsentry-based directory listing (similar to opendir / readdir). + * Dir should not contain trailing '/'. Use an empty string for the current + * directory (not "."!). + */ +static struct fsentry *fsentry_create_list(const struct fsentry *dir) +{ + wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + WIN32_FIND_DATAW fdata; + HANDLE h; + int wlen; + struct fsentry *list, **phead; + DWORD err; + + /* convert name to UTF-16 and check length < MAX_PATH */ + if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, + dir->len)) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; + return NULL; + } + + /* append optional '/' and wildcard '*' */ + if (wlen) + pattern[wlen++] = '/'; + pattern[wlen++] = '*'; + pattern[wlen] = 0; + + /* open find handle */ + h = FindFirstFileW(pattern, &fdata); + if (h == INVALID_HANDLE_VALUE) { + err = GetLastError(); + errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); + return NULL; + } + + /* allocate object to hold directory listing */ + list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + + /* walk directory and build linked list of fsentry structures */ + phead = &list->next; + do { + *phead = fseentry_create_entry(list, &fdata); + phead = &(*phead)->next; + } while (FindNextFileW(h, &fdata)); + + /* remember result of last FindNextFile, then close find handle */ + err = GetLastError(); + FindClose(h); + + /* return the list if we've got all the files */ + if (err == ERROR_NO_MORE_FILES) + return list; + + /* otherwise free the list and return error */ + fsentry_release(list); + errno = err_win_to_posix(err); + return NULL; +} + +/* + * Adds a directory listing to the cache. + */ +static void fscache_add(struct fsentry *fse) +{ + if (fse->list) + fse = fse->list; + + for (; fse; fse = fse->next) + hashmap_add(&map, &fse->ent); +} + +/* + * Clears the cache. + */ +static void fscache_clear(void) +{ + hashmap_clear_and_free(&map, struct fsentry, ent); + hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); +} + +/* + * Checks if the cache is enabled for the given path. + */ +static inline int fscache_enabled(const char *path) +{ + return enabled > 0 && !is_absolute_path(path); +} + +/* + * Looks up or creates a cache entry for the specified key. + */ +static struct fsentry *fscache_get(struct fsentry *key) +{ + struct fsentry *fse; + + EnterCriticalSection(&mutex); + /* check if entry is in cache */ + fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse) { + fsentry_addref(fse); + LeaveCriticalSection(&mutex); + return fse; + } + /* if looking for a file, check if directory listing is in cache */ + if (!fse && key->list) { + fse = hashmap_get_entry(&map, key->list, ent, NULL); + if (fse) { + LeaveCriticalSection(&mutex); + /* dir entry without file entry -> file doesn't exist */ + errno = ENOENT; + return NULL; + } + } + + /* create the directory listing (outside mutex!) */ + LeaveCriticalSection(&mutex); + fse = fsentry_create_list(key->list ? key->list : key); + if (!fse) + return NULL; + + EnterCriticalSection(&mutex); + /* add directory listing if it hasn't been added by some other thread */ + if (!hashmap_get_entry(&map, key, ent, NULL)) + fscache_add(fse); + + /* lookup file entry if requested (fse already points to directory) */ + if (key->list) + fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return entry or ENOENT */ + if (fse) + fsentry_addref(fse); + else + errno = ENOENT; + + LeaveCriticalSection(&mutex); + return fse; +} + +/* + * Enables or disables the cache. Note that the cache is read-only, changes to + * the working directory are NOT reflected in the cache while enabled. + */ +int fscache_enable(int enable) +{ + int result; + + if (!initialized) { + /* allow the cache to be disabled entirely */ + if (!core_fscache) + return 0; + + InitializeCriticalSection(&mutex); + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); + initialized = 1; + } + + result = enable ? InterlockedIncrement(&enabled) + : InterlockedDecrement(&enabled); + + if (enable && result == 1) { + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } else if (!enable && !result) { + /* reset opendir and lstat to the original implementations */ + opendir = dirent_opendir; + lstat = mingw_lstat; + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } + return result; +} + +/* + * Lstat replacement, uses the cache if enabled, otherwise redirects to + * mingw_lstat. + */ +int fscache_lstat(const char *filename, struct stat *st) +{ + int dirlen, base, len; + struct heap_fsentry key[2]; + struct fsentry *fse; + + if (!fscache_enabled(filename)) + return mingw_lstat(filename, st); + + /* split filename into path + name */ + len = strlen(filename); + if (len && is_dir_sep(filename[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(filename[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(&key[0].u.ent, NULL, filename, dirlen); + fsentry_init(&key[1].u.ent, &key[0].u.ent, filename + base, len - base); + fse = fscache_get(&key[1].u.ent); + if (!fse) { + errno = ENOENT; + return -1; + } + + /* copy stat data */ + st->st_ino = 0; + st->st_gid = 0; + st->st_uid = 0; + st->st_dev = 0; + st->st_rdev = 0; + st->st_nlink = 1; + st->st_mode = fse->st_mode; + st->st_size = fse->u.s.st_size; + st->st_atim = fse->u.s.st_atim; + st->st_mtim = fse->u.s.st_mtim; + st->st_ctim = fse->u.s.st_ctim; + + /* don't forget to release fsentry */ + fsentry_release(fse); + return 0; +} + +typedef struct fscache_DIR { + struct DIR base_dir; /* extend base struct DIR */ + struct fsentry *pfsentry; + struct dirent *dirent; +} fscache_DIR; + +/* + * Readdir replacement. + */ +static struct dirent *fscache_readdir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + struct fsentry *next = dir->pfsentry->next; + if (!next) + return NULL; + dir->pfsentry = next; + dir->dirent = &next->dirent; + return dir->dirent; +} + +/* + * Closedir replacement. + */ +static int fscache_closedir(DIR *base_dir) +{ + fscache_DIR *dir = (fscache_DIR*) base_dir; + fsentry_release(dir->pfsentry); + free(dir); + return 0; +} + +/* + * Opendir replacement, uses a directory listing from the cache if enabled, + * otherwise calls original dirent implementation. + */ +DIR *fscache_opendir(const char *dirname) +{ + struct heap_fsentry key; + struct fsentry *list; + fscache_DIR *dir; + int len; + + if (!fscache_enabled(dirname)) + return dirent_opendir(dirname); + + /* prepare name (strip trailing '/', replace '.') */ + len = strlen(dirname); + if ((len == 1 && dirname[0] == '.') || + (len && is_dir_sep(dirname[len - 1]))) + len--; + + /* get directory listing from cache */ + fsentry_init(&key.u.ent, NULL, dirname, len); + list = fscache_get(&key.u.ent); + if (!list) + return NULL; + + /* alloc and return DIR structure */ + dir = (fscache_DIR*) xmalloc(sizeof(fscache_DIR)); + dir->base_dir.preaddir = fscache_readdir; + dir->base_dir.pclosedir = fscache_closedir; + dir->pfsentry = list; + return (DIR*) dir; +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h new file mode 100644 index 00000000000000..ed518b422d705e --- /dev/null +++ b/compat/win32/fscache.h @@ -0,0 +1,10 @@ +#ifndef FSCACHE_H +#define FSCACHE_H + +int fscache_enable(int enable); +#define enable_fscache(x) fscache_enable(x) + +DIR *fscache_opendir(const char *dir); +int fscache_lstat(const char *file_name, struct stat *buf); + +#endif diff --git a/config.mak.uname b/config.mak.uname index 9a363bb58abd4a..d274ac12365190 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -486,7 +486,7 @@ endif compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ compat/win32/trace2_win32_process_info.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DDETECT_MSYS_TTY -DENSURE_MSYSTEM_IS_SET -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\" BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO # invalidcontinue.obj allows Git's source code to close the same file @@ -673,7 +673,7 @@ ifeq ($(uname_S),MINGW) compat/win32/flush.o \ compat/win32/path-utils.o \ compat/win32/pthread.o compat/win32/syslog.o \ - compat/win32/dirent.o + compat/win32/dirent.o compat/win32/fscache.o BASIC_CFLAGS += -DWIN32 EXTLIBS += -lws2_32 GITLIBS += git.res diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index ef80af7e9ead46..4f366765f75f19 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -307,7 +307,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") compat/win32/trace2_win32_process_info.c compat/win32/dirent.c compat/nedmalloc/nedmalloc.c - compat/strdup.c) + compat/strdup.c + compat/win32/fscache.c) set(NO_UNIX_SOCKETS 1) elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/git-compat-util.h b/git-compat-util.h index b6c9f2ad0309cb..9a7ed9fabc9754 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -261,9 +261,11 @@ static inline int is_xplatform_dir_sep(int c) /* pull in Windows compatibility stuff */ #include "compat/win32/path-utils.h" #include "compat/mingw.h" +#include "compat/win32/fscache.h" #elif defined(_MSC_VER) #include "compat/win32/path-utils.h" #include "compat/msvc.h" +#include "compat/win32/fscache.h" #else #include <sys/utsname.h> #include <sys/wait.h> From 2e26a6f3e5038a3ab6500ceddbc348c3504ca43c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Tue, 13 Dec 2016 14:05:32 -0500 Subject: [PATCH 217/303] fscache: remember not-found directories Teach FSCACHE to remember "not found" directories. This is a performance optimization. FSCACHE is a performance optimization available for Windows. It intercepts Posix-style lstat() calls into an in-memory directory using FindFirst/FindNext. It improves performance on Windows by catching the first lstat() call in a directory, using FindFirst/ FindNext to read the list of files (and attribute data) for the entire directory into the cache, and short-cut subsequent lstat() calls in the same directory. This gives a major performance boost on Windows. However, it does not remember "not found" directories. When STATUS runs and there are missing directories, the lstat() interception fails to find the parent directory and simply return ENOENT for the file -- it does not remember that the FindFirst on the directory failed. Thus subsequent lstat() calls in the same directory, each re-attempt the FindFirst. This completely defeats any performance gains. This can be seen by doing a sparse-checkout on a large repo and then doing a read-tree to reset the skip-worktree bits and then running status. This change reduced status times for my very large repo by 60%. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/win32/fscache.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 02814edb214604..403441f6a96a5a 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -179,7 +179,8 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, * Dir should not contain trailing '/'. Use an empty string for the current * directory (not "."!). */ -static struct fsentry *fsentry_create_list(const struct fsentry *dir) +static struct fsentry *fsentry_create_list(const struct fsentry *dir, + int *dir_not_found) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ WIN32_FIND_DATAW fdata; @@ -188,6 +189,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) struct fsentry *list, **phead; DWORD err; + *dir_not_found = 0; + /* convert name to UTF-16 and check length < MAX_PATH */ if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, dir->len)) < 0) { @@ -206,6 +209,7 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) h = FindFirstFileW(pattern, &fdata); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); + *dir_not_found = 1; /* or empty directory */ errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err); trace_printf_key(&trace_fscache, "fscache: error(%d) '%s'\n", errno, dir->dirent.d_name); @@ -214,6 +218,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir) /* allocate object to hold directory listing */ list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + list->st_mode = S_IFDIR; + list->dirent.d_type = DT_DIR; /* walk directory and build linked list of fsentry structures */ phead = &list->next; @@ -298,12 +304,16 @@ static struct fsentry *fscache_get_wait(struct fsentry *key) static struct fsentry *fscache_get(struct fsentry *key) { struct fsentry *fse, *future, *waiter; + int dir_not_found; EnterCriticalSection(&mutex); /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { - fsentry_addref(fse); + if (fse->st_mode) + fsentry_addref(fse); + else + fse = NULL; /* non-existing directory */ LeaveCriticalSection(&mutex); return fse; } @@ -312,7 +322,10 @@ static struct fsentry *fscache_get(struct fsentry *key) fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); - /* dir entry without file entry -> file doesn't exist */ + /* + * dir entry without file entry, or dir does not + * exist -> file doesn't exist + */ errno = ENOENT; return NULL; } @@ -326,7 +339,7 @@ static struct fsentry *fscache_get(struct fsentry *key) /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(future); + fse = fsentry_create_list(future, &dir_not_found); EnterCriticalSection(&mutex); /* remove future entry and signal waiting threads */ @@ -340,6 +353,18 @@ static struct fsentry *fscache_get(struct fsentry *key) /* leave on error (errno set by fsentry_create_list) */ if (!fse) { + if (dir_not_found && key->list) { + /* + * Record that the directory does not exist (or is + * empty, which for all practical matters is the same + * thing as far as fscache is concerned). + */ + fse = fsentry_alloc(key->list->list, + key->list->dirent.d_name, + key->list->len); + fse->st_mode = 0; + hashmap_add(&map, &fse->ent); + } LeaveCriticalSection(&mutex); return NULL; } @@ -351,6 +376,9 @@ static struct fsentry *fscache_get(struct fsentry *key) if (key->list) fse = hashmap_get_entry(&map, key, ent, NULL); + if (fse && !fse->st_mode) + fse = NULL; /* non-existing directory */ + /* return entry or ENOENT */ if (fse) fsentry_addref(fse); From cffcb620974fde2714dcb910eb71f17e2bf20238 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Wed, 20 Dec 2017 10:43:41 -0500 Subject: [PATCH 218/303] fscache: make fscache_enabled() public Make fscache_enabled() function public rather than static. Remove unneeded fscache_is_enabled() function. Change is_fscache_enabled() macro to call fscache_enabled(). is_fscache_enabled() now takes a pathname so that the answer is more precise and mean "is fscache enabled for this pathname", since fscache only stores repo-relative paths and not absolute paths, we can avoid attempting lookups for absolute paths. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> --- compat/win32/fscache.c | 7 +------ compat/win32/fscache.h | 4 ++-- dir.c | 2 +- git-compat-util.h | 2 +- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 330e3d9a11d750..303a5d6274a064 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -10,11 +10,6 @@ static struct hashmap map; static CRITICAL_SECTION mutex; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); -int fscache_is_enabled(void) -{ - return enabled; -} - /* * An entry in the file system cache. Used for both entire directory listings * and file entries. @@ -271,7 +266,7 @@ static void fscache_clear(void) /* * Checks if the cache is enabled for the given path. */ -static inline int fscache_enabled(const char *path) +int fscache_enabled(const char *path) { return enabled > 0 && !is_absolute_path(path); } diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 9a21fd5709c5bc..660ada053b4309 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -4,8 +4,8 @@ int fscache_enable(int enable); #define enable_fscache(x) fscache_enable(x) -int fscache_is_enabled(void); -#define is_fscache_enabled() (fscache_is_enabled()) +int fscache_enabled(const char *path); +#define is_fscache_enabled(path) fscache_enabled(path) DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/dir.c b/dir.c index 818e36df1b3acf..63eb90837a2c3c 100644 --- a/dir.c +++ b/dir.c @@ -1063,7 +1063,7 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; - if (is_fscache_enabled()) { + if (is_fscache_enabled(fname)) { if (lstat(fname, &st) < 0) { fd = -1; } else { diff --git a/git-compat-util.h b/git-compat-util.h index 16ab6835980021..d2f78e5ed90588 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1545,7 +1545,7 @@ static inline int is_missing_file_error(int errno_) #endif #ifndef is_fscache_enabled -#define is_fscache_enabled() (0) +#define is_fscache_enabled(path) (0) #endif int cmd_main(int, const char **); From 0890b0a38071c82f02f7ea0a545d03a9021fdaaa Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 24 Jun 2014 13:22:35 +0200 Subject: [PATCH 219/303] fscache: load directories only once If multiple threads access a directory that is not yet in the cache, the directory will be loaded by each thread. Only one of the results is added to the cache, all others are leaked. This wastes performance and memory. On cache miss, add a future object to the cache to indicate that the directory is currently being loaded. Subsequent threads register themselves with the future object and wait. When the first thread has loaded the directory, it replaces the future object with the result and notifies waiting threads. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/win32/fscache.c | 65 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index f3bcb1881b700b..ff6d272285fe45 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -26,6 +26,8 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; + /* Handle to wait on the loading thread. */ + HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -260,16 +262,43 @@ static inline int fscache_enabled(const char *path) return enabled > 0 && !is_absolute_path(path); } +/* + * Looks up a cache entry, waits if its being loaded by another thread. + * The mutex must be owned by the calling thread. + */ +static struct fsentry *fscache_get_wait(struct fsentry *key) +{ + struct fsentry *fse = hashmap_get_entry(&map, key, ent, NULL); + + /* return if its a 'real' entry (future entries have refcnt == 0) */ + if (!fse || fse->list || fse->u.refcnt) + return fse; + + /* create an event and link our key to the future entry */ + key->u.hwait = CreateEvent(NULL, TRUE, FALSE, NULL); + key->next = fse->next; + fse->next = key; + + /* wait for the loading thread to signal us */ + LeaveCriticalSection(&mutex); + WaitForSingleObject(key->u.hwait, INFINITE); + CloseHandle(key->u.hwait); + EnterCriticalSection(&mutex); + + /* repeat cache lookup */ + return hashmap_get_entry(&map, key, ent, NULL); +} + /* * Looks up or creates a cache entry for the specified key. */ static struct fsentry *fscache_get(struct fsentry *key) { - struct fsentry *fse; + struct fsentry *fse, *future, *waiter; EnterCriticalSection(&mutex); /* check if entry is in cache */ - fse = hashmap_get_entry(&map, key, ent, NULL); + fse = fscache_get_wait(key); if (fse) { fsentry_addref(fse); LeaveCriticalSection(&mutex); @@ -277,7 +306,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = hashmap_get_entry(&map, key->list, ent, NULL); + fse = fscache_get_wait(key->list); if (fse) { LeaveCriticalSection(&mutex); /* dir entry without file entry -> file doesn't exist */ @@ -286,16 +315,34 @@ static struct fsentry *fscache_get(struct fsentry *key) } } + /* add future entry to indicate that we're loading it */ + future = key->list ? key->list : key; + future->next = NULL; + future->u.refcnt = 0; + hashmap_add(&map, &future->ent); + /* create the directory listing (outside mutex!) */ LeaveCriticalSection(&mutex); - fse = fsentry_create_list(key->list ? key->list : key); - if (!fse) + fse = fsentry_create_list(future); + EnterCriticalSection(&mutex); + + /* remove future entry and signal waiting threads */ + hashmap_remove(&map, &future->ent, NULL); + waiter = future->next; + while (waiter) { + HANDLE h = waiter->u.hwait; + waiter = waiter->next; + SetEvent(h); + } + + /* leave on error (errno set by fsentry_create_list) */ + if (!fse) { + LeaveCriticalSection(&mutex); return NULL; + } - EnterCriticalSection(&mutex); - /* add directory listing if it hasn't been added by some other thread */ - if (!hashmap_get_entry(&map, key, ent, NULL)) - fscache_add(fse); + /* add directory listing to the cache */ + fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list) From 0d766ed94cf4875b25e95c66bce31574b8eca227 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Wed, 25 Jan 2017 18:39:16 +0100 Subject: [PATCH 220/303] fscache: add a test for the dir-not-found optimization Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t1090-sparse-checkout-scope.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 3a14218b245d4c..529844e2862c74 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -106,4 +106,24 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success MINGW 'no unnecessary opendir() with fscache' ' + git clone . fscache-test && + ( + cd fscache-test && + git config core.fscache 1 && + echo "/excluded/*" >.git/info/sparse-checkout && + for f in $(test_seq 10) + do + sha1=$(echo $f | git hash-object -w --stdin) && + git update-index --add \ + --cacheinfo 100644,$sha1,excluded/$f || exit 1 + done && + test_tick && + git commit -m excluded && + GIT_TRACE_FSCACHE=1 git status >out 2>err && + grep excluded err >grep.out && + test_line_count = 1 grep.out + ) +' + test_done From fb506ced1bffe0b87b7857cfa8697b0f03ec5214 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Tue, 22 Nov 2016 11:26:38 -0500 Subject: [PATCH 221/303] add: use preload-index and fscache for performance Teach "add" to use preload-index and fscache features to improve performance on very large repositories. During an "add", a call is made to run_diff_files() which calls check_remove() for each index-entry. This calls lstat(). On Windows, the fscache code intercepts the lstat() calls and builds a private cache using the FindFirst/FindNext routines, which are much faster. Somewhat independent of this, is the preload-index code which distributes some of the start-up costs across multiple threads. We need to keep the call to read_cache() before parsing the pathspecs (and hence cannot use the pathspecs to limit any preload) because parse_pathspec() is using the index to determine whether a pathspec is, in fact, in a submodule. If we would not read the index first, parse_pathspec() would not error out on a path that is inside a submodule, and t7400-submodule-basic.sh would fail with not ok 47 - do not add files from a submodule We still want the nice preload performance boost, though, so we simply call read_cache_preload(&pathspecs) after parsing the pathspecs. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- builtin/add.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builtin/add.c b/builtin/add.c index f84372964c8c48..e62fe74036e7a1 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -593,6 +593,10 @@ int cmd_add(int argc, const char **argv, const char *prefix) die_in_unpopulated_submodule(&the_index, prefix); die_path_inside_submodule(&the_index, &pathspec); + enable_fscache(1); + /* We do not really re-read the index but update the up-to-date flags */ + preload_index(&the_index, &pathspec, 0); + if (add_new_files) { int baselen; @@ -695,6 +699,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) die(_("Unable to write new index file")); dir_clear(&dir); + enable_fscache(0); UNLEAK(pathspec); return exit_status; } From dd574be87b5c0e3bcc6af4cd20842559a5ecc4e1 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Wed, 20 Dec 2017 11:19:27 -0500 Subject: [PATCH 222/303] dir.c: regression fix for add_excludes with fscache Fix regression described in: https://github.com/git-for-windows/git/issues/1392 which was introduced in: https://github.com/git-for-windows/git/commit/b2353379bba414e6c00dde913497cc9c827366f2 Problem Symptoms ================ When the user has a .gitignore file that is a symlink, the fscache optimization introduced above caused the stat-data from the symlink, rather that of the target file, to be returned. Later when the ignore file was read, the buffer length did not match the stat.st_size field and we called die("cannot use <path> as an exclude file") Optimization Rationale ====================== The above optimization calls lstat() before open() primarily to ask fscache if the file exists. It gets the current stat-data as a side effect essentially for free (since we already have it in memory). If the file does not exist, it does not need to call open(). And since very few directories have .gitignore files, we can greatly reduce time spent in the filesystem. Discussion of Fix ================= The above optimization calls lstat() rather than stat() because the fscache only intercepts lstat() calls. Calls to stat() stay directed to the mingw_stat() completly bypassing fscache. Furthermore, calls to mingw_stat() always call {open, fstat, close} so that symlinks are properly dereferenced, which adds *additional* open/close calls on top of what the original code in dir.c is doing. Since the problem only manifests for symlinks, we add code to overwrite the stat-data when the path is a symlink. This preserves the effect of the performance gains provided by the fscache in the normal case. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> --- dir.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/dir.c b/dir.c index 63eb90837a2c3c..08d560657486b0 100644 --- a/dir.c +++ b/dir.c @@ -1063,6 +1063,29 @@ static int add_patterns(const char *fname, const char *base, int baselen, size_t size = 0; char *buf; + /* + * A performance optimization for status. + * + * During a status scan, git looks in each directory for a .gitignore + * file before scanning the directory. Since .gitignore files are not + * that common, we can waste a lot of time looking for files that are + * not there. Fortunately, the fscache already knows if the directory + * contains a .gitignore file, since it has already read the directory + * and it already has the stat-data. + * + * If the fscache is enabled, use the fscache-lstat() interlude to see + * if the file exists (in the fscache hash maps) before trying to open() + * it. + * + * This causes problem when the .gitignore file is a symlink, because + * we call lstat() rather than stat() on the symlnk and the resulting + * stat-data is for the symlink itself rather than the target file. + * We CANNOT use stat() here because the fscache DOES NOT install an + * interlude for stat() and mingw_stat() always calls "open-fstat-close" + * on the file and defeats the purpose of the optimization here. Since + * symlinks are even more rare than .gitignore files, we force a fstat() + * after our open() to get stat-data for the target file. + */ if (is_fscache_enabled(fname)) { if (lstat(fname, &st) < 0) { fd = -1; @@ -1070,6 +1093,11 @@ static int add_patterns(const char *fname, const char *base, int baselen, fd = open(fname, O_RDONLY); if (fd < 0) warn_on_fopen_errors(fname); + else if (S_ISLNK(st.st_mode) && fstat(fd, &st) < 0) { + warn_on_fopen_errors(fname); + close(fd); + fd = -1; + } } } else { if (flags & PATTERN_NOFOLLOW) From 81fc1c8796729eaf6170a36bbdcbf7c7ebef6ff5 Mon Sep 17 00:00:00 2001 From: Takuto Ikuta <tikuta@chromium.org> Date: Wed, 22 Nov 2017 20:39:38 +0900 Subject: [PATCH 223/303] fetch-pack.c: enable fscache for stats under .git/objects When I do git fetch, git call file stats under .git/objects for each refs. This takes time when there are many refs. By enabling fscache, git takes file stats by directory traversing and that improved the speed of fetch-pack for repository having large number of refs. In my windows workstation, this improves the time of `git fetch` for chromium repository like below. I took stats 3 times. * With this patch TotalSeconds: 9.9825165 TotalSeconds: 9.1862075 TotalSeconds: 10.1956256 Avg: 9.78811653333333 * Without this patch TotalSeconds: 15.8406702 TotalSeconds: 15.6248053 TotalSeconds: 15.2085938 Avg: 15.5580231 Signed-off-by: Takuto Ikuta <tikuta@chromium.org> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- fetch-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fetch-pack.c b/fetch-pack.c index 998fc2fa1ed4ab..10db9b90f93be3 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -755,6 +755,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, save_commit_buffer = 0; trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); + enable_fscache(1); for (ref = *refs; ref; ref = ref->next) { struct commit *commit; @@ -781,6 +782,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, if (!cutoff || cutoff < commit->date) cutoff = commit->date; } + enable_fscache(0); trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); /* From 6265cafb68527d1503ea7bac4b63518fc937d8f8 Mon Sep 17 00:00:00 2001 From: Takuto Ikuta <tikuta@chromium.org> Date: Tue, 30 Jan 2018 22:42:58 +0900 Subject: [PATCH 224/303] checkout.c: enable fscache for checkout again This is retry of #1419. I added flush_fscache macro to flush cached stats after disk writing with tests for regression reported in #1438 and #1442. git checkout checks each file path in sorted order, so cache flushing does not make performance worse unless we have large number of modified files in a directory containing many files. Using chromium repository, I tested `git checkout .` performance when I delete 10 files in different directories. With this patch: TotalSeconds: 4.307272 TotalSeconds: 4.4863595 TotalSeconds: 4.2975562 Avg: 4.36372923333333 Without this patch: TotalSeconds: 20.9705431 TotalSeconds: 22.4867685 TotalSeconds: 18.8968292 Avg: 20.7847136 I confirmed this patch passed all tests in t/ with core_fscache=1. Signed-off-by: Takuto Ikuta <tikuta@chromium.org> --- builtin/checkout.c | 2 ++ compat/win32/fscache.c | 12 ++++++++++++ compat/win32/fscache.h | 3 +++ entry.c | 3 +++ git-compat-util.h | 4 ++++ parallel-checkout.c | 1 + t/t7201-co.sh | 36 ++++++++++++++++++++++++++++++++++++ 7 files changed, 61 insertions(+) diff --git a/builtin/checkout.c b/builtin/checkout.c index 2a132392fbe747..c0c8d391593dbe 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -391,6 +391,7 @@ static int checkout_worktree(const struct checkout_opts *opts, if (pc_workers > 1) init_parallel_checkout(); + enable_fscache(1); for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -415,6 +416,7 @@ static int checkout_worktree(const struct checkout_opts *opts, errs |= run_parallel_checkout(&state, pc_workers, pc_threshold, NULL, NULL); mem_pool_discard(&ce_mem_pool, should_validate_cache_entries()); + enable_fscache(0); remove_marked_cache_entries(&the_index, 1); remove_scheduled_dirs(); errs |= finish_delayed_checkout(&state, opts->show_progress); diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 303a5d6274a064..62a9034a523668 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -426,6 +426,18 @@ int fscache_enable(int enable) return result; } +/* + * Flush cached stats result when fscache is enabled. + */ +void fscache_flush(void) +{ + if (enabled) { + EnterCriticalSection(&mutex); + fscache_clear(); + LeaveCriticalSection(&mutex); + } +} + /* * Lstat replacement, uses the cache if enabled, otherwise redirects to * mingw_lstat. diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 660ada053b4309..2f06f8df97dcd0 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -7,6 +7,9 @@ int fscache_enable(int enable); int fscache_enabled(const char *path); #define is_fscache_enabled(path) fscache_enabled(path) +void fscache_flush(void); +#define flush_fscache() fscache_flush() + DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); diff --git a/entry.c b/entry.c index 616e4f073c1d6b..87ff624a5ea5f9 100644 --- a/entry.c +++ b/entry.c @@ -393,6 +393,9 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca } finish: + /* Flush cached lstat in fscache after writing to disk. */ + flush_fscache(); + if (state->refresh_cache) { if (!fstat_done && lstat(ce->name, &st) < 0) return error_errno("unable to stat just-written file %s", diff --git a/git-compat-util.h b/git-compat-util.h index d2f78e5ed90588..10da409e2eb563 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1548,6 +1548,10 @@ static inline int is_missing_file_error(int errno_) #define is_fscache_enabled(path) (0) #endif +#ifndef flush_fscache +#define flush_fscache() /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/parallel-checkout.c b/parallel-checkout.c index 4f6819f2406ea8..0679a80ea44621 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -637,6 +637,7 @@ static void write_items_sequentially(struct checkout *state) { size_t i; + flush_fscache(); for (i = 0; i < parallel_checkout.nr; i++) { struct parallel_checkout_item *pc_item = ¶llel_checkout.items[i]; write_pc_item(pc_item, state); diff --git a/t/t7201-co.sh b/t/t7201-co.sh index 61ad47b0c18d23..df068f892006f2 100755 --- a/t/t7201-co.sh +++ b/t/t7201-co.sh @@ -35,6 +35,42 @@ fill () { } +test_expect_success MINGW 'fscache flush cache' ' + + git init fscache-test && + cd fscache-test && + git config core.fscache 1 && + echo A > test.txt && + git add test.txt && + git commit -m A && + echo B >> test.txt && + git checkout . && + test -z "$(git status -s)" && + echo A > expect.txt && + test_cmp expect.txt test.txt && + cd .. && + rm -rf fscache-test +' + +test_expect_success MINGW 'fscache flush cache dir' ' + + git init fscache-test && + cd fscache-test && + git config core.fscache 1 && + echo A > test.txt && + git add test.txt && + git commit -m A && + rm test.txt && + mkdir test.txt && + touch test.txt/test.txt && + git checkout . && + test -z "$(git status -s)" && + echo A > expect.txt && + test_cmp expect.txt test.txt && + cd .. && + rm -rf fscache-test +' + test_expect_success setup ' fill x y z >same && fill 1 2 3 4 5 6 7 8 >one && From a917ff9b697fafb5ebe2627d37c9ba9302f55495 Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Fri, 7 Sep 2018 11:39:57 -0400 Subject: [PATCH 225/303] Enable the filesystem cache (fscache) in refresh_index(). On file systems that support it, this can dramatically speed up operations like add, commit, describe, rebase, reset, rm that would otherwise have to lstat() every file to "re-match" the stat information in the index to that of the file system. On a synthetic repo with 1M files, "git reset" dropped from 52.02 seconds to 14.42 seconds for a savings of 72%. Signed-off-by: Ben Peart <benpeart@microsoft.com> --- read-cache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/read-cache.c b/read-cache.c index b09128b1884318..7c1587116f64ad 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1630,6 +1630,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, typechange_fmt = in_porcelain ? "T\t%s\n" : "%s: needs update\n"; added_fmt = in_porcelain ? "A\t%s\n" : "%s: needs update\n"; unmerged_fmt = in_porcelain ? "U\t%s\n" : "%s: needs merge\n"; + enable_fscache(1); /* * Use the multi-threaded preload_index() to refresh most of the * cache entries quickly then in the single threaded loop below, @@ -1724,6 +1725,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, display_progress(progress, istate->cache_nr); stop_progress(&progress); trace_performance_leave("refresh index"); + enable_fscache(0); return has_errors; } From c5c3a857e14db8ece57135602316ed9b2f54ac4e Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Tue, 23 Oct 2018 11:42:06 -0400 Subject: [PATCH 226/303] fscache: use FindFirstFileExW to avoid retrieving the short name Use FindFirstFileExW with FindExInfoBasic to avoid forcing NTFS to look up the short name. Also switch to a larger (64K vs 4K) buffer using FIND_FIRST_EX_LARGE_FETCH to minimize round trips to the kernel. In a repo with ~200K files, this drops warm cache status times from 3.19 seconds to 2.67 seconds for a 16% savings. Signed-off-by: Ben Peart <benpeart@microsoft.com> --- compat/win32/fscache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 62a9034a523668..780a3e3cf71ff3 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -206,7 +206,8 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir, pattern[wlen] = 0; /* open find handle */ - h = FindFirstFileW(pattern, &fdata); + h = FindFirstFileExW(pattern, FindExInfoBasic, &fdata, FindExSearchNameMatch, + NULL, FIND_FIRST_EX_LARGE_FETCH); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); *dir_not_found = 1; /* or empty directory */ From 31f4a14d0aa773bd961f1068eda07bb75c49bef2 Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Thu, 1 Nov 2018 11:40:51 -0400 Subject: [PATCH 227/303] status: disable and free fscache at the end of the status command At the end of the status command, disable and free the fscache so that we don't leak the memory and so that we can dump the fscache statistics. Signed-off-by: Ben Peart <benpeart@microsoft.com> --- builtin/commit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/builtin/commit.c b/builtin/commit.c index 482897300348a9..89c5cb7ea5b2eb 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1581,6 +1581,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) wt_status_print(&s); wt_status_collect_free_buffers(&s); + enable_fscache(0); return 0; } From 5e9d22b08fb92596b1af00bb1b42a7f041ae03c7 Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Thu, 4 Oct 2018 18:10:21 -0400 Subject: [PATCH 228/303] fscache: add GIT_TEST_FSCACHE support Add support to fscache to enable running the entire test suite with the fscache enabled. Signed-off-by: Ben Peart <benpeart@microsoft.com> --- compat/win32/fscache.c | 5 +++++ t/README | 3 +++ 2 files changed, 8 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 780a3e3cf71ff3..6c60a136dc14db 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -3,6 +3,7 @@ #include "../win32.h" #include "fscache.h" #include "../../dir.h" +#include "config.h" static int initialized; static volatile long enabled; @@ -399,7 +400,11 @@ int fscache_enable(int enable) int result; if (!initialized) { + int fscache = git_env_bool("GIT_TEST_FSCACHE", -1); + /* allow the cache to be disabled entirely */ + if (fscache != -1) + core_fscache = fscache; if (!core_fscache) return 0; diff --git a/t/README b/t/README index 979b2d4833d983..8112e711b57478 100644 --- a/t/README +++ b/t/README @@ -502,6 +502,9 @@ a test and then fails then the whole test run will abort. This can help to make sure the expected tests are executed and not silently skipped when their dependency breaks or is simply not present in a new environment. +GIT_TEST_FSCACHE=<boolean> exercises the uncommon fscache code path +which adds a cache below mingw's lstat and dirent implementations. + Naming Tests ------------ From 511e2cdced5f7251a647dd2e3885fcac41a54077 Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Thu, 4 Oct 2018 18:10:21 -0400 Subject: [PATCH 229/303] mem_pool: add GIT_TRACE_MEMPOOL support Add tracing around initializing and discarding mempools. In discard report on the amount of memory unused in the current block to help tune setting the initial_size. Signed-off-by: Ben Peart <benpeart@microsoft.com> --- mem-pool.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mem-pool.c b/mem-pool.c index 599d8e895f8121..a961d71f2ab10c 100644 --- a/mem-pool.c +++ b/mem-pool.c @@ -5,6 +5,7 @@ #include "cache.h" #include "mem-pool.h" +static struct trace_key trace_mem_pool = TRACE_KEY_INIT(MEMPOOL); #define BLOCK_GROWTH_SIZE (1024 * 1024 - sizeof(struct mp_block)) /* @@ -62,12 +63,20 @@ void mem_pool_init(struct mem_pool *pool, size_t initial_size) if (initial_size > 0) mem_pool_alloc_block(pool, initial_size, NULL); + + trace_printf_key(&trace_mem_pool, + "mem_pool (%p): init (%"PRIuMAX") initial size\n", + (void *)pool, (uintmax_t)initial_size); } void mem_pool_discard(struct mem_pool *pool, int invalidate_memory) { struct mp_block *block, *block_to_free; + trace_printf_key(&trace_mem_pool, + "mem_pool (%p): discard (%"PRIuMAX") unused\n", + (void *)pool, + (uintmax_t)(pool->mp_block->end - pool->mp_block->next_free)); block = pool->mp_block; while (block) { From f995134d88457329580766dd93f3d2e85180f749 Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Fri, 2 Nov 2018 11:19:10 -0400 Subject: [PATCH 230/303] fscache: fscache takes an initial size Update enable_fscache() to take an optional initial size parameter which is used to initialize the hashmap so that it can avoid having to rehash as additional entries are added. Add a separate disable_fscache() macro to make the code clearer and easier to read. Signed-off-by: Ben Peart <benpeart@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- builtin/add.c | 2 +- builtin/checkout.c | 4 ++-- builtin/commit.c | 4 ++-- compat/win32/fscache.c | 8 ++++++-- compat/win32/fscache.h | 5 +++-- fetch-pack.c | 4 ++-- git-compat-util.h | 4 ++++ preload-index.c | 4 ++-- read-cache.c | 4 ++-- 9 files changed, 24 insertions(+), 15 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index e62fe74036e7a1..76d5ad1f5da190 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -593,7 +593,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) die_in_unpopulated_submodule(&the_index, prefix); die_path_inside_submodule(&the_index, &pathspec); - enable_fscache(1); + enable_fscache(0); /* We do not really re-read the index but update the up-to-date flags */ preload_index(&the_index, &pathspec, 0); diff --git a/builtin/checkout.c b/builtin/checkout.c index c0c8d391593dbe..0490a19fdab2b1 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -391,7 +391,7 @@ static int checkout_worktree(const struct checkout_opts *opts, if (pc_workers > 1) init_parallel_checkout(); - enable_fscache(1); + enable_fscache(active_nr); for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -416,7 +416,7 @@ static int checkout_worktree(const struct checkout_opts *opts, errs |= run_parallel_checkout(&state, pc_workers, pc_threshold, NULL, NULL); mem_pool_discard(&ce_mem_pool, should_validate_cache_entries()); - enable_fscache(0); + disable_fscache(); remove_marked_cache_entries(&the_index, 1); remove_scheduled_dirs(); errs |= finish_delayed_checkout(&state, opts->show_progress); diff --git a/builtin/commit.c b/builtin/commit.c index 89c5cb7ea5b2eb..cf0e24d3abf25c 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1540,7 +1540,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) PATHSPEC_PREFER_FULL, prefix, argv); - enable_fscache(1); + enable_fscache(0); if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) progress_flag = REFRESH_PROGRESS; @@ -1581,7 +1581,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) wt_status_print(&s); wt_status_collect_free_buffers(&s); - enable_fscache(0); + disable_fscache(); return 0; } diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 2fc69b91de4438..d3efc5c57e5966 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -403,7 +403,7 @@ static struct fsentry *fscache_get(struct fsentry *key) * Enables or disables the cache. Note that the cache is read-only, changes to * the working directory are NOT reflected in the cache while enabled. */ -int fscache_enable(int enable) +int fscache_enable(int enable, size_t initial_size) { int result; @@ -419,7 +419,11 @@ int fscache_enable(int enable) InitializeCriticalSection(&mutex); lstat_requests = opendir_requests = 0; fscache_misses = fscache_requests = 0; - hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); + /* + * avoid having to rehash by leaving room for the parent dirs. + * '4' was determined empirically by testing several repos + */ + hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, initial_size * 4); initialized = 1; } diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 2f06f8df97dcd0..d49c9381114da6 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -1,8 +1,9 @@ #ifndef FSCACHE_H #define FSCACHE_H -int fscache_enable(int enable); -#define enable_fscache(x) fscache_enable(x) +int fscache_enable(int enable, size_t initial_size); +#define enable_fscache(initial_size) fscache_enable(1, initial_size) +#define disable_fscache() fscache_enable(0, 0) int fscache_enabled(const char *path); #define is_fscache_enabled(path) fscache_enabled(path) diff --git a/fetch-pack.c b/fetch-pack.c index 10db9b90f93be3..007e2894dd991b 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -755,7 +755,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, save_commit_buffer = 0; trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); - enable_fscache(1); + enable_fscache(0); for (ref = *refs; ref; ref = ref->next) { struct commit *commit; @@ -782,7 +782,7 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, if (!cutoff || cutoff < commit->date) cutoff = commit->date; } - enable_fscache(0); + disable_fscache(); trace2_region_leave("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); /* diff --git a/git-compat-util.h b/git-compat-util.h index 10da409e2eb563..227716971b4ff0 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1544,6 +1544,10 @@ static inline int is_missing_file_error(int errno_) #define enable_fscache(x) /* noop */ #endif +#ifndef disable_fscache +#define disable_fscache() /* noop */ +#endif + #ifndef is_fscache_enabled #define is_fscache_enabled(path) (0) #endif diff --git a/preload-index.c b/preload-index.c index 2275e4078693ae..d070753f34ec38 100644 --- a/preload-index.c +++ b/preload-index.c @@ -126,7 +126,7 @@ void preload_index(struct index_state *index, pthread_mutex_init(&pd.mutex, NULL); } - enable_fscache(1); + enable_fscache(index->cache_nr); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; int err; @@ -163,7 +163,7 @@ void preload_index(struct index_state *index, trace2_data_intmax("index", NULL, "preload/sum_lstat", t2_sum_lstat); trace2_region_leave("index", "preload", NULL); - enable_fscache(0); + disable_fscache(); } int repo_read_index_preload(struct repository *repo, diff --git a/read-cache.c b/read-cache.c index 7c1587116f64ad..de1c493f7e3664 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1630,7 +1630,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, typechange_fmt = in_porcelain ? "T\t%s\n" : "%s: needs update\n"; added_fmt = in_porcelain ? "A\t%s\n" : "%s: needs update\n"; unmerged_fmt = in_porcelain ? "U\t%s\n" : "%s: needs merge\n"; - enable_fscache(1); + enable_fscache(0); /* * Use the multi-threaded preload_index() to refresh most of the * cache entries quickly then in the single threaded loop below, @@ -1725,7 +1725,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, display_progress(progress, istate->cache_nr); stop_progress(&progress); trace_performance_leave("refresh index"); - enable_fscache(0); + disable_fscache(); return has_errors; } From c1014ad9dda38ff0b184bb267c856d2f62e0acf0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 11 Dec 2018 12:59:29 +0100 Subject: [PATCH 231/303] fscache: remember the reparse tag for each entry We will use this in the next commit to implement an FSCache-aware version of is_mount_point(). Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/win32/fscache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 760f8b80d65f2b..6ac0ed692e5849 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -44,6 +44,7 @@ static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); struct fsentry { struct hashmap_entry ent; mode_t st_mode; + ULONG reparse_tag; /* Pointer to the directory listing, or NULL for the listing itself. */ struct fsentry *list; /* Pointer to the next file entry of the list. */ @@ -195,6 +196,10 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, fse = fsentry_alloc(cache, list, buf, len); + fse->reparse_tag = + fdata->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT ? + fdata->EaSize : 0; + fse->st_mode = file_attr_to_st_mode(fdata->FileAttributes); fse->dirent.d_type = S_ISDIR(fse->st_mode) ? DT_DIR : DT_REG; fse->u.s.st_size = fdata->EndOfFile.LowPart | From b62c18b0e95a0fe54b9e7e334ac9bf74ef70a4f0 Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Thu, 4 Oct 2018 15:38:08 -0400 Subject: [PATCH 232/303] fscache: update fscache to be thread specific instead of global The threading model for fscache has been to have a single, global cache. This puts requirements on it to be thread safe so that callers like preload-index can call it from multiple threads. This was implemented with a single mutex and completion events which introduces contention between the calling threads. Simplify the threading model by making fscache thread specific. This allows us to remove the global mutex and synchronization events entirely and instead associate a fscache with every thread that requests one. This works well with the current multi-threading which divides the cache entries into blocks with a separate thread processing each block. At the end of each worker thread, if there is a fscache on the primary thread, merge the cached results from the worker into the primary thread cache. This enables us to reuse the cache later especially when scanning for untracked files. In testing, this reduced the time spent in preload_index() by about 25% and also reduced the CPU utilization significantly. On a repo with ~200K files, it reduced overall status times by ~12%. Signed-off-by: Ben Peart <benpeart@microsoft.com> --- compat/win32/fscache.c | 294 +++++++++++++++++++++++++---------------- compat/win32/fscache.h | 22 ++- git-compat-util.h | 12 ++ preload-index.c | 8 +- 4 files changed, 215 insertions(+), 121 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index d3efc5c57e5966..7fad622e1d1e43 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -5,14 +5,24 @@ #include "../../dir.h" #include "config.h" -static int initialized; -static volatile long enabled; -static struct hashmap map; +static volatile long initialized; +static DWORD dwTlsIndex; static CRITICAL_SECTION mutex; -static unsigned int lstat_requests; -static unsigned int opendir_requests; -static unsigned int fscache_requests; -static unsigned int fscache_misses; + +/* + * Store one fscache per thread to avoid thread contention and locking. + * This is ok because multi-threaded access is 1) uncommon and 2) always + * splitting up the cache entries across multiple threads so there isn't + * any overlap between threads anyway. + */ +struct fscache { + volatile long enabled; + struct hashmap map; + unsigned int lstat_requests; + unsigned int opendir_requests; + unsigned int fscache_requests; + unsigned int fscache_misses; +}; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* @@ -32,8 +42,6 @@ struct fsentry { union { /* Reference count of the directory listing. */ volatile long refcnt; - /* Handle to wait on the loading thread. */ - HANDLE hwait; struct { /* More stat members (only used for file entries). */ off64_t st_size; @@ -251,86 +259,63 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir, /* * Adds a directory listing to the cache. */ -static void fscache_add(struct fsentry *fse) +static void fscache_add(struct fscache *cache, struct fsentry *fse) { if (fse->list) fse = fse->list; for (; fse; fse = fse->next) - hashmap_add(&map, &fse->ent); + hashmap_add(&cache->map, &fse->ent); } /* * Clears the cache. */ -static void fscache_clear(void) +static void fscache_clear(struct fscache *cache) { - hashmap_clear_and_free(&map, struct fsentry, ent); - hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); - lstat_requests = opendir_requests = 0; - fscache_misses = fscache_requests = 0; + hashmap_clear_and_free(&cache->map, struct fsentry, ent); + hashmap_init(&cache->map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); + cache->lstat_requests = cache->opendir_requests = 0; + cache->fscache_misses = cache->fscache_requests = 0; } /* * Checks if the cache is enabled for the given path. */ -int fscache_enabled(const char *path) +static int do_fscache_enabled(struct fscache *cache, const char *path) { - return enabled > 0 && !is_absolute_path(path); + return cache->enabled > 0 && !is_absolute_path(path); } -/* - * Looks up a cache entry, waits if its being loaded by another thread. - * The mutex must be owned by the calling thread. - */ -static struct fsentry *fscache_get_wait(struct fsentry *key) +int fscache_enabled(const char *path) { - struct fsentry *fse = hashmap_get_entry(&map, key, ent, NULL); - - /* return if its a 'real' entry (future entries have refcnt == 0) */ - if (!fse || fse->list || fse->u.refcnt) - return fse; - - /* create an event and link our key to the future entry */ - key->u.hwait = CreateEvent(NULL, TRUE, FALSE, NULL); - key->next = fse->next; - fse->next = key; - - /* wait for the loading thread to signal us */ - LeaveCriticalSection(&mutex); - WaitForSingleObject(key->u.hwait, INFINITE); - CloseHandle(key->u.hwait); - EnterCriticalSection(&mutex); + struct fscache *cache = fscache_getcache(); - /* repeat cache lookup */ - return hashmap_get_entry(&map, key, ent, NULL); + return cache ? do_fscache_enabled(cache, path) : 0; } /* * Looks up or creates a cache entry for the specified key. */ -static struct fsentry *fscache_get(struct fsentry *key) +static struct fsentry *fscache_get(struct fscache *cache, struct fsentry *key) { - struct fsentry *fse, *future, *waiter; + struct fsentry *fse; int dir_not_found; - EnterCriticalSection(&mutex); - fscache_requests++; + cache->fscache_requests++; /* check if entry is in cache */ - fse = fscache_get_wait(key); + fse = hashmap_get_entry(&cache->map, key, ent, NULL); if (fse) { if (fse->st_mode) fsentry_addref(fse); else fse = NULL; /* non-existing directory */ - LeaveCriticalSection(&mutex); return fse; } /* if looking for a file, check if directory listing is in cache */ if (!fse && key->list) { - fse = fscache_get_wait(key->list); + fse = hashmap_get_entry(&cache->map, key->list, ent, NULL); if (fse) { - LeaveCriticalSection(&mutex); /* * dir entry without file entry, or dir does not * exist -> file doesn't exist @@ -340,25 +325,8 @@ static struct fsentry *fscache_get(struct fsentry *key) } } - /* add future entry to indicate that we're loading it */ - future = key->list ? key->list : key; - future->next = NULL; - future->u.refcnt = 0; - hashmap_add(&map, &future->ent); - - /* create the directory listing (outside mutex!) */ - LeaveCriticalSection(&mutex); - fse = fsentry_create_list(future, &dir_not_found); - EnterCriticalSection(&mutex); - - /* remove future entry and signal waiting threads */ - hashmap_remove(&map, &future->ent, NULL); - waiter = future->next; - while (waiter) { - HANDLE h = waiter->u.hwait; - waiter = waiter->next; - SetEvent(h); - } + /* create the directory listing */ + fse = fsentry_create_list(key->list ? key->list : key, &dir_not_found); /* leave on error (errno set by fsentry_create_list) */ if (!fse) { @@ -372,19 +340,18 @@ static struct fsentry *fscache_get(struct fsentry *key) key->list->dirent.d_name, key->list->len); fse->st_mode = 0; - hashmap_add(&map, &fse->ent); + hashmap_add(&cache->map, &fse->ent); } - LeaveCriticalSection(&mutex); return NULL; } /* add directory listing to the cache */ - fscache_misses++; - fscache_add(fse); + cache->fscache_misses++; + fscache_add(cache, fse); /* lookup file entry if requested (fse already points to directory) */ if (key->list) - fse = hashmap_get_entry(&map, key, ent, NULL); + fse = hashmap_get_entry(&cache->map, key, ent, NULL); if (fse && !fse->st_mode) fse = NULL; /* non-existing directory */ @@ -395,59 +362,104 @@ static struct fsentry *fscache_get(struct fsentry *key) else errno = ENOENT; - LeaveCriticalSection(&mutex); return fse; } /* - * Enables or disables the cache. Note that the cache is read-only, changes to + * Enables the cache. Note that the cache is read-only, changes to * the working directory are NOT reflected in the cache while enabled. */ -int fscache_enable(int enable, size_t initial_size) +int fscache_enable(size_t initial_size) { - int result; + int fscache; + struct fscache *cache; + int result = 0; + + /* allow the cache to be disabled entirely */ + fscache = git_env_bool("GIT_TEST_FSCACHE", -1); + if (fscache != -1) + core_fscache = fscache; + if (!core_fscache) + return 0; + /* + * refcount the global fscache initialization so that the + * opendir and lstat function pointers are redirected if + * any threads are using the fscache. + */ if (!initialized) { - int fscache = git_env_bool("GIT_TEST_FSCACHE", -1); - - /* allow the cache to be disabled entirely */ - if (fscache != -1) - core_fscache = fscache; - if (!core_fscache) - return 0; - InitializeCriticalSection(&mutex); - lstat_requests = opendir_requests = 0; - fscache_misses = fscache_requests = 0; + if (!dwTlsIndex) { + dwTlsIndex = TlsAlloc(); + if (dwTlsIndex == TLS_OUT_OF_INDEXES) { + LeaveCriticalSection(&mutex); + return 0; + } + } + + /* redirect opendir and lstat to the fscache implementations */ + opendir = fscache_opendir; + lstat = fscache_lstat; + } + InterlockedIncrement(&initialized); + + /* refcount the thread specific initialization */ + cache = fscache_getcache(); + if (cache) { + InterlockedIncrement(&cache->enabled); + } else { + cache = (struct fscache *)xcalloc(1, sizeof(*cache)); + cache->enabled = 1; /* * avoid having to rehash by leaving room for the parent dirs. * '4' was determined empirically by testing several repos */ - hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, initial_size * 4); - initialized = 1; + hashmap_init(&cache->map, (hashmap_cmp_fn)fsentry_cmp, NULL, initial_size * 4); + if (!TlsSetValue(dwTlsIndex, cache)) + BUG("TlsSetValue error"); } - result = enable ? InterlockedIncrement(&enabled) - : InterlockedDecrement(&enabled); + trace_printf_key(&trace_fscache, "fscache: enable\n"); + return result; +} - if (enable && result == 1) { - /* redirect opendir and lstat to the fscache implementations */ - opendir = fscache_opendir; - lstat = fscache_lstat; - } else if (!enable && !result) { +/* + * Disables the cache. + */ +void fscache_disable(void) +{ + struct fscache *cache; + + if (!core_fscache) + return; + + /* update the thread specific fscache initialization */ + cache = fscache_getcache(); + if (!cache) + BUG("fscache_disable() called on a thread where fscache has not been initialized"); + if (!cache->enabled) + BUG("fscache_disable() called on an fscache that is already disabled"); + InterlockedDecrement(&cache->enabled); + if (!cache->enabled) { + TlsSetValue(dwTlsIndex, NULL); + trace_printf_key(&trace_fscache, "fscache_disable: lstat %u, opendir %u, " + "total requests/misses %u/%u\n", + cache->lstat_requests, cache->opendir_requests, + cache->fscache_requests, cache->fscache_misses); + fscache_clear(cache); + free(cache); + } + + /* update the global fscache initialization */ + InterlockedDecrement(&initialized); + if (!initialized) { /* reset opendir and lstat to the original implementations */ opendir = dirent_opendir; lstat = mingw_lstat; - EnterCriticalSection(&mutex); - trace_printf_key(&trace_fscache, "fscache: lstat %u, opendir %u, " - "total requests/misses %u/%u\n", - lstat_requests, opendir_requests, - fscache_requests, fscache_misses); - fscache_clear(); - LeaveCriticalSection(&mutex); } - trace_printf_key(&trace_fscache, "fscache: enable(%d)\n", enable); - return result; + + trace_printf_key(&trace_fscache, "fscache: disable\n"); + return; } /* @@ -455,10 +467,10 @@ int fscache_enable(int enable, size_t initial_size) */ void fscache_flush(void) { - if (enabled) { - EnterCriticalSection(&mutex); - fscache_clear(); - LeaveCriticalSection(&mutex); + struct fscache *cache = fscache_getcache(); + + if (cache && cache->enabled) { + fscache_clear(cache); } } @@ -471,11 +483,12 @@ int fscache_lstat(const char *filename, struct stat *st) int dirlen, base, len; struct heap_fsentry key[2]; struct fsentry *fse; + struct fscache *cache = fscache_getcache(); - if (!fscache_enabled(filename)) + if (!cache || !do_fscache_enabled(cache, filename)) return mingw_lstat(filename, st); - lstat_requests++; + cache->lstat_requests++; /* split filename into path + name */ len = strlen(filename); if (len && is_dir_sep(filename[len - 1])) @@ -488,7 +501,7 @@ int fscache_lstat(const char *filename, struct stat *st) /* lookup entry for path + name in cache */ fsentry_init(&key[0].u.ent, NULL, filename, dirlen); fsentry_init(&key[1].u.ent, &key[0].u.ent, filename + base, len - base); - fse = fscache_get(&key[1].u.ent); + fse = fscache_get(cache, &key[1].u.ent); if (!fse) { errno = ENOENT; return -1; @@ -553,11 +566,12 @@ DIR *fscache_opendir(const char *dirname) struct fsentry *list; fscache_DIR *dir; int len; + struct fscache *cache = fscache_getcache(); - if (!fscache_enabled(dirname)) + if (!cache || !do_fscache_enabled(cache, dirname)) return dirent_opendir(dirname); - opendir_requests++; + cache->opendir_requests++; /* prepare name (strip trailing '/', replace '.') */ len = strlen(dirname); if ((len == 1 && dirname[0] == '.') || @@ -566,7 +580,7 @@ DIR *fscache_opendir(const char *dirname) /* get directory listing from cache */ fsentry_init(&key.u.ent, NULL, dirname, len); - list = fscache_get(&key.u.ent); + list = fscache_get(cache, &key.u.ent); if (!list) return NULL; @@ -577,3 +591,53 @@ DIR *fscache_opendir(const char *dirname) dir->pfsentry = list; return (DIR*) dir; } + +struct fscache *fscache_getcache(void) +{ + return (struct fscache *)TlsGetValue(dwTlsIndex); +} + +void fscache_merge(struct fscache *dest) +{ + struct hashmap_iter iter; + struct hashmap_entry *e; + struct fscache *cache = fscache_getcache(); + + /* + * Only do the merge if fscache was enabled and we have a dest + * cache to merge into. + */ + if (!dest) { + fscache_enable(0); + return; + } + if (!cache) + BUG("fscache_merge() called on a thread where fscache has not been initialized"); + + TlsSetValue(dwTlsIndex, NULL); + trace_printf_key(&trace_fscache, "fscache_merge: lstat %u, opendir %u, " + "total requests/misses %u/%u\n", + cache->lstat_requests, cache->opendir_requests, + cache->fscache_requests, cache->fscache_misses); + + /* + * This is only safe because the primary thread we're merging into + * isn't being used so the critical section only needs to prevent + * the the child threads from stomping on each other. + */ + EnterCriticalSection(&mutex); + + hashmap_iter_init(&cache->map, &iter); + while ((e = hashmap_iter_next(&iter))) + hashmap_add(&dest->map, e); + + dest->lstat_requests += cache->lstat_requests; + dest->opendir_requests += cache->opendir_requests; + dest->fscache_requests += cache->fscache_requests; + dest->fscache_misses += cache->fscache_misses; + LeaveCriticalSection(&mutex); + + free(cache); + + InterlockedDecrement(&initialized); +} diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index d49c9381114da6..2eb8bf3f5cfee8 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -1,9 +1,16 @@ #ifndef FSCACHE_H #define FSCACHE_H -int fscache_enable(int enable, size_t initial_size); -#define enable_fscache(initial_size) fscache_enable(1, initial_size) -#define disable_fscache() fscache_enable(0, 0) +/* + * The fscache is thread specific. enable_fscache() must be called + * for each thread where caching is desired. + */ + +int fscache_enable(size_t initial_size); +#define enable_fscache(initial_size) fscache_enable(initial_size) + +void fscache_disable(void); +#define disable_fscache() fscache_disable() int fscache_enabled(const char *path); #define is_fscache_enabled(path) fscache_enabled(path) @@ -14,4 +21,13 @@ void fscache_flush(void); DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); +/* opaque fscache structure */ +struct fscache; + +struct fscache *fscache_getcache(void); +#define getcache_fscache() fscache_getcache() + +void fscache_merge(struct fscache *dest); +#define merge_fscache(dest) fscache_merge(dest) + #endif diff --git a/git-compat-util.h b/git-compat-util.h index 227716971b4ff0..a6a7d314c6eb79 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1540,6 +1540,10 @@ static inline int is_missing_file_error(int errno_) * data or even file content without the need to synchronize with the file * system. */ + + /* opaque fscache structure */ +struct fscache; + #ifndef enable_fscache #define enable_fscache(x) /* noop */ #endif @@ -1556,6 +1560,14 @@ static inline int is_missing_file_error(int errno_) #define flush_fscache() /* noop */ #endif +#ifndef getcache_fscache +#define getcache_fscache() (NULL) /* noop */ +#endif + +#ifndef merge_fscache +#define merge_fscache(dest) /* noop */ +#endif + int cmd_main(int, const char **); /* diff --git a/preload-index.c b/preload-index.c index d070753f34ec38..0fc9423369d503 100644 --- a/preload-index.c +++ b/preload-index.c @@ -10,6 +10,8 @@ #include "thread-utils.h" #include "repository.h" +static struct fscache *fscache; + /* * Mostly randomly chosen maximum thread counts: we * cap the parallelism to 20 threads, and we want @@ -47,6 +49,7 @@ static void *preload_thread(void *_data) nr = index->cache_nr - p->offset; last_nr = nr; + enable_fscache(nr); do { struct cache_entry *ce = *cep++; struct stat st; @@ -90,6 +93,7 @@ static void *preload_thread(void *_data) pthread_mutex_unlock(&pd->mutex); } cache_def_clear(&cache); + merge_fscache(fscache); return NULL; } @@ -105,6 +109,7 @@ void preload_index(struct index_state *index, if (!HAVE_THREADS || !core_preload_index) return; + fscache = getcache_fscache(); threads = index->cache_nr / THREAD_COST; if ((index->cache_nr > 1) && (threads < 2) && git_env_bool("GIT_TEST_PRELOAD_INDEX", 0)) threads = 2; @@ -126,7 +131,6 @@ void preload_index(struct index_state *index, pthread_mutex_init(&pd.mutex, NULL); } - enable_fscache(index->cache_nr); for (i = 0; i < threads; i++) { struct thread_data *p = data+i; int err; @@ -162,8 +166,6 @@ void preload_index(struct index_state *index, trace2_data_intmax("index", NULL, "preload/sum_lstat", t2_sum_lstat); trace2_region_leave("index", "preload", NULL); - - disable_fscache(); } int repo_read_index_preload(struct repository *repo, From c4f9797b70c194ad86ad41f04ae8c4b91cd79739 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 11 Dec 2018 12:17:49 +0100 Subject: [PATCH 233/303] fscache: implement an FSCache-aware is_mount_point() When FSCache is active, we can cache the reparse tag and use it directly to determine whether a path refers to an NTFS junction, without any additional, costly I/O. Note: this change only makes a difference with the next commit, which will make use of the FSCache in `git clean` (contingent on `core.fscache` set, of course). Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 2 ++ compat/mingw.h | 3 ++- compat/win32/fscache.c | 35 +++++++++++++++++++++++++++++++++++ compat/win32/fscache.h | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 20628140043fce..9b283d62698427 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2734,6 +2734,8 @@ pid_t waitpid(pid_t pid, int *status, int options) return -1; } +int (*win32_is_mount_point)(struct strbuf *path) = mingw_is_mount_point; + int mingw_is_mount_point(struct strbuf *path) { WIN32_FIND_DATAW findbuf = { 0 }; diff --git a/compat/mingw.h b/compat/mingw.h index 5401902dcba5e9..a02d34f5b40f04 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -471,7 +471,8 @@ static inline void convert_slashes(char *path) } struct strbuf; int mingw_is_mount_point(struct strbuf *path); -#define is_mount_point mingw_is_mount_point +extern int (*win32_is_mount_point)(struct strbuf *path); +#define is_mount_point win32_is_mount_point #define CAN_UNLINK_MOUNT_POINTS 1 #define PATH_SEP ';' char *mingw_query_user_email(void); diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 6ac0ed692e5849..351b1dff7fff65 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -467,6 +467,7 @@ int fscache_enable(size_t initial_size) /* redirect opendir and lstat to the fscache implementations */ opendir = fscache_opendir; lstat = fscache_lstat; + win32_is_mount_point = fscache_is_mount_point; } initialized++; LeaveCriticalSection(&fscache_cs); @@ -527,6 +528,7 @@ void fscache_disable(void) /* reset opendir and lstat to the original implementations */ opendir = dirent_opendir; lstat = mingw_lstat; + win32_is_mount_point = mingw_is_mount_point; } LeaveCriticalSection(&fscache_cs); @@ -597,6 +599,39 @@ int fscache_lstat(const char *filename, struct stat *st) return 0; } +/* + * is_mount_point() replacement, uses cache if enabled, otherwise falls + * back to mingw_is_mount_point(). + */ +int fscache_is_mount_point(struct strbuf *path) +{ + int dirlen, base, len; + struct heap_fsentry key[2]; + struct fsentry *fse; + struct fscache *cache = fscache_getcache(); + + if (!cache || !do_fscache_enabled(cache, path->buf)) + return mingw_is_mount_point(path); + + cache->lstat_requests++; + /* split path into path + name */ + len = path->len; + if (len && is_dir_sep(path->buf[len - 1])) + len--; + base = len; + while (base && !is_dir_sep(path->buf[base - 1])) + base--; + dirlen = base ? base - 1 : 0; + + /* lookup entry for path + name in cache */ + fsentry_init(&key[0].u.ent, NULL, path->buf, dirlen); + fsentry_init(&key[1].u.ent, &key[0].u.ent, path->buf + base, len - base); + fse = fscache_get(cache, &key[1].u.ent); + if (!fse) + return mingw_is_mount_point(path); + return fse->reparse_tag == IO_REPARSE_TAG_MOUNT_POINT; +} + typedef struct fscache_DIR { struct DIR base_dir; /* extend base struct DIR */ struct fsentry *pfsentry; diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 042b247a542554..386c770a85d321 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -22,6 +22,7 @@ void fscache_flush(void); DIR *fscache_opendir(const char *dir); int fscache_lstat(const char *file_name, struct stat *buf); +int fscache_is_mount_point(struct strbuf *path); /* opaque fscache structure */ struct fscache; From 2096c0ec5aad731116244be96413a79a1b1a6314 Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Tue, 25 Sep 2018 16:28:16 -0400 Subject: [PATCH 234/303] fscache: add fscache hit statistics Track fscache hits and misses for lstat and opendir requests. Reporting of statistics is done when the cache is disabled for the last time and freed and is only reported if GIT_TRACE_FSCACHE is set. Sample output is: 11:33:11.836428 compat/win32/fscache.c:433 fscache: lstat 3775, opendir 263, total requests/misses 4052/269 Signed-off-by: Ben Peart <benpeart@microsoft.com> --- compat/win32/fscache.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 6c60a136dc14db..2fc69b91de4438 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -9,6 +9,10 @@ static int initialized; static volatile long enabled; static struct hashmap map; static CRITICAL_SECTION mutex; +static unsigned int lstat_requests; +static unsigned int opendir_requests; +static unsigned int fscache_requests; +static unsigned int fscache_misses; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); /* @@ -263,6 +267,8 @@ static void fscache_clear(void) { hashmap_clear_and_free(&map, struct fsentry, ent); hashmap_init(&map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); + lstat_requests = opendir_requests = 0; + fscache_misses = fscache_requests = 0; } /* @@ -309,6 +315,7 @@ static struct fsentry *fscache_get(struct fsentry *key) int dir_not_found; EnterCriticalSection(&mutex); + fscache_requests++; /* check if entry is in cache */ fse = fscache_get_wait(key); if (fse) { @@ -372,6 +379,7 @@ static struct fsentry *fscache_get(struct fsentry *key) } /* add directory listing to the cache */ + fscache_misses++; fscache_add(fse); /* lookup file entry if requested (fse already points to directory) */ @@ -409,6 +417,8 @@ int fscache_enable(int enable) return 0; InitializeCriticalSection(&mutex); + lstat_requests = opendir_requests = 0; + fscache_misses = fscache_requests = 0; hashmap_init(&map, (hashmap_cmp_fn) fsentry_cmp, NULL, 0); initialized = 1; } @@ -425,6 +435,10 @@ int fscache_enable(int enable) opendir = dirent_opendir; lstat = mingw_lstat; EnterCriticalSection(&mutex); + trace_printf_key(&trace_fscache, "fscache: lstat %u, opendir %u, " + "total requests/misses %u/%u\n", + lstat_requests, opendir_requests, + fscache_requests, fscache_misses); fscache_clear(); LeaveCriticalSection(&mutex); } @@ -457,6 +471,7 @@ int fscache_lstat(const char *filename, struct stat *st) if (!fscache_enabled(filename)) return mingw_lstat(filename, st); + lstat_requests++; /* split filename into path + name */ len = strlen(filename); if (len && is_dir_sep(filename[len - 1])) @@ -538,6 +553,7 @@ DIR *fscache_opendir(const char *dirname) if (!fscache_enabled(dirname)) return dirent_opendir(dirname); + opendir_requests++; /* prepare name (strip trailing '/', replace '.') */ len = strlen(dirname); if ((len == 1 && dirname[0] == '.') || From c4bb1022b9ef31607d362620da2e1e72d3849e1c Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Fri, 2 Nov 2018 11:19:10 -0400 Subject: [PATCH 235/303] fscache: teach fscache to use mempool Now that the fscache is single threaded, take advantage of the mem_pool as the allocator to significantly reduce the cost of allocations and frees. With the reduced cost of free, in future patches, we can start freeing the fscache at the end of commands instead of just leaking it. Signed-off-by: Ben Peart <benpeart@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/win32/fscache.c | 45 ++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 7fad622e1d1e43..0817dd3b48fbf5 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -4,6 +4,7 @@ #include "fscache.h" #include "../../dir.h" #include "config.h" +#include "../../mem-pool.h" static volatile long initialized; static DWORD dwTlsIndex; @@ -18,6 +19,7 @@ static CRITICAL_SECTION mutex; struct fscache { volatile long enabled; struct hashmap map; + struct mem_pool mem_pool; unsigned int lstat_requests; unsigned int opendir_requests; unsigned int fscache_requests; @@ -122,11 +124,12 @@ static void fsentry_init(struct fsentry *fse, struct fsentry *list, /* * Allocate an fsentry structure on the heap. */ -static struct fsentry *fsentry_alloc(struct fsentry *list, const char *name, +static struct fsentry *fsentry_alloc(struct fscache *cache, struct fsentry *list, const char *name, size_t len) { /* overallocate fsentry and copy the name to the end */ - struct fsentry *fse = xmalloc(sizeof(struct fsentry) + len + 1); + struct fsentry *fse = + mem_pool_alloc(&cache->mem_pool, sizeof(*fse) + len + 1); /* init the rest of the structure */ fsentry_init(fse, list, name, len); fse->next = NULL; @@ -146,27 +149,21 @@ inline static void fsentry_addref(struct fsentry *fse) } /* - * Release the reference to an fsentry, frees the memory if its the last ref. + * Release the reference to an fsentry. */ static void fsentry_release(struct fsentry *fse) { if (fse->list) fse = fse->list; - if (InterlockedDecrement(&(fse->u.refcnt))) - return; - - while (fse) { - struct fsentry *next = fse->next; - free(fse); - fse = next; - } + InterlockedDecrement(&(fse->u.refcnt)); } /* * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. */ -static struct fsentry *fseentry_create_entry(struct fsentry *list, +static struct fsentry *fseentry_create_entry(struct fscache *cache, + struct fsentry *list, const WIN32_FIND_DATAW *fdata) { char buf[MAX_PATH * 3]; @@ -174,7 +171,7 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, struct fsentry *fse; len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); - fse = fsentry_alloc(list, buf, len); + fse = fsentry_alloc(cache, list, buf, len); fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); fse->dirent.d_type = S_ISDIR(fse->st_mode) ? DT_DIR : DT_REG; @@ -192,7 +189,7 @@ static struct fsentry *fseentry_create_entry(struct fsentry *list, * Dir should not contain trailing '/'. Use an empty string for the current * directory (not "."!). */ -static struct fsentry *fsentry_create_list(const struct fsentry *dir, +static struct fsentry *fsentry_create_list(struct fscache *cache, const struct fsentry *dir, int *dir_not_found) { wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ @@ -231,14 +228,14 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir, } /* allocate object to hold directory listing */ - list = fsentry_alloc(NULL, dir->dirent.d_name, dir->len); + list = fsentry_alloc(cache, NULL, dir->dirent.d_name, dir->len); list->st_mode = S_IFDIR; list->dirent.d_type = DT_DIR; /* walk directory and build linked list of fsentry structures */ phead = &list->next; do { - *phead = fseentry_create_entry(list, &fdata); + *phead = fseentry_create_entry(cache, list, &fdata); phead = &(*phead)->next; } while (FindNextFileW(h, &fdata)); @@ -250,7 +247,7 @@ static struct fsentry *fsentry_create_list(const struct fsentry *dir, if (err == ERROR_NO_MORE_FILES) return list; - /* otherwise free the list and return error */ + /* otherwise release the list and return error */ fsentry_release(list); errno = err_win_to_posix(err); return NULL; @@ -273,7 +270,9 @@ static void fscache_add(struct fscache *cache, struct fsentry *fse) */ static void fscache_clear(struct fscache *cache) { - hashmap_clear_and_free(&cache->map, struct fsentry, ent); + mem_pool_discard(&cache->mem_pool, 0); + mem_pool_init(&cache->mem_pool, 0); + hashmap_clear(&cache->map); hashmap_init(&cache->map, (hashmap_cmp_fn)fsentry_cmp, NULL, 0); cache->lstat_requests = cache->opendir_requests = 0; cache->fscache_misses = cache->fscache_requests = 0; @@ -326,7 +325,7 @@ static struct fsentry *fscache_get(struct fscache *cache, struct fsentry *key) } /* create the directory listing */ - fse = fsentry_create_list(key->list ? key->list : key, &dir_not_found); + fse = fsentry_create_list(cache, key->list ? key->list : key, &dir_not_found); /* leave on error (errno set by fsentry_create_list) */ if (!fse) { @@ -336,7 +335,7 @@ static struct fsentry *fscache_get(struct fscache *cache, struct fsentry *key) * empty, which for all practical matters is the same * thing as far as fscache is concerned). */ - fse = fsentry_alloc(key->list->list, + fse = fsentry_alloc(cache, key->list->list, key->list->dirent.d_name, key->list->len); fse->st_mode = 0; @@ -415,6 +414,7 @@ int fscache_enable(size_t initial_size) * '4' was determined empirically by testing several repos */ hashmap_init(&cache->map, (hashmap_cmp_fn)fsentry_cmp, NULL, initial_size * 4); + mem_pool_init(&cache->mem_pool, 0); if (!TlsSetValue(dwTlsIndex, cache)) BUG("TlsSetValue error"); } @@ -446,7 +446,8 @@ void fscache_disable(void) "total requests/misses %u/%u\n", cache->lstat_requests, cache->opendir_requests, cache->fscache_requests, cache->fscache_misses); - fscache_clear(cache); + mem_pool_discard(&cache->mem_pool, 0); + hashmap_clear(&cache->map); free(cache); } @@ -631,6 +632,8 @@ void fscache_merge(struct fscache *dest) while ((e = hashmap_iter_next(&iter))) hashmap_add(&dest->map, e); + mem_pool_combine(&dest->mem_pool, &cache->mem_pool); + dest->lstat_requests += cache->lstat_requests; dest->opendir_requests += cache->opendir_requests; dest->fscache_requests += cache->fscache_requests; From cd119f2cab9a2d3435efdba11bee180abe10863f Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Fri, 16 Nov 2018 10:59:18 -0500 Subject: [PATCH 236/303] fscache: make fscache_enable() thread safe The recent change to make fscache thread specific relied on fscache_enable() being called first from the primary thread before being called in parallel from worker threads. Make that more robust and protect it with a critical section to avoid any issues. Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Ben Peart <benpeart@microsoft.com> --- compat/mingw.c | 4 ++++ compat/win32/fscache.c | 23 +++++++++++++---------- compat/win32/fscache.h | 2 ++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 1bea9d1507e1f2..20628140043fce 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -12,6 +12,7 @@ #include "dir.h" #define SECURITY_WIN32 #include <sspi.h> +#include "win32/fscache.h" #define HCAST(type, handle) ((type)(intptr_t)handle) @@ -3402,6 +3403,9 @@ int wmain(int argc, const wchar_t **wargv) /* initialize critical section for waitpid pinfo_t list */ InitializeCriticalSection(&pinfo_cs); + /* initialize critical section for fscache */ + InitializeCriticalSection(&fscache_cs); + /* set up default file mode and file modes for stdin/out/err */ _fmode = _O_BINARY; _setmode(_fileno(stdin), _O_BINARY); diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 0817dd3b48fbf5..75dcd70b803334 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -8,7 +8,7 @@ static volatile long initialized; static DWORD dwTlsIndex; -static CRITICAL_SECTION mutex; +CRITICAL_SECTION fscache_cs; /* * Store one fscache per thread to avoid thread contention and locking. @@ -386,12 +386,12 @@ int fscache_enable(size_t initial_size) * opendir and lstat function pointers are redirected if * any threads are using the fscache. */ + EnterCriticalSection(&fscache_cs); if (!initialized) { - InitializeCriticalSection(&mutex); if (!dwTlsIndex) { dwTlsIndex = TlsAlloc(); if (dwTlsIndex == TLS_OUT_OF_INDEXES) { - LeaveCriticalSection(&mutex); + LeaveCriticalSection(&fscache_cs); return 0; } } @@ -400,12 +400,13 @@ int fscache_enable(size_t initial_size) opendir = fscache_opendir; lstat = fscache_lstat; } - InterlockedIncrement(&initialized); + initialized++; + LeaveCriticalSection(&fscache_cs); /* refcount the thread specific initialization */ cache = fscache_getcache(); if (cache) { - InterlockedIncrement(&cache->enabled); + cache->enabled++; } else { cache = (struct fscache *)xcalloc(1, sizeof(*cache)); cache->enabled = 1; @@ -439,7 +440,7 @@ void fscache_disable(void) BUG("fscache_disable() called on a thread where fscache has not been initialized"); if (!cache->enabled) BUG("fscache_disable() called on an fscache that is already disabled"); - InterlockedDecrement(&cache->enabled); + cache->enabled--; if (!cache->enabled) { TlsSetValue(dwTlsIndex, NULL); trace_printf_key(&trace_fscache, "fscache_disable: lstat %u, opendir %u, " @@ -452,12 +453,14 @@ void fscache_disable(void) } /* update the global fscache initialization */ - InterlockedDecrement(&initialized); + EnterCriticalSection(&fscache_cs); + initialized--; if (!initialized) { /* reset opendir and lstat to the original implementations */ opendir = dirent_opendir; lstat = mingw_lstat; } + LeaveCriticalSection(&fscache_cs); trace_printf_key(&trace_fscache, "fscache: disable\n"); return; @@ -626,7 +629,7 @@ void fscache_merge(struct fscache *dest) * isn't being used so the critical section only needs to prevent * the the child threads from stomping on each other. */ - EnterCriticalSection(&mutex); + EnterCriticalSection(&fscache_cs); hashmap_iter_init(&cache->map, &iter); while ((e = hashmap_iter_next(&iter))) @@ -638,9 +641,9 @@ void fscache_merge(struct fscache *dest) dest->opendir_requests += cache->opendir_requests; dest->fscache_requests += cache->fscache_requests; dest->fscache_misses += cache->fscache_misses; - LeaveCriticalSection(&mutex); + initialized--; + LeaveCriticalSection(&fscache_cs); free(cache); - InterlockedDecrement(&initialized); } diff --git a/compat/win32/fscache.h b/compat/win32/fscache.h index 2eb8bf3f5cfee8..042b247a542554 100644 --- a/compat/win32/fscache.h +++ b/compat/win32/fscache.h @@ -6,6 +6,8 @@ * for each thread where caching is desired. */ +extern CRITICAL_SECTION fscache_cs; + int fscache_enable(size_t initial_size); #define enable_fscache(initial_size) fscache_enable(initial_size) From aadde9228ccc644efe02dc8193846d376e24fa9a Mon Sep 17 00:00:00 2001 From: Ben Peart <benpeart@microsoft.com> Date: Thu, 15 Nov 2018 14:15:40 -0500 Subject: [PATCH 237/303] fscache: teach fscache to use NtQueryDirectoryFile Using FindFirstFileExW() requires the OS to allocate a 64K buffer for each directory and then free it when we call FindClose(). Update fscache to call the underlying kernel API NtQueryDirectoryFile so that we can do the buffer management ourselves. That allows us to allocate a single buffer for the lifetime of the cache and reuse it for each directory. This change improves performance of 'git status' by 18% in a repo with ~200K files and 30k folders. Documentation for NtQueryDirectoryFile can be found at: https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/content/ntifs/nf-ntifs-ntquerydirectoryfile https://docs.microsoft.com/en-us/windows/desktop/FileIO/file-attribute-constants https://docs.microsoft.com/en-us/windows/desktop/fileio/reparse-point-tags To determine if the specified directory is a symbolic link, inspect the FileAttributes member to see if the FILE_ATTRIBUTE_REPARSE_POINT flag is set. If so, EaSize will contain the reparse tag (this is a so far undocumented feature, but confirmed by the NTFS developers). To determine if the reparse point is a symbolic link (and not some other form of reparse point), test whether the tag value equals the value IO_REPARSE_TAG_SYMLINK. The NtQueryDirectoryFile() call works best (and on Windows 8.1 and earlier, it works *only*) with buffer sizes up to 64kB. Which is 32k wide characters, so let's use that as our buffer size. Signed-off-by: Ben Peart <benpeart@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/win32/fscache.c | 123 ++++++++++++++++++++++++++++---------- compat/win32/ntifs.h | 131 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 30 deletions(-) create mode 100644 compat/win32/ntifs.h diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 75dcd70b803334..760f8b80d65f2b 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -5,6 +5,7 @@ #include "../../dir.h" #include "config.h" #include "../../mem-pool.h" +#include "ntifs.h" static volatile long initialized; static DWORD dwTlsIndex; @@ -24,6 +25,13 @@ struct fscache { unsigned int opendir_requests; unsigned int fscache_requests; unsigned int fscache_misses; + /* + * 32k wide characters translates to 64kB, which is the maximum that + * Windows 8.1 and earlier can handle. On network drives, not only + * the client's Windows version matters, but also the server's, + * therefore we need to keep this to 64kB. + */ + WCHAR buffer[32 * 1024]; }; static struct trace_key trace_fscache = TRACE_KEY_INIT(FSCACHE); @@ -159,27 +167,44 @@ static void fsentry_release(struct fsentry *fse) InterlockedDecrement(&(fse->u.refcnt)); } +static int xwcstoutfn(char *utf, int utflen, const wchar_t *wcs, int wcslen) +{ + if (!wcs || !utf || utflen < 1) { + errno = EINVAL; + return -1; + } + utflen = WideCharToMultiByte(CP_UTF8, 0, wcs, wcslen, utf, utflen, NULL, NULL); + if (utflen) + return utflen; + errno = ERANGE; + return -1; +} + /* - * Allocate and initialize an fsentry from a WIN32_FIND_DATA structure. + * Allocate and initialize an fsentry from a FILE_FULL_DIR_INFORMATION structure. */ static struct fsentry *fseentry_create_entry(struct fscache *cache, struct fsentry *list, - const WIN32_FIND_DATAW *fdata) + PFILE_FULL_DIR_INFORMATION fdata) { char buf[MAX_PATH * 3]; int len; struct fsentry *fse; - len = xwcstoutf(buf, fdata->cFileName, ARRAY_SIZE(buf)); + + len = xwcstoutfn(buf, ARRAY_SIZE(buf), fdata->FileName, fdata->FileNameLength / sizeof(wchar_t)); fse = fsentry_alloc(cache, list, buf, len); - fse->st_mode = file_attr_to_st_mode(fdata->dwFileAttributes); + fse->st_mode = file_attr_to_st_mode(fdata->FileAttributes); fse->dirent.d_type = S_ISDIR(fse->st_mode) ? DT_DIR : DT_REG; - fse->u.s.st_size = (((off64_t) (fdata->nFileSizeHigh)) << 32) - | fdata->nFileSizeLow; - filetime_to_timespec(&(fdata->ftLastAccessTime), &(fse->u.s.st_atim)); - filetime_to_timespec(&(fdata->ftLastWriteTime), &(fse->u.s.st_mtim)); - filetime_to_timespec(&(fdata->ftCreationTime), &(fse->u.s.st_ctim)); + fse->u.s.st_size = fdata->EndOfFile.LowPart | + (((off_t)fdata->EndOfFile.HighPart) << 32); + filetime_to_timespec((FILETIME *)&(fdata->LastAccessTime), + &(fse->u.s.st_atim)); + filetime_to_timespec((FILETIME *)&(fdata->LastWriteTime), + &(fse->u.s.st_mtim)); + filetime_to_timespec((FILETIME *)&(fdata->CreationTime), + &(fse->u.s.st_ctim)); return fse; } @@ -192,8 +217,10 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, static struct fsentry *fsentry_create_list(struct fscache *cache, const struct fsentry *dir, int *dir_not_found) { - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ - WIN32_FIND_DATAW fdata; + wchar_t pattern[MAX_PATH]; + NTSTATUS status; + IO_STATUS_BLOCK iosb; + PFILE_FULL_DIR_INFORMATION di; HANDLE h; int wlen; struct fsentry *list, **phead; @@ -209,15 +236,18 @@ static struct fsentry *fsentry_create_list(struct fscache *cache, const struct f return NULL; } - /* append optional '/' and wildcard '*' */ - if (wlen) - pattern[wlen++] = '/'; - pattern[wlen++] = '*'; - pattern[wlen] = 0; + /* handle CWD */ + if (!wlen) { + wlen = GetCurrentDirectoryW(ARRAY_SIZE(pattern), pattern); + if (!wlen || wlen >= ARRAY_SIZE(pattern)) { + errno = wlen ? ENAMETOOLONG : err_win_to_posix(GetLastError()); + return NULL; + } + } - /* open find handle */ - h = FindFirstFileExW(pattern, FindExInfoBasic, &fdata, FindExSearchNameMatch, - NULL, FIND_FIRST_EX_LARGE_FETCH); + h = CreateFileW(pattern, FILE_LIST_DIRECTORY, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); if (h == INVALID_HANDLE_VALUE) { err = GetLastError(); *dir_not_found = 1; /* or empty directory */ @@ -234,22 +264,55 @@ static struct fsentry *fsentry_create_list(struct fscache *cache, const struct f /* walk directory and build linked list of fsentry structures */ phead = &list->next; - do { - *phead = fseentry_create_entry(cache, list, &fdata); + status = NtQueryDirectoryFile(h, NULL, 0, 0, &iosb, cache->buffer, + sizeof(cache->buffer), FileFullDirectoryInformation, FALSE, NULL, FALSE); + if (!NT_SUCCESS(status)) { + /* + * NtQueryDirectoryFile returns STATUS_INVALID_PARAMETER when + * asked to enumerate an invalid directory (ie it is a file + * instead of a directory). Verify that is the actual cause + * of the error. + */ + if (status == STATUS_INVALID_PARAMETER) { + DWORD attributes = GetFileAttributesW(pattern); + if (!(attributes & FILE_ATTRIBUTE_DIRECTORY)) + status = ERROR_DIRECTORY; + } + goto Error; + } + di = (PFILE_FULL_DIR_INFORMATION)(cache->buffer); + for (;;) { + + *phead = fseentry_create_entry(cache, list, di); phead = &(*phead)->next; - } while (FindNextFileW(h, &fdata)); - /* remember result of last FindNextFile, then close find handle */ - err = GetLastError(); - FindClose(h); + /* If there is no offset in the entry, the buffer has been exhausted. */ + if (di->NextEntryOffset == 0) { + status = NtQueryDirectoryFile(h, NULL, 0, 0, &iosb, cache->buffer, + sizeof(cache->buffer), FileFullDirectoryInformation, FALSE, NULL, FALSE); + if (!NT_SUCCESS(status)) { + if (status == STATUS_NO_MORE_FILES) + break; + goto Error; + } + + di = (PFILE_FULL_DIR_INFORMATION)(cache->buffer); + continue; + } + + /* Advance to the next entry. */ + di = (PFILE_FULL_DIR_INFORMATION)(((PUCHAR)di) + di->NextEntryOffset); + } - /* return the list if we've got all the files */ - if (err == ERROR_NO_MORE_FILES) - return list; + CloseHandle(h); + return list; - /* otherwise release the list and return error */ +Error: + trace_printf_key(&trace_fscache, + "fscache: status(%ld) unable to query directory " + "contents '%s'\n", status, dir->dirent.d_name); + CloseHandle(h); fsentry_release(list); - errno = err_win_to_posix(err); return NULL; } diff --git a/compat/win32/ntifs.h b/compat/win32/ntifs.h new file mode 100644 index 00000000000000..64ed792c52f352 --- /dev/null +++ b/compat/win32/ntifs.h @@ -0,0 +1,131 @@ +#ifndef _NTIFS_ +#define _NTIFS_ + +/* + * Copy necessary structures and definitions out of the Windows DDK + * to enable calling NtQueryDirectoryFile() + */ + +typedef _Return_type_success_(return >= 0) LONG NTSTATUS; +#define NT_SUCCESS(Status) (((NTSTATUS)(Status)) >= 0) + +#if !defined(_NTSECAPI_) && !defined(_WINTERNL_) && \ + !defined(__UNICODE_STRING_DEFINED) +#define __UNICODE_STRING_DEFINED +typedef struct _UNICODE_STRING { + USHORT Length; + USHORT MaximumLength; + PWSTR Buffer; +} UNICODE_STRING; +typedef UNICODE_STRING *PUNICODE_STRING; +typedef const UNICODE_STRING *PCUNICODE_STRING; +#endif /* !_NTSECAPI_ && !_WINTERNL_ && !__UNICODE_STRING_DEFINED */ + +typedef enum _FILE_INFORMATION_CLASS { + FileDirectoryInformation = 1, + FileFullDirectoryInformation, + FileBothDirectoryInformation, + FileBasicInformation, + FileStandardInformation, + FileInternalInformation, + FileEaInformation, + FileAccessInformation, + FileNameInformation, + FileRenameInformation, + FileLinkInformation, + FileNamesInformation, + FileDispositionInformation, + FilePositionInformation, + FileFullEaInformation, + FileModeInformation, + FileAlignmentInformation, + FileAllInformation, + FileAllocationInformation, + FileEndOfFileInformation, + FileAlternateNameInformation, + FileStreamInformation, + FilePipeInformation, + FilePipeLocalInformation, + FilePipeRemoteInformation, + FileMailslotQueryInformation, + FileMailslotSetInformation, + FileCompressionInformation, + FileObjectIdInformation, + FileCompletionInformation, + FileMoveClusterInformation, + FileQuotaInformation, + FileReparsePointInformation, + FileNetworkOpenInformation, + FileAttributeTagInformation, + FileTrackingInformation, + FileIdBothDirectoryInformation, + FileIdFullDirectoryInformation, + FileValidDataLengthInformation, + FileShortNameInformation, + FileIoCompletionNotificationInformation, + FileIoStatusBlockRangeInformation, + FileIoPriorityHintInformation, + FileSfioReserveInformation, + FileSfioVolumeInformation, + FileHardLinkInformation, + FileProcessIdsUsingFileInformation, + FileNormalizedNameInformation, + FileNetworkPhysicalNameInformation, + FileIdGlobalTxDirectoryInformation, + FileIsRemoteDeviceInformation, + FileAttributeCacheInformation, + FileNumaNodeInformation, + FileStandardLinkInformation, + FileRemoteProtocolInformation, + FileMaximumInformation +} FILE_INFORMATION_CLASS, *PFILE_INFORMATION_CLASS; + +typedef struct _FILE_FULL_DIR_INFORMATION { + ULONG NextEntryOffset; + ULONG FileIndex; + LARGE_INTEGER CreationTime; + LARGE_INTEGER LastAccessTime; + LARGE_INTEGER LastWriteTime; + LARGE_INTEGER ChangeTime; + LARGE_INTEGER EndOfFile; + LARGE_INTEGER AllocationSize; + ULONG FileAttributes; + ULONG FileNameLength; + ULONG EaSize; + WCHAR FileName[1]; +} FILE_FULL_DIR_INFORMATION, *PFILE_FULL_DIR_INFORMATION; + +typedef struct _IO_STATUS_BLOCK { + union { + NTSTATUS Status; + PVOID Pointer; + } u; + ULONG_PTR Information; +} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK; + +typedef VOID +(NTAPI *PIO_APC_ROUTINE)( + IN PVOID ApcContext, + IN PIO_STATUS_BLOCK IoStatusBlock, + IN ULONG Reserved); + +NTSYSCALLAPI +NTSTATUS +NTAPI +NtQueryDirectoryFile( + _In_ HANDLE FileHandle, + _In_opt_ HANDLE Event, + _In_opt_ PIO_APC_ROUTINE ApcRoutine, + _In_opt_ PVOID ApcContext, + _Out_ PIO_STATUS_BLOCK IoStatusBlock, + _Out_writes_bytes_(Length) PVOID FileInformation, + _In_ ULONG Length, + _In_ FILE_INFORMATION_CLASS FileInformationClass, + _In_ BOOLEAN ReturnSingleEntry, + _In_opt_ PUNICODE_STRING FileName, + _In_ BOOLEAN RestartScan +); + +#define STATUS_NO_MORE_FILES ((NTSTATUS)0x80000006L) + +#endif From 3ac93fc228601430158d537bb0ed7d15e93df32e Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Wed, 12 Jun 2019 00:58:49 +0000 Subject: [PATCH 238/303] unpack-trees: enable fscache for sparse-checkout When updating the skip-worktree bits in the index to align with new values in a sparse-checkout file, Git scans the entire working directory with lstat() calls. In a sparse-checkout, many of these lstat() calls are for paths that do not exist. Enable the fscache feature during this scan. Since enable_fscache() calls nest, the disable_fscache() method decrements a counter and would only clear the cache if that counter reaches zero. In a local test of a repo with ~2.2 million paths, updating the index with git read-tree -m -u HEAD with a sparse-checkout file containing only /.gitattributes improved from 2-3 minutes to ~6 seconds. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- unpack-trees.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index bae812156c4fed..98945c53e9792f 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1797,7 +1797,9 @@ static void mark_new_skip_worktree(struct pattern_list *pl, * 2. Widen worktree according to sparse-checkout file. * Matched entries will have skip_wt_flag cleared (i.e. "in") */ + enable_fscache(istate->cache_nr); clear_ce_flags(istate, select_flag, skip_wt_flag, pl, show_progress); + disable_fscache(); } static void populate_from_existing_patterns(struct unpack_trees_options *o, From be9795a0da50ca8a3b3fc6838caeb3f7a6ed4e95 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 11 Dec 2018 12:17:49 +0100 Subject: [PATCH 239/303] clean: make use of FSCache The `git clean` command needs to enumerate plenty of files and directories, and can therefore benefit from the FSCache. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- builtin/clean.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/clean.c b/builtin/clean.c index cb55e8956b0747..ae4c6f37bab701 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -1041,6 +1041,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix) if (read_cache() < 0) die(_("index file corrupt")); + enable_fscache(active_nr); pl = add_pattern_list(&dir, EXC_CMDL, "--exclude option"); for (i = 0; i < exclude_list.nr; i++) @@ -1115,6 +1116,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix) } } + disable_fscache(); strbuf_release(&abs_path); strbuf_release(&buf); string_list_clear(&del_list, 0); From fe95dea56a6a75d61670371139cc8493076d555c Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Mon, 11 May 2015 19:54:23 +0200 Subject: [PATCH 240/303] strbuf_readlink: don't call readlink twice if hint is the exact link size strbuf_readlink() calls readlink() twice if the hint argument specifies the exact size of the link target (e.g. by passing stat.st_size as returned by lstat()). This is necessary because 'readlink(..., hint) == hint' could mean that the buffer was too small. Use hint + 1 as buffer size to prevent this. Signed-off-by: Karsten Blees <blees@dcon.de> --- strbuf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/strbuf.c b/strbuf.c index 0890b1405c5cc6..789b3af1fe8a03 100644 --- a/strbuf.c +++ b/strbuf.c @@ -571,12 +571,12 @@ int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) while (hint < STRBUF_MAXLINK) { ssize_t len; - strbuf_grow(sb, hint); - len = readlink(path, sb->buf, hint); + strbuf_grow(sb, hint + 1); + len = readlink(path, sb->buf, hint + 1); if (len < 0) { if (errno != ERANGE) break; - } else if (len < hint) { + } else if (len <= hint) { strbuf_setlen(sb, len); return 0; } From 2c04911580885b0f6298c46ac0f9902a9649bfd5 Mon Sep 17 00:00:00 2001 From: Doug Kelly <dougk.ff7@gmail.com> Date: Wed, 8 Jan 2014 20:28:15 -0600 Subject: [PATCH 241/303] pack-objects (mingw): demonstrate a segmentation fault with large deltas There is a problem in the way 9ac3f0e5b3e4 (pack-objects: fix performance issues on packing large deltas, 2018-07-22) initializes that mutex in the `packing_data` struct. The problem manifests in a segmentation fault on Windows, when a mutex (AKA critical section) is accessed without being initialized. (With pthreads, you apparently do not really have to initialize them?) This was reported in https://github.com/git-for-windows/git/issues/1839. Signed-off-by: Doug Kelly <dougk.ff7@gmail.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t7422-submodule-long-path.sh | 105 +++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100755 t/t7422-submodule-long-path.sh diff --git a/t/t7422-submodule-long-path.sh b/t/t7422-submodule-long-path.sh new file mode 100755 index 00000000000000..1c03c76080a429 --- /dev/null +++ b/t/t7422-submodule-long-path.sh @@ -0,0 +1,105 @@ +#!/bin/sh +# +# Copyright (c) 2013 Doug Kelly +# + +test_description='Test submodules with a path near PATH_MAX + +This test verifies that "git submodule" initialization, update and clones work, including with recursive submodules and paths approaching PATH_MAX (260 characters on Windows) +' + +TEST_NO_CREATE_REPO=1 +. ./test-lib.sh + +longpath="" +for (( i=0; i<4; i++ )); do + longpath="0123456789abcdefghijklmnopqrstuvwxyz$longpath" +done +# Pick a substring maximum of 90 characters +# This should be good, since we'll add on a lot for temp directories +longpath=${longpath:0:90}; export longpath + +test_expect_failure 'submodule with a long path' ' + GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME= \ + git -c init.defaultBranch=long init --bare remote && + test_create_repo bundle1 && + ( + cd bundle1 && + test_commit "shoot" && + git rev-parse --verify HEAD >../expect + ) && + mkdir home && + ( + cd home && + git clone ../remote test && + cd test && + git checkout -B long && + git submodule add ../bundle1 $longpath && + test_commit "sogood" && + ( + cd $longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../expect actual + ) && + git push origin long + ) && + mkdir home2 && + ( + cd home2 && + git clone ../remote test && + cd test && + git checkout long && + git submodule update --init && + ( + cd $longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../expect actual + ) + ) +' + +test_expect_failure 'recursive submodule with a long path' ' + GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME= \ + git -c init.defaultBranch=long init --bare super && + test_create_repo child && + ( + cd child && + test_commit "shoot" && + git rev-parse --verify HEAD >../expect + ) && + test_create_repo parent && + ( + cd parent && + git submodule add ../child $longpath && + test_commit "aim" + ) && + mkdir home3 && + ( + cd home3 && + git clone ../super test && + cd test && + git checkout -B long && + git submodule add ../parent foo && + git submodule update --init --recursive && + test_commit "sogood" && + ( + cd foo/$longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../../expect actual + ) && + git push origin long + ) && + mkdir home4 && + ( + cd home4 && + git clone ../super test --recursive && + ( + cd test/foo/$longpath && + git rev-parse --verify HEAD >actual && + test_cmp ../../../../expect actual + ) + ) +' +unset longpath + +test_done From 3d9387a6136d7f0b96a55c910015bf0e0aa0b130 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Thu, 19 Mar 2015 16:33:44 +0100 Subject: [PATCH 242/303] mingw: Support `git_terminal_prompt` with more terminals The `git_terminal_prompt()` function expects the terminal window to be attached to a Win32 Console. However, this is not the case with terminal windows other than `cmd.exe`'s, e.g. with MSys2's own `mintty`. Non-cmd terminals such as `mintty` still have to have a Win32 Console to be proper console programs, but have to hide the Win32 Console to be able to provide more flexibility (such as being resizeable not only vertically but also horizontally). By writing to that Win32 Console, `git_terminal_prompt()` manages only to send the prompt to nowhere and to wait for input from a Console to which the user has no access. This commit introduces a function specifically to support `mintty` -- or other terminals that are compatible with MSys2's `/dev/tty` emulation. We use the `TERM` environment variable as an indicator for that: if the value starts with "xterm" (such as `mintty`'s "xterm_256color"), we prefer to let `xterm_prompt()` handle the user interaction. The most prominent user of `git_terminal_prompt()` is certainly `git-remote-https.exe`. It is an interesting use case because both `stdin` and `stdout` are redirected when Git calls said executable, yet it still wants to access the terminal. When running inside a `mintty`, the terminal is not accessible to the `git-remote-https.exe` program, though, because it is a MinGW program and the `mintty` terminal is not backed by a Win32 console. To solve that problem, we simply call out to the shell -- which is an *MSys2* program and can therefore access `/dev/tty`. Helped-by: nalla <nalla@hamal.uberspace.de> Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/terminal.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/compat/terminal.c b/compat/terminal.c index ea490a7ced431a..a2cfb0b5087216 100644 --- a/compat/terminal.c +++ b/compat/terminal.c @@ -418,6 +418,54 @@ static int getchar_with_timeout(int timeout) return getchar(); } +static char *shell_prompt(const char *prompt, int echo) +{ + const char *read_input[] = { + /* Note: call 'bash' explicitly, as 'read -s' is bash-specific */ + "bash", "-c", echo ? + "cat >/dev/tty && read -r line </dev/tty && echo \"$line\"" : + "cat >/dev/tty && read -r -s line </dev/tty && echo \"$line\" && echo >/dev/tty", + NULL + }; + struct child_process child = CHILD_PROCESS_INIT; + static struct strbuf buffer = STRBUF_INIT; + int prompt_len = strlen(prompt), len = -1, code; + + strvec_pushv(&child.args, read_input); + child.in = -1; + child.out = -1; + + if (start_command(&child)) + return NULL; + + if (write_in_full(child.in, prompt, prompt_len) != prompt_len) { + error("could not write to prompt script"); + close(child.in); + goto ret; + } + close(child.in); + + strbuf_reset(&buffer); + len = strbuf_read(&buffer, child.out, 1024); + if (len < 0) { + error("could not read from prompt script"); + goto ret; + } + + strbuf_strip_suffix(&buffer, "\n"); + strbuf_strip_suffix(&buffer, "\r"); + +ret: + close(child.out); + code = finish_command(&child); + if (code) { + error("failed to execute prompt script (exit code %d)", code); + return NULL; + } + + return len < 0 ? NULL : buffer.buf; +} + #endif #ifndef FORCE_TEXT @@ -429,6 +477,12 @@ char *git_terminal_prompt(const char *prompt, int echo) static struct strbuf buf = STRBUF_INIT; int r; FILE *input_fh, *output_fh; +#ifdef GIT_WINDOWS_NATIVE + const char *term = getenv("TERM"); + + if (term && starts_with(term, "xterm")) + return shell_prompt(prompt, echo); +#endif input_fh = fopen(INPUT_PATH, "r" FORCE_TEXT); if (!input_fh) From d925b68775b9fa604643d1206a2371f06a9dbbec Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Mon, 11 May 2015 22:15:40 +0200 Subject: [PATCH 243/303] strbuf_readlink: support link targets that exceed PATH_MAX strbuf_readlink() refuses to read link targets that exceed PATH_MAX (even if a sufficient size was specified by the caller). As some platforms support longer paths, remove this restriction (similar to strbuf_getcwd()). Signed-off-by: Karsten Blees <blees@dcon.de> --- strbuf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/strbuf.c b/strbuf.c index 789b3af1fe8a03..8da139b1126ab5 100644 --- a/strbuf.c +++ b/strbuf.c @@ -559,8 +559,6 @@ ssize_t strbuf_write(struct strbuf *sb, FILE *f) return sb->len ? fwrite(sb->buf, 1, sb->len, f) : 0; } -#define STRBUF_MAXLINK (2*PATH_MAX) - int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) { size_t oldalloc = sb->alloc; @@ -568,7 +566,7 @@ int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) if (hint < 32) hint = 32; - while (hint < STRBUF_MAXLINK) { + for (;;) { ssize_t len; strbuf_grow(sb, hint + 1); From 40772e0f719f8559831681e8b4f6e47a575edde0 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 28 Jul 2015 21:07:41 +0200 Subject: [PATCH 244/303] mingw: support long paths Windows paths are typically limited to MAX_PATH = 260 characters, even though the underlying NTFS file system supports paths up to 32,767 chars. This limitation is also evident in Windows Explorer, cmd.exe and many other applications (including IDEs). Particularly annoying is that most Windows APIs return bogus error codes if a relative path only barely exceeds MAX_PATH in conjunction with the current directory, e.g. ERROR_PATH_NOT_FOUND / ENOENT instead of the infinitely more helpful ERROR_FILENAME_EXCED_RANGE / ENAMETOOLONG. Many Windows wide char APIs support longer than MAX_PATH paths through the file namespace prefix ('\\?\' or '\\?\UNC\') followed by an absolute path. Notable exceptions include functions dealing with executables and the current directory (CreateProcess, LoadLibrary, Get/SetCurrentDirectory) as well as the entire shell API (ShellExecute, SHGetSpecialFolderPath...). Introduce a handle_long_path function to check the length of a specified path properly (and fail with ENAMETOOLONG), and to optionally expand long paths using the '\\?\' file namespace prefix. Short paths will not be modified, so we don't need to worry about device names (NUL, CON, AUX). Contrary to MSDN docs, the GetFullPathNameW function doesn't seem to be limited to MAX_PATH (at least not on Win7), so we can use it to do the heavy lifting of the conversion (translate '/' to '\', eliminate '.' and '..', and make an absolute path). Add long path error checking to xutftowcs_path for APIs with hard MAX_PATH limit. Add a new MAX_LONG_PATH constant and xutftowcs_long_path function for APIs that support long paths. While improved error checking is always active, long paths support must be explicitly enabled via 'core.longpaths' option. This is to prevent end users to shoot themselves in the foot by checking out files that Windows Explorer, cmd/bash or their favorite IDE cannot handle. Test suite: Test the case is when the full pathname length of a dir is close to 260 (MAX_PATH). Bug report and an original reproducer by Andrey Rogozhnikov: https://github.com/msysgit/git/pull/122#issuecomment-43604199 [jes: adjusted test number to avoid conflicts, added support for chdir(), etc] Thanks-to: Martin W. Kirst <maki@bitkings.de> Thanks-to: Doug Kelly <dougk.ff7@gmail.com> Original-test-by: Andrey Rogozhnikov <rogozhnikov.andrey@gmail.com> Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Stepan Kasal <kasal@ucw.cz> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- Documentation/config/core.txt | 7 ++ compat/mingw.c | 152 ++++++++++++++++++++++++++------- compat/mingw.h | 75 ++++++++++++++-- compat/win32/dirent.c | 16 ++-- compat/win32/fscache.c | 16 ++-- t/t2031-checkout-long-paths.sh | 102 ++++++++++++++++++++++ t/t7422-submodule-long-path.sh | 24 +++--- 7 files changed, 329 insertions(+), 63 deletions(-) create mode 100755 t/t2031-checkout-long-paths.sh diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index a22e25fb0ba17e..d52fc67af13efc 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -676,6 +676,13 @@ core.fscache:: Git for Windows uses this to bulk-read and cache lstat data of entire directories (instead of doing lstat file by file). +core.longpaths:: + Enable long path (> 260) support for builtin commands in Git for + Windows. This is disabled by default, as long paths are not supported + by Windows Explorer, cmd.exe and the Git for Windows tool chain + (msys, bash, tcl, perl...). Only enable this if you know what you're + doing and are prepared to live with a few quirks. + core.unsetenvvars:: Windows-only: comma-separated list of environment variables' names that need to be unset before spawning any other process. diff --git a/compat/mingw.c b/compat/mingw.c index 9b283d62698427..ae9e9644de37c3 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -235,6 +235,7 @@ static int core_restrict_inherited_handles = -1; static enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY; static char *unset_environment_variables; int core_fscache; +int core_long_paths; int mingw_core_config(const char *var, const char *value, void *cb) { @@ -251,6 +252,11 @@ int mingw_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.longpaths")) { + core_long_paths = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.unsetenvvars")) { free(unset_environment_variables); unset_environment_variables = xstrdup(value); @@ -297,8 +303,8 @@ static wchar_t *normalize_ntpath(wchar_t *wbuf) int mingw_unlink(const char *pathname) { int ret, tries = 0; - wchar_t wpathname[MAX_PATH]; - if (xutftowcs_path(wpathname, pathname) < 0) + wchar_t wpathname[MAX_LONG_PATH]; + if (xutftowcs_long_path(wpathname, pathname) < 0) return -1; if (DeleteFileW(wpathname)) @@ -330,7 +336,7 @@ static int is_dir_empty(const wchar_t *wpath) { WIN32_FIND_DATAW findbuf; HANDLE handle; - wchar_t wbuf[MAX_PATH + 2]; + wchar_t wbuf[MAX_LONG_PATH + 2]; wcscpy(wbuf, wpath); wcscat(wbuf, L"\\*"); handle = FindFirstFileW(wbuf, &findbuf); @@ -351,7 +357,7 @@ static int is_dir_empty(const wchar_t *wpath) int mingw_rmdir(const char *pathname) { int ret, tries = 0; - wchar_t wpathname[MAX_PATH]; + wchar_t wpathname[MAX_LONG_PATH]; struct stat st; /* @@ -373,7 +379,7 @@ int mingw_rmdir(const char *pathname) return -1; } - if (xutftowcs_path(wpathname, pathname) < 0) + if (xutftowcs_long_path(wpathname, pathname) < 0) return -1; while ((ret = _wrmdir(wpathname)) == -1 && tries < ARRAY_SIZE(delay)) { @@ -452,15 +458,18 @@ static int set_hidden_flag(const wchar_t *path, int set) int mingw_mkdir(const char *path, int mode) { int ret; - wchar_t wpath[MAX_PATH]; + wchar_t wpath[MAX_LONG_PATH]; if (!is_valid_win32_path(path, 0)) { errno = EINVAL; return -1; } - if (xutftowcs_path(wpath, path) < 0) + /* CreateDirectoryW path limit is 248 (MAX_PATH - 8.3 file name) */ + if (xutftowcs_path_ex(wpath, path, MAX_LONG_PATH, -1, 248, + core_long_paths) < 0) return -1; + ret = _wmkdir(wpath); if (!ret && needs_hiding(path)) return set_hidden_flag(wpath, 1); @@ -547,7 +556,7 @@ int mingw_open (const char *filename, int oflags, ...) va_list args; unsigned mode; int fd, create = (oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL); - wchar_t wfilename[MAX_PATH]; + wchar_t wfilename[MAX_LONG_PATH]; open_fn_t open_fn; va_start(args, oflags); @@ -575,7 +584,7 @@ int mingw_open (const char *filename, int oflags, ...) if (filename && !strcmp(filename, "/dev/null")) wcscpy(wfilename, L"nul"); - else if (xutftowcs_path(wfilename, filename) < 0) + else if (xutftowcs_long_path(wfilename, filename) < 0) return -1; fd = open_fn(wfilename, oflags, mode); @@ -633,14 +642,14 @@ FILE *mingw_fopen (const char *filename, const char *otype) { int hide = needs_hiding(filename); FILE *file; - wchar_t wfilename[MAX_PATH], wotype[4]; + wchar_t wfilename[MAX_LONG_PATH], wotype[4]; if (filename && !strcmp(filename, "/dev/null")) wcscpy(wfilename, L"nul"); else if (!is_valid_win32_path(filename, 1)) { int create = otype && strchr(otype, 'w'); errno = create ? EINVAL : ENOENT; return NULL; - } else if (xutftowcs_path(wfilename, filename) < 0) + } else if (xutftowcs_long_path(wfilename, filename) < 0) return NULL; if (xutftowcs(wotype, otype, ARRAY_SIZE(wotype)) < 0) @@ -662,14 +671,14 @@ FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream) { int hide = needs_hiding(filename); FILE *file; - wchar_t wfilename[MAX_PATH], wotype[4]; + wchar_t wfilename[MAX_LONG_PATH], wotype[4]; if (filename && !strcmp(filename, "/dev/null")) wcscpy(wfilename, L"nul"); else if (!is_valid_win32_path(filename, 1)) { int create = otype && strchr(otype, 'w'); errno = create ? EINVAL : ENOENT; return NULL; - } else if (xutftowcs_path(wfilename, filename) < 0) + } else if (xutftowcs_long_path(wfilename, filename) < 0) return NULL; if (xutftowcs(wotype, otype, ARRAY_SIZE(wotype)) < 0) @@ -744,27 +753,33 @@ ssize_t mingw_write(int fd, const void *buf, size_t len) int mingw_access(const char *filename, int mode) { - wchar_t wfilename[MAX_PATH]; + wchar_t wfilename[MAX_LONG_PATH]; if (!strcmp("nul", filename) || !strcmp("/dev/null", filename)) return 0; - if (xutftowcs_path(wfilename, filename) < 0) + if (xutftowcs_long_path(wfilename, filename) < 0) return -1; /* X_OK is not supported by the MSVCRT version */ return _waccess(wfilename, mode & ~X_OK); } +/* cached length of current directory for handle_long_path */ +static int current_directory_len = 0; + int mingw_chdir(const char *dirname) { - wchar_t wdirname[MAX_PATH]; - if (xutftowcs_path(wdirname, dirname) < 0) + int result; + wchar_t wdirname[MAX_LONG_PATH]; + if (xutftowcs_long_path(wdirname, dirname) < 0) return -1; - return _wchdir(wdirname); + result = _wchdir(wdirname); + current_directory_len = GetCurrentDirectoryW(0, NULL); + return result; } int mingw_chmod(const char *filename, int mode) { - wchar_t wfilename[MAX_PATH]; - if (xutftowcs_path(wfilename, filename) < 0) + wchar_t wfilename[MAX_LONG_PATH]; + if (xutftowcs_long_path(wfilename, filename) < 0) return -1; return _wchmod(wfilename, mode); } @@ -812,8 +827,8 @@ static int has_valid_directory_prefix(wchar_t *wfilename) static int do_lstat(int follow, const char *file_name, struct stat *buf) { WIN32_FILE_ATTRIBUTE_DATA fdata; - wchar_t wfilename[MAX_PATH]; - if (xutftowcs_path(wfilename, file_name) < 0) + wchar_t wfilename[MAX_LONG_PATH]; + if (xutftowcs_long_path(wfilename, file_name) < 0) return -1; if (GetFileAttributesExW(wfilename, GetFileExInfoStandard, &fdata)) { @@ -984,10 +999,10 @@ int mingw_utime (const char *file_name, const struct utimbuf *times) FILETIME mft, aft; int rc; DWORD attrs; - wchar_t wfilename[MAX_PATH]; + wchar_t wfilename[MAX_LONG_PATH]; HANDLE osfilehandle; - if (xutftowcs_path(wfilename, file_name) < 0) + if (xutftowcs_long_path(wfilename, file_name) < 0) return -1; /* must have write permission */ @@ -1070,6 +1085,7 @@ char *mingw_mktemp(char *template) wchar_t wtemplate[MAX_PATH]; int offset = 0; + /* we need to return the path, thus no long paths here! */ if (xutftowcs_path(wtemplate, template) < 0) return NULL; @@ -1704,6 +1720,10 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **deltaen if (*argv && !strcmp(cmd, *argv)) wcmd[0] = L'\0'; + /* + * Paths to executables and to the current directory do not support + * long paths, therefore we cannot use xutftowcs_long_path() here. + */ else if (xutftowcs_path(wcmd, cmd) < 0) return -1; if (dir && xutftowcs_path(wdir, dir) < 0) @@ -2355,8 +2375,9 @@ int mingw_rename(const char *pold, const char *pnew) { DWORD attrs, gle; int tries = 0; - wchar_t wpold[MAX_PATH], wpnew[MAX_PATH]; - if (xutftowcs_path(wpold, pold) < 0 || xutftowcs_path(wpnew, pnew) < 0) + wchar_t wpold[MAX_LONG_PATH], wpnew[MAX_LONG_PATH]; + if (xutftowcs_long_path(wpold, pold) < 0 || + xutftowcs_long_path(wpnew, pnew) < 0) return -1; /* @@ -2670,9 +2691,9 @@ int mingw_raise(int sig) int link(const char *oldpath, const char *newpath) { - wchar_t woldpath[MAX_PATH], wnewpath[MAX_PATH]; - if (xutftowcs_path(woldpath, oldpath) < 0 || - xutftowcs_path(wnewpath, newpath) < 0) + wchar_t woldpath[MAX_LONG_PATH], wnewpath[MAX_LONG_PATH]; + if (xutftowcs_long_path(woldpath, oldpath) < 0 || + xutftowcs_long_path(wnewpath, newpath) < 0) return -1; if (!CreateHardLinkW(wnewpath, woldpath, NULL)) { @@ -2740,8 +2761,8 @@ int mingw_is_mount_point(struct strbuf *path) { WIN32_FIND_DATAW findbuf = { 0 }; HANDLE handle; - wchar_t wfilename[MAX_PATH]; - int wlen = xutftowcs_path(wfilename, path->buf); + wchar_t wfilename[MAX_LONG_PATH]; + int wlen = xutftowcs_long_path(wfilename, path->buf); if (wlen < 0) die(_("could not get long path for '%s'"), path->buf); @@ -2886,9 +2907,9 @@ static size_t append_system_bin_dirs(char *path, size_t size) static int is_system32_path(const char *path) { - WCHAR system32[MAX_PATH], wpath[MAX_PATH]; + WCHAR system32[MAX_LONG_PATH], wpath[MAX_LONG_PATH]; - if (xutftowcs_path(wpath, path) < 0 || + if (xutftowcs_long_path(wpath, path) < 0 || !GetSystemDirectoryW(system32, ARRAY_SIZE(system32)) || _wcsicmp(system32, wpath)) return 0; @@ -3256,6 +3277,68 @@ int is_valid_win32_path(const char *path, int allow_literal_nul) } } +int handle_long_path(wchar_t *path, int len, int max_path, int expand) +{ + int result; + wchar_t buf[MAX_LONG_PATH]; + + /* + * we don't need special handling if path is relative to the current + * directory, and current directory + path don't exceed the desired + * max_path limit. This should cover > 99 % of cases with minimal + * performance impact (git almost always uses relative paths). + */ + if ((len < 2 || (!is_dir_sep(path[0]) && path[1] != ':')) && + (current_directory_len + len < max_path)) + return len; + + /* + * handle everything else: + * - absolute paths: "C:\dir\file" + * - absolute UNC paths: "\\server\share\dir\file" + * - absolute paths on current drive: "\dir\file" + * - relative paths on other drive: "X:file" + * - prefixed paths: "\\?\...", "\\.\..." + */ + + /* convert to absolute path using GetFullPathNameW */ + result = GetFullPathNameW(path, MAX_LONG_PATH, buf, NULL); + if (!result) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + + /* + * return absolute path if it fits within max_path (even if + * "cwd + path" doesn't due to '..' components) + */ + if (result < max_path) { + wcscpy(path, buf); + return result; + } + + /* error out if we shouldn't expand the path or buf is too small */ + if (!expand || result >= MAX_LONG_PATH - 6) { + errno = ENAMETOOLONG; + return -1; + } + + /* prefix full path with "\\?\" or "\\?\UNC\" */ + if (buf[0] == '\\') { + /* ...unless already prefixed */ + if (buf[1] == '\\' && (buf[2] == '?' || buf[2] == '.')) + return len; + + wcscpy(path, L"\\\\?\\UNC\\"); + wcscpy(path + 8, buf + 2); + return result + 6; + } else { + wcscpy(path, L"\\\\?\\"); + wcscpy(path + 4, buf); + return result + 4; + } +} + #if !defined(_MSC_VER) /* * Disable MSVCRT command line wildcard expansion (__getmainargs called from @@ -3417,6 +3500,9 @@ int wmain(int argc, const wchar_t **wargv) /* initialize Unicode console */ winansi_init(); + /* init length of current directory for handle_long_path */ + current_directory_len = GetCurrentDirectoryW(0, NULL); + /* invoke the real main() using our utf8 version of argv. */ exit_status = main(argc, argv); diff --git a/compat/mingw.h b/compat/mingw.h index a02d34f5b40f04..90f1ff18b35c55 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -12,6 +12,7 @@ typedef _sigset_t sigset_t; #endif extern int core_fscache; +extern int core_long_paths; int mingw_core_config(const char *var, const char *value, void *cb); #define platform_core_config mingw_core_config @@ -515,6 +516,42 @@ int is_path_owned_by_current_sid(const char *path, struct strbuf *report); int is_valid_win32_path(const char *path, int allow_literal_nul); #define is_valid_path(path) is_valid_win32_path(path, 0) +/** + * Max length of long paths (exceeding MAX_PATH). The actual maximum supported + * by NTFS is 32,767 (* sizeof(wchar_t)), but we choose an arbitrary smaller + * value to limit required stack memory. + */ +#define MAX_LONG_PATH 4096 + +/** + * Handles paths that would exceed the MAX_PATH limit of Windows Unicode APIs. + * + * With expand == false, the function checks for over-long paths and fails + * with ENAMETOOLONG. The path parameter is not modified, except if cwd + path + * exceeds max_path, but the resulting absolute path doesn't (e.g. due to + * eliminating '..' components). The path parameter must point to a buffer + * of max_path wide characters. + * + * With expand == true, an over-long path is automatically converted in place + * to an absolute path prefixed with '\\?\', and the new length is returned. + * The path parameter must point to a buffer of MAX_LONG_PATH wide characters. + * + * Parameters: + * path: path to check and / or convert + * len: size of path on input (number of wide chars without \0) + * max_path: max short path length to check (usually MAX_PATH = 260, but just + * 248 for CreateDirectoryW) + * expand: false to only check the length, true to expand the path to a + * '\\?\'-prefixed absolute path + * + * Return: + * length of the resulting path, or -1 on failure + * + * Errors: + * ENAMETOOLONG if path is too long + */ +int handle_long_path(wchar_t *path, int len, int max_path, int expand); + /** * Converts UTF-8 encoded string to UTF-16LE. * @@ -573,18 +610,46 @@ static inline int xutftowcs(wchar_t *wcs, const char *utf, size_t wcslen) } /** - * Simplified file system specific variant of xutftowcsn, assumes output - * buffer size is MAX_PATH wide chars and input string is \0-terminated, - * fails with ENAMETOOLONG if input string is too long. + * Simplified file system specific wrapper of xutftowcsn and handle_long_path. + * Converts ERANGE to ENAMETOOLONG. If expand is true, wcs must be at least + * MAX_LONG_PATH wide chars (see handle_long_path). */ -static inline int xutftowcs_path(wchar_t *wcs, const char *utf) +static inline int xutftowcs_path_ex(wchar_t *wcs, const char *utf, + size_t wcslen, int utflen, int max_path, int expand) { - int result = xutftowcsn(wcs, utf, MAX_PATH, -1); + int result = xutftowcsn(wcs, utf, wcslen, utflen); if (result < 0 && errno == ERANGE) errno = ENAMETOOLONG; + if (result >= 0) + result = handle_long_path(wcs, result, max_path, expand); return result; } +/** + * Simplified file system specific variant of xutftowcsn, assumes output + * buffer size is MAX_PATH wide chars and input string is \0-terminated, + * fails with ENAMETOOLONG if input string is too long. Typically used for + * Windows APIs that don't support long paths, e.g. SetCurrentDirectory, + * LoadLibrary, CreateProcess... + */ +static inline int xutftowcs_path(wchar_t *wcs, const char *utf) +{ + return xutftowcs_path_ex(wcs, utf, MAX_PATH, -1, MAX_PATH, 0); +} + +/** + * Simplified file system specific variant of xutftowcsn for Windows APIs + * that support long paths via '\\?\'-prefix, assumes output buffer size is + * MAX_LONG_PATH wide chars, fails with ENAMETOOLONG if input string is too + * long. The 'core.longpaths' git-config option controls whether the path + * is only checked or expanded to a long path. + */ +static inline int xutftowcs_long_path(wchar_t *wcs, const char *utf) +{ + return xutftowcs_path_ex(wcs, utf, MAX_LONG_PATH, -1, MAX_PATH, + core_long_paths); +} + /** * Converts UTF-16LE encoded string to UTF-8. * diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index 139d2ba3c4da34..aae364b67aa6f8 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -65,19 +65,23 @@ static int dirent_closedir(dirent_DIR *dir) DIR *dirent_opendir(const char *name) { - wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */ + wchar_t pattern[MAX_LONG_PATH + 2]; /* + 2 for "\*" */ WIN32_FIND_DATAW fdata; HANDLE h; int len; dirent_DIR *dir; - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((len = xutftowcs_path(pattern, name)) < 0) + /* convert name to UTF-16 and check length */ + if ((len = xutftowcs_path_ex(pattern, name, MAX_LONG_PATH, -1, + MAX_PATH - 2, core_long_paths)) < 0) return NULL; - /* append optional '/' and wildcard '*' */ + /* + * append optional '\' and wildcard '*'. Note: we need to use '\' as + * Windows doesn't translate '/' to '\' for "\\?\"-prefixed paths. + */ if (len && !is_dir_sep(pattern[len - 1])) - pattern[len++] = '/'; + pattern[len++] = '\\'; pattern[len++] = '*'; pattern[len] = 0; @@ -90,7 +94,7 @@ DIR *dirent_opendir(const char *name) } /* initialize DIR structure and copy first dir entry */ - dir = xmalloc(sizeof(dirent_DIR) + MAX_PATH); + dir = xmalloc(sizeof(dirent_DIR) + MAX_LONG_PATH); dir->base_dir.preaddir = (struct dirent *(*)(DIR *dir)) dirent_readdir; dir->base_dir.pclosedir = (int (*)(DIR *dir)) dirent_closedir; dir->dd_handle = h; diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 351b1dff7fff65..3c1ded64ae6c56 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -78,7 +78,7 @@ struct fsentry { struct heap_fsentry { union { struct fsentry ent; - char dummy[sizeof(struct fsentry) + MAX_PATH]; + char dummy[sizeof(struct fsentry) + MAX_LONG_PATH]; } u; }; @@ -121,7 +121,7 @@ static void fsentry_init(struct fsentry *fse, struct fsentry *list, const char *name, size_t len) { fse->list = list; - if (len > MAX_PATH) + if (len > MAX_LONG_PATH) BUG("Trying to allocate fsentry for long path '%.*s'", (int)len, name); memcpy(fse->dirent.d_name, name, len); @@ -222,7 +222,7 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, static struct fsentry *fsentry_create_list(struct fscache *cache, const struct fsentry *dir, int *dir_not_found) { - wchar_t pattern[MAX_PATH]; + wchar_t pattern[MAX_LONG_PATH]; NTSTATUS status; IO_STATUS_BLOCK iosb; PFILE_FULL_DIR_INFORMATION di; @@ -233,13 +233,11 @@ static struct fsentry *fsentry_create_list(struct fscache *cache, const struct f *dir_not_found = 0; - /* convert name to UTF-16 and check length < MAX_PATH */ - if ((wlen = xutftowcsn(pattern, dir->dirent.d_name, MAX_PATH, - dir->len)) < 0) { - if (errno == ERANGE) - errno = ENAMETOOLONG; + /* convert name to UTF-16 and check length */ + if ((wlen = xutftowcs_path_ex(pattern, dir->dirent.d_name, + MAX_LONG_PATH, dir->len, MAX_PATH - 2, + core_long_paths)) < 0) return NULL; - } /* handle CWD */ if (!wlen) { diff --git a/t/t2031-checkout-long-paths.sh b/t/t2031-checkout-long-paths.sh new file mode 100755 index 00000000000000..f30f8920ca689c --- /dev/null +++ b/t/t2031-checkout-long-paths.sh @@ -0,0 +1,102 @@ +#!/bin/sh + +test_description='checkout long paths on Windows + +Ensures that Git for Windows can deal with long paths (>260) enabled via core.longpaths' + +. ./test-lib.sh + +if test_have_prereq !MINGW +then + skip_all='skipping MINGW specific long paths test' + test_done +fi + +test_expect_success setup ' + p=longpathxx && # -> 10 + p=$p$p$p$p$p && # -> 50 + p=$p$p$p$p$p && # -> 250 + + path=${p}/longtestfile && # -> 263 (MAX_PATH = 260) + + blob=$(echo foobar | git hash-object -w --stdin) && + + printf "100644 %s 0\t%s\n" "$blob" "$path" | + git update-index --add --index-info && + git commit -m initial -q +' + +test_expect_success 'checkout of long paths without core.longpaths fails' ' + git config core.longpaths false && + test_must_fail git checkout -f 2>error && + grep -q "Filename too long" error && + test ! -d longpa* +' + +test_expect_success 'checkout of long paths with core.longpaths works' ' + git config core.longpaths true && + git checkout -f && + test_path_is_file longpa*/longtestfile +' + +test_expect_success 'update of long paths' ' + echo frotz >>$(ls longpa*/longtestfile) && + echo $path > expect && + git ls-files -m > actual && + test_cmp expect actual && + git add $path && + git commit -m second && + git grep "frotz" HEAD -- $path +' + +test_expect_success cleanup ' + # bash cannot delete the trash dir if it contains a long path + # lets help cleaning up (unless in debug mode) + if test -z "$debug" + then + rm -rf longpa~1 + fi +' + +# check that the template used in the test won't be too long: +abspath="$(pwd)"/testdir +test ${#abspath} -gt 230 || +test_set_prereq SHORTABSPATH + +test_expect_success SHORTABSPATH 'clean up path close to MAX_PATH' ' + p=/123456789abcdef/123456789abcdef/123456789abcdef/123456789abc/ef && + p=y$p$p$p$p && + subdir="x$(echo "$p" | tail -c $((253 - ${#abspath})) - )" && + # Now, $abspath/$subdir has exactly 254 characters, and is inside CWD + p2="$abspath/$subdir" && + test 254 = ${#p2} && + + # Be careful to overcome path limitations of the MSys tools and split + # the $subdir into two parts. ($subdir2 has to contain 16 chars and a + # slash somewhere following; that is why we asked for abspath <= 230 and + # why we placed a slash near the end of the $subdir template.) + subdir2=${subdir#????????????????*/} && + subdir1=testdir/${subdir%/$subdir2} && + mkdir -p "$subdir1" && + i=0 && + # The most important case is when absolute path is 258 characters long, + # and that will be when i == 4. + while test $i -le 7 + do + mkdir -p $subdir2 && + touch $subdir2/one-file && + mv ${subdir2%%/*} "$subdir1/" && + subdir2=z${subdir2} && + i=$(($i+1)) || + exit 1 + done && + + # now check that git is able to clear the tree: + (cd testdir && + git init && + git config core.longpaths yes && + git clean -fdx) && + test ! -d "$subdir1" +' + +test_done diff --git a/t/t7422-submodule-long-path.sh b/t/t7422-submodule-long-path.sh index 1c03c76080a429..0da221b45aeca8 100755 --- a/t/t7422-submodule-long-path.sh +++ b/t/t7422-submodule-long-path.sh @@ -11,15 +11,20 @@ This test verifies that "git submodule" initialization, update and clones work, TEST_NO_CREATE_REPO=1 . ./test-lib.sh -longpath="" -for (( i=0; i<4; i++ )); do - longpath="0123456789abcdefghijklmnopqrstuvwxyz$longpath" -done -# Pick a substring maximum of 90 characters -# This should be good, since we'll add on a lot for temp directories -longpath=${longpath:0:90}; export longpath +# cloning a submodule calls is_git_directory("$path/../.git/modules/$path"), +# which effectively limits the maximum length to PATH_MAX / 2 minus some +# overhead; start with 3 * 36 = 108 chars (test 2 fails if >= 110) +longpath36=0123456789abcdefghijklmnopqrstuvwxyz +longpath180=$longpath36$longpath36$longpath36$longpath36$longpath36 -test_expect_failure 'submodule with a long path' ' +# the git database must fit within PATH_MAX, which limits the submodule name +# to PATH_MAX - len(pwd) - ~90 (= len("/objects//") + 40-byte sha1 + some +# overhead from the test case) +pwd=$(pwd) +pwdlen=$(echo "$pwd" | wc -c) +longpath=$(echo $longpath180 | cut -c 1-$((170-$pwdlen))) + +test_expect_success 'submodule with a long path' ' GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME= \ git -c init.defaultBranch=long init --bare remote && test_create_repo bundle1 && @@ -58,7 +63,7 @@ test_expect_failure 'submodule with a long path' ' ) ' -test_expect_failure 'recursive submodule with a long path' ' +test_expect_success 'recursive submodule with a long path' ' GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME= \ git -c init.defaultBranch=long init --bare super && test_create_repo child && @@ -100,6 +105,5 @@ test_expect_failure 'recursive submodule with a long path' ' ) ) ' -unset longpath test_done From 8c5823dafdaab45d3020ea75b793165ee0016855 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sat, 9 May 2015 02:11:48 +0200 Subject: [PATCH 245/303] compat/terminal.c: only use the Windows console if bash 'read -r' fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accessing the Windows console through the special CONIN$ / CONOUT$ devices doesn't work properly for non-ASCII usernames an passwords. It also doesn't work for terminal emulators that hide the native console window (such as mintty), and 'TERM=xterm*' is not necessarily a reliable indicator for such terminals. The new shell_prompt() function, on the other hand, works fine for both MSys1 and MSys2, in native console windows as well as mintty, and properly supports Unicode. It just needs bash on the path (for 'read -s', which is bash-specific). On Windows, try to use the shell to read from the terminal. If that fails with ENOENT (i.e. bash was not found), use CONIN/OUT as fallback. Note: To test this, create a UTF-8 credential file with non-ASCII chars, e.g. in git-bash: 'echo url=http://täst.com > cred.txt'. Then in git-cmd, 'git credential fill <cred.txt' works (shell version), while calling git without the git-wrapper (i.e. 'mingw64\bin\git credential fill <cred.txt') mangles non-ASCII chars in both console output and input. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/terminal.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/compat/terminal.c b/compat/terminal.c index a2cfb0b5087216..4c74208cd6a2f1 100644 --- a/compat/terminal.c +++ b/compat/terminal.c @@ -434,6 +434,7 @@ static char *shell_prompt(const char *prompt, int echo) strvec_pushv(&child.args, read_input); child.in = -1; child.out = -1; + child.silent_exec_failure = 1; if (start_command(&child)) return NULL; @@ -477,11 +478,14 @@ char *git_terminal_prompt(const char *prompt, int echo) static struct strbuf buf = STRBUF_INIT; int r; FILE *input_fh, *output_fh; + #ifdef GIT_WINDOWS_NATIVE - const char *term = getenv("TERM"); - if (term && starts_with(term, "xterm")) - return shell_prompt(prompt, echo); + /* try shell_prompt first, fall back to CONIN/OUT if bash is missing */ + char *result = shell_prompt(prompt, echo); + if (result || errno != ENOENT) + return result; + #endif input_fh = fopen(INPUT_PATH, "r" FORCE_TEXT); From 366cf5c1577a9e191f0733fb3efd7a203768ae3f Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Mon, 11 May 2015 19:58:14 +0200 Subject: [PATCH 246/303] lockfile.c: use is_dir_sep() instead of hardcoded '/' checks Signed-off-by: Karsten Blees <blees@dcon.de> --- lockfile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lockfile.c b/lockfile.c index cc9a4b84283be3..62e4c9a14f0060 100644 --- a/lockfile.c +++ b/lockfile.c @@ -17,14 +17,14 @@ static void trim_last_path_component(struct strbuf *path) int i = path->len; /* back up past trailing slashes, if any */ - while (i && path->buf[i - 1] == '/') + while (i && is_dir_sep(path->buf[i - 1])) i--; /* * then go backwards until a slash, or the beginning of the * string */ - while (i && path->buf[i - 1] != '/') + while (i && !is_dir_sep(path->buf[i - 1])) i--; strbuf_setlen(path, i); From f391b197a131920bec8fb7285daf706cc45e1c21 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sat, 5 Jul 2014 00:00:36 +0200 Subject: [PATCH 247/303] Win32: fix 'lstat("dir/")' with long paths Use a suffciently large buffer to strip the trailing slash. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index ae9e9644de37c3..520f769245a64c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -899,7 +899,7 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) static int do_stat_internal(int follow, const char *file_name, struct stat *buf) { int namelen; - char alt_name[PATH_MAX]; + char alt_name[MAX_LONG_PATH]; if (!do_lstat(follow, file_name, buf)) return 0; @@ -915,7 +915,7 @@ static int do_stat_internal(int follow, const char *file_name, struct stat *buf) return -1; while (namelen && file_name[namelen-1] == '/') --namelen; - if (!namelen || namelen >= PATH_MAX) + if (!namelen || namelen >= MAX_LONG_PATH) return -1; memcpy(alt_name, file_name, namelen); From 26fa0804cc9dd814e6a3b0f0fefbcc0837596999 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 23 Feb 2018 02:50:03 +0100 Subject: [PATCH 248/303] mingw (git_terminal_prompt): do fall back to CONIN$/CONOUT$ method To support Git Bash running in a MinTTY, we use a dirty trick to access the MSYS2 pseudo terminal: we execute a Bash snippet that accesses /dev/tty. The idea was to fall back to writing to/reading from CONOUT$/CONIN$ if that Bash call failed because Bash was not found. However, we should fall back even in other error conditions, because we have not successfully read the user input. Let's make it so. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/terminal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compat/terminal.c b/compat/terminal.c index 4c74208cd6a2f1..70a18f82f619ae 100644 --- a/compat/terminal.c +++ b/compat/terminal.c @@ -483,7 +483,7 @@ char *git_terminal_prompt(const char *prompt, int echo) /* try shell_prompt first, fall back to CONIN/OUT if bash is missing */ char *result = shell_prompt(prompt, echo); - if (result || errno != ENOENT) + if (result) return result; #endif From ae584286f9861bd9abdf68bca42aa528ec55583f Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 12 May 2015 11:09:01 +0200 Subject: [PATCH 249/303] Win32: don't call GetFileAttributes twice in mingw_lstat() GetFileAttributes cannot handle paths with trailing dir separator. The current [l]stat implementation calls GetFileAttributes twice if the path has trailing slashes (first with the original path passed to [l]stat, and and a second time with a path copy with trailing '/' removed). With Unicode conversion, we get the length of the path for free and also have a (wide char) buffer that can be modified. Remove trailing directory separators before calling the Win32 API. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 48 ++++++++++++------------------------------------ 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 520f769245a64c..b50e242950ad41 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -828,8 +828,17 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) { WIN32_FILE_ATTRIBUTE_DATA fdata; wchar_t wfilename[MAX_LONG_PATH]; - if (xutftowcs_long_path(wfilename, file_name) < 0) + int wlen = xutftowcs_long_path(wfilename, file_name); + if (wlen < 0) + return -1; + + /* strip trailing '/', or GetFileAttributes will fail */ + while (wlen && is_dir_sep(wfilename[wlen - 1])) + wfilename[--wlen] = 0; + if (!wlen) { + errno = ENOENT; return -1; + } if (GetFileAttributesExW(wfilename, GetFileExInfoStandard, &fdata)) { buf->st_ino = 0; @@ -890,39 +899,6 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) return -1; } -/* We provide our own lstat/fstat functions, since the provided - * lstat/fstat functions are so slow. These stat functions are - * tailored for Git's usage (read: fast), and are not meant to be - * complete. Note that Git stat()s are redirected to mingw_lstat() - * too, since Windows doesn't really handle symlinks that well. - */ -static int do_stat_internal(int follow, const char *file_name, struct stat *buf) -{ - int namelen; - char alt_name[MAX_LONG_PATH]; - - if (!do_lstat(follow, file_name, buf)) - return 0; - - /* if file_name ended in a '/', Windows returned ENOENT; - * try again without trailing slashes - */ - if (errno != ENOENT) - return -1; - - namelen = strlen(file_name); - if (namelen && file_name[namelen-1] != '/') - return -1; - while (namelen && file_name[namelen-1] == '/') - --namelen; - if (!namelen || namelen >= MAX_LONG_PATH) - return -1; - - memcpy(alt_name, file_name, namelen); - alt_name[namelen] = 0; - return do_lstat(follow, alt_name, buf); -} - int (*lstat)(const char *file_name, struct stat *buf) = mingw_lstat; static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) @@ -950,11 +926,11 @@ static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) int mingw_lstat(const char *file_name, struct stat *buf) { - return do_stat_internal(0, file_name, buf); + return do_lstat(0, file_name, buf); } int mingw_stat(const char *file_name, struct stat *buf) { - return do_stat_internal(1, file_name, buf); + return do_lstat(1, file_name, buf); } int mingw_fstat(int fd, struct stat *buf) From a28d9045b3089ddb798dcd7eb0cb9eb93f8ea4e9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Wed, 30 Aug 2017 01:28:22 +0200 Subject: [PATCH 250/303] mingw: ensure that core.longPaths is handled *always* A ton of Git commands simply do not read (or at least parse) the core.* settings. This is not good, as Git for Windows relies on the core.longPaths setting to be read quite early on. So let's just make sure that all commands read the config and give platform_core_config() a chance. This patch teaches tons of Git commands to respect the config setting `core.longPaths = true`, including `pack-refs`, thereby fixing https://github.com/git-for-windows/git/issues/1218 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- builtin/archive.c | 2 ++ builtin/bisect--helper.c | 2 ++ builtin/bundle.c | 2 ++ builtin/check-ref-format.c | 2 ++ builtin/clone.c | 2 ++ builtin/column.c | 2 ++ builtin/credential-store.c | 3 +++ builtin/fetch-pack.c | 2 ++ builtin/get-tar-commit-id.c | 2 ++ builtin/log.c | 1 + builtin/ls-remote.c | 2 ++ builtin/mailinfo.c | 2 ++ builtin/mailsplit.c | 2 ++ builtin/merge-index.c | 3 +++ builtin/merge-tree.c | 3 +++ builtin/mktag.c | 1 + builtin/mktree.c | 2 ++ builtin/pack-refs.c | 1 + builtin/prune-packed.c | 2 ++ builtin/prune.c | 3 +++ builtin/reflog.c | 1 + builtin/remote-ext.c | 2 ++ builtin/remote.c | 1 + builtin/rev-parse.c | 1 + builtin/show-index.c | 2 ++ builtin/show-ref.c | 1 + builtin/stripspace.c | 5 ++--- builtin/submodule--helper.c | 1 + builtin/upload-archive.c | 3 +++ http-backend.c | 1 + refs.c | 2 +- 31 files changed, 57 insertions(+), 4 deletions(-) diff --git a/builtin/archive.c b/builtin/archive.c index f094390ee01f81..15fe853e0e5af9 100644 --- a/builtin/archive.c +++ b/builtin/archive.c @@ -9,6 +9,7 @@ #include "parse-options.h" #include "pkt-line.h" #include "sideband.h" +#include "config.h" static void create_output_file(const char *output_file) { @@ -93,6 +94,7 @@ int cmd_archive(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, local_opts, NULL, PARSE_OPT_KEEP_ALL); diff --git a/builtin/bisect--helper.c b/builtin/bisect--helper.c index 501245fac95217..91f68252790607 100644 --- a/builtin/bisect--helper.c +++ b/builtin/bisect--helper.c @@ -9,6 +9,7 @@ #include "prompt.h" #include "quote.h" #include "revision.h" +#include "config.h" static GIT_PATH_FUNC(git_path_bisect_terms, "BISECT_TERMS") static GIT_PATH_FUNC(git_path_bisect_expected_rev, "BISECT_EXPECTED_REV") @@ -1324,6 +1325,7 @@ int cmd_bisect__helper(int argc, const char **argv, const char *prefix) }; struct bisect_terms terms = { .term_good = NULL, .term_bad = NULL }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, git_bisect_helper_usage, PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_UNKNOWN_OPT); diff --git a/builtin/bundle.c b/builtin/bundle.c index e80efce3a420a0..aca68bb39d8a22 100644 --- a/builtin/bundle.c +++ b/builtin/bundle.c @@ -3,6 +3,7 @@ #include "parse-options.h" #include "cache.h" #include "bundle.h" +#include "config.h" /* * Basic handler for bundle files to connect repositories via sneakernet. @@ -110,6 +111,7 @@ static int cmd_bundle_verify(int argc, const char **argv, const char *prefix) { }; char *bundle_file; + git_config(git_default_config, NULL); argc = parse_options_cmd_bundle(argc, argv, prefix, builtin_bundle_verify_usage, options, &bundle_file); /* bundle internals use argv[1] as further parameters */ diff --git a/builtin/check-ref-format.c b/builtin/check-ref-format.c index fd0e5f86832a0e..2269d8df7d83c0 100644 --- a/builtin/check-ref-format.c +++ b/builtin/check-ref-format.c @@ -6,6 +6,7 @@ #include "refs.h" #include "builtin.h" #include "strbuf.h" +#include "config.h" static const char builtin_check_ref_format_usage[] = "git check-ref-format [--normalize] [<options>] <refname>\n" @@ -60,6 +61,7 @@ int cmd_check_ref_format(int argc, const char **argv, const char *prefix) char *to_free = NULL; int ret = 1; + git_config(git_default_config, NULL); if (argc == 2 && !strcmp(argv[1], "-h")) usage(builtin_check_ref_format_usage); diff --git a/builtin/clone.c b/builtin/clone.c index d269d6fec68ce4..cde660b2a67c35 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -905,6 +905,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct transport_ls_refs_options transport_ls_refs_options = TRANSPORT_LS_REFS_OPTIONS_INIT; + git_config(git_default_core_config, NULL); + packet_trace_identity("clone"); git_config(git_clone_config, NULL); diff --git a/builtin/column.c b/builtin/column.c index 158fdf53d9fb9c..69718917526868 100644 --- a/builtin/column.c +++ b/builtin/column.c @@ -34,6 +34,8 @@ int cmd_column(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_core_config, NULL); + /* This one is special and must be the first one */ if (argc > 1 && starts_with(argv[1], "--command=")) { command = argv[1] + 10; diff --git a/builtin/credential-store.c b/builtin/credential-store.c index 62a4f3c2653143..a693b235f34208 100644 --- a/builtin/credential-store.c +++ b/builtin/credential-store.c @@ -4,6 +4,7 @@ #include "credential.h" #include "string-list.h" #include "parse-options.h" +#include "config.h" static struct lock_file credential_lock; @@ -165,6 +166,8 @@ int cmd_credential_store(int argc, const char **argv, const char *prefix) umask(077); + git_config(git_default_config, NULL); + argc = parse_options(argc, (const char **)argv, prefix, options, usage, 0); if (argc != 1) usage_with_options(usage, options); diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index afe679368deec2..d958b41b47ae56 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -5,6 +5,7 @@ #include "connect.h" #include "oid-array.h" #include "protocol.h" +#include "config.h" static const char fetch_pack_usage[] = "git fetch-pack [--all] [--stdin] [--quiet | -q] [--keep | -k] [--thin] " @@ -57,6 +58,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) struct packet_reader reader; enum protocol_version version; + git_config(git_default_config, NULL); fetch_if_missing = 0; packet_trace_identity("fetch-pack"); diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c index 491af9202dc937..a623518ae7dc23 100644 --- a/builtin/get-tar-commit-id.c +++ b/builtin/get-tar-commit-id.c @@ -6,6 +6,7 @@ #include "tar.h" #include "builtin.h" #include "quote.h" +#include "config.h" static const char builtin_get_tar_commit_id_usage[] = "git get-tar-commit-id"; @@ -27,6 +28,7 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) if (argc != 1) usage(builtin_get_tar_commit_id_usage); + git_config(git_default_config, NULL); n = read_in_full(0, buffer, HEADERSIZE); if (n < 0) die_errno("git get-tar-commit-id: read error"); diff --git a/builtin/log.c b/builtin/log.c index ee19dc5d450c57..a68a6bfb9e7af3 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -2437,6 +2437,7 @@ int cmd_cherry(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, cherry_usage, 0); switch (argc) { diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index df44e5cc0d1171..7398b7c4aa4cc0 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -4,6 +4,7 @@ #include "ref-filter.h" #include "remote.h" #include "refs.h" +#include "config.h" static const char * const ls_remote_usage[] = { N_("git ls-remote [--heads] [--tags] [--refs] [--upload-pack=<exec>]\n" @@ -86,6 +87,7 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) packet_trace_identity("ls-remote"); + git_config(git_default_config, NULL); if (argc > 1) { int i; CALLOC_ARRAY(pattern, argc); diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 01d16ef9e5a2d6..df6cc4172120f1 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -8,6 +8,7 @@ #include "strbuf.h" #include "mailinfo.h" #include "parse-options.h" +#include "config.h" static const char * const mailinfo_usage[] = { /* TRANSLATORS: keep <> in "<" mail ">" info. */ @@ -78,6 +79,7 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); setup_mailinfo(&mi); meta_charset.policy = CHARSET_DEFAULT; diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c index 73509f651bda48..939c5b41975832 100644 --- a/builtin/mailsplit.c +++ b/builtin/mailsplit.c @@ -8,6 +8,7 @@ #include "builtin.h" #include "string-list.h" #include "strbuf.h" +#include "config.h" static const char git_mailsplit_usage[] = "git mailsplit [-d<prec>] [-f<n>] [-b] [--keep-cr] -o<directory> [(<mbox>|<Maildir>)...]"; @@ -277,6 +278,7 @@ int cmd_mailsplit(int argc, const char **argv, const char *prefix) const char **argp; static const char *stdin_only[] = { "-", NULL }; + git_config(git_default_config, NULL); for (argp = argv+1; *argp; argp++) { const char *arg = *argp; diff --git a/builtin/merge-index.c b/builtin/merge-index.c index c0383fe9df9a3e..24e5c1f6ea9f7c 100644 --- a/builtin/merge-index.c +++ b/builtin/merge-index.c @@ -1,6 +1,7 @@ #define USE_THE_INDEX_COMPATIBILITY_MACROS #include "builtin.h" #include "run-command.h" +#include "config.h" static const char *pgm; static int one_shot, quiet; @@ -77,6 +78,8 @@ int cmd_merge_index(int argc, const char **argv, const char *prefix) */ signal(SIGCHLD, SIG_DFL); + git_config(git_default_config, NULL); + if (argc < 3) usage("git merge-index [-o] [-q] <merge-program> (-a | [--] [<filename>...])"); diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index ae5782917b96c5..14126bf33c2d72 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -12,6 +12,7 @@ #include "exec-cmd.h" #include "merge-blobs.h" #include "quote.h" +#include "config.h" static int line_termination = '\n'; @@ -508,6 +509,8 @@ int cmd_merge_tree(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); + /* Parse arguments */ original_argc = argc - 1; /* ignoring argv[0] */ argc = parse_options(argc, argv, prefix, mt_options, diff --git a/builtin/mktag.c b/builtin/mktag.c index 5d22909122d195..275feb0667a4bf 100644 --- a/builtin/mktag.c +++ b/builtin/mktag.c @@ -84,6 +84,7 @@ int cmd_mktag(int argc, const char **argv, const char *prefix) builtin_mktag_options, builtin_mktag_usage, 0); + git_config(git_default_config, NULL); if (strbuf_read(&buf, 0, 0) < 0) die_errno(_("could not read from stdin")); diff --git a/builtin/mktree.c b/builtin/mktree.c index 06d81400f55815..12386110b452aa 100644 --- a/builtin/mktree.c +++ b/builtin/mktree.c @@ -8,6 +8,7 @@ #include "tree.h" #include "parse-options.h" #include "object-store.h" +#include "config.h" static struct treeent { unsigned mode; @@ -164,6 +165,7 @@ int cmd_mktree(int ac, const char **av, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); ac = parse_options(ac, av, prefix, option, mktree_usage, 0); getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf; diff --git a/builtin/pack-refs.c b/builtin/pack-refs.c index cfbd5c36c7640b..28f79b993862e7 100644 --- a/builtin/pack-refs.c +++ b/builtin/pack-refs.c @@ -3,6 +3,7 @@ #include "parse-options.h" #include "refs.h" #include "repository.h" +#include "config.h" static char const * const pack_refs_usage[] = { N_("git pack-refs [<options>]"), diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index da3273a268b47d..221fd881eb7b34 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "parse-options.h" #include "prune-packed.h" +#include "config.h" static const char * const prune_packed_usage[] = { "git prune-packed [-n | --dry-run] [-q | --quiet]", @@ -18,6 +19,7 @@ int cmd_prune_packed(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, prune_packed_options, prune_packed_usage, 0); diff --git a/builtin/prune.c b/builtin/prune.c index df376b2ed1e092..5dfd6a47d52fcb 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -9,6 +9,7 @@ #include "prune-packed.h" #include "object-store.h" #include "shallow.h" +#include "config.h" static const char * const prune_usage[] = { N_("git prune [-n] [-v] [--progress] [--expire <time>] [--] [<head>...]"), @@ -152,6 +153,8 @@ int cmd_prune(int argc, const char **argv, const char *prefix) }; char *s; + git_config(git_default_config, NULL); + expire = TIME_MAX; save_commit_buffer = 0; read_replace_refs = 0; diff --git a/builtin/reflog.c b/builtin/reflog.c index 57c5c0d061c449..6ba7e6e67aef28 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -418,6 +418,7 @@ int cmd_reflog(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, reflog_usage, PARSE_OPT_SUBCOMMAND_OPTIONAL | PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0 | diff --git a/builtin/remote-ext.c b/builtin/remote-ext.c index fd3538d4f0e692..cdd6fddd65b1e8 100644 --- a/builtin/remote-ext.c +++ b/builtin/remote-ext.c @@ -2,6 +2,7 @@ #include "transport.h" #include "run-command.h" #include "pkt-line.h" +#include "config.h" static const char usage_msg[] = "git remote-ext <remote> <url>"; @@ -198,5 +199,6 @@ int cmd_remote_ext(int argc, const char **argv, const char *prefix) if (argc != 3) usage(usage_msg); + git_config(git_default_config, NULL); return command_loop(argv[2]); } diff --git a/builtin/remote.c b/builtin/remote.c index 985b845a18bae8..b3a642014317f5 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -1765,6 +1765,7 @@ int cmd_remote(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, builtin_remote_usage, PARSE_OPT_SUBCOMMAND_OPTIONAL); diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 8f61050bde8843..03341cc85afd7b 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -428,6 +428,7 @@ static int cmd_parseopt(int argc, const char **argv, const char *prefix) struct option *opts = NULL; int onb = 0, osz = 0, unb = 0, usz = 0; + git_config(git_default_config, NULL); strbuf_addstr(&parsed, "set --"); argc = parse_options(argc, argv, prefix, parseopt_opts, parseopt_usage, PARSE_OPT_KEEP_DASHDASH); diff --git a/builtin/show-index.c b/builtin/show-index.c index 0e0b9fb95bc113..ff2cc3ee48291c 100644 --- a/builtin/show-index.c +++ b/builtin/show-index.c @@ -2,6 +2,7 @@ #include "cache.h" #include "pack.h" #include "parse-options.h" +#include "config.h" static const char *const show_index_usage[] = { "git show-index [--object-format=<hash-algorithm>]", @@ -23,6 +24,7 @@ int cmd_show_index(int argc, const char **argv, const char *prefix) OPT_END() }; + git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, show_index_options, show_index_usage, 0); if (hash_name) { diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 48569061087416..6ba4aa450e1781 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -7,6 +7,7 @@ #include "tag.h" #include "string-list.h" #include "parse-options.h" +#include "config.h" static const char * const show_ref_usage[] = { N_("git show-ref [-q | --quiet] [--verify] [--head] [-d | --dereference] [-s | --hash[=<n>]] [--abbrev[=<n>]] [--tags] [--heads] [--] [<pattern>...]"), diff --git a/builtin/stripspace.c b/builtin/stripspace.c index 1e34cf2bebdf51..166211662f7b70 100644 --- a/builtin/stripspace.c +++ b/builtin/stripspace.c @@ -46,10 +46,9 @@ int cmd_stripspace(int argc, const char **argv, const char *prefix) if (argc) usage_with_options(stripspace_usage, options); - if (mode == STRIP_COMMENTS || mode == COMMENT_LINES) { + if (mode == STRIP_COMMENTS || mode == COMMENT_LINES) setup_git_directory_gently(&nongit); - git_config(git_default_config, NULL); - } + git_config(git_default_config, NULL); if (strbuf_read(&buf, 0, 1024) < 0) die_errno("could not read the input"); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 0b4acb442b2093..49336b7ee185db 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -3425,6 +3425,7 @@ static struct cmd_struct commands[] = { int cmd_submodule__helper(int argc, const char **argv, const char *prefix) { int i; + git_config(git_default_config, NULL); if (argc < 2 || !strcmp(argv[1], "-h")) usage("git submodule--helper <command>"); diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 98d028dae67908..b639d78b2cabf1 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -8,6 +8,7 @@ #include "sideband.h" #include "run-command.h" #include "strvec.h" +#include "config.h" static const char upload_archive_usage[] = "git upload-archive <repo>"; @@ -28,6 +29,7 @@ int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix) if (!enter_repo(argv[1], 0)) die("'%s' does not appear to be a git repository", argv[1]); + git_config(git_default_config, NULL); init_archivers(); /* put received options in sent_argv[] */ @@ -79,6 +81,7 @@ int cmd_upload_archive(int argc, const char **argv, const char *prefix) { struct child_process writer = CHILD_PROCESS_INIT; + git_config(git_default_config, NULL); if (argc == 2 && !strcmp(argv[1], "-h")) usage(upload_archive_usage); diff --git a/http-backend.c b/http-backend.c index 6eb3b2fe51c6fe..e4b4e7597734f2 100644 --- a/http-backend.c +++ b/http-backend.c @@ -783,6 +783,7 @@ int cmd_main(int argc, const char **argv) setup_path(); if (!enter_repo(dir, 0)) not_found(&hdr, "Not a git repository: '%s'", dir); + git_config(git_default_config, NULL); if (!getenv("GIT_HTTP_EXPORT_ALL") && access("git-daemon-export-ok", F_OK) ) not_found(&hdr, "Repository not exported: '%s'", dir); diff --git a/refs.c b/refs.c index c89d558892569b..811b2c0b02f383 100644 --- a/refs.c +++ b/refs.c @@ -1411,7 +1411,7 @@ int parse_hide_refs_config(const char *var, const char *value, const char *secti } string_list_append(hide_refs, ref); } - return 0; + return git_default_config(var, value, NULL); } int ref_is_hidden(const char *refname, const char *refname_full) From d5ac68ee31f0b34fc4045e70fe5c39e8f1bf5c16 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler <jeffhost@microsoft.com> Date: Fri, 25 Mar 2022 16:56:04 -0400 Subject: [PATCH 251/303] compat/fsmonitor/fsm-*-win32: support long paths Update wchar_t buffers to use MAX_LONG_PATH instead of MAX_PATH and call xutftowcs_long_path() in the Win32 backend source files. Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> --- compat/fsmonitor/fsm-health-win32.c | 6 +++--- compat/fsmonitor/fsm-listen-win32.c | 18 +++++++++--------- compat/fsmonitor/fsm-settings-win32.c | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/compat/fsmonitor/fsm-health-win32.c b/compat/fsmonitor/fsm-health-win32.c index 2ea08c1d4e81ec..5551d6d97dee87 100644 --- a/compat/fsmonitor/fsm-health-win32.c +++ b/compat/fsmonitor/fsm-health-win32.c @@ -32,7 +32,7 @@ struct fsm_health_data struct wt_moved { - wchar_t wpath[MAX_PATH + 1]; + wchar_t wpath[MAX_LONG_PATH + 1]; BY_HANDLE_FILE_INFORMATION bhfi; } wt_moved; }; @@ -141,8 +141,8 @@ static int has_worktree_moved(struct fsmonitor_daemon_state *state, return 0; case CTX_INIT: - if (xutftowcs_path(data->wt_moved.wpath, - state->path_worktree_watch.buf) < 0) { + if (xutftowcs_long_path(data->wt_moved.wpath, + state->path_worktree_watch.buf) < 0) { error(_("could not convert to wide characters: '%s'"), state->path_worktree_watch.buf); return -1; diff --git a/compat/fsmonitor/fsm-listen-win32.c b/compat/fsmonitor/fsm-listen-win32.c index 03df8d951b871f..df8704aa5b2ca4 100644 --- a/compat/fsmonitor/fsm-listen-win32.c +++ b/compat/fsmonitor/fsm-listen-win32.c @@ -25,7 +25,7 @@ struct one_watch DWORD count; struct strbuf path; - wchar_t wpath_longname[MAX_PATH + 1]; + wchar_t wpath_longname[MAX_LONG_PATH + 1]; DWORD wpath_longname_len; HANDLE hDir; @@ -128,8 +128,8 @@ static int normalize_path_in_utf8(wchar_t *wpath, DWORD wpath_len, */ static void check_for_shortnames(struct one_watch *watch) { - wchar_t buf_in[MAX_PATH + 1]; - wchar_t buf_out[MAX_PATH + 1]; + wchar_t buf_in[MAX_LONG_PATH + 1]; + wchar_t buf_out[MAX_LONG_PATH + 1]; wchar_t *last; wchar_t *p; @@ -194,8 +194,8 @@ static enum get_relative_result get_relative_longname( const wchar_t *wpath, DWORD wpath_len, wchar_t *wpath_longname, size_t bufsize_wpath_longname) { - wchar_t buf_in[2 * MAX_PATH + 1]; - wchar_t buf_out[MAX_PATH + 1]; + wchar_t buf_in[2 * MAX_LONG_PATH + 1]; + wchar_t buf_out[MAX_LONG_PATH + 1]; DWORD root_len; DWORD out_len; @@ -296,10 +296,10 @@ static struct one_watch *create_watch(struct fsmonitor_daemon_state *state, FILE_SHARE_WRITE | FILE_SHARE_READ | FILE_SHARE_DELETE; HANDLE hDir; DWORD len_longname; - wchar_t wpath[MAX_PATH + 1]; - wchar_t wpath_longname[MAX_PATH + 1]; + wchar_t wpath[MAX_LONG_PATH + 1]; + wchar_t wpath_longname[MAX_LONG_PATH + 1]; - if (xutftowcs_path(wpath, path) < 0) { + if (xutftowcs_long_path(wpath, path) < 0) { error(_("could not convert to wide characters: '%s'"), path); return NULL; } @@ -544,7 +544,7 @@ static int process_worktree_events(struct fsmonitor_daemon_state *state) struct string_list cookie_list = STRING_LIST_INIT_DUP; struct fsmonitor_batch *batch = NULL; const char *p = watch->buffer; - wchar_t wpath_longname[MAX_PATH + 1]; + wchar_t wpath_longname[MAX_LONG_PATH + 1]; /* * If the kernel gets more events than will fit in the kernel diff --git a/compat/fsmonitor/fsm-settings-win32.c b/compat/fsmonitor/fsm-settings-win32.c index e5ec5b0a9f73bc..28e8f3fec92ead 100644 --- a/compat/fsmonitor/fsm-settings-win32.c +++ b/compat/fsmonitor/fsm-settings-win32.c @@ -130,8 +130,8 @@ static int check_remote_protocol(wchar_t *wpath) static enum fsmonitor_reason check_remote(struct repository *r) { int ret; - wchar_t wpath[MAX_PATH]; - wchar_t wfullpath[MAX_PATH]; + wchar_t wpath[MAX_LONG_PATH]; + wchar_t wfullpath[MAX_LONG_PATH]; size_t wlen; UINT driveType; @@ -139,7 +139,7 @@ static enum fsmonitor_reason check_remote(struct repository *r) * Do everything in wide chars because the drive letter might be * a multi-byte sequence. See win32_has_dos_drive_prefix(). */ - if (xutftowcs_path(wpath, r->worktree) < 0) + if (xutftowcs_long_path(wpath, r->worktree) < 0) return FSMONITOR_REASON_ERROR; /* @@ -157,7 +157,7 @@ static enum fsmonitor_reason check_remote(struct repository *r) * slashes to backslashes. This is essential to get GetDriveTypeW() * correctly handle some UNC "\\server\share\..." paths. */ - if (!GetFullPathNameW(wpath, MAX_PATH, wfullpath, NULL)) + if (!GetFullPathNameW(wpath, MAX_LONG_PATH, wfullpath, NULL)) return FSMONITOR_REASON_ERROR; driveType = GetDriveTypeW(wfullpath); From 6e09ae2df454c4f3fae8cd617325850a4a8fffeb Mon Sep 17 00:00:00 2001 From: Ben Boeckel <mathstuf@gmail.com> Date: Fri, 22 Apr 2022 09:06:23 -0400 Subject: [PATCH 252/303] clean: suggest using `core.longPaths` if paths are too long to remove On Windows, git repositories may have extra files which need cleaned (e.g., a build directory) that may be arbitrarily deep. Suggest using `core.longPaths` if such situations are encountered. Fixes: #2715 Signed-off-by: Ben Boeckel <mathstuf@gmail.com> --- Documentation/config/advice.txt | 3 +++ advice.c | 1 + advice.h | 1 + builtin/clean.c | 12 ++++++++++++ 4 files changed, 17 insertions(+) diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt index 0d1e04f1ed631c..72776b4cf36b68 100644 --- a/Documentation/config/advice.txt +++ b/Documentation/config/advice.txt @@ -118,6 +118,9 @@ advice.*:: waitingForEditor:: Print a message to the terminal whenever Git is waiting for editor input from the user. + nameTooLong:: + Advice shown if a filepath operation is attempted where the + path was too long. nestedTag:: Advice shown if a user attempts to recursively tag a tag object. submoduleAlternateErrorStrategyDie:: diff --git a/advice.c b/advice.c index 1d75981d05db60..76c553a71fb419 100644 --- a/advice.c +++ b/advice.c @@ -48,6 +48,7 @@ static struct { [ADVICE_GRAFT_FILE_DEPRECATED] = { "graftFileDeprecated", 1 }, [ADVICE_IGNORED_HOOK] = { "ignoredHook", 1 }, [ADVICE_IMPLICIT_IDENTITY] = { "implicitIdentity", 1 }, + [ADVICE_NAME_TOO_LONG] = { "nameTooLong", 1 }, [ADVICE_NESTED_TAG] = { "nestedTag", 1 }, [ADVICE_OBJECT_NAME_WARNING] = { "objectNameWarning", 1 }, [ADVICE_PUSH_ALREADY_EXISTS] = { "pushAlreadyExists", 1 }, diff --git a/advice.h b/advice.h index edfb86ecaa8857..bd0a2add268428 100644 --- a/advice.h +++ b/advice.h @@ -26,6 +26,7 @@ struct string_list; ADVICE_GRAFT_FILE_DEPRECATED, ADVICE_IGNORED_HOOK, ADVICE_IMPLICIT_IDENTITY, + ADVICE_NAME_TOO_LONG, ADVICE_NESTED_TAG, ADVICE_OBJECT_NAME_WARNING, ADVICE_PUSH_ALREADY_EXISTS, diff --git a/builtin/clean.c b/builtin/clean.c index ae4c6f37bab701..1852e33ec993b0 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -211,6 +211,9 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, quote_path(path->buf, prefix, "ed, 0); errno = saved_errno; warning_errno(_(msg_warn_remove_failed), quoted.buf); + if (saved_errno == ENAMETOOLONG) { + advise_if_enabled(ADVICE_NAME_TOO_LONG, _("Setting `core.longPaths` may allow the deletion to succeed.")); + } *dir_gone = 0; } ret = res; @@ -246,6 +249,9 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, quote_path(path->buf, prefix, "ed, 0); errno = saved_errno; warning_errno(_(msg_warn_remove_failed), quoted.buf); + if (saved_errno == ENAMETOOLONG) { + advise_if_enabled(ADVICE_NAME_TOO_LONG, _("Setting `core.longPaths` may allow the deletion to succeed.")); + } *dir_gone = 0; ret = 1; } @@ -289,6 +295,9 @@ static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag, quote_path(path->buf, prefix, "ed, 0); errno = saved_errno; warning_errno(_(msg_warn_remove_failed), quoted.buf); + if (saved_errno == ENAMETOOLONG) { + advise_if_enabled(ADVICE_NAME_TOO_LONG, _("Setting `core.longPaths` may allow the deletion to succeed.")); + } *dir_gone = 0; ret = 1; } @@ -1108,6 +1117,9 @@ int cmd_clean(int argc, const char **argv, const char *prefix) qname = quote_path(item->string, NULL, &buf, 0); errno = saved_errno; warning_errno(_(msg_warn_remove_failed), qname); + if (saved_errno == ENAMETOOLONG) { + advise_if_enabled(ADVICE_NAME_TOO_LONG, _("Setting `core.longPaths` may allow the deletion to succeed.")); + } errors++; } else if (!quiet) { qname = quote_path(item->string, NULL, &buf, 0); From 1100ffb89a71134ec8bb6bdc4a3aee7a2dbedef8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 6 Sep 2016 09:50:33 +0200 Subject: [PATCH 253/303] Unbreak interactive GPG prompt upon signing With the recent update in efee955 (gpg-interface: check gpg signature creation status, 2016-06-17), we ask GPG to send all status updates to stderr, and then catch the stderr in an strbuf. But GPG might fail, and send error messages to stderr. And we simply do not show them to the user. Even worse: this swallows any interactive prompt for a passphrase. And detaches stderr from the tty so that the passphrase cannot be read. So while the first problem could be fixed (by printing the captured stderr upon error), the second problem cannot be easily fixed, and presents a major regression. So let's just revert commit efee9553a4f97b2ecd8f49be19606dd4cf7d9c28. This fixes https://github.com/git-for-windows/git/issues/871 Cc: Michael J Gruber <git@drmicha.warpmail.net> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- gpg-interface.c | 15 ++------------- t/t7004-tag.sh | 31 ------------------------------- 2 files changed, 2 insertions(+), 44 deletions(-) diff --git a/gpg-interface.c b/gpg-interface.c index 9aa714bdeea81e..cfa6bd4de70796 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -950,12 +950,9 @@ static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, struct child_process gpg = CHILD_PROCESS_INIT; int ret; size_t bottom; - const char *cp; - struct strbuf gpg_status = STRBUF_INIT; strvec_pushl(&gpg.args, use_format->program, - "--status-fd=2", "-bsau", signing_key, NULL); @@ -967,18 +964,10 @@ static int sign_buffer_gpg(struct strbuf *buffer, struct strbuf *signature, */ sigchain_push(SIGPIPE, SIG_IGN); ret = pipe_command(&gpg, buffer->buf, buffer->len, - signature, 1024, &gpg_status, 0); + signature, 1024, NULL, 0); sigchain_pop(SIGPIPE); - for (cp = gpg_status.buf; - cp && (cp = strstr(cp, "[GNUPG:] SIG_CREATED ")); - cp++) { - if (cp == gpg_status.buf || cp[-1] == '\n') - break; /* found */ - } - ret |= !cp; - strbuf_release(&gpg_status); - if (ret) + if (ret || signature->len == bottom) return error(_("gpg failed to sign the data")); /* Strip CR from the line endings, in case we are on Windows. */ diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index 9aa1660651b8a9..4e56d5c70124d0 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh @@ -1375,30 +1375,6 @@ test_expect_success GPG \ 'test_config user.signingkey BobTheMouse && test_must_fail git tag -s -m tail tag-gpg-failure' -# try to produce invalid signature -test_expect_success GPG \ - 'git tag -s fails if gpg is misconfigured (bad signature format)' \ - 'test_config gpg.program echo && - test_must_fail git tag -s -m tail tag-gpg-failure' - -# try to produce invalid signature -test_expect_success GPG 'git verifies tag is valid with double signature' ' - git tag -s -m tail tag-gpg-double-sig && - git cat-file tag tag-gpg-double-sig >tag && - othersigheader=$(test_oid othersigheader) && - sed -ne "/^\$/q;p" tag >new-tag && - cat <<-EOM >>new-tag && - $othersigheader -----BEGIN PGP SIGNATURE----- - someinvaliddata - -----END PGP SIGNATURE----- - EOM - sed -e "1,/^tagger/d" tag >>new-tag && - new_tag=$(git hash-object -t tag -w new-tag) && - git update-ref refs/tags/tag-gpg-double-sig $new_tag && - git verify-tag tag-gpg-double-sig && - git fsck -' - # try to sign with bad user.signingkey test_expect_success GPGSM \ 'git tag -s fails if gpgsm is misconfigured (bad key)' \ @@ -1406,13 +1382,6 @@ test_expect_success GPGSM \ test_config gpg.format x509 && test_must_fail git tag -s -m tail tag-gpg-failure' -# try to produce invalid signature -test_expect_success GPGSM \ - 'git tag -s fails if gpgsm is misconfigured (bad signature format)' \ - 'test_config gpg.x509.program echo && - test_config gpg.format x509 && - test_must_fail git tag -s -m tail tag-gpg-failure' - # try to verify without gpg: rm -rf gpghome From 5e0578d64322b1545d02f67a3c0defd61ec03674 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sat, 16 May 2015 01:18:14 +0200 Subject: [PATCH 254/303] Win32: implement stat() with symlink support With respect to symlinks, the current stat() implementation is almost the same as lstat(): except for the file type (st_mode & S_IFMT), it returns information about the link rather than the target. Implement stat by opening the file with as little permissions as possible and calling GetFileInformationByHandle on it. This way, all link resoltion is handled by the Windows file system layer. If symlinks are disabled, use lstat() as before, but fail with ELOOP if a symlink would have to be resolved. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index b50e242950ad41..4bb95b59d2b343 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -928,9 +928,26 @@ int mingw_lstat(const char *file_name, struct stat *buf) { return do_lstat(0, file_name, buf); } + int mingw_stat(const char *file_name, struct stat *buf) { - return do_lstat(1, file_name, buf); + wchar_t wfile_name[MAX_LONG_PATH]; + HANDLE hnd; + int result; + + /* open the file and let Windows resolve the links */ + if (xutftowcs_long_path(wfile_name, file_name) < 0) + return -1; + hnd = CreateFileW(wfile_name, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (hnd == INVALID_HANDLE_VALUE) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + result = get_file_info_by_handle(hnd, buf); + CloseHandle(hnd); + return result; } int mingw_fstat(int fd, struct stat *buf) From ae4e96b474d67a8398ca4329b002aebef8e4c079 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 12 May 2015 00:58:39 +0200 Subject: [PATCH 255/303] Win32: remove separate do_lstat() function With the new mingw_stat() implementation, do_lstat() is only called from mingw_lstat() (with follow == 0). Remove the extra function and the old mingw_stat()-specific (follow == 1) logic. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 4bb95b59d2b343..a7fbfc73380277 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -817,14 +817,7 @@ static int has_valid_directory_prefix(wchar_t *wfilename) return 1; } -/* We keep the do_lstat code in a separate function to avoid recursion. - * When a path ends with a slash, the stat will fail with ENOENT. In - * this case, we strip the trailing slashes and stat again. - * - * If follow is true then act like stat() and report on the link - * target. Otherwise report on the link itself. - */ -static int do_lstat(int follow, const char *file_name, struct stat *buf) +int mingw_lstat(const char *file_name, struct stat *buf) { WIN32_FILE_ATTRIBUTE_DATA fdata; wchar_t wfilename[MAX_LONG_PATH]; @@ -858,13 +851,7 @@ static int do_lstat(int follow, const char *file_name, struct stat *buf) if (handle != INVALID_HANDLE_VALUE) { if ((findbuf.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) && (findbuf.dwReserved0 == IO_REPARSE_TAG_SYMLINK)) { - if (follow) { - char buffer[MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; - buf->st_size = readlink(file_name, buffer, MAXIMUM_REPARSE_DATA_BUFFER_SIZE); - } else { - buf->st_mode = S_IFLNK; - } - buf->st_mode |= S_IREAD; + buf->st_mode = S_IFLNK | S_IREAD; if (!(findbuf.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) buf->st_mode |= S_IWRITE; } @@ -924,11 +911,6 @@ static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) return 0; } -int mingw_lstat(const char *file_name, struct stat *buf) -{ - return do_lstat(0, file_name, buf); -} - int mingw_stat(const char *file_name, struct stat *buf) { wchar_t wfile_name[MAX_LONG_PATH]; From aa33f266d68448403a3a4bdc2a67c993f03221f3 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 24 May 2015 00:17:56 +0200 Subject: [PATCH 256/303] Win32: let mingw_lstat() error early upon problems with reparse points When obtaining lstat information for reparse points, we need to call FindFirstFile() in addition to GetFileInformationEx() to obtain the type of the reparse point (symlink, mount point etc.). However, currently there is no error handling whatsoever if FindFirstFile() fails. Call FindFirstFile() before modifying the stat *buf output parameter and error out if the call fails. Note: The FindFirstFile() return value includes all the data that we get from GetFileAttributesEx(), so we could replace GetFileAttributesEx() with FindFirstFile(). We don't do that because GetFileAttributesEx() is about twice as fast for single files. I.e. we only pay the extra cost of calling FindFirstFile() in the rare case that we encounter a reparse point. Note: The indentation of the remaining reparse point code will be fixed in the next patch. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index a7fbfc73380277..9d8b40c9400db9 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -820,6 +820,7 @@ static int has_valid_directory_prefix(wchar_t *wfilename) int mingw_lstat(const char *file_name, struct stat *buf) { WIN32_FILE_ATTRIBUTE_DATA fdata; + WIN32_FIND_DATAW findbuf = { 0 }; wchar_t wfilename[MAX_LONG_PATH]; int wlen = xutftowcs_long_path(wfilename, file_name); if (wlen < 0) @@ -834,6 +835,13 @@ int mingw_lstat(const char *file_name, struct stat *buf) } if (GetFileAttributesExW(wfilename, GetFileExInfoStandard, &fdata)) { + /* for reparse points, use FindFirstFile to get the reparse tag */ + if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + HANDLE handle = FindFirstFileW(wfilename, &findbuf); + if (handle == INVALID_HANDLE_VALUE) + goto error; + FindClose(handle); + } buf->st_ino = 0; buf->st_gid = 0; buf->st_uid = 0; @@ -846,20 +854,16 @@ int mingw_lstat(const char *file_name, struct stat *buf) filetime_to_timespec(&(fdata.ftLastWriteTime), &(buf->st_mtim)); filetime_to_timespec(&(fdata.ftCreationTime), &(buf->st_ctim)); if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - WIN32_FIND_DATAW findbuf; - HANDLE handle = FindFirstFileW(wfilename, &findbuf); - if (handle != INVALID_HANDLE_VALUE) { if ((findbuf.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) && (findbuf.dwReserved0 == IO_REPARSE_TAG_SYMLINK)) { buf->st_mode = S_IFLNK | S_IREAD; if (!(findbuf.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) buf->st_mode |= S_IWRITE; } - FindClose(handle); - } } return 0; } +error: switch (GetLastError()) { case ERROR_ACCESS_DENIED: case ERROR_SHARING_VIOLATION: From 0a17234a6199c034933ba400341173737aae2038 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 10 Jan 2017 23:21:56 +0100 Subject: [PATCH 257/303] mingw: teach fscache and dirent about symlinks Move S_IFLNK detection to file_attr_to_st_mode() and reuse it in fscache. Implement DT_LNK detection in dirent.c and the fscache readdir version. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 13 +++---------- compat/win32.h | 6 ++++-- compat/win32/dirent.c | 5 ++++- compat/win32/fscache.c | 11 +++++++---- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 9d8b40c9400db9..30531e4c5f3ca5 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -846,21 +846,14 @@ int mingw_lstat(const char *file_name, struct stat *buf) buf->st_gid = 0; buf->st_uid = 0; buf->st_nlink = 1; - buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes); + buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes, + findbuf.dwReserved0); buf->st_size = fdata.nFileSizeLow | (((off_t)fdata.nFileSizeHigh)<<32); buf->st_dev = buf->st_rdev = 0; /* not used by Git */ filetime_to_timespec(&(fdata.ftLastAccessTime), &(buf->st_atim)); filetime_to_timespec(&(fdata.ftLastWriteTime), &(buf->st_mtim)); filetime_to_timespec(&(fdata.ftCreationTime), &(buf->st_ctim)); - if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if ((findbuf.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) && - (findbuf.dwReserved0 == IO_REPARSE_TAG_SYMLINK)) { - buf->st_mode = S_IFLNK | S_IREAD; - if (!(findbuf.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) - buf->st_mode |= S_IWRITE; - } - } return 0; } error: @@ -905,7 +898,7 @@ static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) buf->st_gid = 0; buf->st_uid = 0; buf->st_nlink = 1; - buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes); + buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes, 0); buf->st_size = fdata.nFileSizeLow | (((off_t)fdata.nFileSizeHigh)<<32); buf->st_dev = buf->st_rdev = 0; /* not used by Git */ diff --git a/compat/win32.h b/compat/win32.h index a97e880757b6f1..671bcc81f93351 100644 --- a/compat/win32.h +++ b/compat/win32.h @@ -6,10 +6,12 @@ #include <windows.h> #endif -static inline int file_attr_to_st_mode (DWORD attr) +static inline int file_attr_to_st_mode (DWORD attr, DWORD tag) { int fMode = S_IREAD; - if (attr & FILE_ATTRIBUTE_DIRECTORY) + if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) && tag == IO_REPARSE_TAG_SYMLINK) + fMode |= S_IFLNK; + else if (attr & FILE_ATTRIBUTE_DIRECTORY) fMode |= S_IFDIR; else fMode |= S_IFREG; diff --git a/compat/win32/dirent.c b/compat/win32/dirent.c index aae364b67aa6f8..db3b08c84606a9 100644 --- a/compat/win32/dirent.c +++ b/compat/win32/dirent.c @@ -18,7 +18,10 @@ static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata) xwcstoutf(ent->d_name, fdata->cFileName, MAX_PATH * 3); /* Set file type, based on WIN32_FIND_DATA */ - if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + if ((fdata->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + && fdata->dwReserved0 == IO_REPARSE_TAG_SYMLINK) + ent->d_type = DT_LNK; + else if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ent->d_type = DT_DIR; else ent->d_type = DT_REG; diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 3c1ded64ae6c56..39eade0777e32b 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -200,10 +200,13 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, fdata->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT ? fdata->EaSize : 0; - fse->st_mode = file_attr_to_st_mode(fdata->FileAttributes); - fse->dirent.d_type = S_ISDIR(fse->st_mode) ? DT_DIR : DT_REG; - fse->u.s.st_size = fdata->EndOfFile.LowPart | - (((off_t)fdata->EndOfFile.HighPart) << 32); + fse->st_mode = file_attr_to_st_mode(fdata->FileAttributes, + fdata->EaSize); + fse->dirent.d_type = S_ISREG(fse->st_mode) ? DT_REG : + S_ISDIR(fse->st_mode) ? DT_DIR : DT_LNK; + fse->u.s.st_size = S_ISLNK(fse->st_mode) ? MAX_LONG_PATH : + fdata->EndOfFile.LowPart | + (((off_t)fdata->EndOfFile.HighPart) << 32); filetime_to_timespec((FILETIME *)&(fdata->LastAccessTime), &(fse->u.s.st_atim)); filetime_to_timespec((FILETIME *)&(fdata->LastWriteTime), From c96e396863ed90c3690a4f3a16f1898bc8b14734 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sat, 16 May 2015 01:11:37 +0200 Subject: [PATCH 258/303] Win32: lstat(): return adequate stat.st_size for symlinks Git typically doesn't trust the stat.st_size member of symlinks (e.g. see strbuf_readlink()). However, some functions take shortcuts if st_size is 0 (e.g. diff_populate_filespec()). In mingw_lstat() and fscache_lstat(), make sure to return an adequate size. The extra overhead of opening and reading the reparse point to calculate the exact size is not necessary, as git doesn't rely on the value anyway. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 30531e4c5f3ca5..23cce8b9486430 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -848,8 +848,8 @@ int mingw_lstat(const char *file_name, struct stat *buf) buf->st_nlink = 1; buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes, findbuf.dwReserved0); - buf->st_size = fdata.nFileSizeLow | - (((off_t)fdata.nFileSizeHigh)<<32); + buf->st_size = S_ISLNK(buf->st_mode) ? MAX_LONG_PATH : + fdata.nFileSizeLow | (((off_t) fdata.nFileSizeHigh) << 32); buf->st_dev = buf->st_rdev = 0; /* not used by Git */ filetime_to_timespec(&(fdata.ftLastAccessTime), &(buf->st_atim)); filetime_to_timespec(&(fdata.ftLastWriteTime), &(buf->st_mtim)); From 15754480e9bb319f9df6bf6b84272caf4685e0b3 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 19 May 2015 21:48:55 +0200 Subject: [PATCH 259/303] Win32: factor out retry logic The retry pattern is duplicated in three places. It also seems to be too hard to use: mingw_unlink() and mingw_rmdir() duplicate the code to retry, and both of them do so incompletely. They also do not restore errno if the user answers 'no'. Introduce a retry_ask_yes_no() helper function that handles retry with small delay, asking the user, and restoring errno. mingw_unlink: include _wchmod in the retry loop (which may fail if the file is locked exclusively). mingw_rmdir: include special error handling in the retry loop. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 102 ++++++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 57 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 23cce8b9486430..eef6af9c05c41c 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -16,8 +16,6 @@ #define HCAST(type, handle) ((type)(intptr_t)handle) -static const int delay[] = { 0, 1, 10, 20, 40 }; - void open_in_gdb(void) { static struct child_process cp = CHILD_PROCESS_INIT; @@ -194,15 +192,12 @@ static int read_yes_no_answer(void) return -1; } -static int ask_yes_no_if_possible(const char *format, ...) +static int ask_yes_no_if_possible(const char *format, va_list args) { char question[4096]; const char *retry_hook[] = { NULL, NULL, NULL }; - va_list args; - va_start(args, format); vsnprintf(question, sizeof(question), format, args); - va_end(args); if ((retry_hook[0] = mingw_getenv("GIT_ASK_YESNO"))) { retry_hook[1] = question; @@ -224,6 +219,31 @@ static int ask_yes_no_if_possible(const char *format, ...) } } +static int retry_ask_yes_no(int *tries, const char *format, ...) +{ + static const int delay[] = { 0, 1, 10, 20, 40 }; + va_list args; + int result, saved_errno = errno; + + if ((*tries) < ARRAY_SIZE(delay)) { + /* + * We assume that some other process had the file open at the wrong + * moment and retry. In order to give the other process a higher + * chance to complete its operation, we give up our time slice now. + * If we have to retry again, we do sleep a bit. + */ + Sleep(delay[*tries]); + (*tries)++; + return 1; + } + + va_start(args, format); + result = ask_yes_no_if_possible(format, args); + va_end(args); + errno = saved_errno; + return result; +} + /* Windows only */ enum hide_dotfiles_type { HIDE_DOTFILES_FALSE = 0, @@ -302,7 +322,7 @@ static wchar_t *normalize_ntpath(wchar_t *wbuf) int mingw_unlink(const char *pathname) { - int ret, tries = 0; + int tries = 0; wchar_t wpathname[MAX_LONG_PATH]; if (xutftowcs_long_path(wpathname, pathname) < 0) return -1; @@ -310,26 +330,16 @@ int mingw_unlink(const char *pathname) if (DeleteFileW(wpathname)) return 0; - /* read-only files cannot be removed */ - _wchmod(wpathname, 0666); - while ((ret = _wunlink(wpathname)) == -1 && tries < ARRAY_SIZE(delay)) { + do { + /* read-only files cannot be removed */ + _wchmod(wpathname, 0666); + if (!_wunlink(wpathname)) + return 0; if (!is_file_in_use_error(GetLastError())) break; - /* - * We assume that some other process had the source or - * destination file open at the wrong moment and retry. - * In order to give the other process a higher chance to - * complete its operation, we give up our time slice now. - * If we have to retry again, we do sleep a bit. - */ - Sleep(delay[tries]); - tries++; - } - while (ret == -1 && is_file_in_use_error(GetLastError()) && - ask_yes_no_if_possible("Unlink of file '%s' failed. " - "Should I try again?", pathname)) - ret = _wunlink(wpathname); - return ret; + } while (retry_ask_yes_no(&tries, "Unlink of file '%s' failed. " + "Should I try again?", pathname)); + return -1; } static int is_dir_empty(const wchar_t *wpath) @@ -356,7 +366,7 @@ static int is_dir_empty(const wchar_t *wpath) int mingw_rmdir(const char *pathname) { - int ret, tries = 0; + int tries = 0; wchar_t wpathname[MAX_LONG_PATH]; struct stat st; @@ -382,7 +392,11 @@ int mingw_rmdir(const char *pathname) if (xutftowcs_long_path(wpathname, pathname) < 0) return -1; - while ((ret = _wrmdir(wpathname)) == -1 && tries < ARRAY_SIZE(delay)) { + do { + if (!_wrmdir(wpathname)) { + invalidate_lstat_cache(); + return 0; + } if (!is_file_in_use_error(GetLastError())) errno = err_win_to_posix(GetLastError()); if (errno != EACCES) @@ -391,23 +405,9 @@ int mingw_rmdir(const char *pathname) errno = ENOTEMPTY; break; } - /* - * We assume that some other process had the source or - * destination file open at the wrong moment and retry. - * In order to give the other process a higher chance to - * complete its operation, we give up our time slice now. - * If we have to retry again, we do sleep a bit. - */ - Sleep(delay[tries]); - tries++; - } - while (ret == -1 && errno == EACCES && is_file_in_use_error(GetLastError()) && - ask_yes_no_if_possible("Deletion of directory '%s' failed. " - "Should I try again?", pathname)) - ret = _wrmdir(wpathname); - if (!ret) - invalidate_lstat_cache(); - return ret; + } while (retry_ask_yes_no(&tries, "Deletion of directory '%s' failed. " + "Should I try again?", pathname)); + return -1; } static inline int needs_hiding(const char *path) @@ -2385,20 +2385,8 @@ int mingw_rename(const char *pold, const char *pnew) SetFileAttributesW(wpnew, attrs); } } - if (tries < ARRAY_SIZE(delay) && gle == ERROR_ACCESS_DENIED) { - /* - * We assume that some other process had the source or - * destination file open at the wrong moment and retry. - * In order to give the other process a higher chance to - * complete its operation, we give up our time slice now. - * If we have to retry again, we do sleep a bit. - */ - Sleep(delay[tries]); - tries++; - goto repeat; - } if (gle == ERROR_ACCESS_DENIED && - ask_yes_no_if_possible("Rename from '%s' to '%s' failed. " + retry_ask_yes_no(&tries, "Rename from '%s' to '%s' failed. " "Should I try again?", pold, pnew)) goto repeat; From a64ec3095f8719150d64f74214a07ed04bb037e4 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 24 May 2015 01:55:05 +0200 Subject: [PATCH 260/303] Win32: change default of 'core.symlinks' to false Symlinks on Windows don't work the same way as on Unix systems. E.g. there are different types of symlinks for directories and files, creating symlinks requires administrative privileges etc. By default, disable symlink support on Windows. I.e. users explicitly have to enable it with 'git config [--system|--global] core.symlinks true'. The test suite ignores system / global config files. Allow testing *with* symlink support by checking if native symlinks are enabled in MSys2 (via 'MSYS=winsymlinks:nativestrict'). Reminder: This would need to be changed if / when we find a way to run the test suite in a non-MSys-based shell (e.g. dash). Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index eef6af9c05c41c..4a991dd9682014 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2971,6 +2971,15 @@ static void setup_windows_environment(void) if (!getenv("LC_ALL") && !getenv("LC_CTYPE") && !getenv("LANG")) setenv("LC_CTYPE", "C.UTF-8", 1); + + /* + * Change 'core.symlinks' default to false, unless native symlinks are + * enabled in MSys2 (via 'MSYS=winsymlinks:nativestrict'). Thus we can + * run the test suite (which doesn't obey config files) with or without + * symlink support. + */ + if (!(tmp = getenv("MSYS")) || !strstr(tmp, "winsymlinks:nativestrict")) + has_symlinks = 0; } static PSID get_current_user_sid(void) From 199b0801b38655e548aed56e61fed958b4050222 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sat, 16 May 2015 00:32:03 +0200 Subject: [PATCH 261/303] Win32: add symlink-specific error codes Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 4a991dd9682014..0043188d2fec67 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -91,6 +91,7 @@ int err_win_to_posix(DWORD winerr) case ERROR_INVALID_PARAMETER: error = EINVAL; break; case ERROR_INVALID_PASSWORD: error = EPERM; break; case ERROR_INVALID_PRIMARY_GROUP: error = EINVAL; break; + case ERROR_INVALID_REPARSE_DATA: error = EINVAL; break; case ERROR_INVALID_SIGNAL_NUMBER: error = EINVAL; break; case ERROR_INVALID_TARGET_HANDLE: error = EIO; break; case ERROR_INVALID_WORKSTATION: error = EACCES; break; @@ -105,6 +106,7 @@ int err_win_to_posix(DWORD winerr) case ERROR_NEGATIVE_SEEK: error = ESPIPE; break; case ERROR_NOACCESS: error = EFAULT; break; case ERROR_NONE_MAPPED: error = EINVAL; break; + case ERROR_NOT_A_REPARSE_POINT: error = EINVAL; break; case ERROR_NOT_ENOUGH_MEMORY: error = ENOMEM; break; case ERROR_NOT_READY: error = EAGAIN; break; case ERROR_NOT_SAME_DEVICE: error = EXDEV; break; @@ -125,6 +127,9 @@ int err_win_to_posix(DWORD winerr) case ERROR_PIPE_NOT_CONNECTED: error = EPIPE; break; case ERROR_PRIVILEGE_NOT_HELD: error = EACCES; break; case ERROR_READ_FAULT: error = EIO; break; + case ERROR_REPARSE_ATTRIBUTE_CONFLICT: error = EINVAL; break; + case ERROR_REPARSE_TAG_INVALID: error = EINVAL; break; + case ERROR_REPARSE_TAG_MISMATCH: error = EINVAL; break; case ERROR_SEEK: error = EIO; break; case ERROR_SEEK_ON_DEVICE: error = ESPIPE; break; case ERROR_SHARING_BUFFER_EXCEEDED: error = ENFILE; break; From 90bbb62ff4894a8dd399992055e314db6198f8ac Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 24 May 2015 01:06:10 +0200 Subject: [PATCH 262/303] Win32: mingw_unlink: support symlinks to directories _wunlink() / DeleteFileW() refuses to delete symlinks to directories. If _wunlink() fails with ERROR_ACCESS_DENIED, try _wrmdir() as well. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 0043188d2fec67..6416ec0c62d037 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -342,6 +342,13 @@ int mingw_unlink(const char *pathname) return 0; if (!is_file_in_use_error(GetLastError())) break; + /* + * _wunlink() / DeleteFileW() for directory symlinks fails with + * ERROR_ACCESS_DENIED (EACCES), so try _wrmdir() as well. This is the + * same error we get if a file is in use (already checked above). + */ + if (!_wrmdir(wpathname)) + return 0; } while (retry_ask_yes_no(&tries, "Unlink of file '%s' failed. " "Should I try again?", pathname)); return -1; From 8458d84b9d219f013d76864680d60b39dd9566ba Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Tue, 19 May 2015 22:42:48 +0200 Subject: [PATCH 263/303] Win32: mingw_rename: support renaming symlinks MSVCRT's _wrename() cannot rename symlinks over existing files: it returns success without doing anything. Newer MSVCR*.dll versions probably do not have this problem: according to CRT sources, they just call MoveFileEx() with the MOVEFILE_COPY_ALLOWED flag. Get rid of _wrename() and call MoveFileEx() with proper error handling. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 6416ec0c62d037..b96d2c7cbf872e 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2357,27 +2357,29 @@ int mingw_accept(int sockfd1, struct sockaddr *sa, socklen_t *sz) #undef rename int mingw_rename(const char *pold, const char *pnew) { - DWORD attrs, gle; + DWORD attrs = INVALID_FILE_ATTRIBUTES, gle; int tries = 0; wchar_t wpold[MAX_LONG_PATH], wpnew[MAX_LONG_PATH]; if (xutftowcs_long_path(wpold, pold) < 0 || xutftowcs_long_path(wpnew, pnew) < 0) return -1; - /* - * Try native rename() first to get errno right. - * It is based on MoveFile(), which cannot overwrite existing files. - */ - if (!_wrename(wpold, wpnew)) - return 0; - if (errno != EEXIST) - return -1; repeat: - if (MoveFileExW(wpold, wpnew, MOVEFILE_REPLACE_EXISTING)) + if (MoveFileExW(wpold, wpnew, + MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED)) return 0; - /* TODO: translate more errors */ gle = GetLastError(); - if (gle == ERROR_ACCESS_DENIED && + + /* revert file attributes on failure */ + if (attrs != INVALID_FILE_ATTRIBUTES) + SetFileAttributesW(wpnew, attrs); + + if (!is_file_in_use_error(gle)) { + errno = err_win_to_posix(gle); + return -1; + } + + if (attrs == INVALID_FILE_ATTRIBUTES && (attrs = GetFileAttributesW(wpnew)) != INVALID_FILE_ATTRIBUTES) { if (attrs & FILE_ATTRIBUTE_DIRECTORY) { DWORD attrsold = GetFileAttributesW(wpold); @@ -2389,16 +2391,10 @@ int mingw_rename(const char *pold, const char *pnew) return -1; } if ((attrs & FILE_ATTRIBUTE_READONLY) && - SetFileAttributesW(wpnew, attrs & ~FILE_ATTRIBUTE_READONLY)) { - if (MoveFileExW(wpold, wpnew, MOVEFILE_REPLACE_EXISTING)) - return 0; - gle = GetLastError(); - /* revert file attributes on failure */ - SetFileAttributesW(wpnew, attrs); - } + SetFileAttributesW(wpnew, attrs & ~FILE_ATTRIBUTE_READONLY)) + goto repeat; } - if (gle == ERROR_ACCESS_DENIED && - retry_ask_yes_no(&tries, "Rename from '%s' to '%s' failed. " + if (retry_ask_yes_no(&tries, "Rename from '%s' to '%s' failed. " "Should I try again?", pold, pnew)) goto repeat; From 89798ae133b56e46f9d42cc4c582d18dc828fd83 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 24 May 2015 01:17:31 +0200 Subject: [PATCH 264/303] Win32: mingw_chdir: change to symlink-resolved directory If symlinks are enabled, resolve all symlinks when changing directories, as required by POSIX. Note: Git's real_path() function bases its link resolution algorithm on this property of chdir(). Unfortunately, the current directory on Windows is limited to only MAX_PATH (260) characters. Therefore using symlinks and long paths in combination may be problematic. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index b96d2c7cbf872e..ad8273eaf41dac 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -783,7 +783,24 @@ int mingw_chdir(const char *dirname) wchar_t wdirname[MAX_LONG_PATH]; if (xutftowcs_long_path(wdirname, dirname) < 0) return -1; - result = _wchdir(wdirname); + + if (has_symlinks) { + HANDLE hnd = CreateFileW(wdirname, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (hnd == INVALID_HANDLE_VALUE) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + if (!GetFinalPathNameByHandleW(hnd, wdirname, ARRAY_SIZE(wdirname), 0)) { + errno = err_win_to_posix(GetLastError()); + CloseHandle(hnd); + return -1; + } + CloseHandle(hnd); + } + + result = _wchdir(normalize_ntpath(wdirname)); current_directory_len = GetCurrentDirectoryW(0, NULL); return result; } From ed2b8c1db811822c5ab35c79a96855ba14195a09 Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 24 May 2015 01:24:41 +0200 Subject: [PATCH 265/303] Win32: implement readlink() Implement readlink() by reading NTFS reparse points. Works for symlinks and directory junctions. If symlinks are disabled, fail with ENOSYS. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++ compat/mingw.h | 3 +- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index ad8273eaf41dac..45799645386d8a 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -4,6 +4,7 @@ #include <sddl.h> #include <conio.h> #include <wchar.h> +#include <winioctl.h> #include "../strbuf.h" #include "../run-command.h" #include "../cache.h" @@ -2688,6 +2689,103 @@ int link(const char *oldpath, const char *newpath) return 0; } +#ifndef _WINNT_H +/* + * The REPARSE_DATA_BUFFER structure is defined in the Windows DDK (in + * ntifs.h) and in MSYS1's winnt.h (which defines _WINNT_H). So define + * it ourselves if we are on MSYS2 (whose winnt.h defines _WINNT_). + */ +typedef struct _REPARSE_DATA_BUFFER { + DWORD ReparseTag; + WORD ReparseDataLength; + WORD Reserved; +#ifndef _MSC_VER + _ANONYMOUS_UNION +#endif + union { + struct { + WORD SubstituteNameOffset; + WORD SubstituteNameLength; + WORD PrintNameOffset; + WORD PrintNameLength; + ULONG Flags; + WCHAR PathBuffer[1]; + } SymbolicLinkReparseBuffer; + struct { + WORD SubstituteNameOffset; + WORD SubstituteNameLength; + WORD PrintNameOffset; + WORD PrintNameLength; + WCHAR PathBuffer[1]; + } MountPointReparseBuffer; + struct { + BYTE DataBuffer[1]; + } GenericReparseBuffer; + } DUMMYUNIONNAME; +} REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER; +#endif + +int readlink(const char *path, char *buf, size_t bufsiz) +{ + HANDLE handle; + WCHAR wpath[MAX_LONG_PATH], *wbuf; + REPARSE_DATA_BUFFER *b = alloca(MAXIMUM_REPARSE_DATA_BUFFER_SIZE); + DWORD dummy; + char tmpbuf[MAX_LONG_PATH]; + int len; + + if (xutftowcs_long_path(wpath, path) < 0) + return -1; + + /* read reparse point data */ + handle = CreateFileW(wpath, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT, NULL); + if (handle == INVALID_HANDLE_VALUE) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + if (!DeviceIoControl(handle, FSCTL_GET_REPARSE_POINT, NULL, 0, b, + MAXIMUM_REPARSE_DATA_BUFFER_SIZE, &dummy, NULL)) { + errno = err_win_to_posix(GetLastError()); + CloseHandle(handle); + return -1; + } + CloseHandle(handle); + + /* get target path for symlinks or mount points (aka 'junctions') */ + switch (b->ReparseTag) { + case IO_REPARSE_TAG_SYMLINK: + wbuf = (WCHAR*) (((char*) b->SymbolicLinkReparseBuffer.PathBuffer) + + b->SymbolicLinkReparseBuffer.SubstituteNameOffset); + *(WCHAR*) (((char*) wbuf) + + b->SymbolicLinkReparseBuffer.SubstituteNameLength) = 0; + break; + case IO_REPARSE_TAG_MOUNT_POINT: + wbuf = (WCHAR*) (((char*) b->MountPointReparseBuffer.PathBuffer) + + b->MountPointReparseBuffer.SubstituteNameOffset); + *(WCHAR*) (((char*) wbuf) + + b->MountPointReparseBuffer.SubstituteNameLength) = 0; + break; + default: + errno = EINVAL; + return -1; + } + + /* + * Adapt to strange readlink() API: Copy up to bufsiz *bytes*, potentially + * cutting off a UTF-8 sequence. Insufficient bufsize is *not* a failure + * condition. There is no conversion function that produces invalid UTF-8, + * so convert to a (hopefully large enough) temporary buffer, then memcpy + * the requested number of bytes (including '\0' for robustness). + */ + if ((len = xwcstoutf(tmpbuf, normalize_ntpath(wbuf), MAX_LONG_PATH)) < 0) + return -1; + memcpy(buf, tmpbuf, min(bufsiz, len + 1)); + return min(bufsiz, len); +} + pid_t waitpid(pid_t pid, int *status, int options) { HANDLE h = OpenProcess(SYNCHRONIZE | PROCESS_QUERY_INFORMATION, diff --git a/compat/mingw.h b/compat/mingw.h index 90f1ff18b35c55..1239552ef0a5e7 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -123,8 +123,6 @@ struct utsname { * trivial stubs */ -static inline int readlink(const char *path, char *buf, size_t bufsiz) -{ errno = ENOSYS; return -1; } static inline int symlink(const char *oldpath, const char *newpath) { errno = ENOSYS; return -1; } static inline int fchmod(int fildes, mode_t mode) @@ -217,6 +215,7 @@ int setitimer(int type, struct itimerval *in, struct itimerval *out); int sigaction(int sig, struct sigaction *in, struct sigaction *out); int link(const char *oldpath, const char *newpath); int uname(struct utsname *buf); +int readlink(const char *path, char *buf, size_t bufsiz); /* * replacements of existing functions From 628e191e58b5ecd63b70c7da8f729c29821cf373 Mon Sep 17 00:00:00 2001 From: Bill Zissimopoulos <billziss@navimatics.com> Date: Thu, 28 May 2020 16:35:57 -0700 Subject: [PATCH 266/303] mingw: lstat: compute correct size for symlinks This commit fixes mingw_lstat by computing the proper size for symlinks according to POSIX. POSIX specifies that upon successful return from lstat: "the value of the st_size member shall be set to the length of the pathname contained in the symbolic link not including any terminating null byte". Prior to this commit the mingw_lstat function returned a fixed size of 4096. This caused problems in git repositories that were accessed by git for Cygwin or git for WSL. For example, doing `git reset --hard` using git for Windows would update the size of symlinks in the index to be 4096; at a later time git for Cygwin or git for WSL would find that symlinks have changed size during `git status`. Vice versa doing `git reset --hard` in git for Cygwin or git for WSL would update the size of symlinks in the index with the correct value, only for git for Windows to find incorrectly at a later time that the size had changed. Signed-off-by: Bill Zissimopoulos <billziss@navimatics.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 65 ++++++++++++++++++++++++++++-------------- compat/win32/fscache.c | 12 ++++++++ 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 45799645386d8a..334a511efdaa95 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -847,10 +847,14 @@ static int has_valid_directory_prefix(wchar_t *wfilename) return 1; } +static int readlink_1(const WCHAR *wpath, BOOL fail_on_unknown_tag, + char *tmpbuf, int *plen, DWORD *ptag); + int mingw_lstat(const char *file_name, struct stat *buf) { WIN32_FILE_ATTRIBUTE_DATA fdata; - WIN32_FIND_DATAW findbuf = { 0 }; + DWORD reparse_tag = 0; + int link_len = 0; wchar_t wfilename[MAX_LONG_PATH]; int wlen = xutftowcs_long_path(wfilename, file_name); if (wlen < 0) @@ -865,20 +869,21 @@ int mingw_lstat(const char *file_name, struct stat *buf) } if (GetFileAttributesExW(wfilename, GetFileExInfoStandard, &fdata)) { - /* for reparse points, use FindFirstFile to get the reparse tag */ + /* for reparse points, get the link tag and length */ if (fdata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - HANDLE handle = FindFirstFileW(wfilename, &findbuf); - if (handle == INVALID_HANDLE_VALUE) - goto error; - FindClose(handle); + char tmpbuf[MAX_LONG_PATH]; + + if (readlink_1(wfilename, FALSE, tmpbuf, &link_len, + &reparse_tag) < 0) + return -1; } buf->st_ino = 0; buf->st_gid = 0; buf->st_uid = 0; buf->st_nlink = 1; buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes, - findbuf.dwReserved0); - buf->st_size = S_ISLNK(buf->st_mode) ? MAX_LONG_PATH : + reparse_tag); + buf->st_size = S_ISLNK(buf->st_mode) ? link_len : fdata.nFileSizeLow | (((off_t) fdata.nFileSizeHigh) << 32); buf->st_dev = buf->st_rdev = 0; /* not used by Git */ filetime_to_timespec(&(fdata.ftLastAccessTime), &(buf->st_atim)); @@ -886,7 +891,7 @@ int mingw_lstat(const char *file_name, struct stat *buf) filetime_to_timespec(&(fdata.ftCreationTime), &(buf->st_ctim)); return 0; } -error: + switch (GetLastError()) { case ERROR_ACCESS_DENIED: case ERROR_SHARING_VIOLATION: @@ -2725,17 +2730,13 @@ typedef struct _REPARSE_DATA_BUFFER { } REPARSE_DATA_BUFFER, *PREPARSE_DATA_BUFFER; #endif -int readlink(const char *path, char *buf, size_t bufsiz) +static int readlink_1(const WCHAR *wpath, BOOL fail_on_unknown_tag, + char *tmpbuf, int *plen, DWORD *ptag) { HANDLE handle; - WCHAR wpath[MAX_LONG_PATH], *wbuf; + WCHAR *wbuf; REPARSE_DATA_BUFFER *b = alloca(MAXIMUM_REPARSE_DATA_BUFFER_SIZE); DWORD dummy; - char tmpbuf[MAX_LONG_PATH]; - int len; - - if (xutftowcs_long_path(wpath, path) < 0) - return -1; /* read reparse point data */ handle = CreateFileW(wpath, 0, @@ -2755,7 +2756,7 @@ int readlink(const char *path, char *buf, size_t bufsiz) CloseHandle(handle); /* get target path for symlinks or mount points (aka 'junctions') */ - switch (b->ReparseTag) { + switch ((*ptag = b->ReparseTag)) { case IO_REPARSE_TAG_SYMLINK: wbuf = (WCHAR*) (((char*) b->SymbolicLinkReparseBuffer.PathBuffer) + b->SymbolicLinkReparseBuffer.SubstituteNameOffset); @@ -2769,10 +2770,34 @@ int readlink(const char *path, char *buf, size_t bufsiz) + b->MountPointReparseBuffer.SubstituteNameLength) = 0; break; default: - errno = EINVAL; - return -1; + if (fail_on_unknown_tag) { + errno = EINVAL; + return -1; + } else { + *plen = MAX_LONG_PATH; + return 0; + } } + if ((*plen = + xwcstoutf(tmpbuf, normalize_ntpath(wbuf), MAX_LONG_PATH)) < 0) + return -1; + return 0; +} + +int readlink(const char *path, char *buf, size_t bufsiz) +{ + WCHAR wpath[MAX_LONG_PATH]; + char tmpbuf[MAX_LONG_PATH]; + int len; + DWORD tag; + + if (xutftowcs_long_path(wpath, path) < 0) + return -1; + + if (readlink_1(wpath, TRUE, tmpbuf, &len, &tag) < 0) + return -1; + /* * Adapt to strange readlink() API: Copy up to bufsiz *bytes*, potentially * cutting off a UTF-8 sequence. Insufficient bufsize is *not* a failure @@ -2780,8 +2805,6 @@ int readlink(const char *path, char *buf, size_t bufsiz) * so convert to a (hopefully large enough) temporary buffer, then memcpy * the requested number of bytes (including '\0' for robustness). */ - if ((len = xwcstoutf(tmpbuf, normalize_ntpath(wbuf), MAX_LONG_PATH)) < 0) - return -1; memcpy(buf, tmpbuf, min(bufsiz, len + 1)); return min(bufsiz, len); } diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 39eade0777e32b..16b1e522f34182 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -582,6 +582,18 @@ int fscache_lstat(const char *filename, struct stat *st) return -1; } + /* + * Special case symbolic links: FindFirstFile()/FindNextFile() did not + * provide us with the length of the target path. + */ + if (fse->u.s.st_size == MAX_LONG_PATH && S_ISLNK(fse->st_mode)) { + char buf[MAX_LONG_PATH]; + int len = readlink(filename, buf, sizeof(buf) - 1); + + if (len > 0) + fse->u.s.st_size = len; + } + /* copy stat data */ st->st_ino = 0; st->st_gid = 0; From 2fa7c31247194333ec7186204df8f3ad50407f2d Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 24 May 2015 01:32:03 +0200 Subject: [PATCH 267/303] Win32: implement basic symlink() functionality (file symlinks only) Implement symlink() that always creates file symlinks. Fails with ENOSYS if symlinks are disabled or unsupported. Note: CreateSymbolicLinkW() was introduced with symlink support in Windows Vista. For compatibility with Windows XP, we need to load it dynamically and fail gracefully if it isnt's available. Signed-off-by: Karsten Blees <blees@dcon.de> --- compat/mingw.c | 28 ++++++++++++++++++++++++++++ compat/mingw.h | 3 +-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 334a511efdaa95..d3e8790f712b48 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2694,6 +2694,34 @@ int link(const char *oldpath, const char *newpath) return 0; } +int symlink(const char *target, const char *link) +{ + wchar_t wtarget[MAX_LONG_PATH], wlink[MAX_LONG_PATH]; + int len; + + /* fail if symlinks are disabled or API is not supported (WinXP) */ + if (!has_symlinks) { + errno = ENOSYS; + return -1; + } + + if ((len = xutftowcs_long_path(wtarget, target)) < 0 + || xutftowcs_long_path(wlink, link) < 0) + return -1; + + /* convert target dir separators to backslashes */ + while (len--) + if (wtarget[len] == '/') + wtarget[len] = '\\'; + + /* create file symlink */ + if (!CreateSymbolicLinkW(wlink, wtarget, 0)) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + return 0; +} + #ifndef _WINNT_H /* * The REPARSE_DATA_BUFFER structure is defined in the Windows DDK (in diff --git a/compat/mingw.h b/compat/mingw.h index 1239552ef0a5e7..bde51af5c302fe 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -123,8 +123,6 @@ struct utsname { * trivial stubs */ -static inline int symlink(const char *oldpath, const char *newpath) -{ errno = ENOSYS; return -1; } static inline int fchmod(int fildes, mode_t mode) { errno = ENOSYS; return -1; } #ifndef __MINGW64_VERSION_MAJOR @@ -215,6 +213,7 @@ int setitimer(int type, struct itimerval *in, struct itimerval *out); int sigaction(int sig, struct sigaction *in, struct sigaction *out); int link(const char *oldpath, const char *newpath); int uname(struct utsname *buf); +int symlink(const char *target, const char *link); int readlink(const char *path, char *buf, size_t bufsiz); /* From d25cf4e5570893d9785cd74d75112ebe717475da Mon Sep 17 00:00:00 2001 From: Karsten Blees <blees@dcon.de> Date: Sun, 24 May 2015 01:48:35 +0200 Subject: [PATCH 268/303] Win32: symlink: add support for symlinks to directories Symlinks on Windows have a flag that indicates whether the target is a file or a directory. Symlinks of wrong type simply don't work. This even affects core Win32 APIs (e.g. DeleteFile() refuses to delete directory symlinks). However, CreateFile() with FILE_FLAG_BACKUP_SEMANTICS doesn't seem to care. Check the target type by first creating a tentative file symlink, opening it, and checking the type of the resulting handle. If it is a directory, recreate the symlink with the directory flag set. It is possible to create symlinks before the target exists (or in case of symlinks to symlinks: before the target type is known). If this happens, create a tentative file symlink and postpone the directory decision: keep a list of phantom symlinks to be processed whenever a new directory is created in mingw_mkdir(). Limitations: This algorithm may fail if a link target changes from file to directory or vice versa, or if the target directory is created in another process. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 164 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index d3e8790f712b48..5318c261137990 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -301,6 +301,131 @@ int mingw_core_config(const char *var, const char *value, void *cb) return 0; } +enum phantom_symlink_result { + PHANTOM_SYMLINK_RETRY, + PHANTOM_SYMLINK_DONE, + PHANTOM_SYMLINK_DIRECTORY +}; + +static inline int is_wdir_sep(wchar_t wchar) +{ + return wchar == L'/' || wchar == L'\\'; +} + +static const wchar_t *make_relative_to(const wchar_t *path, + const wchar_t *relative_to, wchar_t *out, + size_t size) +{ + size_t i = wcslen(relative_to), len; + + /* Is `path` already absolute? */ + if (is_wdir_sep(path[0]) || + (iswalpha(path[0]) && path[1] == L':' && is_wdir_sep(path[2]))) + return path; + + while (i > 0 && !is_wdir_sep(relative_to[i - 1])) + i--; + + /* Is `relative_to` in the current directory? */ + if (!i) + return path; + + len = wcslen(path); + if (i + len + 1 > size) { + error("Could not make '%ls' relative to '%ls' (too large)", + path, relative_to); + return NULL; + } + + memcpy(out, relative_to, i * sizeof(wchar_t)); + wcscpy(out + i, path); + return out; +} + +/* + * Changes a file symlink to a directory symlink if the target exists and is a + * directory. + */ +static enum phantom_symlink_result +process_phantom_symlink(const wchar_t *wtarget, const wchar_t *wlink) +{ + HANDLE hnd; + BY_HANDLE_FILE_INFORMATION fdata; + wchar_t relative[MAX_LONG_PATH]; + const wchar_t *rel; + + /* check that wlink is still a file symlink */ + if ((GetFileAttributesW(wlink) + & (FILE_ATTRIBUTE_REPARSE_POINT | FILE_ATTRIBUTE_DIRECTORY)) + != FILE_ATTRIBUTE_REPARSE_POINT) + return PHANTOM_SYMLINK_DONE; + + /* make it relative, if necessary */ + rel = make_relative_to(wtarget, wlink, relative, ARRAY_SIZE(relative)); + if (!rel) + return PHANTOM_SYMLINK_DONE; + + /* let Windows resolve the link by opening it */ + hnd = CreateFileW(rel, 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (hnd == INVALID_HANDLE_VALUE) { + errno = err_win_to_posix(GetLastError()); + return PHANTOM_SYMLINK_RETRY; + } + + if (!GetFileInformationByHandle(hnd, &fdata)) { + errno = err_win_to_posix(GetLastError()); + CloseHandle(hnd); + return PHANTOM_SYMLINK_RETRY; + } + CloseHandle(hnd); + + /* if target exists and is a file, we're done */ + if (!(fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + return PHANTOM_SYMLINK_DONE; + + /* otherwise recreate the symlink with directory flag */ + if (DeleteFileW(wlink) && CreateSymbolicLinkW(wlink, wtarget, 1)) + return PHANTOM_SYMLINK_DIRECTORY; + + errno = err_win_to_posix(GetLastError()); + return PHANTOM_SYMLINK_RETRY; +} + +/* keep track of newly created symlinks to non-existing targets */ +struct phantom_symlink_info { + struct phantom_symlink_info *next; + wchar_t *wlink; + wchar_t *wtarget; +}; + +static struct phantom_symlink_info *phantom_symlinks = NULL; +static CRITICAL_SECTION phantom_symlinks_cs; + +static void process_phantom_symlinks(void) +{ + struct phantom_symlink_info *current, **psi; + EnterCriticalSection(&phantom_symlinks_cs); + /* process phantom symlinks list */ + psi = &phantom_symlinks; + while ((current = *psi)) { + enum phantom_symlink_result result = process_phantom_symlink( + current->wtarget, current->wlink); + if (result == PHANTOM_SYMLINK_RETRY) { + psi = ¤t->next; + } else { + /* symlink was processed, remove from list */ + *psi = current->next; + free(current); + /* if symlink was a directory, start over */ + if (result == PHANTOM_SYMLINK_DIRECTORY) + psi = &phantom_symlinks; + } + } + LeaveCriticalSection(&phantom_symlinks_cs); +} + /* Normalizes NT paths as returned by some low-level APIs. */ static wchar_t *normalize_ntpath(wchar_t *wbuf) { @@ -484,6 +609,8 @@ int mingw_mkdir(const char *path, int mode) return -1; ret = _wmkdir(wpath); + if (!ret) + process_phantom_symlinks(); if (!ret && needs_hiding(path)) return set_hidden_flag(wpath, 1); return ret; @@ -2719,6 +2846,42 @@ int symlink(const char *target, const char *link) errno = err_win_to_posix(GetLastError()); return -1; } + + /* convert to directory symlink if target exists */ + switch (process_phantom_symlink(wtarget, wlink)) { + case PHANTOM_SYMLINK_RETRY: { + /* if target doesn't exist, add to phantom symlinks list */ + wchar_t wfullpath[MAX_LONG_PATH]; + struct phantom_symlink_info *psi; + + /* convert to absolute path to be independent of cwd */ + len = GetFullPathNameW(wlink, MAX_LONG_PATH, wfullpath, NULL); + if (!len || len >= MAX_LONG_PATH) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + + /* over-allocate and fill phantom_symlink_info structure */ + psi = xmalloc(sizeof(struct phantom_symlink_info) + + sizeof(wchar_t) * (len + wcslen(wtarget) + 2)); + psi->wlink = (wchar_t *)(psi + 1); + wcscpy(psi->wlink, wfullpath); + psi->wtarget = psi->wlink + len + 1; + wcscpy(psi->wtarget, wtarget); + + EnterCriticalSection(&phantom_symlinks_cs); + psi->next = phantom_symlinks; + phantom_symlinks = psi; + LeaveCriticalSection(&phantom_symlinks_cs); + break; + } + case PHANTOM_SYMLINK_DIRECTORY: + /* if we created a dir symlink, process other phantom symlinks */ + process_phantom_symlinks(); + break; + default: + break; + } return 0; } @@ -3630,6 +3793,7 @@ int wmain(int argc, const wchar_t **wargv) /* initialize critical section for waitpid pinfo_t list */ InitializeCriticalSection(&pinfo_cs); + InitializeCriticalSection(&phantom_symlinks_cs); /* initialize critical section for fscache */ InitializeCriticalSection(&fscache_cs); From 65e654e65f0bfdbaac0e9ac64c1af394ab234dd7 Mon Sep 17 00:00:00 2001 From: JiSeop Moon <zcube@zcube.kr> Date: Mon, 23 Apr 2018 22:30:18 +0900 Subject: [PATCH 269/303] mingw: introduce code to detect whether we're inside a Windows container This will come in handy in the next commit. Signed-off-by: JiSeop Moon <zcube@zcube.kr> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 32 ++++++++++++++++++++++++++++++++ compat/mingw.h | 5 +++++ 2 files changed, 37 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index c71e58593457ad..88b2c25d1a51d1 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -3867,3 +3867,35 @@ int uname(struct utsname *buf) "%u", (v >> 16) & 0x7fff); return 0; } + +/* + * Based on https://stackoverflow.com/questions/43002803 + * + * [HKLM\SYSTEM\CurrentControlSet\Services\cexecsvc] + * "DisplayName"="@%systemroot%\\system32\\cexecsvc.exe,-100" + * "ErrorControl"=dword:00000001 + * "ImagePath"=hex(2):25,00,73,00,79,00,73,00,74,00,65,00,6d,00,72,00,6f,00, + * 6f,00,74,00,25,00,5c,00,73,00,79,00,73,00,74,00,65,00,6d,00,33,00,32,00, + * 5c,00,63,00,65,00,78,00,65,00,63,00,73,00,76,00,63,00,2e,00,65,00,78,00, + * 65,00,00,00 + * "Start"=dword:00000002 + * "Type"=dword:00000010 + * "Description"="@%systemroot%\\system32\\cexecsvc.exe,-101" + * "ObjectName"="LocalSystem" + * "ServiceSidType"=dword:00000001 + */ +int is_inside_windows_container(void) +{ + static int inside_container = -1; /* -1 uninitialized */ + const char *key = "SYSTEM\\CurrentControlSet\\Services\\cexecsvc"; + HKEY handle = NULL; + + if (inside_container != -1) + return inside_container; + + inside_container = ERROR_SUCCESS == + RegOpenKeyExA(HKEY_LOCAL_MACHINE, key, 0, KEY_READ, &handle); + RegCloseKey(handle); + + return inside_container; +} diff --git a/compat/mingw.h b/compat/mingw.h index bde51af5c302fe..778a44036ac28b 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -717,3 +717,8 @@ void open_in_gdb(void); * Used by Pthread API implementation for Windows */ int err_win_to_posix(DWORD winerr); + +/* + * Check current process is inside Windows Container. + */ +int is_inside_windows_container(void); From 2c12a2306f6b4595e2f79ad9bede39efd374d483 Mon Sep 17 00:00:00 2001 From: Bert Belder <bertbelder@gmail.com> Date: Fri, 26 Oct 2018 11:13:45 +0200 Subject: [PATCH 270/303] Win32: symlink: move phantom symlink creation to a separate function Signed-off-by: Bert Belder <bertbelder@gmail.com> --- compat/mingw.c | 91 +++++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index c71e58593457ad..1493b9b1701e76 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -429,6 +429,54 @@ static void process_phantom_symlinks(void) LeaveCriticalSection(&phantom_symlinks_cs); } +static int create_phantom_symlink(wchar_t *wtarget, wchar_t *wlink) +{ + int len; + + /* create file symlink */ + if (!CreateSymbolicLinkW(wlink, wtarget, symlink_file_flags)) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + + /* convert to directory symlink if target exists */ + switch (process_phantom_symlink(wtarget, wlink)) { + case PHANTOM_SYMLINK_RETRY: { + /* if target doesn't exist, add to phantom symlinks list */ + wchar_t wfullpath[MAX_LONG_PATH]; + struct phantom_symlink_info *psi; + + /* convert to absolute path to be independent of cwd */ + len = GetFullPathNameW(wlink, MAX_LONG_PATH, wfullpath, NULL); + if (!len || len >= MAX_LONG_PATH) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + + /* over-allocate and fill phantom_symlink_info structure */ + psi = xmalloc(sizeof(struct phantom_symlink_info) + + sizeof(wchar_t) * (len + wcslen(wtarget) + 2)); + psi->wlink = (wchar_t *)(psi + 1); + wcscpy(psi->wlink, wfullpath); + psi->wtarget = psi->wlink + len + 1; + wcscpy(psi->wtarget, wtarget); + + EnterCriticalSection(&phantom_symlinks_cs); + psi->next = phantom_symlinks; + phantom_symlinks = psi; + LeaveCriticalSection(&phantom_symlinks_cs); + break; + } + case PHANTOM_SYMLINK_DIRECTORY: + /* if we created a dir symlink, process other phantom symlinks */ + process_phantom_symlinks(); + break; + default: + break; + } + return 0; +} + /* Normalizes NT paths as returned by some low-level APIs. */ static wchar_t *normalize_ntpath(wchar_t *wbuf) { @@ -2856,48 +2904,7 @@ int symlink(const char *target, const char *link) if (wtarget[len] == '/') wtarget[len] = '\\'; - /* create file symlink */ - if (!CreateSymbolicLinkW(wlink, wtarget, symlink_file_flags)) { - errno = err_win_to_posix(GetLastError()); - return -1; - } - - /* convert to directory symlink if target exists */ - switch (process_phantom_symlink(wtarget, wlink)) { - case PHANTOM_SYMLINK_RETRY: { - /* if target doesn't exist, add to phantom symlinks list */ - wchar_t wfullpath[MAX_LONG_PATH]; - struct phantom_symlink_info *psi; - - /* convert to absolute path to be independent of cwd */ - len = GetFullPathNameW(wlink, MAX_LONG_PATH, wfullpath, NULL); - if (!len || len >= MAX_LONG_PATH) { - errno = err_win_to_posix(GetLastError()); - return -1; - } - - /* over-allocate and fill phantom_symlink_info structure */ - psi = xmalloc(sizeof(struct phantom_symlink_info) - + sizeof(wchar_t) * (len + wcslen(wtarget) + 2)); - psi->wlink = (wchar_t *)(psi + 1); - wcscpy(psi->wlink, wfullpath); - psi->wtarget = psi->wlink + len + 1; - wcscpy(psi->wtarget, wtarget); - - EnterCriticalSection(&phantom_symlinks_cs); - psi->next = phantom_symlinks; - phantom_symlinks = psi; - LeaveCriticalSection(&phantom_symlinks_cs); - break; - } - case PHANTOM_SYMLINK_DIRECTORY: - /* if we created a dir symlink, process other phantom symlinks */ - process_phantom_symlinks(); - break; - default: - break; - } - return 0; + return create_phantom_symlink(wtarget, wlink); } #ifndef _WINNT_H From 348c78a1d15ed2ad83116cc3b3f6d0af7f484650 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 30 May 2017 21:50:57 +0200 Subject: [PATCH 271/303] mingw: try to create symlinks without elevated permissions With Windows 10 Build 14972 in Developer Mode, a new flag is supported by CreateSymbolicLink() to create symbolic links even when running outside of an elevated session (which was previously required). This new flag is called SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE and has the numeric value 0x02. Previous Windows 10 versions will not understand that flag and return an ERROR_INVALID_PARAMETER, therefore we have to be careful to try passing that flag only when the build number indicates that it is supported. For more information about the new flag, see this blog post: https://blogs.windows.com/buildingapps/2016/12/02/symlinks-windows-10/ This patch is loosely based on the patch submitted by Samuel D. Leslie as https://github.com/git-for-windows/git/pull/1184. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 5318c261137990..cc0dbd1152b55b 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -301,6 +301,8 @@ int mingw_core_config(const char *var, const char *value, void *cb) return 0; } +static DWORD symlink_file_flags = 0, symlink_directory_flags = 1; + enum phantom_symlink_result { PHANTOM_SYMLINK_RETRY, PHANTOM_SYMLINK_DONE, @@ -386,7 +388,8 @@ process_phantom_symlink(const wchar_t *wtarget, const wchar_t *wlink) return PHANTOM_SYMLINK_DONE; /* otherwise recreate the symlink with directory flag */ - if (DeleteFileW(wlink) && CreateSymbolicLinkW(wlink, wtarget, 1)) + if (DeleteFileW(wlink) && + CreateSymbolicLinkW(wlink, wtarget, symlink_directory_flags)) return PHANTOM_SYMLINK_DIRECTORY; errno = err_win_to_posix(GetLastError()); @@ -2842,7 +2845,7 @@ int symlink(const char *target, const char *link) wtarget[len] = '\\'; /* create file symlink */ - if (!CreateSymbolicLinkW(wlink, wtarget, 0)) { + if (!CreateSymbolicLinkW(wlink, wtarget, symlink_file_flags)) { errno = err_win_to_posix(GetLastError()); return -1; } @@ -3729,6 +3732,24 @@ static void maybe_redirect_std_handles(void) GENERIC_WRITE, FILE_FLAG_NO_BUFFERING); } +static void adjust_symlink_flags(void) +{ + /* + * Starting with Windows 10 Build 14972, symbolic links can be created + * using CreateSymbolicLink() without elevation by passing the flag + * SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE (0x02) as last + * parameter, provided the Developer Mode has been enabled. Some + * earlier Windows versions complain about this flag with an + * ERROR_INVALID_PARAMETER, hence we have to test the build number + * specifically. + */ + if (GetVersion() >= 14972 << 16) { + symlink_file_flags |= 2; + symlink_directory_flags |= 2; + } + +} + #ifdef _MSC_VER #ifdef _DEBUG #include <crtdbg.h> @@ -3763,6 +3784,7 @@ int wmain(int argc, const wchar_t **wargv) #endif maybe_redirect_std_handles(); + adjust_symlink_flags(); fsync_object_files = 1; /* determine size of argv and environ conversion buffer */ From 3e5a5c4b4524b291e8d211b107b321179381fd4d Mon Sep 17 00:00:00 2001 From: JiSeop Moon <zcube@zcube.kr> Date: Mon, 23 Apr 2018 22:31:42 +0200 Subject: [PATCH 272/303] mingw: when running in a Windows container, try to rename() harder It is a known issue that a rename() can fail with an "Access denied" error at times, when copying followed by deleting the original file works. Let's just fall back to that behavior. Signed-off-by: JiSeop Moon <zcube@zcube.kr> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 88b2c25d1a51d1..f056bb9ea2ae4a 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2535,6 +2535,13 @@ int mingw_rename(const char *pold, const char *pnew) return 0; gle = GetLastError(); + if (gle == ERROR_ACCESS_DENIED && is_inside_windows_container()) { + /* Fall back to copy to destination & remove source */ + if (CopyFileW(wpold, wpnew, FALSE) && !mingw_unlink(pold)) + return 0; + gle = GetLastError(); + } + /* revert file attributes on failure */ if (attrs != INVALID_FILE_ATTRIBUTES) SetFileAttributesW(wpnew, attrs); From 322fe4a7eeb59802b1d0cbab14d23e04cc3b2b93 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Mon, 11 Feb 2019 14:19:18 +0100 Subject: [PATCH 273/303] Introduce helper to create symlinks that knows about index_state On Windows, symbolic links actually have a type depending on the target: it can be a file or a directory. In certain circumstances, this poses problems, e.g. when a symbolic link is supposed to point into a submodule that is not checked out, so there is no way for Git to auto-detect the type. To help with that, we will add support over the course of the next commits to specify that symlink type via the Git attributes. This requires an index_state, though, something that Git for Windows' `symlink()` replacement cannot know about because the function signature is defined by the POSIX standard and not ours to change. So let's introduce a helper function to create symbolic links that *does* know about the index_state. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- apply.c | 2 +- builtin/difftool.c | 2 +- builtin/init-db.c | 4 ++-- compat/mingw.c | 2 +- compat/mingw.h | 4 +++- entry.c | 2 +- git-compat-util.h | 9 +++++++++ merge-recursive.c | 2 +- refs/files-backend.c | 2 +- 9 files changed, 20 insertions(+), 9 deletions(-) diff --git a/apply.c b/apply.c index 2b7cd930efa3bd..f86a01b26080dd 100644 --- a/apply.c +++ b/apply.c @@ -4366,7 +4366,7 @@ static int try_create_file(struct apply_state *state, const char *path, /* Although buf:size is counted string, it also is NUL * terminated. */ - return !!symlink(buf, path); + return !!create_symlink(state && state->repo ? state->repo->index : NULL, buf, path); fd = open(path, O_CREAT | O_EXCL | O_WRONLY, (mode & 0100) ? 0777 : 0666); if (fd < 0) diff --git a/builtin/difftool.c b/builtin/difftool.c index 4b10ad1a36908f..a133424fe3899f 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -513,7 +513,7 @@ static int run_dir_diff(const char *extcmd, int symlinks, const char *prefix, } add_path(&wtdir, wtdir_len, dst_path); if (symlinks) { - if (symlink(wtdir.buf, rdir.buf)) { + if (create_symlink(lstate.istate, wtdir.buf, rdir.buf)) { ret = error_errno("could not symlink '%s' to '%s'", wtdir.buf, rdir.buf); goto finish; } diff --git a/builtin/init-db.c b/builtin/init-db.c index f33d18e8533606..665d2da51bce8f 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -78,7 +78,7 @@ static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, if (strbuf_readlink(&lnk, template_path->buf, st_template.st_size) < 0) die_errno(_("cannot readlink '%s'"), template_path->buf); - if (symlink(lnk.buf, path->buf)) + if (create_symlink(NULL, lnk.buf, path->buf)) die_errno(_("cannot symlink '%s' '%s'"), lnk.buf, path->buf); strbuf_release(&lnk); @@ -300,7 +300,7 @@ static int create_default_files(const char *template_path, path = git_path_buf(&buf, "tXXXXXX"); if (!close(xmkstemp(path)) && !unlink(path) && - !symlink("testing", path) && + !create_symlink(NULL, "testing", path) && !lstat(path, &st1) && S_ISLNK(st1.st_mode)) unlink(path); /* good */ diff --git a/compat/mingw.c b/compat/mingw.c index 1493b9b1701e76..87fba3d060719f 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2884,7 +2884,7 @@ int link(const char *oldpath, const char *newpath) return 0; } -int symlink(const char *target, const char *link) +int mingw_create_symlink(struct index_state *index, const char *target, const char *link) { wchar_t wtarget[MAX_LONG_PATH], wlink[MAX_LONG_PATH]; int len; diff --git a/compat/mingw.h b/compat/mingw.h index bde51af5c302fe..f329082bd5cc3c 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -213,8 +213,10 @@ int setitimer(int type, struct itimerval *in, struct itimerval *out); int sigaction(int sig, struct sigaction *in, struct sigaction *out); int link(const char *oldpath, const char *newpath); int uname(struct utsname *buf); -int symlink(const char *target, const char *link); int readlink(const char *path, char *buf, size_t bufsiz); +struct index_state; +int mingw_create_symlink(struct index_state *index, const char *target, const char *link); +#define create_symlink mingw_create_symlink /* * replacements of existing functions diff --git a/entry.c b/entry.c index 87ff624a5ea5f9..c9a701db189cfe 100644 --- a/entry.c +++ b/entry.c @@ -306,7 +306,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca if (!has_symlinks || to_tempfile) goto write_file_entry; - ret = symlink(new_blob, path); + ret = create_symlink(state->istate, new_blob, path); free(new_blob); if (ret) return error_errno("unable to create symlink %s", path); diff --git a/git-compat-util.h b/git-compat-util.h index a6a7d314c6eb79..cbaf0a4276dc69 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -558,6 +558,15 @@ static inline int git_has_dir_sep(const char *path) #define is_mount_point is_mount_point_via_stat #endif +#ifndef create_symlink +struct index_state; +static inline int git_create_symlink(struct index_state *index, const char *target, const char *link) +{ + return symlink(target, link); +} +#define create_symlink git_create_symlink +#endif + #ifndef query_user_email #define query_user_email() NULL #endif diff --git a/merge-recursive.c b/merge-recursive.c index 4ddd3adea003e3..005b0cfd54b3b1 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -993,7 +993,7 @@ static int update_file_flags(struct merge_options *opt, char *lnk = xmemdupz(buf, size); safe_create_leading_directories_const(path); unlink(path); - if (symlink(lnk, path)) + if (create_symlink(&opt->priv->orig_index, lnk, path)) ret = err(opt, _("failed to symlink '%s': %s"), path, strerror(errno)); free(lnk); diff --git a/refs/files-backend.c b/refs/files-backend.c index e4009b3c421f5b..2ed8f890dafa2d 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1880,7 +1880,7 @@ static int create_ref_symlink(struct ref_lock *lock, const char *target) #ifndef NO_SYMLINK_HEAD char *ref_path = get_locked_file_path(&lock->lk); unlink(ref_path); - ret = symlink(target, ref_path); + ret = create_symlink(NULL, target, ref_path); free(ref_path); if (ret) From 5d11986d32d933ee5740705c026256504ff8c3c1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Mon, 2 Mar 2020 21:54:29 +0100 Subject: [PATCH 274/303] mingw: emulate stat() a little more faithfully When creating directories via `safe_create_leading_directories()`, we might encounter an already-existing directory which is not readable by the current user. To handle that situation, Git's code calls `stat()` to determine whether we're looking at a directory. In such a case, `CreateFile()` will fail, though, no matter what, and consequently `mingw_stat()` will fail, too. But POSIX semantics seem to still allow `stat()` to go forward. So let's call `mingw_lstat()` for the rescue if we fail to get a file handle due to denied permission in `mingw_stat()`, and fill the stat info that way. We need to be careful to not allow this to go forward in case that we're looking at a symbolic link: to resolve the link, we would still have to create a file handle, and we just found out that we cannot. Therefore, `stat()` still needs to fail with `EACCES` in that case. This fixes https://github.com/git-for-windows/git/issues/2531. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index cc0dbd1152b55b..c71e58593457ad 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1086,7 +1086,19 @@ int mingw_stat(const char *file_name, struct stat *buf) FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); if (hnd == INVALID_HANDLE_VALUE) { - errno = err_win_to_posix(GetLastError()); + DWORD err = GetLastError(); + + if (err == ERROR_ACCESS_DENIED && + !mingw_lstat(file_name, buf) && + !S_ISLNK(buf->st_mode)) + /* + * POSIX semantics state to still try to fill + * information, even if permission is denied to create + * a file handle. + */ + return 0; + + errno = err_win_to_posix(err); return -1; } result = get_file_info_by_handle(hnd, buf); From 39c16ac8d530711f566e948c19c231990c0a040a Mon Sep 17 00:00:00 2001 From: JiSeop Moon <zcube@zcube.kr> Date: Mon, 23 Apr 2018 22:35:26 +0200 Subject: [PATCH 275/303] mingw: move the file_attr_to_st_mode() function definition In preparation for making this function a bit more complicated (to allow for special-casing the `ContainerMappedDirectories` in Windows containers, which look like a symbolic link, but are not), let's move it out of the header. Signed-off-by: JiSeop Moon <zcube@zcube.kr> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 14 ++++++++++++++ compat/win32.h | 14 +------------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index f056bb9ea2ae4a..125b526745e50d 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -3906,3 +3906,17 @@ int is_inside_windows_container(void) return inside_container; } + +int file_attr_to_st_mode (DWORD attr, DWORD tag) +{ + int fMode = S_IREAD; + if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) && tag == IO_REPARSE_TAG_SYMLINK) + fMode |= S_IFLNK; + else if (attr & FILE_ATTRIBUTE_DIRECTORY) + fMode |= S_IFDIR; + else + fMode |= S_IFREG; + if (!(attr & FILE_ATTRIBUTE_READONLY)) + fMode |= S_IWRITE; + return fMode; +} diff --git a/compat/win32.h b/compat/win32.h index 671bcc81f93351..52169ae19f4371 100644 --- a/compat/win32.h +++ b/compat/win32.h @@ -6,19 +6,7 @@ #include <windows.h> #endif -static inline int file_attr_to_st_mode (DWORD attr, DWORD tag) -{ - int fMode = S_IREAD; - if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) && tag == IO_REPARSE_TAG_SYMLINK) - fMode |= S_IFLNK; - else if (attr & FILE_ATTRIBUTE_DIRECTORY) - fMode |= S_IFDIR; - else - fMode |= S_IFREG; - if (!(attr & FILE_ATTRIBUTE_READONLY)) - fMode |= S_IWRITE; - return fMode; -} +extern int file_attr_to_st_mode (DWORD attr, DWORD tag); static inline int get_file_attr(const char *fname, WIN32_FILE_ATTRIBUTE_DATA *fdata) { From 32e946bf8c1dbcf74142e0746387aa4719da87dc Mon Sep 17 00:00:00 2001 From: Bert Belder <bertbelder@gmail.com> Date: Fri, 26 Oct 2018 11:51:51 +0200 Subject: [PATCH 276/303] mingw: allow to specify the symlink type in .gitattributes On Windows, symbolic links have a type: a "file symlink" must point at a file, and a "directory symlink" must point at a directory. If the type of symlink does not match its target, it doesn't work. Git does not record the type of symlink in the index or in a tree. On checkout it'll guess the type, which only works if the target exists at the time the symlink is created. This may often not be the case, for example when the link points at a directory inside a submodule. By specifying `symlink=file` or `symlink=dir` the user can specify what type of symlink Git should create, so Git doesn't have to rely on unreliable heuristics. Signed-off-by: Bert Belder <bertbelder@gmail.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- Documentation/gitattributes.txt | 30 +++++++++++++++++ compat/mingw.c | 58 ++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 4b36d51beb66f0..85faf9c1429485 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -385,6 +385,36 @@ sign `$` upon checkout. Any byte sequence that begins with with `$Id$` upon check-in. +`symlink` +^^^^^^^^^ + +On Windows, symbolic links have a type: a "file symlink" must point at +a file, and a "directory symlink" must point at a directory. If the +type of symlink does not match its target, it doesn't work. + +Git does not record the type of symlink in the index or in a tree. On +checkout it'll guess the type, which only works if the target exists +at the time the symlink is created. This may often not be the case, +for example when the link points at a directory inside a submodule. + +The `symlink` attribute allows you to explicitly set the type of symlink +to `file` or `dir`, so Git doesn't have to guess. If you have a set of +symlinks that point at other files, you can do: + +------------------------ +*.gif symlink=file +------------------------ + +To tell Git that a symlink points at a directory, use: + +------------------------ +tools_folder symlink=dir +------------------------ + +The `symlink` attribute is ignored on platforms other than Windows, +since they don't distinguish between different types of symlinks. + + `filter` ^^^^^^^^ diff --git a/compat/mingw.c b/compat/mingw.c index 87fba3d060719f..f4d8b9c820a1c9 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -14,6 +14,7 @@ #define SECURITY_WIN32 #include <sspi.h> #include "win32/fscache.h" +#include "../attr.h" #define HCAST(type, handle) ((type)(intptr_t)handle) @@ -2884,6 +2885,37 @@ int link(const char *oldpath, const char *newpath) return 0; } +enum symlink_type { + SYMLINK_TYPE_UNSPECIFIED = 0, + SYMLINK_TYPE_FILE, + SYMLINK_TYPE_DIRECTORY, +}; + +static enum symlink_type check_symlink_attr(struct index_state *index, const char *link) +{ + static struct attr_check *check; + const char *value; + + if (!index) + return SYMLINK_TYPE_UNSPECIFIED; + + if (!check) + check = attr_check_initl("symlink", NULL); + + git_check_attr(index, link, check); + + value = check->items[0].value; + if (ATTR_UNSET(value)) + return SYMLINK_TYPE_UNSPECIFIED; + if (!strcmp(value, "file")) + return SYMLINK_TYPE_FILE; + if (!strcmp(value, "dir") || !strcmp(value, "directory")) + return SYMLINK_TYPE_DIRECTORY; + + warning(_("ignoring invalid symlink type '%s' for '%s'"), value, link); + return SYMLINK_TYPE_UNSPECIFIED; +} + int mingw_create_symlink(struct index_state *index, const char *target, const char *link) { wchar_t wtarget[MAX_LONG_PATH], wlink[MAX_LONG_PATH]; @@ -2904,7 +2936,31 @@ int mingw_create_symlink(struct index_state *index, const char *target, const ch if (wtarget[len] == '/') wtarget[len] = '\\'; - return create_phantom_symlink(wtarget, wlink); + switch (check_symlink_attr(index, link)) { + case SYMLINK_TYPE_UNSPECIFIED: + /* Create a phantom symlink: it is initially created as a file + * symlink, but may change to a directory symlink later if/when + * the target exists. */ + return create_phantom_symlink(wtarget, wlink); + case SYMLINK_TYPE_FILE: + if (!CreateSymbolicLinkW(wlink, wtarget, symlink_file_flags)) + break; + return 0; + case SYMLINK_TYPE_DIRECTORY: + if (!CreateSymbolicLinkW(wlink, wtarget, + symlink_directory_flags)) + break; + /* There may be dangling phantom symlinks that point at this + * one, which should now morph into directory symlinks. */ + process_phantom_symlinks(); + return 0; + default: + BUG("unhandled symlink type"); + } + + /* CreateSymbolicLinkW failed. */ + errno = err_win_to_posix(GetLastError()); + return -1; } #ifndef _WINNT_H From cf74ed5f5b1852336a676bc79f1653b492d69d21 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 4 Jun 2020 23:16:07 +0200 Subject: [PATCH 277/303] mingw: special-case index entries for symlinks with buggy size In https://github.com/git-for-windows/git/pull/2637, we fixed a bug where symbolic links' target path sizes were recorded incorrectly in the index. The downside of this fix was that every user with tracked symbolic links in their checkouts would see them as modified in `git status`, but not in `git diff`, and only a `git add <path>` (or `git add -u`) would "fix" this. Let's do better than that: we can detect that situation and simply pretend that a symbolic link with a known bad size (or a size that just happens to be that bad size, a _very_ unlikely scenario because it would overflow our buffers due to the trailing NUL byte) means that it needs to be re-checked as if we had just checked it out. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- read-cache.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/read-cache.c b/read-cache.c index de1c493f7e3664..7f94bc68d060aa 100644 --- a/read-cache.c +++ b/read-cache.c @@ -479,6 +479,17 @@ int ie_modified(struct index_state *istate, * then we know it is. */ if ((changed & DATA_CHANGED) && +#ifdef GIT_WINDOWS_NATIVE + /* + * Work around Git for Windows v2.27.0 fixing a bug where symlinks' + * target path lengths were not read at all, and instead recorded + * as 4096: now, all symlinks would appear as modified. + * + * So let's just special-case symlinks with a target path length + * (i.e. `sd_size`) of 4096 and force them to be re-checked. + */ + (!S_ISLNK(st->st_mode) || ce->ce_stat_data.sd_size != MAX_LONG_PATH) && +#endif (S_ISGITLINK(ce->ce_mode) || ce->ce_stat_data.sd_size != 0)) return changed; From 7da6e7bb4ba5ecf49d999693fd8d4e9c2da66840 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Mon, 23 Apr 2018 23:20:00 +0200 Subject: [PATCH 278/303] mingw: Windows Docker volumes are *not* symbolic links ... even if they may look like them. As looking up the target of the "symbolic link" (just to see whether it starts with `/ContainerMappedDirectories/`) is pretty expensive, we do it when we can be *really* sure that there is a possibility that this might be the case. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: JiSeop Moon <zcube@zcube.kr> --- compat/mingw.c | 25 +++++++++++++++++++------ compat/win32.h | 2 +- compat/win32/fscache.c | 24 +++++++++++++++++++++++- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 125b526745e50d..1caa21f2e6c076 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1012,7 +1012,7 @@ int mingw_lstat(const char *file_name, struct stat *buf) buf->st_uid = 0; buf->st_nlink = 1; buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes, - reparse_tag); + reparse_tag, file_name); buf->st_size = S_ISLNK(buf->st_mode) ? link_len : fdata.nFileSizeLow | (((off_t) fdata.nFileSizeHigh) << 32); buf->st_dev = buf->st_rdev = 0; /* not used by Git */ @@ -1063,7 +1063,7 @@ static int get_file_info_by_handle(HANDLE hnd, struct stat *buf) buf->st_gid = 0; buf->st_uid = 0; buf->st_nlink = 1; - buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes, 0); + buf->st_mode = file_attr_to_st_mode(fdata.dwFileAttributes, 0, NULL); buf->st_size = fdata.nFileSizeLow | (((off_t)fdata.nFileSizeHigh)<<32); buf->st_dev = buf->st_rdev = 0; /* not used by Git */ @@ -3907,12 +3907,25 @@ int is_inside_windows_container(void) return inside_container; } -int file_attr_to_st_mode (DWORD attr, DWORD tag) +int file_attr_to_st_mode (DWORD attr, DWORD tag, const char *path) { int fMode = S_IREAD; - if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) && tag == IO_REPARSE_TAG_SYMLINK) - fMode |= S_IFLNK; - else if (attr & FILE_ATTRIBUTE_DIRECTORY) + if ((attr & FILE_ATTRIBUTE_REPARSE_POINT) && + tag == IO_REPARSE_TAG_SYMLINK) { + int flag = S_IFLNK; + char buf[MAX_LONG_PATH]; + + /* + * Windows containers' mapped volumes are marked as reparse + * points and look like symbolic links, but they are not. + */ + if (path && is_inside_windows_container() && + readlink(path, buf, sizeof(buf)) > 27 && + starts_with(buf, "/ContainerMappedDirectories/")) + flag = S_IFDIR; + + fMode |= flag; + } else if (attr & FILE_ATTRIBUTE_DIRECTORY) fMode |= S_IFDIR; else fMode |= S_IFREG; diff --git a/compat/win32.h b/compat/win32.h index 52169ae19f4371..299f01bdf0f5a4 100644 --- a/compat/win32.h +++ b/compat/win32.h @@ -6,7 +6,7 @@ #include <windows.h> #endif -extern int file_attr_to_st_mode (DWORD attr, DWORD tag); +extern int file_attr_to_st_mode (DWORD attr, DWORD tag, const char *path); static inline int get_file_attr(const char *fname, WIN32_FILE_ATTRIBUTE_DATA *fdata) { diff --git a/compat/win32/fscache.c b/compat/win32/fscache.c index 16b1e522f34182..baba69b40e2c67 100644 --- a/compat/win32/fscache.c +++ b/compat/win32/fscache.c @@ -200,8 +200,30 @@ static struct fsentry *fseentry_create_entry(struct fscache *cache, fdata->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT ? fdata->EaSize : 0; + /* + * On certain Windows versions, host directories mapped into + * Windows Containers ("Volumes", see https://docs.docker.com/storage/volumes/) + * look like symbolic links, but their targets are paths that + * are valid only in kernel mode. + * + * Let's work around this by detecting that situation and + * telling Git that these are *not* symbolic links. + */ + if (fse->reparse_tag == IO_REPARSE_TAG_SYMLINK && + sizeof(buf) > (list ? list->len + 1 : 0) + fse->len + 1 && + is_inside_windows_container()) { + size_t off = 0; + if (list) { + memcpy(buf, list->dirent.d_name, list->len); + buf[list->len] = '/'; + off = list->len + 1; + } + memcpy(buf + off, fse->dirent.d_name, fse->len); + buf[off + fse->len] = '\0'; + } + fse->st_mode = file_attr_to_st_mode(fdata->FileAttributes, - fdata->EaSize); + fdata->EaSize, buf); fse->dirent.d_type = S_ISREG(fse->st_mode) ? DT_REG : S_ISDIR(fse->st_mode) ? DT_DIR : DT_LNK; fse->u.s.st_size = S_ISLNK(fse->st_mode) ? MAX_LONG_PATH : From 469f0c60a9f992774378ecd05ac88d9c475f9010 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 20 Jul 2017 22:45:01 +0200 Subject: [PATCH 279/303] mingw: explicitly specify with which cmd to prefix the cmdline The main idea of this patch is that even if we have to look up the absolute path of the script, if only the basename was specified as argv[0], then we should use that basename on the command line, too, not the absolute path. This patch will also help with the upcoming patch where we automatically substitute "sh ..." by "busybox sh ..." if "sh" is not in the PATH but "busybox" is: we will do that by substituting the actual executable, but still keep prepending "sh" to the command line. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index f4d8b9c820a1c9..8c85995ca57ea5 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1838,8 +1838,8 @@ static int is_msys2_sh(const char *cmd) } static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **deltaenv, - const char *dir, - int prepend_cmd, int fhin, int fhout, int fherr) + const char *dir, const char *prepend_cmd, + int fhin, int fhout, int fherr) { static int restrict_handle_inheritance = -1; STARTUPINFOEXW si; @@ -1930,9 +1930,9 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **deltaen /* concatenate argv, quoting args as we go */ strbuf_init(&args, 0); if (prepend_cmd) { - char *quoted = (char *)quote_arg(cmd); + char *quoted = (char *)quote_arg(prepend_cmd); strbuf_addstr(&args, quoted); - if (quoted != cmd) + if (quoted != prepend_cmd) free(quoted); } for (; *argv; argv++) { @@ -2091,7 +2091,8 @@ static pid_t mingw_spawnve_fd(const char *cmd, const char **argv, char **deltaen return (pid_t)pi.dwProcessId; } -static pid_t mingw_spawnv(const char *cmd, const char **argv, int prepend_cmd) +static pid_t mingw_spawnv(const char *cmd, const char **argv, + const char *prepend_cmd) { return mingw_spawnve_fd(cmd, argv, NULL, NULL, prepend_cmd, 0, 1, 2); } @@ -2119,14 +2120,14 @@ pid_t mingw_spawnvpe(const char *cmd, const char **argv, char **deltaenv, pid = -1; } else { - pid = mingw_spawnve_fd(iprog, argv, deltaenv, dir, 1, + pid = mingw_spawnve_fd(iprog, argv, deltaenv, dir, interpr, fhin, fhout, fherr); free(iprog); } argv[0] = argv0; } else - pid = mingw_spawnve_fd(prog, argv, deltaenv, dir, 0, + pid = mingw_spawnve_fd(prog, argv, deltaenv, dir, NULL, fhin, fhout, fherr); free(prog); } @@ -2154,7 +2155,7 @@ static int try_shell_exec(const char *cmd, char *const *argv) argv2[0] = (char *)cmd; /* full path to the script file */ COPY_ARRAY(&argv2[1], &argv[1], argc); exec_id = trace2_exec(prog, argv2); - pid = mingw_spawnv(prog, argv2, 1); + pid = mingw_spawnv(prog, argv2, interpr); if (pid >= 0) { int status; if (waitpid(pid, &status, 0) < 0) @@ -2178,7 +2179,7 @@ int mingw_execv(const char *cmd, char *const *argv) int exec_id; exec_id = trace2_exec(cmd, (const char **)argv); - pid = mingw_spawnv(cmd, (const char **)argv, 0); + pid = mingw_spawnv(cmd, (const char **)argv, NULL); if (pid < 0) { trace2_exec_result(exec_id, -1); return -1; From 27ea87878721bc9659e7ea68d1a4e7071cffeeaa Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 20 Jul 2017 20:41:29 +0200 Subject: [PATCH 280/303] mingw: when path_lookup() failed, try BusyBox BusyBox comes with a ton of applets ("applet" being the identical concept to Git's "builtins"). And similar to Git's builtins, the applets can be called via `busybox <command>`, or the BusyBox executable can be copied/hard-linked to the command name. The similarities do not end here. Just as with Git's builtins, it is problematic that BusyBox' hard-linked applets cannot easily be put into a .zip file: .zip archives have no concept of hard-links and therefore would store identical copies (and also extract identical copies, "inflating" the archive unnecessarily). To counteract that issue, MinGit already ships without hard-linked copies of the builtins, and the plan is to do the same with BusyBox' applets: simply ship busybox.exe as single executable, without hard-linked applets. To accommodate that, Git is being taught by this commit a very special trick, exploiting the fact that it is possible to call an executable with a command-line whose argv[0] is different from the executable's name: when `sh` is to be spawned, and no `sh` is found in the PATH, but busybox.exe is, use that executable (with unchanged argv). Likewise, if any executable to be spawned is not on the PATH, but busybox.exe is found, parse the output of `busybox.exe --help` to find out what applets are included, and if the command matches an included applet name, use busybox.exe to execute it. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ t/t0014-alias.sh | 2 +- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index 8c85995ca57ea5..e54d968b2b4ba9 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -15,6 +15,7 @@ #include <sspi.h> #include "win32/fscache.h" #include "../attr.h" +#include "../string-list.h" #define HCAST(type, handle) ((type)(intptr_t)handle) @@ -1611,6 +1612,65 @@ static char *lookup_prog(const char *dir, int dirlen, const char *cmd, return NULL; } +static char *path_lookup(const char *cmd, int exe_only); + +static char *is_busybox_applet(const char *cmd) +{ + static struct string_list applets = STRING_LIST_INIT_DUP; + static char *busybox_path; + static int busybox_path_initialized; + + /* Avoid infinite loop */ + if (!strncasecmp(cmd, "busybox", 7) && + (!cmd[7] || !strcasecmp(cmd + 7, ".exe"))) + return NULL; + + if (!busybox_path_initialized) { + busybox_path = path_lookup("busybox.exe", 1); + busybox_path_initialized = 1; + } + + /* Assume that sh is compiled in... */ + if (!busybox_path || !strcasecmp(cmd, "sh")) + return xstrdup_or_null(busybox_path); + + if (!applets.nr) { + struct child_process cp = CHILD_PROCESS_INIT; + struct strbuf buf = STRBUF_INIT; + char *p; + + strvec_pushl(&cp.args, busybox_path, "--help", NULL); + + if (capture_command(&cp, &buf, 2048)) { + string_list_append(&applets, ""); + return NULL; + } + + /* parse output */ + p = strstr(buf.buf, "Currently defined functions:\n"); + if (!p) { + warning("Could not parse output of busybox --help"); + string_list_append(&applets, ""); + return NULL; + } + p = strchrnul(p, '\n'); + for (;;) { + size_t len; + + p += strspn(p, "\n\t ,"); + len = strcspn(p, "\n\t ,"); + if (!len) + break; + p[len] = '\0'; + string_list_insert(&applets, p); + p = p + len + 1; + } + } + + return string_list_has_string(&applets, cmd) ? + xstrdup(busybox_path) : NULL; +} + /* * Determines the absolute path of cmd using the split path in path. * If cmd contains a slash or backslash, no lookup is performed. @@ -1639,6 +1699,9 @@ static char *path_lookup(const char *cmd, int exe_only) path = sep + 1; } + if (!prog && !isexe) + prog = is_busybox_applet(cmd); + return prog; } diff --git a/t/t0014-alias.sh b/t/t0014-alias.sh index 288e08299abc4e..06669cab78eec6 100755 --- a/t/t0014-alias.sh +++ b/t/t0014-alias.sh @@ -39,7 +39,7 @@ test_expect_success 'looping aliases - internal execution' ' test_expect_success 'run-command formats empty args properly' ' test_must_fail env GIT_TRACE=1 git frotz a "" b " " c 2>actual.raw && - sed -ne "/run_command:/s/.*trace: run_command: //p" actual.raw >actual && + sed -ne "/run_command: git-frotz/s/.*trace: run_command: //p" actual.raw >actual && echo "git-frotz a '\'''\'' b '\'' '\'' c" >expect && test_cmp expect actual ' From 950da0acf2668d6d0b86d89afd06df60611b900c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Sat, 5 Aug 2017 22:23:36 +0200 Subject: [PATCH 281/303] test-lib: avoid unnecessary Perl invocation It is a bit strange, and even undesirable, to require Perl just to run the test suite even when NO_PERL was set. This patch does not fix this problem by any stretch of imagination. However, it fixes *the* Perl invocation that *every single* test script has to run. While at it, it makes the source code also more grep'able, as the code that unsets some, but not all, GIT_* environment variables just became a *lot* more explicit. And all that while still reducing the total number of lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/test-lib.sh | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/t/test-lib.sh b/t/test-lib.sh index a65df2fd220465..b181a31ce901e6 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -486,23 +486,18 @@ EDITOR=: # /usr/xpg4/bin/sh and /bin/ksh to bail out. So keep the unsets # deriving from the command substitution clustered with the other # ones. -unset VISUAL EMAIL LANGUAGE $("$PERL_PATH" -e ' - my @env = keys %ENV; - my $ok = join("|", qw( - TRACE - DEBUG - TEST - .*_TEST - PROVE - VALGRIND - UNZIP - PERF_ - CURL_VERBOSE - TRACE_CURL - )); - my @vars = grep(/^GIT_/ && !/^GIT_($ok)/o, @env); - print join("\n", @vars); -') +unset VISUAL EMAIL LANGUAGE $(env | sed -n \ + -e '/^GIT_TRACE/d' \ + -e '/^GIT_DEBUG/d' \ + -e '/^GIT_TEST/d' \ + -e '/^GIT_.*_TEST/d' \ + -e '/^GIT_PROVE/d' \ + -e '/^GIT_VALGRIND/d' \ + -e '/^GIT_UNZIP/d' \ + -e '/^GIT_PERF_/d' \ + -e '/^GIT_CURL_VERBOSE/d' \ + -e '/^GIT_TRACE_CURL/d' \ + -e 's/^\(GIT_[^=]*\)=.*/\1/p') unset XDG_CACHE_HOME unset XDG_CONFIG_HOME unset GITPERLLIB From 486da742511775f88d6a9279125bcce57697ba4b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 7 Jun 2018 10:47:25 +0200 Subject: [PATCH 282/303] tests: replace mingw_test_cmp with a helper in C This helper is slightly more performant than the script with MSYS2's Bash. And a lot more readable. To accommodate t1050, which wants to compare files weighing in with 3MB (falling outside of t1050's malloc limit of 1.5MB), we simply lift the allocation limit by setting the environment variable GIT_ALLOC_LIMIT to zero when calling the helper. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- Makefile | 1 + t/helper/test-cmp.c | 73 +++++++++++++++++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/test-lib-functions.sh | 68 +------------------------------------- t/test-lib.sh | 2 +- 6 files changed, 78 insertions(+), 68 deletions(-) create mode 100644 t/helper/test-cmp.c diff --git a/Makefile b/Makefile index 305460a2b08e0e..584d45ede57d80 100644 --- a/Makefile +++ b/Makefile @@ -722,6 +722,7 @@ TEST_BUILTINS_OBJS += test-advise.o TEST_BUILTINS_OBJS += test-bitmap.o TEST_BUILTINS_OBJS += test-bloom.o TEST_BUILTINS_OBJS += test-chmtime.o +TEST_BUILTINS_OBJS += test-cmp.o TEST_BUILTINS_OBJS += test-config.o TEST_BUILTINS_OBJS += test-crontab.o TEST_BUILTINS_OBJS += test-csprng.o diff --git a/t/helper/test-cmp.c b/t/helper/test-cmp.c new file mode 100644 index 00000000000000..1c646a54bf609b --- /dev/null +++ b/t/helper/test-cmp.c @@ -0,0 +1,73 @@ +#include "test-tool.h" +#include "git-compat-util.h" +#include "strbuf.h" +#include "gettext.h" +#include "parse-options.h" +#include "run-command.h" + +#ifdef WIN32 +#define NO_SUCH_DIR "\\\\.\\GLOBALROOT\\invalid" +#else +#define NO_SUCH_DIR "/dev/null" +#endif + +static int run_diff(const char *path1, const char *path2) +{ + const char *argv[] = { + "diff", "--no-index", NULL, NULL, NULL + }; + const char *env[] = { + "GIT_PAGER=cat", + "GIT_DIR=" NO_SUCH_DIR, + "HOME=" NO_SUCH_DIR, + NULL + }; + + argv[2] = path1; + argv[3] = path2; + return run_command_v_opt_cd_env(argv, + RUN_COMMAND_NO_STDIN | RUN_GIT_CMD, + NULL, env); +} + +int cmd__cmp(int argc, const char **argv) +{ + FILE *f0, *f1; + struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT; + + if (argc != 3) + die("Require exactly 2 arguments, got %d", argc); + + if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"))) + return error_errno("could not open '%s'", argv[1]); + if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) { + fclose(f0); + return error_errno("could not open '%s'", argv[2]); + } + + for (;;) { + int r0 = strbuf_getline(&b0, f0); + int r1 = strbuf_getline(&b1, f1); + + if (r0 == EOF) { + fclose(f0); + fclose(f1); + strbuf_release(&b0); + strbuf_release(&b1); + if (r1 == EOF) + return 0; +cmp_failed: + if (!run_diff(argv[1], argv[2])) + die("Huh? 'diff --no-index %s %s' succeeded", + argv[1], argv[2]); + return 1; + } + if (r1 == EOF || strbuf_cmp(&b0, &b1)) { + fclose(f0); + fclose(f1); + strbuf_release(&b0); + strbuf_release(&b1); + goto cmp_failed; + } + } +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index d1d013bcd920b1..409e5a178f6cb4 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -14,6 +14,7 @@ static struct test_cmd cmds[] = { { "bitmap", cmd__bitmap }, { "bloom", cmd__bloom }, { "chmtime", cmd__chmtime }, + { "cmp", cmd__cmp }, { "config", cmd__config }, { "crontab", cmd__crontab }, { "csprng", cmd__csprng }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 6b46b6444b657c..a191368d8aa442 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -8,6 +8,7 @@ int cmd__advise_if_enabled(int argc, const char **argv); int cmd__bitmap(int argc, const char **argv); int cmd__bloom(int argc, const char **argv); int cmd__chmtime(int argc, const char **argv); +int cmd__cmp(int argc, const char **argv); int cmd__config(int argc, const char **argv); int cmd__crontab(int argc, const char **argv); int cmd__csprng(int argc, const char **argv); diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index c6479f24eb5ac2..1c77050849e8ca 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1153,7 +1153,7 @@ test_expect_code () { test_cmp () { test "$#" -ne 2 && BUG "2 param" - eval "$GIT_TEST_CMP" '"$@"' + GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"' } # Check that the given config key has the expected value. @@ -1454,72 +1454,6 @@ test_skip_or_die () { error "$2" } -# The following mingw_* functions obey POSIX shell syntax, but are actually -# bash scripts, and are meant to be used only with bash on Windows. - -# A test_cmp function that treats LF and CRLF equal and avoids to fork -# diff when possible. -mingw_test_cmp () { - # Read text into shell variables and compare them. If the results - # are different, use regular diff to report the difference. - local test_cmp_a= test_cmp_b= - - # When text came from stdin (one argument is '-') we must feed it - # to diff. - local stdin_for_diff= - - # Since it is difficult to detect the difference between an - # empty input file and a failure to read the files, we go straight - # to diff if one of the inputs is empty. - if test -s "$1" && test -s "$2" - then - # regular case: both files non-empty - mingw_read_file_strip_cr_ test_cmp_a <"$1" - mingw_read_file_strip_cr_ test_cmp_b <"$2" - elif test -s "$1" && test "$2" = - - then - # read 2nd file from stdin - mingw_read_file_strip_cr_ test_cmp_a <"$1" - mingw_read_file_strip_cr_ test_cmp_b - stdin_for_diff='<<<"$test_cmp_b"' - elif test "$1" = - && test -s "$2" - then - # read 1st file from stdin - mingw_read_file_strip_cr_ test_cmp_a - mingw_read_file_strip_cr_ test_cmp_b <"$2" - stdin_for_diff='<<<"$test_cmp_a"' - fi - test -n "$test_cmp_a" && - test -n "$test_cmp_b" && - test "$test_cmp_a" = "$test_cmp_b" || - eval "diff -u \"\$@\" $stdin_for_diff" -} - -# $1 is the name of the shell variable to fill in -mingw_read_file_strip_cr_ () { - # Read line-wise using LF as the line separator - # and use IFS to strip CR. - local line - while : - do - if IFS=$'\r' read -r -d $'\n' line - then - # good - line=$line$'\n' - else - # we get here at EOF, but also if the last line - # was not terminated by LF; in the latter case, - # some text was read - if test -z "$line" - then - # EOF, really - break - fi - fi - eval "$1=\$$1\$line" - done -} - # Like "env FOO=BAR some-program", but run inside a subshell, which means # it also works for shell functions (though those functions cannot impact # the environment outside of the test_env invocation). diff --git a/t/test-lib.sh b/t/test-lib.sh index b181a31ce901e6..ce00f03e395fff 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1704,7 +1704,7 @@ case $uname_s in test_set_prereq SED_STRIPS_CR test_set_prereq GREP_STRIPS_CR test_set_prereq WINDOWS - GIT_TEST_CMP=mingw_test_cmp + GIT_TEST_CMP="test-tool cmp" ;; *CYGWIN*) test_set_prereq POSIXPERM From fa912e4c1eaa13ef8a066011dc63f30ab2536fd3 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 20 Jul 2017 22:18:56 +0200 Subject: [PATCH 283/303] test-tool: learn to act as a drop-in replacement for `iconv` It is convenient to assume that everybody who wants to build & test Git has access to a working `iconv` executable (after all, we already pretty much require libiconv). However, that limits esoteric test scenarios such as Git for Windows', where an end user installation has to ship with `iconv` for the sole purpose of being testable. That payload serves no other purpose. So let's just have a test helper (to be able to test Git, the test helpers have to be available, after all) to act as `iconv` replacement. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- Makefile | 1 + t/helper/test-iconv.c | 47 +++++++++++++++++++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + 4 files changed, 50 insertions(+) create mode 100644 t/helper/test-iconv.c diff --git a/Makefile b/Makefile index 584d45ede57d80..5d0de405ae0ebc 100644 --- a/Makefile +++ b/Makefile @@ -745,6 +745,7 @@ TEST_BUILTINS_OBJS += test-hash-speed.o TEST_BUILTINS_OBJS += test-hash.o TEST_BUILTINS_OBJS += test-hashmap.o TEST_BUILTINS_OBJS += test-hexdump.o +TEST_BUILTINS_OBJS += test-iconv.o TEST_BUILTINS_OBJS += test-index-version.o TEST_BUILTINS_OBJS += test-json-writer.o TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o diff --git a/t/helper/test-iconv.c b/t/helper/test-iconv.c new file mode 100644 index 00000000000000..d3c772fddf990b --- /dev/null +++ b/t/helper/test-iconv.c @@ -0,0 +1,47 @@ +#include "test-tool.h" +#include "git-compat-util.h" +#include "strbuf.h" +#include "gettext.h" +#include "parse-options.h" +#include "utf8.h" + +int cmd__iconv(int argc, const char **argv) +{ + struct strbuf buf = STRBUF_INIT; + char *from = NULL, *to = NULL, *p; + size_t len; + int ret = 0; + const char * const iconv_usage[] = { + N_("test-helper --iconv [<options>]"), + NULL + }; + struct option options[] = { + OPT_STRING('f', "from-code", &from, "encoding", "from"), + OPT_STRING('t', "to-code", &to, "encoding", "to"), + OPT_END() + }; + + argc = parse_options(argc, argv, NULL, options, + iconv_usage, 0); + + if (argc > 1 || !from || !to) + usage_with_options(iconv_usage, options); + + if (!argc) { + if (strbuf_read(&buf, 0, 2048) < 0) + die_errno("Could not read from stdin"); + } else if (strbuf_read_file(&buf, argv[0], 2048) < 0) + die_errno("Could not read from '%s'", argv[0]); + + p = reencode_string_len(buf.buf, buf.len, to, from, &len); + if (!p) + die_errno("Could not reencode"); + if (write(1, p, len) < 0) + ret = !!error_errno("Could not write %"PRIuMAX" bytes", + (uintmax_t)len); + + strbuf_release(&buf); + free(p); + + return ret; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 409e5a178f6cb4..a64bfe0ebc1da0 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -36,6 +36,7 @@ static struct test_cmd cmds[] = { { "hashmap", cmd__hashmap }, { "hash-speed", cmd__hash_speed }, { "hexdump", cmd__hexdump }, + { "iconv", cmd__iconv }, { "index-version", cmd__index_version }, { "json-writer", cmd__json_writer }, { "lazy-init-name-hash", cmd__lazy_init_name_hash }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index a191368d8aa442..d98e57710b5370 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -31,6 +31,7 @@ int cmd__getcwd(int argc, const char **argv); int cmd__hashmap(int argc, const char **argv); int cmd__hash_speed(int argc, const char **argv); int cmd__hexdump(int argc, const char **argv); +int cmd__iconv(int argc, const char **argv); int cmd__index_version(int argc, const char **argv); int cmd__json_writer(int argc, const char **argv); int cmd__lazy_init_name_hash(int argc, const char **argv); From 1bd6e5c507f46ccc7fded890c534acfa3c860727 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 20 Jul 2017 22:25:21 +0200 Subject: [PATCH 284/303] tests(mingw): if `iconv` is unavailable, use `test-helper --iconv` Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/test-lib.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/t/test-lib.sh b/t/test-lib.sh index ce00f03e395fff..8dc51175dd0f4e 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1705,6 +1705,12 @@ case $uname_s in test_set_prereq GREP_STRIPS_CR test_set_prereq WINDOWS GIT_TEST_CMP="test-tool cmp" + if ! type iconv >/dev/null 2>&1 + then + iconv () { + test-tool iconv "$@" + } + fi ;; *CYGWIN*) test_set_prereq POSIXPERM From 5c32e432ecdba4cd1f36153b9efc950f7bf4e4ec Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 11 Oct 2018 23:55:44 +0200 Subject: [PATCH 285/303] gitattributes: mark .png files as binary Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- .gitattributes | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitattributes b/.gitattributes index b0044cf272fec9..2bc4defac6720c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4,6 +4,7 @@ *.perl eol=lf diff=perl *.pl eof=lf diff=perl *.pm eol=lf diff=perl +*.png binary *.py eol=lf diff=python *.bat eol=crlf CODE_OF_CONDUCT.md -whitespace From a3a487693f5a7ef686057883a0cb40a64d85cf7a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Sat, 5 Aug 2017 20:28:37 +0200 Subject: [PATCH 286/303] tests: move test PNGs into t/lib-diff/ We already have a directory where we store files intended for use by multiple test scripts. The same directory is a better home for the test-binary-*.png files than t/. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/{ => lib-diff}/test-binary-1.png | Bin t/{ => lib-diff}/test-binary-2.png | Bin t/t3307-notes-man.sh | 2 +- t/t3903-stash.sh | 2 +- t/t4012-diff-binary.sh | 2 +- t/t4049-diff-stat-count.sh | 2 +- t/t4108-apply-threeway.sh | 12 ++++++------ t/t6403-merge-file.sh | 2 +- t/t6407-merge-binary.sh | 2 +- t/t9200-git-cvsexportcommit.sh | 15 ++++++++------- 10 files changed, 20 insertions(+), 19 deletions(-) rename t/{ => lib-diff}/test-binary-1.png (100%) rename t/{ => lib-diff}/test-binary-2.png (100%) diff --git a/t/test-binary-1.png b/t/lib-diff/test-binary-1.png similarity index 100% rename from t/test-binary-1.png rename to t/lib-diff/test-binary-1.png diff --git a/t/test-binary-2.png b/t/lib-diff/test-binary-2.png similarity index 100% rename from t/test-binary-2.png rename to t/lib-diff/test-binary-2.png diff --git a/t/t3307-notes-man.sh b/t/t3307-notes-man.sh index ae316502c4531b..2b6894df0a247a 100755 --- a/t/t3307-notes-man.sh +++ b/t/t3307-notes-man.sh @@ -27,7 +27,7 @@ test_expect_success 'example 1: notes to add an Acked-by line' ' ' test_expect_success 'example 2: binary notes' ' - cp "$TEST_DIRECTORY"/test-binary-1.png . && + cp "$TEST_DIRECTORY"/lib-diff/test-binary-1.png . && git checkout B && blob=$(git hash-object -w test-binary-1.png) && git notes --ref=logo add -C "$blob" && diff --git a/t/t3903-stash.sh b/t/t3903-stash.sh index 376cc8f4ab8429..4e49ac1b114a3b 100755 --- a/t/t3903-stash.sh +++ b/t/t3903-stash.sh @@ -1332,7 +1332,7 @@ test_expect_success 'stash -- <subdir> works with binary files' ' git reset && >subdir/untracked && >subdir/tracked && - cp "$TEST_DIRECTORY"/test-binary-1.png subdir/tracked-binary && + cp "$TEST_DIRECTORY"/lib-diff/test-binary-1.png subdir/tracked-binary && git add subdir/tracked* && git stash -- subdir/ && test_path_is_missing subdir/tracked && diff --git a/t/t4012-diff-binary.sh b/t/t4012-diff-binary.sh index c509143c8141e0..c2c7c021346421 100755 --- a/t/t4012-diff-binary.sh +++ b/t/t4012-diff-binary.sh @@ -20,7 +20,7 @@ test_expect_success 'prepare repository' ' echo AIT >a && echo BIT >b && echo CIT >c && echo DIT >d && git update-index --add a b c d && echo git >a && - cat "$TEST_DIRECTORY"/test-binary-1.png >b && + cat "$TEST_DIRECTORY"/lib-diff/test-binary-1.png >b && echo git >c && cat b b >d ' diff --git a/t/t4049-diff-stat-count.sh b/t/t4049-diff-stat-count.sh index 0a4fc735d44ad5..6c028646485971 100755 --- a/t/t4049-diff-stat-count.sh +++ b/t/t4049-diff-stat-count.sh @@ -34,7 +34,7 @@ test_expect_success 'binary changes do not count in lines' ' git reset --hard && echo a >a && echo c >c && - cat "$TEST_DIRECTORY"/test-binary-1.png >d && + cat "$TEST_DIRECTORY"/lib-diff/test-binary-1.png >d && cat >expect <<-\EOF && a | 1 + c | 1 + diff --git a/t/t4108-apply-threeway.sh b/t/t4108-apply-threeway.sh index c558282bc09475..1c85e7051d5f72 100755 --- a/t/t4108-apply-threeway.sh +++ b/t/t4108-apply-threeway.sh @@ -232,11 +232,11 @@ test_expect_success 'apply with --3way --cached and conflicts' ' test_expect_success 'apply binary file patch' ' git reset --hard main && - cp "$TEST_DIRECTORY/test-binary-1.png" bin.png && + cp "$TEST_DIRECTORY/lib-diff/test-binary-1.png" bin.png && git add bin.png && git commit -m "add binary file" && - cp "$TEST_DIRECTORY/test-binary-2.png" bin.png && + cp "$TEST_DIRECTORY/lib-diff/test-binary-2.png" bin.png && git diff --binary >bin.diff && git reset --hard && @@ -247,11 +247,11 @@ test_expect_success 'apply binary file patch' ' test_expect_success 'apply binary file patch with 3way' ' git reset --hard main && - cp "$TEST_DIRECTORY/test-binary-1.png" bin.png && + cp "$TEST_DIRECTORY/lib-diff/test-binary-1.png" bin.png && git add bin.png && git commit -m "add binary file" && - cp "$TEST_DIRECTORY/test-binary-2.png" bin.png && + cp "$TEST_DIRECTORY/lib-diff/test-binary-2.png" bin.png && git diff --binary >bin.diff && git reset --hard && @@ -262,11 +262,11 @@ test_expect_success 'apply binary file patch with 3way' ' test_expect_success 'apply full-index patch with 3way' ' git reset --hard main && - cp "$TEST_DIRECTORY/test-binary-1.png" bin.png && + cp "$TEST_DIRECTORY/lib-diff/test-binary-1.png" bin.png && git add bin.png && git commit -m "add binary file" && - cp "$TEST_DIRECTORY/test-binary-2.png" bin.png && + cp "$TEST_DIRECTORY/lib-diff/test-binary-2.png" bin.png && git diff --full-index >bin.diff && git reset --hard && diff --git a/t/t6403-merge-file.sh b/t/t6403-merge-file.sh index 1a7082323dddfc..a6854952c9ab9e 100755 --- a/t/t6403-merge-file.sh +++ b/t/t6403-merge-file.sh @@ -252,7 +252,7 @@ test_expect_success "expected conflict markers" ' test_expect_success 'binary files cannot be merged' ' test_must_fail git merge-file -p \ - orig.txt "$TEST_DIRECTORY"/test-binary-1.png new1.txt 2> merge.err && + orig.txt "$TEST_DIRECTORY"/lib-diff/test-binary-1.png new1.txt 2> merge.err && grep "Cannot merge binary files" merge.err ' diff --git a/t/t6407-merge-binary.sh b/t/t6407-merge-binary.sh index e8a28717cece32..2547f1d504a2c5 100755 --- a/t/t6407-merge-binary.sh +++ b/t/t6407-merge-binary.sh @@ -9,7 +9,7 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME test_expect_success setup ' - cat "$TEST_DIRECTORY"/test-binary-1.png >m && + cat "$TEST_DIRECTORY"/lib-diff/test-binary-1.png >m && git add m && git ls-files -s | sed -e "s/ 0 / 1 /" >E1 && test_tick && diff --git a/t/t9200-git-cvsexportcommit.sh b/t/t9200-git-cvsexportcommit.sh index c5946cb0b8a94c..3e1034e6b44059 100755 --- a/t/t9200-git-cvsexportcommit.sh +++ b/t/t9200-git-cvsexportcommit.sh @@ -55,8 +55,8 @@ test_expect_success \ 'mkdir A B C D E F && echo hello1 >A/newfile1.txt && echo hello2 >B/newfile2.txt && - cp "$TEST_DIRECTORY"/test-binary-1.png C/newfile3.png && - cp "$TEST_DIRECTORY"/test-binary-1.png D/newfile4.png && + cp "$TEST_DIRECTORY"/lib-diff/test-binary-1.png C/newfile3.png && + cp "$TEST_DIRECTORY"/lib-diff/test-binary-1.png D/newfile4.png && git add A/newfile1.txt && git add B/newfile2.txt && git add C/newfile3.png && @@ -81,8 +81,8 @@ test_expect_success \ rm -f B/newfile2.txt && rm -f C/newfile3.png && echo Hello5 >E/newfile5.txt && - cp "$TEST_DIRECTORY"/test-binary-2.png D/newfile4.png && - cp "$TEST_DIRECTORY"/test-binary-1.png F/newfile6.png && + cp "$TEST_DIRECTORY"/lib-diff/test-binary-2.png D/newfile4.png && + cp "$TEST_DIRECTORY"/lib-diff/test-binary-1.png F/newfile6.png && git add E/newfile5.txt && git add F/newfile6.png && git commit -a -m "Test: Remove, add and update" && @@ -170,7 +170,7 @@ test_expect_success \ 'mkdir "G g" && echo ok then >"G g/with spaces.txt" && git add "G g/with spaces.txt" && \ - cp "$TEST_DIRECTORY"/test-binary-1.png "G g/with spaces.png" && \ + cp "$TEST_DIRECTORY"/lib-diff/test-binary-1.png "G g/with spaces.png" && \ git add "G g/with spaces.png" && git commit -a -m "With spaces" && id=$(git rev-list --max-count=1 HEAD) && @@ -182,7 +182,8 @@ test_expect_success \ test_expect_success \ 'Update file with spaces in file name' \ 'echo Ok then >>"G g/with spaces.txt" && - cat "$TEST_DIRECTORY"/test-binary-1.png >>"G g/with spaces.png" && \ + cat "$TEST_DIRECTORY"/lib-diff/test-binary-1.png \ + >>"G g/with spaces.png" && \ git add "G g/with spaces.png" && git commit -a -m "Update with spaces" && id=$(git rev-list --max-count=1 HEAD) && @@ -207,7 +208,7 @@ test_expect_success !MINGW \ 'mkdir -p Å/goo/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/å/ä/ö && echo Foo >Å/goo/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/å/ä/ö/gårdetsågårdet.txt && git add Å/goo/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/å/ä/ö/gårdetsågårdet.txt && - cp "$TEST_DIRECTORY"/test-binary-1.png Å/goo/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/å/ä/ö/gårdetsågårdet.png && + cp "$TEST_DIRECTORY"/lib-diff/test-binary-1.png Å/goo/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/å/ä/ö/gårdetsågårdet.png && git add Å/goo/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/å/ä/ö/gårdetsågårdet.png && git commit -a -m "Går det så går det" && \ id=$(git rev-list --max-count=1 HEAD) && From ce25ea833ee95c3bb81bcddf329e4883eb7fe249 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 18 Jul 2017 01:15:40 +0200 Subject: [PATCH 287/303] tests: only override sort & find if there are usable ones in /usr/bin/ The idea is to allow running the test suite on MinGit with BusyBox installed in /mingw64/bin/sh.exe. In that case, we will want to exclude sort & find (and other Unix utilities) from being bundled. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- git-sh-setup.sh | 21 ++++++++++++++------- t/test-lib.sh | 21 ++++++++++++++------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/git-sh-setup.sh b/git-sh-setup.sh index ce273fe0e48d99..785a4cbfa9c65e 100644 --- a/git-sh-setup.sh +++ b/git-sh-setup.sh @@ -292,13 +292,20 @@ create_virtual_base() { # Platform specific tweaks to work around some commands case $(uname -s) in *MINGW*) - # Windows has its own (incompatible) sort and find - sort () { - /usr/bin/sort "$@" - } - find () { - /usr/bin/find "$@" - } + if test -x /usr/bin/sort + then + # Windows has its own (incompatible) sort; override + sort () { + /usr/bin/sort "$@" + } + fi + if test -x /usr/bin/find + then + # Windows has its own (incompatible) find; override + find () { + /usr/bin/find "$@" + } + fi # git sees Windows-style pwd pwd () { builtin pwd -W diff --git a/t/test-lib.sh b/t/test-lib.sh index 8dc51175dd0f4e..0ccfa33da08f6b 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1685,13 +1685,20 @@ fi uname_s=$(uname -s) case $uname_s in *MINGW*) - # Windows has its own (incompatible) sort and find - sort () { - /usr/bin/sort "$@" - } - find () { - /usr/bin/find "$@" - } + if test -x /usr/bin/sort + then + # Windows has its own (incompatible) sort; override + sort () { + /usr/bin/sort "$@" + } + fi + if test -x /usr/bin/find + then + # Windows has its own (incompatible) find; override + find () { + /usr/bin/find "$@" + } + fi # git sees Windows-style pwd pwd () { builtin pwd -W From 1264fffda7bbb4a78e7a10470dd8dcb716d3e76b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Mon, 19 Nov 2018 20:34:13 +0100 Subject: [PATCH 288/303] tests: use the correct path separator with BusyBox BusyBox-w32 is a true Win32 application, i.e. it does not come with a POSIX emulation layer. That also means that it does *not* use the Unix convention of separating the entries in the PATH variable using colons, but semicolons. However, there are also BusyBox ports to Windows which use a POSIX emulation layer such as Cygwin's or MSYS2's runtime, i.e. using colons as PATH separators. As a tell-tale, let's use the presence of semicolons in the PATH variable: on Unix, it is highly unlikely that it contains semicolons, and on Windows (without POSIX emulation), it is virtually guaranteed, as everybody should have both $SYSTEMROOT and $SYSTEMROOT/system32 in their PATH. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/interop/interop-lib.sh | 8 ++++++-- t/lib-proto-disable.sh | 2 +- t/t0021-conversion.sh | 2 +- t/t0060-path-utils.sh | 24 ++++++++++++------------ t/t0061-run-command.sh | 6 +++--- t/t0300-credentials.sh | 2 +- t/t1504-ceiling-dirs.sh | 10 +++++----- t/t2300-cd-to-toplevel.sh | 2 +- t/t3402-rebase-merge.sh | 2 +- t/t3418-rebase-continue.sh | 8 ++++---- t/t5615-alternate-env.sh | 4 ++-- t/t5802-connect-helper.sh | 2 +- t/t7006-pager.sh | 4 ++-- t/t7606-merge-custom.sh | 2 +- t/t7811-grep-open.sh | 2 +- t/t9003-help-autocorrect.sh | 2 +- t/t9800-git-p4-basic.sh | 2 +- t/test-lib.sh | 17 +++++++++++++---- 18 files changed, 57 insertions(+), 44 deletions(-) diff --git a/t/interop/interop-lib.sh b/t/interop/interop-lib.sh index 3e0a2911d4f9ba..dea8883821fe3e 100644 --- a/t/interop/interop-lib.sh +++ b/t/interop/interop-lib.sh @@ -4,6 +4,10 @@ . ../../GIT-BUILD-OPTIONS INTEROP_ROOT=$(pwd) BUILD_ROOT=$INTEROP_ROOT/build +case "$PATH" in +*\;*) PATH_SEP=\; ;; +*) PATH_SEP=: ;; +esac build_version () { if test -z "$1" @@ -57,7 +61,7 @@ wrap_git () { write_script "$1" <<-EOF GIT_EXEC_PATH="$2" export GIT_EXEC_PATH - PATH="$2:\$PATH" + PATH="$2$PATH_SEP\$PATH" export GIT_EXEC_PATH exec git "\$@" EOF @@ -71,7 +75,7 @@ generate_wrappers () { echo >&2 fatal: test tried to run generic git exit 1 EOF - PATH=$(pwd)/.bin:$PATH + PATH=$(pwd)/.bin$PATH_SEP$PATH } VERSION_A=${GIT_TEST_VERSION_A:-$VERSION_A} diff --git a/t/lib-proto-disable.sh b/t/lib-proto-disable.sh index 890622be81642b..9db481e1be15b2 100644 --- a/t/lib-proto-disable.sh +++ b/t/lib-proto-disable.sh @@ -214,7 +214,7 @@ setup_ext_wrapper () { cd "$TRASH_DIRECTORY/remote" && eval "$*" EOF - PATH=$TRASH_DIRECTORY:$PATH && + PATH=$TRASH_DIRECTORY$PATH_SEP$PATH && export TRASH_DIRECTORY ' } diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index abecd75e4e430b..c8560d57cb129d 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -9,7 +9,7 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . "$TEST_DIRECTORY"/lib-terminal.sh TEST_ROOT="$PWD" -PATH=$TEST_ROOT:$PATH +PATH=$TEST_ROOT$PATH_SEP$PATH write_script <<\EOF "$TEST_ROOT/rot13.sh" tr \ diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 304a2ea96032b3..a62c8ab1efc255 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -139,25 +139,25 @@ ancestor /foo /fo -1 ancestor /foo /foo -1 ancestor /foo /bar -1 ancestor /foo /foo/bar -1 -ancestor /foo /foo:/bar -1 -ancestor /foo /:/foo:/bar 0 -ancestor /foo /foo:/:/bar 0 -ancestor /foo /:/bar:/foo 0 +ancestor /foo "/foo$PATH_SEP/bar" -1 +ancestor /foo "/$PATH_SEP/foo$PATH_SEP/bar" 0 +ancestor /foo "/foo$PATH_SEP/$PATH_SEP/bar" 0 +ancestor /foo "/$PATH_SEP/bar$PATH_SEP/foo" 0 ancestor /foo/bar / 0 ancestor /foo/bar /fo -1 ancestor /foo/bar /foo 4 ancestor /foo/bar /foo/ba -1 -ancestor /foo/bar /:/fo 0 -ancestor /foo/bar /foo:/foo/ba 4 +ancestor /foo/bar "/$PATH_SEP/fo" 0 +ancestor /foo/bar "/foo$PATH_SEP/foo/ba" 4 ancestor /foo/bar /bar -1 ancestor /foo/bar /fo -1 -ancestor /foo/bar /foo:/bar 4 -ancestor /foo/bar /:/foo:/bar 4 -ancestor /foo/bar /foo:/:/bar 4 -ancestor /foo/bar /:/bar:/fo 0 -ancestor /foo/bar /:/bar 0 +ancestor /foo/bar "/foo$PATH_SEP/bar" 4 +ancestor /foo/bar "/$PATH_SEP/foo$PATH_SEP/bar" 4 +ancestor /foo/bar "/foo$PATH_SEP/$PATH_SEP/bar" 4 +ancestor /foo/bar "/$PATH_SEP/bar$PATH_SEP/fo" 0 +ancestor /foo/bar "/$PATH_SEP/bar" 0 ancestor /foo/bar /foo 4 -ancestor /foo/bar /foo:/bar 4 +ancestor /foo/bar "/foo$PATH_SEP/bar" 4 ancestor /foo/bar /bar -1 # Windows-specific: DOS drives, network shares diff --git a/t/t0061-run-command.sh b/t/t0061-run-command.sh index 7b5423eebdafa4..9e55358742e464 100755 --- a/t/t0061-run-command.sh +++ b/t/t0061-run-command.sh @@ -70,7 +70,7 @@ test_expect_success 'run_command does not try to execute a directory' ' cat bin2/greet EOF - PATH=$PWD/bin1:$PWD/bin2:$PATH \ + PATH=$PWD/bin1$PATH_SEP$PWD/bin2$PATH_SEP$PATH \ test-tool run-command run-command greet >actual 2>err && test_cmp bin2/greet actual && test_must_be_empty err @@ -87,7 +87,7 @@ test_expect_success POSIXPERM 'run_command passes over non-executable file' ' cat bin2/greet EOF - PATH=$PWD/bin1:$PWD/bin2:$PATH \ + PATH=$PWD/bin1$PATH_SEP$PWD/bin2$PATH_SEP$PATH \ test-tool run-command run-command greet >actual 2>err && test_cmp bin2/greet actual && test_must_be_empty err @@ -107,7 +107,7 @@ test_expect_success POSIXPERM,SANITY 'unreadable directory in PATH' ' git config alias.nitfol "!echo frotz" && chmod a-rx local-command && ( - PATH=./local-command:$PATH && + PATH=./local-command$PATH_SEP$PATH && git nitfol >actual ) && echo frotz >expect && diff --git a/t/t0300-credentials.sh b/t/t0300-credentials.sh index 3485c0534e6d39..ed045188e0f124 100755 --- a/t/t0300-credentials.sh +++ b/t/t0300-credentials.sh @@ -35,7 +35,7 @@ test_expect_success 'setup helper scripts' ' test -z "$pass" || echo password=$pass EOF - PATH="$PWD:$PATH" + PATH="$PWD$PATH_SEP$PATH" ' test_expect_success 'credential_fill invokes helper' ' diff --git a/t/t1504-ceiling-dirs.sh b/t/t1504-ceiling-dirs.sh index 0fafcf9dde385f..060296febcce1c 100755 --- a/t/t1504-ceiling-dirs.sh +++ b/t/t1504-ceiling-dirs.sh @@ -81,9 +81,9 @@ then GIT_CEILING_DIRECTORIES="$TRASH_ROOT/top/" test_fail subdir_ceil_at_top_slash - GIT_CEILING_DIRECTORIES=":$TRASH_ROOT/top" + GIT_CEILING_DIRECTORIES="$PATH_SEP$TRASH_ROOT/top" test_prefix subdir_ceil_at_top_no_resolve "sub/dir/" - GIT_CEILING_DIRECTORIES=":$TRASH_ROOT/top/" + GIT_CEILING_DIRECTORIES="$PATH_SEP$TRASH_ROOT/top/" test_prefix subdir_ceil_at_top_slash_no_resolve "sub/dir/" fi @@ -113,13 +113,13 @@ GIT_CEILING_DIRECTORIES="$TRASH_ROOT/subdi" test_prefix subdir_ceil_at_subdi_slash "sub/dir/" -GIT_CEILING_DIRECTORIES="/foo:$TRASH_ROOT/sub" +GIT_CEILING_DIRECTORIES="/foo$PATH_SEP$TRASH_ROOT/sub" test_fail second_of_two -GIT_CEILING_DIRECTORIES="$TRASH_ROOT/sub:/bar" +GIT_CEILING_DIRECTORIES="$TRASH_ROOT/sub$PATH_SEP/bar" test_fail first_of_two -GIT_CEILING_DIRECTORIES="/foo:$TRASH_ROOT/sub:/bar" +GIT_CEILING_DIRECTORIES="/foo$PATH_SEP$TRASH_ROOT/sub$PATH_SEP/bar" test_fail second_of_three diff --git a/t/t2300-cd-to-toplevel.sh b/t/t2300-cd-to-toplevel.sh index b40eeb263fe896..4328b9172a5e6e 100755 --- a/t/t2300-cd-to-toplevel.sh +++ b/t/t2300-cd-to-toplevel.sh @@ -17,7 +17,7 @@ test_cd_to_toplevel () { test_expect_success $3 "$2" ' ( cd '"'$1'"' && - PATH="$EXEC_PATH:$PATH" && + PATH="$EXEC_PATH$PATH_SEP$PATH" && . git-sh-setup && cd_to_toplevel && [ "$(pwd -P)" = "$TOPLEVEL" ] diff --git a/t/t3402-rebase-merge.sh b/t/t3402-rebase-merge.sh index 7e46f4ca850616..ce553b6edd50e4 100755 --- a/t/t3402-rebase-merge.sh +++ b/t/t3402-rebase-merge.sh @@ -146,7 +146,7 @@ test_expect_success 'rebase -s funny -Xopt' ' git checkout -b test-funny main^ && test_commit funny && ( - PATH=./test-bin:$PATH && + PATH=./test-bin$PATH_SEP$PATH && git rebase -s funny -Xopt main ) && test -f funny.was.run diff --git a/t/t3418-rebase-continue.sh b/t/t3418-rebase-continue.sh index 130e2f9b553003..f1c1eff4eed4f5 100755 --- a/t/t3418-rebase-continue.sh +++ b/t/t3418-rebase-continue.sh @@ -73,7 +73,7 @@ test_expect_success 'rebase --continue remembers merge strategy and options' ' EOF chmod +x test-bin/git-merge-funny && ( - PATH=./test-bin:$PATH && + PATH=./test-bin$PATH_SEP$PATH && test_must_fail git rebase -s funny -Xopt main topic ) && test -f funny.was.run && @@ -81,7 +81,7 @@ test_expect_success 'rebase --continue remembers merge strategy and options' ' echo "Resolved" >F2 && git add F2 && ( - PATH=./test-bin:$PATH && + PATH=./test-bin$PATH_SEP$PATH && git rebase --continue ) && test -f funny.was.run @@ -105,7 +105,7 @@ test_expect_success 'rebase -i --continue handles merge strategy and options' ' EOF chmod +x test-bin/git-merge-funny && ( - PATH=./test-bin:$PATH && + PATH=./test-bin$PATH_SEP$PATH && test_must_fail git rebase -i -s funny -Xopt -Xfoo main topic ) && test -f funny.was.run && @@ -113,7 +113,7 @@ test_expect_success 'rebase -i --continue handles merge strategy and options' ' echo "Resolved" >F2 && git add F2 && ( - PATH=./test-bin:$PATH && + PATH=./test-bin$PATH_SEP$PATH && git rebase --continue ) && test -f funny.was.run diff --git a/t/t5615-alternate-env.sh b/t/t5615-alternate-env.sh index 83513e46a3556b..fa2dd08084d21c 100755 --- a/t/t5615-alternate-env.sh +++ b/t/t5615-alternate-env.sh @@ -40,7 +40,7 @@ test_expect_success 'access alternate via absolute path' ' ' test_expect_success 'access multiple alternates' ' - check_obj "$PWD/one.git/objects:$PWD/two.git/objects" <<-EOF + check_obj "$PWD/one.git/objects$PATH_SEP$PWD/two.git/objects" <<-EOF $one blob $two blob EOF @@ -76,7 +76,7 @@ test_expect_success 'access alternate via relative path (subdir)' ' quoted='"one.git\057objects"' unquoted='two.git/objects' test_expect_success 'mix of quoted and unquoted alternates' ' - check_obj "$quoted:$unquoted" <<-EOF + check_obj "$quoted$PATH_SEP$unquoted" <<-EOF $one blob $two blob EOF diff --git a/t/t5802-connect-helper.sh b/t/t5802-connect-helper.sh index c6c2661878c0ca..a096eeeeb427cf 100755 --- a/t/t5802-connect-helper.sh +++ b/t/t5802-connect-helper.sh @@ -85,7 +85,7 @@ test_expect_success 'set up fake git-daemon' ' "$TRASH_DIRECTORY/remote" EOF export TRASH_DIRECTORY && - PATH=$TRASH_DIRECTORY:$PATH + PATH=$TRASH_DIRECTORY$PATH_SEP$PATH ' test_expect_success 'ext command can connect to git daemon (no vhost)' ' diff --git a/t/t7006-pager.sh b/t/t7006-pager.sh index e56ca5b0fa8d47..2a90dc74f2b73d 100755 --- a/t/t7006-pager.sh +++ b/t/t7006-pager.sh @@ -54,7 +54,7 @@ test_expect_success !MINGW,TTY 'LESS and LV envvars set by git-sh-setup' ' sane_unset LESS LV && PAGER="env >pager-env.out; wc" && export PAGER && - PATH="$(git --exec-path):$PATH" && + PATH="$(git --exec-path)$PATH_SEP$PATH" && export PATH && test_terminal sh -c ". git-sh-setup && git_pager" ) && @@ -388,7 +388,7 @@ test_default_pager() { EOF chmod +x \$less && ( - PATH=.:\$PATH && + PATH=.$PATH_SEP\$PATH && export PATH && $full_command ) && diff --git a/t/t7606-merge-custom.sh b/t/t7606-merge-custom.sh index 81fb7c474c14c1..8197a1c46bb5b6 100755 --- a/t/t7606-merge-custom.sh +++ b/t/t7606-merge-custom.sh @@ -23,7 +23,7 @@ test_expect_success 'set up custom strategy' ' EOF chmod +x git-merge-theirs && - PATH=.:$PATH && + PATH=.$PATH_SEP$PATH && export PATH ' diff --git a/t/t7811-grep-open.sh b/t/t7811-grep-open.sh index 1dd07141a7df9f..668de47c42e226 100755 --- a/t/t7811-grep-open.sh +++ b/t/t7811-grep-open.sh @@ -53,7 +53,7 @@ test_expect_success SIMPLEPAGER 'git grep -O' ' EOF echo grep.h >expect.notless && - PATH=.:$PATH git grep -O GREP_PATTERN >out && + PATH=.$PATH_SEP$PATH git grep -O GREP_PATTERN >out && { test_cmp expect.less pager-args || test_cmp expect.notless pager-args diff --git a/t/t9003-help-autocorrect.sh b/t/t9003-help-autocorrect.sh index f00deaf3815f3b..982aac7d169f55 100755 --- a/t/t9003-help-autocorrect.sh +++ b/t/t9003-help-autocorrect.sh @@ -12,7 +12,7 @@ test_expect_success 'setup' ' echo distimdistim was called EOF - PATH="$PATH:." && + PATH="$PATH$PATH_SEP." && export PATH && git commit --allow-empty -m "a single log entry" && diff --git a/t/t9800-git-p4-basic.sh b/t/t9800-git-p4-basic.sh index dc88d0e064931a..8a5233e3af800e 100755 --- a/t/t9800-git-p4-basic.sh +++ b/t/t9800-git-p4-basic.sh @@ -286,7 +286,7 @@ test_expect_success 'exit when p4 fails to produce marshaled output' ' EOF chmod 755 badp4dir/p4 && ( - PATH="$TRASH_DIRECTORY/badp4dir:$PATH" && + PATH="$TRASH_DIRECTORY/badp4dir$PATH_SEP$PATH" && export PATH && test_expect_code 1 git p4 clone --dest="$git" //depot >errs 2>&1 ) && diff --git a/t/test-lib.sh b/t/test-lib.sh index 0ccfa33da08f6b..f378a7c5469ca5 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -15,6 +15,15 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see http://www.gnu.org/licenses/ . +# On Unix/Linux, the path separator is the colon, on other systems it +# may be different, though. On Windows, for example, it is a semicolon. +# If the PATH variable contains semicolons, it is pretty safe to assume +# that the path separator is a semicolon. +case "$PATH" in +*\;*) PATH_SEP=\; ;; +*) PATH_SEP=: ;; +esac + # Test the binaries we have just built. The tests are kept in # t/ subdirectory and are run in 'trash directory' subdirectory. if test -z "$TEST_DIRECTORY" @@ -1442,7 +1451,7 @@ then done done IFS=$OLDIFS - PATH=$GIT_VALGRIND/bin:$PATH + PATH=$GIT_VALGRIND/bin$PATH_SEP$PATH GIT_EXEC_PATH=$GIT_VALGRIND/bin export GIT_VALGRIND GIT_VALGRIND_MODE="$valgrind" @@ -1454,7 +1463,7 @@ elif test -n "$GIT_TEST_INSTALLED" then GIT_EXEC_PATH=$($GIT_TEST_INSTALLED/git --exec-path) || error "Cannot run git from $GIT_TEST_INSTALLED." - PATH=$GIT_TEST_INSTALLED:$GIT_BUILD_DIR/t/helper:$PATH + PATH=$GIT_TEST_INSTALLED$PATH_SEP$GIT_BUILD_DIR/t/helper$PATH_SEP$PATH GIT_EXEC_PATH=${GIT_TEST_EXEC_PATH:-$GIT_EXEC_PATH} else # normal case, use ../bin-wrappers only unless $with_dashes: if test -n "$no_bin_wrappers" @@ -1470,12 +1479,12 @@ else # normal case, use ../bin-wrappers only unless $with_dashes: fi with_dashes=t fi - PATH="$git_bin_dir:$PATH" + PATH="$git_bin_dir$PATH_SEP$PATH" fi GIT_EXEC_PATH=$GIT_BUILD_DIR if test -n "$with_dashes" then - PATH="$GIT_BUILD_DIR:$GIT_BUILD_DIR/t/helper:$PATH" + PATH="$GIT_BUILD_DIR$PATH_SEP$GIT_BUILD_DIR/t/helper$PATH_SEP$PATH" fi fi GIT_TEMPLATE_DIR="$GIT_BUILD_DIR"/templates/blt From 11d64facb004173514afa8fb637ab5f32528b7af Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 30 Jun 2017 00:35:40 +0200 Subject: [PATCH 289/303] mingw: only use Bash-ism `builtin pwd -W` when available Traditionally, Git for Windows' SDK uses Bash as its default shell. However, other Unix shells are available, too. Most notably, the Win32 port of BusyBox comes with `ash` whose `pwd` command already prints Windows paths as Git for Windows wants them, while there is not even a `builtin` command. Therefore, let's be careful not to override `pwd` unless we know that the `builtin` command is available. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- git-sh-setup.sh | 14 ++++++++++---- t/test-lib.sh | 14 ++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/git-sh-setup.sh b/git-sh-setup.sh index 785a4cbfa9c65e..b53539cd0e556a 100644 --- a/git-sh-setup.sh +++ b/git-sh-setup.sh @@ -306,10 +306,16 @@ case $(uname -s) in /usr/bin/find "$@" } fi - # git sees Windows-style pwd - pwd () { - builtin pwd -W - } + # On Windows, Git wants Windows paths. But /usr/bin/pwd spits out + # Unix-style paths. At least in Bash, we have a builtin pwd that + # understands the -W option to force "mixed" paths, i.e. with drive + # prefix but still with forward slashes. Let's use that, if available. + if type builtin >/dev/null 2>&1 + then + pwd () { + builtin pwd -W + } + fi is_absolute_path () { case "$1" in [/\\]* | [A-Za-z]:*) diff --git a/t/test-lib.sh b/t/test-lib.sh index f378a7c5469ca5..36c03944e0d73b 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1708,10 +1708,16 @@ case $uname_s in /usr/bin/find "$@" } fi - # git sees Windows-style pwd - pwd () { - builtin pwd -W - } + # On Windows, Git wants Windows paths. But /usr/bin/pwd spits out + # Unix-style paths. At least in Bash, we have a builtin pwd that + # understands the -W option to force "mixed" paths, i.e. with drive + # prefix but still with forward slashes. Let's use that, if available. + if type builtin >/dev/null 2>&1 + then + pwd () { + builtin pwd -W + } + fi # no POSIX permissions # backslashes in pathspec are converted to '/' # exec does not inherit the PID From 874f3704f5add33e9f19e7602657020380466441 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 30 Jun 2017 22:32:33 +0200 Subject: [PATCH 290/303] tests (mingw): remove Bash-specific pwd option The -W option is only understood by MSYS2 Bash's pwd command. We already make sure to override `pwd` by `builtin pwd -W` for MINGW, so let's not double the effort here. This will also help when switching the shell to another one (such as BusyBox' ash) whose pwd does *not* understand the -W option. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t9902-completion.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 43de868b8005d3..d1e93f9529adbc 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -126,12 +126,7 @@ invalid_variable_name='${foo.bar}' actual="$TRASH_DIRECTORY/actual" -if test_have_prereq MINGW -then - ROOT="$(pwd -W)" -else - ROOT="$(pwd)" -fi +ROOT="$(pwd)" test_expect_success 'setup for __git_find_repo_path/__gitdir tests' ' mkdir -p subdir/subsubdir && From 069e648903373c44acfc981328e360c46364e4bf Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Wed, 19 Jul 2017 17:07:56 +0200 Subject: [PATCH 291/303] test-lib: add BUSYBOX prerequisite When running with BusyBox, we will want to avoid calling executables on the PATH that are implemented in BusyBox itself. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/test-lib.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/t/test-lib.sh b/t/test-lib.sh index 36c03944e0d73b..4e45bc7c4f16fc 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1890,6 +1890,10 @@ test_lazy_prereq UNZIP ' test $? -ne 127 ' +test_lazy_prereq BUSYBOX ' + case "$($SHELL --help 2>&1)" in *BusyBox*) true;; *) false;; esac +' + run_with_limited_cmdline () { (ulimit -s 128 && "$@") } From a1077e6de7b8feb024a23835fed9a39275641eca Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 4 Aug 2017 11:51:56 +0200 Subject: [PATCH 292/303] t0021: use Windows path when appropriate Since c6b0831c9c1 (docs: warn about possible '=' in clean/smudge filter process values, 2016-12-03), t0021 writes out a file with quotes in its name, and MSYS2's path conversion heuristics mistakes that to mean that we are not talking about a path here. Therefore, we need to use Windows paths, as the test-helper is a Win32 program that would otherwise have no idea where to look for the file. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t0021-conversion.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index c8560d57cb129d..408aeededc4295 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -8,8 +8,8 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh . "$TEST_DIRECTORY"/lib-terminal.sh -TEST_ROOT="$PWD" -PATH=$TEST_ROOT$PATH_SEP$PATH +TEST_ROOT="$(pwd)" +PATH=$PWD$PATH_SEP$PATH write_script <<\EOF "$TEST_ROOT/rot13.sh" tr \ From 1066699ddbee9b429c8284ae333d186274e07a45 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Sat, 5 Aug 2017 21:36:01 +0200 Subject: [PATCH 293/303] t5003: use binary file from t/lib-diff/ At some stage, t5003-archive-zip wants to add a file that is not ASCII. To that end, it uses /bin/sh. But that file may actually not exist (it is too easy to forget that not all the world is Unix/Linux...)! Besides, we already have perfectly fine binary files intended for use solely by the tests. So let's use one of them instead. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t5003-archive-zip.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index fc499cdff01d01..2bfa0c0a285689 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -88,7 +88,7 @@ test_expect_success \ 'mkdir a && echo simple textfile >a/a && mkdir a/bin && - cp /bin/sh a/bin && + cp "$TEST_DIRECTORY/lib-diff/test-binary-1.png" a/bin && printf "text\r" >a/text.cr && printf "text\r\n" >a/text.crlf && printf "text\n" >a/text.lf && From 09a3d4ca6e5aa8732aee0e412ec46d05090481f1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 21 Jul 2017 12:48:33 +0200 Subject: [PATCH 294/303] t5532: workaround for BusyBox on Windows While it may seem super convenient to some old Unix hands to simpy require Perl to be available when running the test suite, this is a major hassle on Windows, where we want to verify that Perl is not, actually, required in a NO_PERL build. As a super ugly workaround, we "install" a script into /usr/bin/perl reading like this: #!/bin/sh # We'd much rather avoid requiring Perl altogether when testing # an installed Git. Oh well, that's why we cannot have nice # things. exec c:/git-sdk-64/usr/bin/perl.exe "$@" The problem with that is that BusyBox assumes that the #! line in a script refers to an executable, not to a script. So when it encounters the line #!/usr/bin/perl in t5532's proxy-get-cmd, it barfs. Let's help this situation by simply executing the Perl script with the "interpreter" specified explicitly. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t5532-fetch-proxy.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5532-fetch-proxy.sh b/t/t5532-fetch-proxy.sh index d664912799b43a..55bc57c138ddc2 100755 --- a/t/t5532-fetch-proxy.sh +++ b/t/t5532-fetch-proxy.sh @@ -27,7 +27,7 @@ test_expect_success 'setup proxy script' ' write_script proxy <<-\EOF echo >&2 "proxying for $*" - cmd=$(./proxy-get-cmd) + cmd=$("$PERL_PATH" ./proxy-get-cmd) echo >&2 "Running $cmd" exec $cmd EOF From fd2cce47daef6b4dec77745c6674c70ceaa04ca0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 21 Jul 2017 13:24:55 +0200 Subject: [PATCH 295/303] t5605: special-case hardlink test for BusyBox-w32 When t5605 tries to verify that files are hardlinked (or that they are not), it uses the `-links` option of the `find` utility. BusyBox' implementation does not support that option, and BusyBox-w32's lstat() does not even report the number of hard links correctly (for performance reasons). So let's just switch to a different method that actually works on Windows. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t5605-clone-local.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/t/t5605-clone-local.sh b/t/t5605-clone-local.sh index 38b850c10ef8cd..829d54edc0e2dd 100755 --- a/t/t5605-clone-local.sh +++ b/t/t5605-clone-local.sh @@ -11,6 +11,21 @@ repo_is_hardlinked() { test_line_count = 0 output } +if test_have_prereq MINGW,BUSYBOX +then + # BusyBox' `find` does not support `-links`. Besides, BusyBox-w32's + # lstat() does not report hard links, just like Git's mingw_lstat() + # (from where BusyBox-w32 got its initial implementation). + repo_is_hardlinked() { + for f in $(find "$1/objects" -type f) + do + "$SYSTEMROOT"/system32/fsutil.exe \ + hardlink list $f >links && + test_line_count -gt 1 links || return 1 + done + } +fi + test_expect_success 'preparing origin repository' ' : >file && git add . && git commit -m1 && git clone --bare . a.git && From 9a89257b809c4066390f7e04ade179370f3d2982 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Wed, 5 Jul 2017 15:14:50 +0200 Subject: [PATCH 296/303] t5813: allow for $PWD to be a Windows path Git for Windows uses MSYS2's Bash to run the test suite, which comes with benefits but also at a heavy price: on the plus side, MSYS2's POSIX emulation layer allows us to continue pretending that we are on a Unix system, e.g. use Unix paths instead of Windows ones, yet this is bought at a rather noticeable performance penalty. There *are* some more native ports of Unix shells out there, though, most notably BusyBox-w32's ash. These native ports do not use any POSIX emulation layer (or at most a *very* thin one, choosing to avoid features such as fork() that are expensive to emulate on Windows), and they use native Windows paths (usually with forward slashes instead of backslashes, which is perfectly legal in almost all use cases). And here comes the problem: with a $PWD looking like, say, C:/git-sdk-64/usr/src/git/t/trash directory.t5813-proto-disable-ssh Git's test scripts get quite a bit confused, as their assumptions have been shattered. Not only does this path contain a colon (oh no!), it also does not start with a slash. This is a problem e.g. when constructing a URL as t5813 does it: ssh://remote$PWD. Not only is it impossible to separate the "host" from the path with a $PWD as above, even prefixing $PWD by a slash won't work, as /C:/git-sdk-64/... is not a valid path. As a workaround, detect when $PWD does not start with a slash on Windows, and simply strip the drive prefix, using an obscure feature of Windows paths: if an absolute Windows path starts with a slash, it is implicitly prefixed by the drive prefix of the current directory. As we are talking about the current directory here, anyway, that strategy works. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t5813-proto-disable-ssh.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/t/t5813-proto-disable-ssh.sh b/t/t5813-proto-disable-ssh.sh index 3f084ee306517b..0a2c77093babad 100755 --- a/t/t5813-proto-disable-ssh.sh +++ b/t/t5813-proto-disable-ssh.sh @@ -14,8 +14,23 @@ test_expect_success 'setup repository to clone' ' ' test_proto "host:path" ssh "remote:repo.git" -test_proto "ssh://" ssh "ssh://remote$PWD/remote/repo.git" -test_proto "git+ssh://" ssh "git+ssh://remote$PWD/remote/repo.git" + +hostdir="$PWD" +if test_have_prereq MINGW && test "/${PWD#/}" != "$PWD" +then + case "$PWD" in + [A-Za-z]:/*) + hostdir="${PWD#?:}" + ;; + *) + skip_all="Unhandled PWD '$PWD'; skipping rest" + test_done + ;; + esac +fi + +test_proto "ssh://" ssh "ssh://remote$hostdir/remote/repo.git" +test_proto "git+ssh://" ssh "git+ssh://remote$hostdir/remote/repo.git" # Don't even bother setting up a "-remote" directory, as ssh would generally # complain about the bogus option rather than completing our request. Our From 7629ab15629a72da21dfba44272130b8a329b8dc Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Fri, 7 Jul 2017 10:15:36 +0200 Subject: [PATCH 297/303] t9200: skip tests when $PWD contains a colon On Windows, the current working directory is pretty much guaranteed to contain a colon. If we feed that path to CVS, it mistakes it for a separator between host and port, though. This has not been a problem so far because Git for Windows uses MSYS2's Bash using a POSIX emulation layer that also pretends that the current directory is a Unix path (at least as long as we're in a shell script). However, that is rather limiting, as Git for Windows also explores other ports of other Unix shells. One of those is BusyBox-w32's ash, which is a native port (i.e. *not* using any POSIX emulation layer, and certainly not emulating Unix paths). So let's just detect if there is a colon in $PWD and punt in that case. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t9200-git-cvsexportcommit.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/t/t9200-git-cvsexportcommit.sh b/t/t9200-git-cvsexportcommit.sh index 3e1034e6b44059..450ff1c2bc773d 100755 --- a/t/t9200-git-cvsexportcommit.sh +++ b/t/t9200-git-cvsexportcommit.sh @@ -11,6 +11,13 @@ if ! test_have_prereq PERL; then test_done fi +case "$PWD" in +*:*) + skip_all='cvs would get confused by the colon in `pwd`; skipping tests' + test_done + ;; +esac + cvs >/dev/null 2>&1 if test $? -ne 1 then From 97088dd66e04d0d44cd075093a630a78d03317f1 Mon Sep 17 00:00:00 2001 From: Bert Belder <bertbelder@gmail.com> Date: Fri, 26 Oct 2018 23:42:09 +0200 Subject: [PATCH 298/303] Win32: symlink: add test for `symlink` attribute To verify that the symlink is resolved correctly, we use the fact that `git.exe` is a native Win32 program, and that `git.exe config -f <path>` therefore uses the native symlink resolution. Signed-off-by: Bert Belder <bertbelder@gmail.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- t/t2040-checkout-symlink-attr.sh | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 t/t2040-checkout-symlink-attr.sh diff --git a/t/t2040-checkout-symlink-attr.sh b/t/t2040-checkout-symlink-attr.sh new file mode 100755 index 00000000000000..e00c31d096ce88 --- /dev/null +++ b/t/t2040-checkout-symlink-attr.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +test_description='checkout symlinks with `symlink` attribute on Windows + +Ensures that Git for Windows creates symlinks of the right type, +as specified by the `symlink` attribute in `.gitattributes`.' + +# Tell MSYS to create native symlinks. Without this flag test-lib's +# prerequisite detection for SYMLINKS doesn't detect the right thing. +MSYS=winsymlinks:nativestrict && export MSYS + +. ./test-lib.sh + +if ! test_have_prereq MINGW,SYMLINKS +then + skip_all='skipping $0: MinGW-only test, which requires symlink support.' + test_done +fi + +# Adds a symlink to the index without clobbering the work tree. +cache_symlink () { + sha=$(printf '%s' "$1" | git hash-object --stdin -w) && + git update-index --add --cacheinfo 120000,$sha,"$2" +} + +test_expect_success 'checkout symlinks with attr' ' + cache_symlink file1 file-link && + cache_symlink dir dir-link && + + printf "file-link symlink=file\ndir-link symlink=dir\n" >.gitattributes && + git add .gitattributes && + + git checkout . && + + mkdir dir && + echo "[a]b=c" >file1 && + echo "[x]y=z" >dir/file2 && + + # MSYS2 is very forgiving, it will resolve symlinks even if the + # symlink type is incorrect. To make this test meaningful, try + # them with a native, non-MSYS executable, such as `git config`. + test "$(git config -f file-link a.b)" = "c" && + test "$(git config -f dir-link/file2 x.y)" = "z" +' + +test_done From dea4755f468f20d93213ec31a2d308d47a77b90c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Wed, 17 May 2017 17:05:09 +0200 Subject: [PATCH 299/303] mingw: kill child processes in a gentler way The TerminateProcess() function does not actually leave the child processes any chance to perform any cleanup operations. This is bad insofar as Git itself expects its signal handlers to run. A symptom is e.g. a left-behind .lock file that would not be left behind if the same operation was run, say, on Linux. To remedy this situation, we use an obscure trick: we inject a thread into the process that needs to be killed and to let that thread run the ExitProcess() function with the desired exit status. Thanks J Wyman for describing this trick. The advantage is that the ExitProcess() function lets the atexit handlers run. While this is still different from what Git expects (i.e. running a signal handler), in practice Git sets up signal handlers and atexit handlers that call the same code to clean up after itself. In case that the gentle method to terminate the process failed, we still fall back to calling TerminateProcess(), but in that case we now also make sure that processes spawned by the spawned process are terminated; TerminateProcess() does not give the spawned process a chance to do so itself. Please note that this change only affects how Git for Windows tries to terminate processes spawned by Git's own executables. Third-party software that *calls* Git and wants to terminate it *still* need to make sure to imitate this gentle method, otherwise this patch will not have any effect. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 29 +++++-- compat/win32/exit-process.h | 165 ++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 8 deletions(-) create mode 100644 compat/win32/exit-process.h diff --git a/compat/mingw.c b/compat/mingw.c index c71e58593457ad..5ae279441b448d 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -8,6 +8,7 @@ #include "../strbuf.h" #include "../run-command.h" #include "../cache.h" +#include "win32/exit-process.h" #include "win32/lazyload.h" #include "../config.h" #include "dir.h" @@ -2158,16 +2159,28 @@ int mingw_execvp(const char *cmd, char *const *argv) int mingw_kill(pid_t pid, int sig) { if (pid > 0 && sig == SIGTERM) { - HANDLE h = OpenProcess(PROCESS_TERMINATE, FALSE, pid); - - if (TerminateProcess(h, -1)) { + HANDLE h = OpenProcess(PROCESS_CREATE_THREAD | + PROCESS_QUERY_INFORMATION | + PROCESS_VM_OPERATION | PROCESS_VM_WRITE | + PROCESS_VM_READ | PROCESS_TERMINATE, + FALSE, pid); + int ret; + + if (h) + ret = exit_process(h, 128 + sig); + else { + h = OpenProcess(PROCESS_TERMINATE, FALSE, pid); + if (!h) { + errno = err_win_to_posix(GetLastError()); + return -1; + } + ret = terminate_process_tree(h, 128 + sig); + } + if (ret) { + errno = err_win_to_posix(GetLastError()); CloseHandle(h); - return 0; } - - errno = err_win_to_posix(GetLastError()); - CloseHandle(h); - return -1; + return ret; } else if (pid > 0 && sig == 0) { HANDLE h = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, pid); if (h) { diff --git a/compat/win32/exit-process.h b/compat/win32/exit-process.h new file mode 100644 index 00000000000000..d53989884cfb0c --- /dev/null +++ b/compat/win32/exit-process.h @@ -0,0 +1,165 @@ +#ifndef EXIT_PROCESS_H +#define EXIT_PROCESS_H + +/* + * This file contains functions to terminate a Win32 process, as gently as + * possible. + * + * At first, we will attempt to inject a thread that calls ExitProcess(). If + * that fails, we will fall back to terminating the entire process tree. + * + * For simplicity, these functions are marked as file-local. + */ + +#include <tlhelp32.h> + +/* + * Terminates the process corresponding to the process ID and all of its + * directly and indirectly spawned subprocesses. + * + * This way of terminating the processes is not gentle: the processes get + * no chance of cleaning up after themselves (closing file handles, removing + * .lock files, terminating spawned processes (if any), etc). + */ +static int terminate_process_tree(HANDLE main_process, int exit_status) +{ + HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); + PROCESSENTRY32 entry; + DWORD pids[16384]; + int max_len = sizeof(pids) / sizeof(*pids), i, len, ret = 0; + pid_t pid = GetProcessId(main_process); + + pids[0] = (DWORD)pid; + len = 1; + + /* + * Even if Process32First()/Process32Next() seem to traverse the + * processes in topological order (i.e. parent processes before + * child processes), there is nothing in the Win32 API documentation + * suggesting that this is guaranteed. + * + * Therefore, run through them at least twice and stop when no more + * process IDs were added to the list. + */ + for (;;) { + int orig_len = len; + + memset(&entry, 0, sizeof(entry)); + entry.dwSize = sizeof(entry); + + if (!Process32First(snapshot, &entry)) + break; + + do { + for (i = len - 1; i >= 0; i--) { + if (pids[i] == entry.th32ProcessID) + break; + if (pids[i] == entry.th32ParentProcessID) + pids[len++] = entry.th32ProcessID; + } + } while (len < max_len && Process32Next(snapshot, &entry)); + + if (orig_len == len || len >= max_len) + break; + } + + for (i = len - 1; i > 0; i--) { + HANDLE process = OpenProcess(PROCESS_TERMINATE, FALSE, pids[i]); + + if (process) { + if (!TerminateProcess(process, exit_status)) + ret = -1; + CloseHandle(process); + } + } + if (!TerminateProcess(main_process, exit_status)) + ret = -1; + CloseHandle(main_process); + + return ret; +} + +/** + * Determine whether a process runs in the same architecture as the current + * one. That test is required before we assume that GetProcAddress() returns + * a valid address *for the target process*. + */ +static inline int process_architecture_matches_current(HANDLE process) +{ + static BOOL current_is_wow = -1; + BOOL is_wow; + + if (current_is_wow == -1 && + !IsWow64Process (GetCurrentProcess(), ¤t_is_wow)) + current_is_wow = -2; + if (current_is_wow == -2) + return 0; /* could not determine current process' WoW-ness */ + if (!IsWow64Process (process, &is_wow)) + return 0; /* cannot determine */ + return is_wow == current_is_wow; +} + +/** + * Inject a thread into the given process that runs ExitProcess(). + * + * Note: as kernel32.dll is loaded before any process, the other process and + * this process will have ExitProcess() at the same address. + * + * This function expects the process handle to have the access rights for + * CreateRemoteThread(): PROCESS_CREATE_THREAD, PROCESS_QUERY_INFORMATION, + * PROCESS_VM_OPERATION, PROCESS_VM_WRITE, and PROCESS_VM_READ. + * + * The idea comes from the Dr Dobb's article "A Safer Alternative to + * TerminateProcess()" by Andrew Tucker (July 1, 1999), + * http://www.drdobbs.com/a-safer-alternative-to-terminateprocess/184416547 + * + * If this method fails, we fall back to running terminate_process_tree(). + */ +static int exit_process(HANDLE process, int exit_code) +{ + DWORD code; + + if (GetExitCodeProcess(process, &code) && code == STILL_ACTIVE) { + static int initialized; + static LPTHREAD_START_ROUTINE exit_process_address; + PVOID arg = (PVOID)(intptr_t)exit_code; + DWORD thread_id; + HANDLE thread = NULL; + + if (!initialized) { + HINSTANCE kernel32 = GetModuleHandleA("kernel32"); + if (!kernel32) + die("BUG: cannot find kernel32"); + exit_process_address = + (LPTHREAD_START_ROUTINE)(void (*)(void)) + GetProcAddress(kernel32, "ExitProcess"); + initialized = 1; + } + if (!exit_process_address || + !process_architecture_matches_current(process)) + return terminate_process_tree(process, exit_code); + + thread = CreateRemoteThread(process, NULL, 0, + exit_process_address, + arg, 0, &thread_id); + if (thread) { + CloseHandle(thread); + /* + * If the process survives for 10 seconds (a completely + * arbitrary value picked from thin air), fall back to + * killing the process tree via TerminateProcess(). + */ + if (WaitForSingleObject(process, 10000) == + WAIT_OBJECT_0) { + CloseHandle(process); + return 0; + } + } + + return terminate_process_tree(process, exit_code); + } + + return 0; +} + +#endif From 493fb696814745f2e3b051d340ccb1d8717741c2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 20 Jul 2017 00:23:26 +0200 Subject: [PATCH 300/303] mingw: add a Makefile target to copy test artifacts The Makefile target `install-mingit-test-artifacts` simply copies stuff and things directly into a MinGit directory, including an init.bat script to set everything up so that the tests can be run in a cmd window. Sadly, Git's test suite still relies on a Perl interpreter even if compiled with NO_PERL=YesPlease. We punt for now, installing a small script into /usr/bin/perl that hands off to an existing Perl of a Git for Windows SDK. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- config.mak.uname | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/config.mak.uname b/config.mak.uname index d274ac12365190..c1cadf4ffa576b 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -736,6 +736,62 @@ else NO_PYTHON = YesPlease endif endif +ifeq (i686,$(uname_M)) + MINGW_PREFIX := mingw32 +endif +ifeq (x86_64,$(uname_M)) + MINGW_PREFIX := mingw64 +endif + + DESTDIR_WINDOWS = $(shell cygpath -aw '$(DESTDIR_SQ)') + DESTDIR_MIXED = $(shell cygpath -am '$(DESTDIR_SQ)') +install-mingit-test-artifacts: + install -m755 -d '$(DESTDIR_SQ)/usr/bin' + printf '%s\n%s\n' >'$(DESTDIR_SQ)/usr/bin/perl' \ + "#!/mingw64/bin/busybox sh" \ + "exec \"$(shell cygpath -am /usr/bin/perl.exe)\" \"\$$@\"" + + install -m755 -d '$(DESTDIR_SQ)' + printf '%s%s\n%s\n%s\n%s\n%s\n' >'$(DESTDIR_SQ)/init.bat' \ + "PATH=$(DESTDIR_WINDOWS)\\$(MINGW_PREFIX)\\bin;" \ + "C:\\WINDOWS;C:\\WINDOWS\\system32" \ + "@set GIT_TEST_INSTALLED=$(DESTDIR_MIXED)/$(MINGW_PREFIX)/bin" \ + "@`echo "$(DESTDIR_WINDOWS)" | sed 's/:.*/:/'`" \ + "@cd `echo "$(DESTDIR_WINDOWS)" | sed 's/^.://'`\\test-git\\t" \ + "@echo Now, run 'helper\\test-run-command testsuite'" + + install -m755 -d '$(DESTDIR_SQ)/test-git' + sed 's/^\(NO_PERL\|NO_PYTHON\)=.*/\1=YesPlease/' \ + <GIT-BUILD-OPTIONS >'$(DESTDIR_SQ)/test-git/GIT-BUILD-OPTIONS' + + install -m755 -d '$(DESTDIR_SQ)/test-git/t/helper' + install -m755 $(TEST_PROGRAMS) '$(DESTDIR_SQ)/test-git/t/helper' + (cd t && $(TAR) cf - t[0-9][0-9][0-9][0-9] lib-diff) | \ + (cd '$(DESTDIR_SQ)/test-git/t' && $(TAR) xf -) + install -m755 t/t556x_common t/*.sh '$(DESTDIR_SQ)/test-git/t' + + install -m755 -d '$(DESTDIR_SQ)/test-git/templates' + (cd templates && $(TAR) cf - blt) | \ + (cd '$(DESTDIR_SQ)/test-git/templates' && $(TAR) xf -) + + # po/build/locale for t0200 + install -m755 -d '$(DESTDIR_SQ)/test-git/po/build/locale' + (cd po/build/locale && $(TAR) cf - .) | \ + (cd '$(DESTDIR_SQ)/test-git/po/build/locale' && $(TAR) xf -) + + # git-daemon.exe for t5802, git-http-backend.exe for t5560 + install -m755 -d '$(DESTDIR_SQ)/$(MINGW_PREFIX)/bin' + install -m755 git-daemon.exe git-http-backend.exe \ + '$(DESTDIR_SQ)/$(MINGW_PREFIX)/bin' + + # git-upload-archive (dashed) for t5000 + install -m755 -d '$(DESTDIR_SQ)/$(MINGW_PREFIX)/bin' + install -m755 git-upload-archive.exe '$(DESTDIR_SQ)/$(MINGW_PREFIX)/bin' + + # git-difftool--helper for t7800 + install -m755 -d '$(DESTDIR_SQ)/$(MINGW_PREFIX)/libexec/git-core' + install -m755 git-difftool--helper \ + '$(DESTDIR_SQ)/$(MINGW_PREFIX)/libexec/git-core' endif ifeq ($(uname_S),QNX) COMPAT_CFLAGS += -DSA_RESTART=0 From 732efcff5d5049331500ebdba35fff961df01d33 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Mon, 23 Apr 2018 00:24:29 +0200 Subject: [PATCH 301/303] mingw: really handle SIGINT Previously, we did not install any handler for Ctrl+C, but now we really want to because the MSYS2 runtime learned the trick to call the ConsoleCtrlHandler when Ctrl+C was pressed. With this, hitting Ctrl+C while `git log` is running will only terminate the Git process, but not the pager. This finally matches the behavior on Linux and on macOS. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 5ae279441b448d..07e8fcda7d68c4 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -3772,7 +3772,14 @@ static void adjust_symlink_flags(void) symlink_file_flags |= 2; symlink_directory_flags |= 2; } +} +static BOOL WINAPI handle_ctrl_c(DWORD ctrl_type) +{ + if (ctrl_type != CTRL_C_EVENT) + return FALSE; /* we did not handle this */ + mingw_raise(SIGINT); + return TRUE; /* we did handle this */ } #ifdef _MSC_VER @@ -3808,6 +3815,8 @@ int wmain(int argc, const wchar_t **wargv) #endif #endif + SetConsoleCtrlHandler(handle_ctrl_c, TRUE); + maybe_redirect_std_handles(); adjust_symlink_flags(); fsync_object_files = 1; From 78d6d6f0ed7809aaddf42e5df27ac0bb47dfd78b Mon Sep 17 00:00:00 2001 From: "Neeraj K. Singh" <neerajsi@microsoft.com> Date: Wed, 27 Oct 2021 14:22:42 -0700 Subject: [PATCH 302/303] mingw: do not call xutftowcs_path in mingw_mktemp The `xutftowcs_path` function canonicalizes absolute paths using GetFullPathNameW. This canonicalization may change the length of the string (e.g. getting rid of \.\), which breaks callers that pass the template string in a strbuf and expect the length of the string to remain the same. In my particular case, the tmp-objdir code is passing a strbuf to mkdtemp and is breaking since the strbuf.len is no longer synchronized with strlen(strbuf.buf). Signed-off-by: Neeraj K. Singh <neerajsi@microsoft.com> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- compat/mingw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compat/mingw.c b/compat/mingw.c index c71e58593457ad..2bbe88a2059947 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -1235,8 +1235,11 @@ char *mingw_mktemp(char *template) int offset = 0; /* we need to return the path, thus no long paths here! */ - if (xutftowcs_path(wtemplate, template) < 0) + if (xutftowcsn(wtemplate, template, MAX_PATH, -1) < 0) { + if (errno == ERANGE) + errno = ENAMETOOLONG; return NULL; + } if (is_dir_sep(template[0]) && !is_dir_sep(template[1]) && iswalpha(wtemplate[0]) && wtemplate[1] == L':') { From 9904679116144dbde0d5068b121da7f5875c4d78 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Thu, 25 Nov 2021 11:26:41 +0100 Subject: [PATCH 303/303] Partially un-revert "editor: save and reset terminal after calling EDITOR" In e3f7e01b50be (Revert "editor: save and reset terminal after calling EDITOR", 2021-11-22), we reverted the commit wholesale where the terminal state would be saved and restored before/after calling an editor. The reverted commit was intended to fix a problem with Windows Terminal where simply calling `vi` would cause problems afterwards. To fix the problem addressed by the revert, but _still_ keep the problem with Windows Terminal fixed, let's revert the revert, with a twist: we restrict the save/restore _specifically_ to the case where `vi` (or `vim`) is called, and do not do the same for any other editor. This should still catch the majority of the cases, and will bridge the time until the original patch is re-done in a way that addresses all concerns. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- editor.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/editor.c b/editor.c index 008c04fe2f6e0a..ba31e84ece5461 100644 --- a/editor.c +++ b/editor.c @@ -4,6 +4,7 @@ #include "strvec.h" #include "run-command.h" #include "sigchain.h" +#include "compat/terminal.h" #ifndef DEFAULT_EDITOR #define DEFAULT_EDITOR "vi" @@ -55,6 +56,7 @@ static int launch_specified_editor(const char *editor, const char *path, return error("Terminal is dumb, but EDITOR unset"); if (strcmp(editor, ":")) { + int save_and_restore_term = !strcmp(editor, "vi") || !strcmp(editor, "vim"); struct strbuf realpath = STRBUF_INIT; struct child_process p = CHILD_PROCESS_INIT; int ret, sig; @@ -83,7 +85,11 @@ static int launch_specified_editor(const char *editor, const char *path, strvec_pushv(&p.env, (const char **)env); p.use_shell = 1; p.trace2_child_class = "editor"; + if (save_and_restore_term) + save_and_restore_term = !save_term(1); if (start_command(&p) < 0) { + if (save_and_restore_term) + restore_term(); strbuf_release(&realpath); return error("unable to start editor '%s'", editor); } @@ -91,6 +97,8 @@ static int launch_specified_editor(const char *editor, const char *path, sigchain_push(SIGINT, SIG_IGN); sigchain_push(SIGQUIT, SIG_IGN); ret = finish_command(&p); + if (save_and_restore_term) + restore_term(); strbuf_release(&realpath); sig = ret - 128; sigchain_pop(SIGINT);