diff --git a/diffcore-rename.c b/diffcore-rename.c index 963ca582216ba5..3375e24659ea1a 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -568,7 +568,8 @@ static void update_dir_rename_counts(struct dir_rename_info *info, static void initialize_dir_rename_info(struct dir_rename_info *info, struct strintmap *relevant_sources, struct strintmap *dirs_removed, - struct strmap *dir_rename_count) + struct strmap *dir_rename_count, + struct strmap *cached_pairs) { struct hashmap_iter iter; struct strmap_entry *entry; @@ -633,6 +634,17 @@ static void initialize_dir_rename_info(struct dir_rename_info *info, rename_dst[i].p->two->path); } + /* Add cached_pairs to counts */ + strmap_for_each_entry(cached_pairs, &iter, entry) { + const char *old_name = entry->key; + const char *new_name = entry->value; + if (!new_name) + /* known delete; ignore it */ + continue; + + update_dir_rename_counts(info, dirs_removed, old_name, new_name); + } + /* * Now we collapse * dir_rename_count: old_directory -> {new_directory -> count} @@ -1247,7 +1259,8 @@ static void handle_early_known_dir_renames(struct dir_rename_info *info, void diffcore_rename_extended(struct diff_options *options, struct strintmap *relevant_sources, struct strintmap *dirs_removed, - struct strmap *dir_rename_count) + struct strmap *dir_rename_count, + struct strmap *cached_pairs) { int detect_rename = options->detect_rename; int minimum_score = options->rename_score; @@ -1363,7 +1376,8 @@ void diffcore_rename_extended(struct diff_options *options, /* Preparation for basename-driven matching. */ trace2_region_enter("diff", "dir rename setup", options->repo); initialize_dir_rename_info(&info, relevant_sources, - dirs_removed, dir_rename_count); + dirs_removed, dir_rename_count, + cached_pairs); trace2_region_leave("diff", "dir rename setup", options->repo); /* Utilize file basenames to quickly find renames. */ @@ -1560,5 +1574,5 @@ void diffcore_rename_extended(struct diff_options *options, void diffcore_rename(struct diff_options *options) { - diffcore_rename_extended(options, NULL, NULL, NULL); + diffcore_rename_extended(options, NULL, NULL, NULL, NULL); } diff --git a/diffcore.h b/diffcore.h index f5c6de4841ed83..533b30e21e7fe2 100644 --- a/diffcore.h +++ b/diffcore.h @@ -181,7 +181,8 @@ void diffcore_rename(struct diff_options *); void diffcore_rename_extended(struct diff_options *options, struct strintmap *relevant_sources, struct strintmap *dirs_removed, - struct strmap *dir_rename_count); + struct strmap *dir_rename_count, + struct strmap *cached_pairs); void diffcore_merge_broken(void); void diffcore_pickaxe(struct diff_options *); void diffcore_order(const char *orderfile); diff --git a/merge-ort.c b/merge-ort.c index 2cdc57e75543d9..142d44d74d6360 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -764,15 +764,48 @@ static void add_pair(struct merge_options *opt, struct rename_info *renames = &opt->priv->renames; int names_idx = is_add ? side : 0; - if (!is_add) { + if (is_add) { + if (strset_contains(&renames->cached_target_names[side], + pathname)) + return; + } else { unsigned content_relevant = (match_mask == 0); unsigned location_relevant = (dir_rename_mask == 0x07); + /* + * If pathname is found in cached_irrelevant[side] due to + * previous pick but for this commit content is relevant, + * then we need to remove it from cached_irrelevant. + */ + if (content_relevant) + /* strset_remove is no-op if strset doesn't have key */ + strset_remove(&renames->cached_irrelevant[side], + pathname); + + /* + * We do not need to re-detect renames for paths that we already + * know the pairing, i.e. for cached_pairs (or + * cached_irrelevant). However, handle_deferred_entries() needs + * to loop over the union of keys from relevant_sources[side] and + * cached_pairs[side], so for simplicity we set relevant_sources + * for all the cached_pairs too and then strip them back out in + * prune_cached_from_relevant() at the beginning of + * detect_regular_renames(). + */ if (content_relevant || location_relevant) { /* content_relevant trumps location_relevant */ strintmap_set(&renames->relevant_sources[side], pathname, content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION); } + + /* + * Avoid creating pair if we've already cached rename results. + * Note that we do this after setting relevant_sources[side] + * as noted in the comment above. + */ + if (strmap_contains(&renames->cached_pairs[side], pathname) || + strset_contains(&renames->cached_irrelevant[side], pathname)) + return; } one = alloc_filespec(pathname); @@ -2360,7 +2393,9 @@ static inline int possible_side_renames(struct rename_info *renames, static inline int possible_renames(struct rename_info *renames) { return possible_side_renames(renames, 1) || - possible_side_renames(renames, 2); + possible_side_renames(renames, 2) || + !strmap_empty(&renames->cached_pairs[1]) || + !strmap_empty(&renames->cached_pairs[2]); } static void resolve_diffpair_statuses(struct diff_queue_struct *q) @@ -2384,7 +2419,6 @@ static void resolve_diffpair_statuses(struct diff_queue_struct *q) } } -MAYBE_UNUSED static void prune_cached_from_relevant(struct rename_info *renames, unsigned side) { @@ -2404,7 +2438,6 @@ static void prune_cached_from_relevant(struct rename_info *renames, } } -MAYBE_UNUSED static void use_cached_pairs(struct merge_options *opt, struct strmap *cached_pairs, struct diff_queue_struct *pairs) @@ -2507,6 +2540,7 @@ static void detect_regular_renames(struct merge_options *opt, struct diff_options diff_opts; struct rename_info *renames = &opt->priv->renames; + prune_cached_from_relevant(renames, side_index); if (!possible_side_renames(renames, side_index)) { /* * No rename detection needed for this side, but we still need @@ -2535,7 +2569,8 @@ static void detect_regular_renames(struct merge_options *opt, diffcore_rename_extended(&diff_opts, &renames->relevant_sources[side_index], &renames->dirs_removed[side_index], - &renames->dir_rename_count[side_index]); + &renames->dir_rename_count[side_index], + &renames->cached_pairs[side_index]); trace2_region_leave("diff", "diffcore_rename", opt->repo); resolve_diffpair_statuses(&diff_queued_diff); @@ -2643,6 +2678,8 @@ static int detect_and_process_renames(struct merge_options *opt, trace2_region_enter("merge", "regular renames", opt->repo); detect_regular_renames(opt, MERGE_SIDE1); detect_regular_renames(opt, MERGE_SIDE2); + use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]); + use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]); trace2_region_leave("merge", "regular renames", opt->repo); trace2_region_enter("merge", "directory renames", opt->repo); diff --git a/t/t6429-merge-sequence-rename-caching.sh b/t/t6429-merge-sequence-rename-caching.sh index f47d8924ee730d..035edc40b1ebba 100755 --- a/t/t6429-merge-sequence-rename-caching.sh +++ b/t/t6429-merge-sequence-rename-caching.sh @@ -101,10 +101,10 @@ test_expect_success 'caching renames does not preclude finding new ones' ' # dramatic change in size of the file, but remembering the rename and # reusing it is reasonable too. # -# Rename detection (diffcore_rename_extended()) will run twice here; it is -# not needed on the topic side of history for either of the two commits -# being merged, but it is needed on the upstream side of history for each -# commit being picked. +# We do test here that we expect rename detection to only be run once total +# (the topic side of history doesn't need renames, and with caching we +# should be able to only run rename detection on the upstream side one +# time.) test_expect_success 'cherry-pick both a commit and its immediate revert' ' test_create_repo pick-commit-and-its-immediate-revert && ( @@ -140,11 +140,11 @@ test_expect_success 'cherry-pick both a commit and its immediate revert' ' GIT_TRACE2_PERF="$(pwd)/trace.output" && export GIT_TRACE2_PERF && - test_might_fail test-tool fast-rebase --onto HEAD upstream~1 topic && + test-tool fast-rebase --onto HEAD upstream~1 topic && #git cherry-pick upstream~1..topic && grep region_enter.*diffcore_rename trace.output >calls && - test_line_count = 2 calls + test_line_count = 1 calls ) ' @@ -304,9 +304,11 @@ test_expect_success 'rename same file identically, then add file to old dir' ' # Here we are just concerned that cached renames might prevent us from seeing # the rename conflict, and we want to ensure that we do get a conflict. # -# While at it, also test that we do rename detection three times. We have to -# detect renames on the upstream side of history once for each merge, plus -# Topic_2 has renames. +# While at it, though, we do test that we only try to detect renames 2 +# times and not three. (The first merge needs to detect renames on the +# upstream side. Traditionally, the second merge would need to detect +# renames on both sides of history, but our caching of upstream renames +# should avoid the need to re-detect upstream renames.) # test_expect_success 'cached dir rename does not prevent noticing later conflict' ' test_create_repo dir-rename-cache-not-occluding-later-conflict && @@ -357,7 +359,7 @@ test_expect_success 'cached dir rename does not prevent noticing later conflict' grep CONFLICT..rename/rename output && grep region_enter.*diffcore_rename trace.output >calls && - test_line_count = 3 calls + test_line_count = 2 calls ) ' @@ -412,10 +414,17 @@ test_setup_upstream_rename () { # commit to mess up its location either. We want to make sure that # olddir/newfile doesn't exist in the result and that newdir/newfile does. # -# We also expect rename detection to occur three times. Although it is -# typically needed two times per commit, there are no deleted files on the -# topic side of history, so we only need to detect renames on the upstream -# side for each of the 3 commits we need to pick. +# We also test that we only do rename detection twice. We never need +# rename detection on the topic side of history, but we do need it twice on +# the upstream side of history. For the first topic commit, we only need +# the +# relevant-rename -> renamed +# rename, because olddir is unmodified by Topic_1. For Topic_2, however, +# the new file being added to olddir means files that were previously +# irrelevant for rename detection are now relevant, forcing us to repeat +# rename detection for the paths we don't already have cached. Topic_3 also +# tweaks olddir/newfile, but the renames in olddir/ will have been cached +# from the second rename detection run. # test_expect_success 'dir rename unneeded, then add new file to old dir' ' test_setup_upstream_rename dir-rename-unneeded-until-new-file && @@ -450,7 +459,7 @@ test_expect_success 'dir rename unneeded, then add new file to old dir' ' #git cherry-pick upstream..topic && grep region_enter.*diffcore_rename trace.output >calls && - test_line_count = 3 calls && + test_line_count = 2 calls && git ls-files >tracked && test_line_count = 5 tracked && @@ -516,7 +525,7 @@ test_expect_success 'dir rename unneeded, then rename existing file into old dir #git cherry-pick upstream..topic && grep region_enter.*diffcore_rename trace.output >calls && - test_line_count = 4 calls && + test_line_count = 3 calls && test_path_is_missing somefile && test_path_is_missing olddir/newfile && @@ -648,9 +657,8 @@ test_expect_success 'caching renames only on upstream side, part 1' ' # for the wrong side of history. # # -# This testcase should only need three calls to diffcore_rename_extended(), -# because there are no renames on the topic side of history for picking -# Topic_2. +# This testcase should only need two calls to diffcore_rename_extended(), +# both for the first merge, one for each side of history. # test_expect_success 'caching renames only on upstream side, part 2' ' test_setup_topic_rename cache-renames-only-upstream-rename-file && @@ -677,7 +685,7 @@ test_expect_success 'caching renames only on upstream side, part 2' ' #git cherry-pick upstream..topic && grep region_enter.*diffcore_rename trace.output >calls && - test_line_count = 3 calls && + test_line_count = 2 calls && git ls-files >tracked && test_line_count = 4 tracked &&