Skip to content

Commit

Permalink
diffcore-rename: use a mem_pool for exact rename detection's hashmap
Browse files Browse the repository at this point in the history
Exact rename detection, via insert_file_table(), uses a hashmap to store
files by oid.  Use a mem_pool for the hashmap entries so these can all be
allocated and deallocated together.

For the testcases mentioned in commit 557ac03 ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:

                            Before                  After
    no-renames:      204.2  ms ±  3.0  ms   202.5  ms ±  3.2  ms
    mega-renames:      1.076 s ±  0.015 s     1.072 s ±  0.012 s
    just-one-mega:   364.1  ms ±  7.0  ms   357.3  ms ±  3.9  ms

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
newren authored and gitster committed Jul 30, 2021
1 parent 7afc0b0 commit fa0e936
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions diffcore-rename.c
Expand Up @@ -317,10 +317,11 @@ static int find_identical_files(struct hashmap *srcs,
}

static void insert_file_table(struct repository *r,
struct mem_pool *pool,
struct hashmap *table, int index,
struct diff_filespec *filespec)
{
struct file_similarity *entry = xmalloc(sizeof(*entry));
struct file_similarity *entry = mem_pool_alloc(pool, sizeof(*entry));

entry->index = index;
entry->filespec = filespec;
Expand All @@ -336,7 +337,8 @@ static void insert_file_table(struct repository *r,
* and then during the second round we try to match
* cache-dirty entries as well.
*/
static int find_exact_renames(struct diff_options *options)
static int find_exact_renames(struct diff_options *options,
struct mem_pool *pool)
{
int i, renames = 0;
struct hashmap file_table;
Expand All @@ -346,16 +348,16 @@ static int find_exact_renames(struct diff_options *options)
*/
hashmap_init(&file_table, NULL, NULL, rename_src_nr);
for (i = rename_src_nr-1; i >= 0; i--)
insert_file_table(options->repo,
insert_file_table(options->repo, pool,
&file_table, i,
rename_src[i].p->one);

/* Walk the destinations and find best source match */
for (i = 0; i < rename_dst_nr; i++)
renames += find_identical_files(&file_table, i, options);

/* Free the hash data structure and entries */
hashmap_clear_and_free(&file_table, struct file_similarity, entry);
/* Free the hash data structure (entries will be freed with the pool) */
hashmap_clear(&file_table);

return renames;
}
Expand Down Expand Up @@ -1341,6 +1343,7 @@ void diffcore_rename_extended(struct diff_options *options,
int num_destinations, dst_cnt;
int num_sources, want_copies;
struct progress *progress = NULL;
struct mem_pool local_pool;
struct dir_rename_info info;
struct diff_populate_filespec_options dpf_options = {
.check_binary = 0,
Expand Down Expand Up @@ -1409,11 +1412,18 @@ void diffcore_rename_extended(struct diff_options *options,
goto cleanup; /* nothing to do */

trace2_region_enter("diff", "exact renames", options->repo);
mem_pool_init(&local_pool, 32*1024);
/*
* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas.
*/
rename_count = find_exact_renames(options);
rename_count = find_exact_renames(options, &local_pool);
/*
* Discard local_pool immediately instead of at "cleanup:" in order
* to reduce maximum memory usage; inexact rename detection uses up
* a fair amount of memory, and mem_pools can too.
*/
mem_pool_discard(&local_pool, 0);
trace2_region_leave("diff", "exact renames", options->repo);

/* Did we only want exact renames? */
Expand Down

0 comments on commit fa0e936

Please sign in to comment.