-
Notifications
You must be signed in to change notification settings - Fork 127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
multi-pack-index: fix verify on large repos #166
Changes from all commits
5595e01
498258b
8a60902
7e98ea0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
#include "sha1-lookup.h" | ||
#include "midx.h" | ||
#include "progress.h" | ||
#include "trace2.h" | ||
|
||
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ | ||
#define MIDX_VERSION 1 | ||
|
@@ -164,6 +165,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local | |
m->pack_names[i]); | ||
} | ||
|
||
trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs); | ||
trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects); | ||
|
||
return m; | ||
|
||
cleanup_fail: | ||
|
@@ -958,8 +962,35 @@ static void midx_report(const char *fmt, ...) | |
va_end(ap); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, =?utf-8?B?w4Z2YXIgQXJuZmrDtnLDsA==?= Bjarmason wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Jeff Hostetler wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, =?utf-8?B?w4Z2YXIgQXJuZmrDtnLDsA==?= Bjarmason wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Junio C Hamano wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Jeff Hostetler wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, =?utf-8?B?w4Z2YXIgQXJuZmrDtnLDsA==?= Bjarmason wrote (reply to this):
|
||
} | ||
|
||
struct pair_pos_vs_id | ||
{ | ||
uint32_t pos; | ||
uint32_t pack_int_id; | ||
}; | ||
|
||
static int compare_pair_pos_vs_id(const void *_a, const void *_b) | ||
{ | ||
struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; | ||
struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; | ||
|
||
return b->pack_int_id - a->pack_int_id; | ||
} | ||
|
||
/* | ||
* Limit calls to display_progress() for performance reasons. | ||
* The interval here was arbitrarily chosen. | ||
*/ | ||
#define SPARSE_PROGRESS_INTERVAL (1 << 12) | ||
#define midx_display_sparse_progress(progress, n) \ | ||
do { \ | ||
uint64_t _n = (n); \ | ||
if ((_n & (SPARSE_PROGRESS_INTERVAL - 1)) == 0) \ | ||
display_progress(progress, _n); \ | ||
} while (0) | ||
|
||
int verify_midx_file(const char *object_dir) | ||
{ | ||
struct pair_pos_vs_id *pairs = NULL; | ||
uint32_t i; | ||
struct progress *progress; | ||
struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); | ||
|
@@ -968,10 +999,15 @@ int verify_midx_file(const char *object_dir) | |
if (!m) | ||
return 0; | ||
|
||
progress = start_progress(_("Looking for referenced packfiles"), | ||
m->num_packs); | ||
for (i = 0; i < m->num_packs; i++) { | ||
if (prepare_midx_pack(m, i)) | ||
midx_report("failed to load pack in position %d", i); | ||
|
||
display_progress(progress, i + 1); | ||
} | ||
stop_progress(&progress); | ||
|
||
for (i = 0; i < 255; i++) { | ||
uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); | ||
|
@@ -982,6 +1018,8 @@ int verify_midx_file(const char *object_dir) | |
i, oid_fanout1, oid_fanout2, i + 1); | ||
} | ||
|
||
progress = start_sparse_progress(_("Verifying OID order in MIDX"), | ||
m->num_objects - 1); | ||
for (i = 0; i < m->num_objects - 1; i++) { | ||
struct object_id oid1, oid2; | ||
|
||
|
@@ -991,18 +1029,47 @@ int verify_midx_file(const char *object_dir) | |
if (oidcmp(&oid1, &oid2) >= 0) | ||
midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), | ||
i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); | ||
|
||
midx_display_sparse_progress(progress, i + 1); | ||
} | ||
stop_progress(&progress); | ||
|
||
progress = start_progress(_("Verifying object offsets"), m->num_objects); | ||
/* | ||
* Create an array mapping each object to its packfile id. Sort it | ||
* to group the objects by packfile. Use this permutation to visit | ||
* each of the objects and only require 1 packfile to be open at a | ||
* time. | ||
*/ | ||
ALLOC_ARRAY(pairs, m->num_objects); | ||
for (i = 0; i < m->num_objects; i++) { | ||
pairs[i].pos = i; | ||
pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); | ||
} | ||
|
||
progress = start_sparse_progress(_("Sorting objects by packfile"), | ||
m->num_objects); | ||
display_progress(progress, 0); /* TODO: Measure QSORT() progress */ | ||
QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); | ||
stop_progress(&progress); | ||
|
||
progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects); | ||
for (i = 0; i < m->num_objects; i++) { | ||
struct object_id oid; | ||
struct pack_entry e; | ||
off_t m_offset, p_offset; | ||
|
||
nth_midxed_object_oid(&oid, m, i); | ||
if (i > 0 && pairs[i-1].pack_int_id != pairs[i].pack_int_id && | ||
m->packs[pairs[i-1].pack_int_id]) | ||
{ | ||
close_pack_fd(m->packs[pairs[i-1].pack_int_id]); | ||
close_pack_index(m->packs[pairs[i-1].pack_int_id]); | ||
} | ||
|
||
nth_midxed_object_oid(&oid, m, pairs[i].pos); | ||
|
||
if (!fill_midx_entry(&oid, &e, m)) { | ||
midx_report(_("failed to load pack entry for oid[%d] = %s"), | ||
i, oid_to_hex(&oid)); | ||
pairs[i].pos, oid_to_hex(&oid)); | ||
continue; | ||
} | ||
|
||
|
@@ -1017,11 +1084,13 @@ int verify_midx_file(const char *object_dir) | |
|
||
if (m_offset != p_offset) | ||
midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), | ||
i, oid_to_hex(&oid), m_offset, p_offset); | ||
pairs[i].pos, oid_to_hex(&oid), m_offset, p_offset); | ||
|
||
display_progress(progress, i + 1); | ||
midx_display_sparse_progress(progress, i + 1); | ||
} | ||
stop_progress(&progress); | ||
|
||
free(pairs); | ||
|
||
return verify_midx_error; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ struct progress { | |
uint64_t total; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Eric Sunshine wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Jeff Hostetler wrote (reply to this):
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Git mailing list, Eric Sunshine wrote (reply to this):
|
||
unsigned last_percent; | ||
unsigned delay; | ||
unsigned sparse; | ||
struct throughput *throughput; | ||
uint64_t start_ns; | ||
}; | ||
|
@@ -194,7 +195,7 @@ int display_progress(struct progress *progress, uint64_t n) | |
} | ||
|
||
static struct progress *start_progress_delay(const char *title, uint64_t total, | ||
unsigned delay) | ||
unsigned delay, unsigned sparse) | ||
{ | ||
struct progress *progress = malloc(sizeof(*progress)); | ||
if (!progress) { | ||
|
@@ -208,6 +209,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, | |
progress->last_value = -1; | ||
progress->last_percent = -1; | ||
progress->delay = delay; | ||
progress->sparse = sparse; | ||
progress->throughput = NULL; | ||
progress->start_ns = getnanotime(); | ||
set_progress_signal(); | ||
|
@@ -216,16 +218,46 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, | |
|
||
struct progress *start_delayed_progress(const char *title, uint64_t total) | ||
{ | ||
return start_progress_delay(title, total, 2); | ||
return start_progress_delay(title, total, 2, 0); | ||
} | ||
|
||
struct progress *start_progress(const char *title, uint64_t total) | ||
{ | ||
return start_progress_delay(title, total, 0); | ||
return start_progress_delay(title, total, 0, 0); | ||
} | ||
|
||
/* | ||
* Here "sparse" means that the caller might use some sampling criteria to | ||
* decide when to call display_progress() rather than calling it for every | ||
* integer value in[0 .. total). In particular, the caller might not call | ||
* display_progress() for the last value in the range. | ||
* | ||
* When "sparse" is set, stop_progress() will automatically force the done | ||
* message to show 100%. | ||
*/ | ||
struct progress *start_sparse_progress(const char *title, uint64_t total) | ||
{ | ||
return start_progress_delay(title, total, 0, 1); | ||
} | ||
|
||
struct progress *start_delayed_sparse_progress(const char *title, | ||
uint64_t total) | ||
{ | ||
return start_progress_delay(title, total, 2, 1); | ||
} | ||
|
||
static void finish_if_sparse(struct progress *progress) | ||
{ | ||
if (progress && | ||
progress->sparse && | ||
progress->last_value != progress->total) | ||
display_progress(progress, progress->total); | ||
} | ||
|
||
void stop_progress(struct progress **p_progress) | ||
{ | ||
finish_if_sparse(*p_progress); | ||
|
||
stop_progress_msg(p_progress, _("done")); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the Git mailing list, Jeff Hostetler wrote (reply to this):