Skip to content

Commit

Permalink
midx-write.c: do not read existing MIDX with packs_to_include
Browse files Browse the repository at this point in the history
Commit d6a8c58 (midx-write.c: support reading an existing MIDX with
`packs_to_include`, 2024-05-29) changed the MIDX generation machinery to
support reading from an existing MIDX when writing a new one.

Unfortunately, the rest of the MIDX generation machinery is not prepared
to deal with such a change. For instance, the function responsible for
adding to the object ID fanout table from a MIDX source
(midx_fanout_add_midx_fanout()) will gladly add objects from an existing
MIDX for some fanout level regardless of whether or not those objects
came from packs that are to be included in the subsequent MIDX write.

This results in broken pseudo-pack object order (leading to incorrect
object traversal results) and segmentation faults, like so (generated by
running the added test prior to the changes in midx-write.c):

    #0  0x000055ee31393f47 in midx_pack_order (ctx=0x7ffdde205c70) at midx-write.c:590
    #1  0x000055ee31395a69 in write_midx_internal (object_dir=0x55ee32570440 ".git/objects",
        packs_to_include=0x7ffdde205e20, packs_to_drop=0x0, preferred_pack_name=0x0,
        refs_snapshot=0x0, flags=15) at midx-write.c:1171
    #2  0x000055ee31395f38 in write_midx_file_only (object_dir=0x55ee32570440 ".git/objects",
        packs_to_include=0x7ffdde205e20, preferred_pack_name=0x0, refs_snapshot=0x0, flags=15)
        at midx-write.c:1274
    [...]

In stack frame #0, the code on midx-write.c:590 is using the new pack ID
corresponding to some object which was added from the existing MIDX.
Importantly, the pack from which that object was selected in the
existing MIDX does not appear in the new MIDX as it was excluded via
`--stdin-packs`.

In this instance, the pack in question had pack ID "1" in the existing
MIDX, but since it was excluded from the new MIDX, we never filled in
that entry in the pack_perm table, resulting in:

    (gdb) p *ctx->pack_perm@2
    $1 = {0, 1515870810}

Which is what causes the segfault above when we try and read:

    struct pack_info *pack = &ctx->info[ctx->pack_perm[i]];
    if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
        pack->bitmap_pos = 0;

Fundamentally, we should be able to read information from an existing
MIDX when generating a new one. But in practice the midx-write.c code
assumes that we won't run into issues like the above with incongruent
pack IDs, and often makes those assumptions in extremely subtle and
fragile ways.

Instead, let's avoid reading from an existing MIDX altogether, and stick
with the pre-d6a8c58675 implementation. Harden against any regressions
in this area by adding a test which demonstrates these issues.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
ttaylorr authored and gitster committed Jun 11, 2024
1 parent 1b76f06 commit 0c5a62f
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 11 deletions.
42 changes: 31 additions & 11 deletions midx-write.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,27 @@ struct write_midx_context {
};

static int should_include_pack(const struct write_midx_context *ctx,
const char *file_name,
int exclude_from_midx)
const char *file_name)
{
if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name))
/*
* Note that at most one of ctx->m and ctx->to_include are set,
* so we are testing midx_contains_pack() and
* string_list_has_string() independently (guarded by the
* appropriate NULL checks).
*
* We could support passing to_include while reusing an existing
* MIDX, but don't currently since the reuse process drags
* forward all packs from an existing MIDX (without checking
* whether or not they appear in the to_include list).
*
* If we added support for that, these next two conditional
* should be performed independently (likely checking
* to_include before the existing MIDX).
*/
if (ctx->m && midx_contains_pack(ctx->m, file_name))
return 0;
if (ctx->to_include && !string_list_has_string(ctx->to_include,
file_name))
else if (ctx->to_include &&
!string_list_has_string(ctx->to_include, file_name))
return 0;
return 1;
}
Expand All @@ -121,7 +135,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
if (ends_with(file_name, ".idx")) {
display_progress(ctx->progress, ++ctx->pack_paths_checked);

if (!should_include_pack(ctx, file_name, 1))
if (!should_include_pack(ctx, file_name))
return;

ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
Expand Down Expand Up @@ -880,9 +894,6 @@ static int fill_packs_from_midx(struct write_midx_context *ctx,
uint32_t i;

for (i = 0; i < ctx->m->num_packs; i++) {
if (!should_include_pack(ctx, ctx->m->pack_names[i], 0))
continue;

ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);

if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
Expand Down Expand Up @@ -937,7 +948,15 @@ static int write_midx_internal(const char *object_dir,
die_errno(_("unable to create leading directories of %s"),
midx_name.buf);

ctx.m = lookup_multi_pack_index(the_repository, object_dir);
if (!packs_to_include) {
/*
* Only reference an existing MIDX when not filtering which
* packs to include, since all packs and objects are copied
* blindly from an existing MIDX if one is present.
*/
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
}

if (ctx.m && !midx_checksum_valid(ctx.m)) {
warning(_("ignoring existing multi-pack-index; checksum mismatch"));
ctx.m = NULL;
Expand All @@ -946,7 +965,6 @@ static int write_midx_internal(const char *object_dir,
ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
ctx.info = NULL;
ctx.to_include = packs_to_include;
ALLOC_ARRAY(ctx.info, ctx.alloc);

if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
Expand All @@ -963,6 +981,8 @@ static int write_midx_internal(const char *object_dir,
else
ctx.progress = NULL;

ctx.to_include = packs_to_include;

for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
stop_progress(&ctx.progress);

Expand Down
30 changes: 30 additions & 0 deletions t/t5326-multi-pack-bitmaps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -551,4 +551,34 @@ do
'
done

test_expect_success 'remove one packfile between MIDX bitmap writes' '
git init remove-pack-between-writes &&
(
cd remove-pack-between-writes &&
test_commit A &&
test_commit B &&
test_commit C &&
# Create packs with the prefix "pack-A", "pack-B",
# "pack-C" to impose a lexicographic order on these
# packs so the pack being removed is always from the
# middle.
packdir=.git/objects/pack &&
A="$(echo A | git pack-objects $packdir/pack-A --revs)" &&
B="$(echo B | git pack-objects $packdir/pack-B --revs)" &&
C="$(echo C | git pack-objects $packdir/pack-C --revs)" &&
git multi-pack-index write --bitmap &&
cat >in <<-EOF &&
pack-A-$A.idx
pack-C-$C.idx
EOF
git multi-pack-index write --bitmap --stdin-packs <in &&
git rev-list --test-bitmap HEAD
)
'

test_done

0 comments on commit 0c5a62f

Please sign in to comment.