Skip to content

Commit

Permalink
Merge branch 'cc/filtered-repack' into seen
Browse files Browse the repository at this point in the history
"git repack" learns to discard objects that ought to be retrievable
again from the promissor remote.

* cc/filtered-repack:
  repack: add --filter=<filter-spec> option
  pack-objects: allow --filter without --stdout
  • Loading branch information
gitster committed Nov 30, 2022
2 parents 7b86b27 + 842fdc7 commit dcd4c55
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 13 deletions.
8 changes: 8 additions & 0 deletions Documentation/git-repack.txt
Expand Up @@ -143,6 +143,14 @@ depth is 4095.
a larger and slower repository; see the discussion in
`pack.packSizeLimit`.

--filter=<filter-spec>::
Omits certain objects (usually blobs) from the resulting
packfile. WARNING: this could easily corrupt the current repo
and lose data if ANY of the omitted objects hasn't been already
pushed to a remote. Be very careful about objects that might
have been created locally! See linkgit:git-rev-list[1] for valid
`<filter-spec>` forms.

-b::
--write-bitmap-index::
Write a reachability bitmap index as part of the repack. This
Expand Down
8 changes: 2 additions & 6 deletions builtin/pack-objects.c
Expand Up @@ -4371,12 +4371,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
unpack_unreachable_expiration = 0;

if (filter_options.choice) {
if (!pack_to_stdout)
die(_("cannot use --filter without --stdout"));
if (stdin_packs)
die(_("cannot use --filter with --stdin-packs"));
}
if (filter_options.choice && stdin_packs)
die(_("cannot use --filter with --stdin-packs"));

if (stdin_packs && use_internal_rev_list)
die(_("cannot use internal rev list with --stdin-packs"));
Expand Down
28 changes: 21 additions & 7 deletions builtin/repack.c
Expand Up @@ -49,6 +49,7 @@ struct pack_objects_args {
const char *depth;
const char *threads;
const char *max_pack_size;
const char *filter;
int no_reuse_delta;
int no_reuse_object;
int quiet;
Expand Down Expand Up @@ -163,6 +164,8 @@ static void prepare_pack_objects(struct child_process *cmd,
strvec_pushf(&cmd->args, "--threads=%s", args->threads);
if (args->max_pack_size)
strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
if (args->filter)
strvec_pushf(&cmd->args, "--filter=%s", args->filter);
if (args->no_reuse_delta)
strvec_pushf(&cmd->args, "--no-reuse-delta");
if (args->no_reuse_object)
Expand Down Expand Up @@ -234,6 +237,13 @@ static struct generated_pack_data *populate_pack_exts(const char *name)
return data;
}

static void write_promisor_file_1(char *p)
{
char *promisor_name = mkpathdup("%s-%s.promisor", packtmp, p);
write_promisor_file(promisor_name, NULL, 0);
free(promisor_name);
}

static void repack_promisor_objects(const struct pack_objects_args *args,
struct string_list *names)
{
Expand Down Expand Up @@ -265,7 +275,6 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
struct string_list_item *item;
char *promisor_name;

if (line.len != the_hash_algo->hexsz)
die(_("repack: Expecting full hex object ID lines only from pack-objects."));
Expand All @@ -282,13 +291,8 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
* concatenate the contents of all .promisor files instead of
* just creating a new empty file.
*/
promisor_name = mkpathdup("%s-%s.promisor", packtmp,
line.buf);
write_promisor_file(promisor_name, NULL, 0);

write_promisor_file_1(line.buf);
item->util = populate_pack_exts(item->string);

free(promisor_name);
}
fclose(out);
if (finish_command(&cmd))
Expand Down Expand Up @@ -800,6 +804,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("limits the maximum number of threads")),
OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
N_("maximum size of each packfile")),
OPT_STRING(0, "filter", &po_args.filter, N_("args"),
N_("object filtering")),
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
N_("repack objects in packs marked with .keep")),
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
Expand Down Expand Up @@ -834,6 +840,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
die(_("options '%s' and '%s' cannot be used together"), "--cruft", "-k");
}

if (po_args.filter && !has_promisor_remote())
die("a promisor remote must be setup\n"
"Also please push all the objects "
"that might be filtered to that remote!\n"
"Otherwise they will be lost!");

if (write_bitmaps < 0) {
if (!write_midx &&
(!(pack_everything & ALL_INTO_ONE) || !is_bare_repository()))
Expand Down Expand Up @@ -971,6 +983,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (line.len != the_hash_algo->hexsz)
die(_("repack: Expecting full hex object ID lines only from pack-objects."));
item = string_list_append(&names, line.buf);
if (po_args.filter)
write_promisor_file_1(line.buf);
item->util = populate_pack_exts(item->string);
}
strbuf_release(&line);
Expand Down
15 changes: 15 additions & 0 deletions t/t7700-repack.sh
Expand Up @@ -253,6 +253,21 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' '
test_must_be_empty actual
'

test_expect_success 'repacking with a filter works' '
test_when_finished "rm -rf server client" &&
test_create_repo server &&
git -C server config uploadpack.allowFilter true &&
git -C server config uploadpack.allowAnySHA1InWant true &&
test_commit -C server 1 &&
git clone --bare --no-local server client &&
git -C client config remote.origin.promisor true &&
git -C client rev-list --objects --all --missing=print >objects &&
test $(grep "^?" objects | wc -l) = 0 &&
git -C client -c repack.writebitmaps=false repack -a -d --filter=blob:none &&
git -C client rev-list --objects --all --missing=print >objects &&
test $(grep "^?" objects | wc -l) = 1
'

objdir=.git/objects
midx=$objdir/pack/multi-pack-index

Expand Down

0 comments on commit dcd4c55

Please sign in to comment.