diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index a8e6437a903592..0678b4bcfef7e6 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -82,5 +82,7 @@ remote..promisor:: objects. remote..partialclonefilter:: - The filter that will be applied when fetching from this - promisor remote. + The filter that will be applied when fetching from this promisor remote. + Changing or clearing this value will only affect fetches for new commits. + To fetch associated objects for commits already present in the local object + database, use the `--refetch` option of linkgit:git-fetch[1]. diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt index 6cdd9d43c5abd2..622bd84768b056 100644 --- a/Documentation/fetch-options.txt +++ b/Documentation/fetch-options.txt @@ -163,6 +163,16 @@ endif::git-pull[] behavior for a remote may be specified with the remote..tagOpt setting. See linkgit:git-config[1]. +ifndef::git-pull[] +--refetch:: + Instead of negotiating with the server to avoid transferring commits and + associated objects that are already present locally, this option fetches + all objects as a fresh clone would. Use this to reapply a partial clone + filter from configuration or using `--filter=` when the filter + definition has changed. Automatic post-fetch maintenance will perform + object database pack consolidation to remove any duplicate objects. +endif::git-pull[] + --refmap=:: When fetching refs listed on the command line, use the specified refspec (can be given more than once) to map the diff --git a/Documentation/git-fetch-pack.txt b/Documentation/git-fetch-pack.txt index c9758847937e7d..46747d5f429164 100644 --- a/Documentation/git-fetch-pack.txt +++ b/Documentation/git-fetch-pack.txt @@ -101,6 +101,10 @@ be in a separate packet, and the list must end with a flush packet. current shallow boundary instead of from the tip of each remote branch history. +--refetch:: + Skips negotiating commits with the server in order to fetch all matching + objects. Use to reapply a new partial clone blob/tree filter. + --no-progress:: Do not show the progress. diff --git a/Documentation/technical/partial-clone.txt b/Documentation/technical/partial-clone.txt index a0dd7c66f247d6..99f0eb304061ad 100644 --- a/Documentation/technical/partial-clone.txt +++ b/Documentation/technical/partial-clone.txt @@ -181,6 +181,9 @@ Fetching Missing Objects currently fetches all objects referred to by the requested objects, even though they are not necessary. +- Fetching with `--refetch` will request a complete new filtered packfile from + the remote, which can be used to change a filter without needing to + dynamically fetch missing objects. Using many promisor remotes --------------------------- diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index c4b9104f9b58ed..f045bbbe946dcb 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -153,6 +153,10 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.from_promisor = 1; continue; } + if (!strcmp("--refetch", arg)) { + args.refetch = 1; + continue; + } if (skip_prefix(arg, ("--filter="), &arg)) { parse_list_objects_filter(&args.filter_options, arg); continue; diff --git a/builtin/fetch.c b/builtin/fetch.c index 9b4018f62c4d3e..e3791f09ed51d0 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -59,7 +59,7 @@ static int prune_tags = -1; /* unspecified */ static int all, append, dry_run, force, keep, multiple, update_head_ok; static int write_fetch_head = 1; -static int verbosity, deepen_relative, set_upstream; +static int verbosity, deepen_relative, set_upstream, refetch; static int progress = -1; static int enable_auto_gc = 1; static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen; @@ -190,6 +190,9 @@ static struct option builtin_fetch_options[] = { OPT_SET_INT_F(0, "unshallow", &unshallow, N_("convert to a complete repository"), 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "refetch", &refetch, + N_("re-fetch without negotiating common commits"), + 1, PARSE_OPT_NONEG), { OPTION_STRING, 0, "submodule-prefix", &submodule_prefix, N_("dir"), N_("prepend this to submodule path output"), PARSE_OPT_HIDDEN }, OPT_CALLBACK_F(0, "recurse-submodules-default", @@ -1304,6 +1307,14 @@ static int check_exist_and_connected(struct ref *ref_map) if (deepen) return -1; + /* + * Similarly, if we need to refetch, we always want to perform a full + * fetch ignoring existing objects. + */ + if (refetch) + return -1; + + /* * check_connected() allows objects to merely be promised, but * we need all direct targets to exist. @@ -1517,6 +1528,8 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes"); if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); + if (refetch) + set_option(transport, TRANS_OPT_REFETCH, "yes"); if (filter_options.choice) { const char *spec = expand_list_objects_filter_spec(&filter_options); @@ -2293,8 +2306,25 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) NULL); } - if (enable_auto_gc) + if (enable_auto_gc) { + if (refetch) { + /* + * Hint auto-maintenance strongly to encourage repacking, + * but respect config settings disabling it. + */ + int opt_val; + if (git_config_get_int("gc.autopacklimit", &opt_val)) + opt_val = -1; + if (opt_val != 0) + git_config_push_parameter("gc.autoPackLimit=1"); + + if (git_config_get_int("maintenance.incremental-repack.auto", &opt_val)) + opt_val = -1; + if (opt_val != 0) + git_config_push_parameter("maintenance.incremental-repack.auto=-1"); + } run_auto_maintenance(verbosity < 0); + } cleanup: string_list_clear(&list, 0); diff --git a/fetch-negotiator.c b/fetch-negotiator.c index 874797d767bb1a..be383367f55a53 100644 --- a/fetch-negotiator.c +++ b/fetch-negotiator.c @@ -23,3 +23,8 @@ void fetch_negotiator_init(struct repository *r, return; } } + +void fetch_negotiator_init_noop(struct fetch_negotiator *negotiator) +{ + noop_negotiator_init(negotiator); +} diff --git a/fetch-negotiator.h b/fetch-negotiator.h index ea78868504bdcf..e348905a1f0008 100644 --- a/fetch-negotiator.h +++ b/fetch-negotiator.h @@ -53,7 +53,15 @@ struct fetch_negotiator { void *data; }; +/* + * Initialize a negotiator based on the repository settings. + */ void fetch_negotiator_init(struct repository *r, struct fetch_negotiator *negotiator); +/* + * Initialize a noop negotiator. + */ +void fetch_negotiator_init_noop(struct fetch_negotiator *negotiator); + #endif diff --git a/fetch-pack.c b/fetch-pack.c index 87657907e78d42..4e1e88eea097dd 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -312,19 +312,21 @@ static int find_common(struct fetch_negotiator *negotiator, const char *remote_hex; struct object *o; - /* - * If that object is complete (i.e. it is an ancestor of a - * local ref), we tell them we have it but do not have to - * tell them about its ancestors, which they already know - * about. - * - * We use lookup_object here because we are only - * interested in the case we *know* the object is - * reachable and we have already scanned it. - */ - if (((o = lookup_object(the_repository, remote)) != NULL) && - (o->flags & COMPLETE)) { - continue; + if (!args->refetch) { + /* + * If that object is complete (i.e. it is an ancestor of a + * local ref), we tell them we have it but do not have to + * tell them about its ancestors, which they already know + * about. + * + * We use lookup_object here because we are only + * interested in the case we *know* the object is + * reachable and we have already scanned it. + */ + if (((o = lookup_object(the_repository, remote)) != NULL) && + (o->flags & COMPLETE)) { + continue; + } } remote_hex = oid_to_hex(remote); @@ -692,6 +694,9 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator, int old_save_commit_buffer = save_commit_buffer; timestamp_t cutoff = 0; + if (args->refetch) + return; + save_commit_buffer = 0; trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL); @@ -1028,7 +1033,11 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, struct fetch_negotiator *negotiator; negotiator = &negotiator_alloc; - fetch_negotiator_init(r, negotiator); + if (args->refetch) { + fetch_negotiator_init_noop(negotiator); + } else { + fetch_negotiator_init(r, negotiator); + } sort_ref_list(&ref, ref_compare_name); QSORT(sought, nr_sought, cmp_ref_by_name); @@ -1121,7 +1130,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); - if (everything_local(args, &ref)) { + if (!args->refetch && everything_local(args, &ref)) { packet_flush(fd[1]); goto all_done; } @@ -1587,7 +1596,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct strvec index_pack_args = STRVEC_INIT; negotiator = &negotiator_alloc; - fetch_negotiator_init(r, negotiator); + if (args->refetch) + fetch_negotiator_init_noop(negotiator); + else + fetch_negotiator_init(r, negotiator); packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE | @@ -1613,7 +1625,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, /* Filter 'ref' by 'sought' and those that aren't local */ mark_complete_and_common_ref(negotiator, args, &ref); filter_refs(args, &ref, sought, nr_sought); - if (everything_local(args, &ref)) + if (!args->refetch && everything_local(args, &ref)) state = FETCH_DONE; else state = FETCH_SEND_REQUEST; diff --git a/fetch-pack.h b/fetch-pack.h index 7f94a2a5831809..8c7752fc8212c7 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -42,6 +42,7 @@ struct fetch_pack_args { unsigned update_shallow:1; unsigned reject_shallow_remote:1; unsigned deepen:1; + unsigned refetch:1; /* * Indicate that the remote of this request is a promisor remote. The diff --git a/remote-curl.c b/remote-curl.c index ff44f41011e8a0..67f178b1120bd9 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -43,6 +43,7 @@ struct options { /* see documentation of corresponding flag in fetch-pack.h */ from_promisor : 1, + refetch : 1, atomic : 1, object_format : 1, force_if_includes : 1; @@ -198,6 +199,9 @@ static int set_option(const char *name, const char *value) } else if (!strcmp(name, "from-promisor")) { options.from_promisor = 1; return 0; + } else if (!strcmp(name, "refetch")) { + options.refetch = 1; + return 0; } else if (!strcmp(name, "filter")) { options.filter = xstrdup(value); return 0; @@ -1182,6 +1186,8 @@ static int fetch_git(struct discovery *heads, strvec_push(&args, "--deepen-relative"); if (options.from_promisor) strvec_push(&args, "--from-promisor"); + if (options.refetch) + strvec_push(&args, "--refetch"); if (options.filter) strvec_pushf(&args, "--filter=%s", options.filter); strvec_push(&args, url.buf); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 34469b6ac10fef..4a3778d04a82df 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -166,6 +166,85 @@ test_expect_success 'manual prefetch of missing objects' ' test_line_count = 0 observed.oids ' +# create new commits in "src" repo to establish a history on file.4.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server for file.4.txt' ' + for x in a b c d e f + do + echo "Mod file.4.txt $x" >src/file.4.txt && + if list_contains "a,b" "$x"; then + printf "%10000s" X >>src/file.4.txt + fi && + if list_contains "c,d" "$x"; then + printf "%20000s" X >>src/file.4.txt + fi && + git -C src add file.4.txt && + git -C src commit -m "mod $x" || return 1 + done && + git -C src push -u srv main +' + +# Do partial fetch to fetch smaller files; then verify that without --refetch +# applying a new filter does not refetch missing large objects. Then use +# --refetch to apply the new filter on existing commits. Test it under both +# protocol v2 & v0. +test_expect_success 'apply a different filter using --refetch' ' + git -C pc1 fetch --filter=blob:limit=999 origin && + git -C pc1 rev-list --quiet --objects --missing=print \ + main..origin/main >observed && + test_line_count = 4 observed && + + git -C pc1 fetch --filter=blob:limit=19999 --refetch origin && + git -C pc1 rev-list --quiet --objects --missing=print \ + main..origin/main >observed && + test_line_count = 2 observed && + + git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 \ + --refetch origin && + git -C pc1 rev-list --quiet --objects --missing=print \ + main..origin/main >observed && + test_line_count = 0 observed +' + +test_expect_success 'fetch --refetch works with a shallow clone' ' + git clone --no-checkout --depth=1 --filter=blob:none "file://$(pwd)/srv.bare" pc1s && + git -C pc1s rev-list --objects --missing=print HEAD >observed && + test_line_count = 6 observed && + + GIT_TRACE=1 git -C pc1s fetch --filter=blob:limit=999 --refetch origin && + git -C pc1s rev-list --objects --missing=print HEAD >observed && + test_line_count = 6 observed +' + +test_expect_success 'fetch --refetch triggers repacking' ' + GIT_TRACE2_CONFIG_PARAMS=gc.autoPackLimit,maintenance.incremental-repack.auto && + export GIT_TRACE2_CONFIG_PARAMS && + + GIT_TRACE2_EVENT="$PWD/trace1.event" \ + git -C pc1 fetch --refetch origin && + test_subcommand git maintenance run --auto --no-quiet fetched_objects && - test_line_count = 70 fetched_objects && + test_line_count = 88 fetched_objects && awk -f print_1.awk fetched_objects | xargs -n1 git -C dst cat-file -t >fetched_types && diff --git a/transport-helper.c b/transport-helper.c index a0297b0986c62e..b4dbbabb0c2ef6 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -715,6 +715,9 @@ static int fetch_refs(struct transport *transport, if (data->transport_options.update_shallow) set_helper_option(transport, "update-shallow", "true"); + if (data->transport_options.refetch) + set_helper_option(transport, "refetch", "true"); + if (data->transport_options.filter_options.choice) { const char *spec = expand_list_objects_filter_spec( &data->transport_options.filter_options); diff --git a/transport.c b/transport.c index 70e9840a90e4cc..3d64a43ab394b1 100644 --- a/transport.c +++ b/transport.c @@ -250,6 +250,9 @@ static int set_git_option(struct git_transport_options *opts, list_objects_filter_die_if_populated(&opts->filter_options); parse_list_objects_filter(&opts->filter_options, value); return 0; + } else if (!strcmp(name, TRANS_OPT_REFETCH)) { + opts->refetch = !!value; + return 0; } else if (!strcmp(name, TRANS_OPT_REJECT_SHALLOW)) { opts->reject_shallow = !!value; return 0; @@ -384,6 +387,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.update_shallow = data->options.update_shallow; args.from_promisor = data->options.from_promisor; args.filter_options = data->options.filter_options; + args.refetch = data->options.refetch; args.stateless_rpc = transport->stateless_rpc; args.server_options = transport->server_options; args.negotiation_tips = data->options.negotiation_tips; diff --git a/transport.h b/transport.h index a0bc6a1e9eba8f..12bc08fc33949a 100644 --- a/transport.h +++ b/transport.h @@ -16,6 +16,7 @@ struct git_transport_options { unsigned update_shallow : 1; unsigned reject_shallow : 1; unsigned deepen_relative : 1; + unsigned refetch : 1; /* see documentation of corresponding flag in fetch-pack.h */ unsigned from_promisor : 1; @@ -216,6 +217,9 @@ void transport_check_allowed(const char *type); /* Filter objects for partial clone and fetch */ #define TRANS_OPT_LIST_OBJECTS_FILTER "filter" +/* Refetch all objects without negotiating */ +#define TRANS_OPT_REFETCH "refetch" + /* Request atomic (all-or-nothing) updates when pushing */ #define TRANS_OPT_ATOMIC "atomic"