Skip to content

Commit

Permalink
Merge branch 'rc/fetch-refetch' into jch
Browse files Browse the repository at this point in the history
"git fetch --refetch" learned to fetch everything without telling
the other side what we already have, which is useful when you
cannot trust what you have in the local object store.

* rc/fetch-refetch:
  docs: mention --refetch fetch option
  fetch: after refetch, encourage auto gc repacking
  t5615-partial-clone: add test for fetch --refetch
  fetch: add --refetch option
  builtin/fetch-pack: add --refetch option
  fetch-pack: add refetch
  fetch-negotiator: add specific noop initializer
  • Loading branch information
gitster committed Apr 1, 2022
2 parents 0fc5801 + 4963d3e commit 649b59b
Show file tree
Hide file tree
Showing 15 changed files with 197 additions and 22 deletions.
6 changes: 4 additions & 2 deletions Documentation/config/remote.txt
Expand Up @@ -82,5 +82,7 @@ remote.<name>.promisor::
objects.

remote.<name>.partialclonefilter::
The filter that will be applied when fetching from this
promisor remote.
The filter that will be applied when fetching from this promisor remote.
Changing or clearing this value will only affect fetches for new commits.
To fetch associated objects for commits already present in the local object
database, use the `--refetch` option of linkgit:git-fetch[1].
10 changes: 10 additions & 0 deletions Documentation/fetch-options.txt
Expand Up @@ -163,6 +163,16 @@ endif::git-pull[]
behavior for a remote may be specified with the remote.<name>.tagOpt
setting. See linkgit:git-config[1].

ifndef::git-pull[]
--refetch::
Instead of negotiating with the server to avoid transferring commits and
associated objects that are already present locally, this option fetches
all objects as a fresh clone would. Use this to reapply a partial clone
filter from configuration or using `--filter=` when the filter
definition has changed. Automatic post-fetch maintenance will perform
object database pack consolidation to remove any duplicate objects.
endif::git-pull[]

--refmap=<refspec>::
When fetching refs listed on the command line, use the
specified refspec (can be given more than once) to map the
Expand Down
4 changes: 4 additions & 0 deletions Documentation/git-fetch-pack.txt
Expand Up @@ -101,6 +101,10 @@ be in a separate packet, and the list must end with a flush packet.
current shallow boundary instead of from the tip of each
remote branch history.

--refetch::
Skips negotiating commits with the server in order to fetch all matching
objects. Use to reapply a new partial clone blob/tree filter.

--no-progress::
Do not show the progress.

Expand Down
3 changes: 3 additions & 0 deletions Documentation/technical/partial-clone.txt
Expand Up @@ -181,6 +181,9 @@ Fetching Missing Objects
currently fetches all objects referred to by the requested objects, even
though they are not necessary.

- Fetching with `--refetch` will request a complete new filtered packfile from
the remote, which can be used to change a filter without needing to
dynamically fetch missing objects.

Using many promisor remotes
---------------------------
Expand Down
4 changes: 4 additions & 0 deletions builtin/fetch-pack.c
Expand Up @@ -153,6 +153,10 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
args.from_promisor = 1;
continue;
}
if (!strcmp("--refetch", arg)) {
args.refetch = 1;
continue;
}
if (skip_prefix(arg, ("--filter="), &arg)) {
parse_list_objects_filter(&args.filter_options, arg);
continue;
Expand Down
34 changes: 32 additions & 2 deletions builtin/fetch.c
Expand Up @@ -59,7 +59,7 @@ static int prune_tags = -1; /* unspecified */

static int all, append, dry_run, force, keep, multiple, update_head_ok;
static int write_fetch_head = 1;
static int verbosity, deepen_relative, set_upstream;
static int verbosity, deepen_relative, set_upstream, refetch;
static int progress = -1;
static int enable_auto_gc = 1;
static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen;
Expand Down Expand Up @@ -190,6 +190,9 @@ static struct option builtin_fetch_options[] = {
OPT_SET_INT_F(0, "unshallow", &unshallow,
N_("convert to a complete repository"),
1, PARSE_OPT_NONEG),
OPT_SET_INT_F(0, "refetch", &refetch,
N_("re-fetch without negotiating common commits"),
1, PARSE_OPT_NONEG),
{ OPTION_STRING, 0, "submodule-prefix", &submodule_prefix, N_("dir"),
N_("prepend this to submodule path output"), PARSE_OPT_HIDDEN },
OPT_CALLBACK_F(0, "recurse-submodules-default",
Expand Down Expand Up @@ -1304,6 +1307,14 @@ static int check_exist_and_connected(struct ref *ref_map)
if (deepen)
return -1;

/*
* Similarly, if we need to refetch, we always want to perform a full
* fetch ignoring existing objects.
*/
if (refetch)
return -1;


/*
* check_connected() allows objects to merely be promised, but
* we need all direct targets to exist.
Expand Down Expand Up @@ -1517,6 +1528,8 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
if (update_shallow)
set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
if (refetch)
set_option(transport, TRANS_OPT_REFETCH, "yes");
if (filter_options.choice) {
const char *spec =
expand_list_objects_filter_spec(&filter_options);
Expand Down Expand Up @@ -2293,8 +2306,25 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
NULL);
}

if (enable_auto_gc)
if (enable_auto_gc) {
if (refetch) {
/*
* Hint auto-maintenance strongly to encourage repacking,
* but respect config settings disabling it.
*/
int opt_val;
if (git_config_get_int("gc.autopacklimit", &opt_val))
opt_val = -1;
if (opt_val != 0)
git_config_push_parameter("gc.autoPackLimit=1");

if (git_config_get_int("maintenance.incremental-repack.auto", &opt_val))
opt_val = -1;
if (opt_val != 0)
git_config_push_parameter("maintenance.incremental-repack.auto=-1");
}
run_auto_maintenance(verbosity < 0);
}

cleanup:
string_list_clear(&list, 0);
Expand Down
5 changes: 5 additions & 0 deletions fetch-negotiator.c
Expand Up @@ -23,3 +23,8 @@ void fetch_negotiator_init(struct repository *r,
return;
}
}

void fetch_negotiator_init_noop(struct fetch_negotiator *negotiator)
{
noop_negotiator_init(negotiator);
}
8 changes: 8 additions & 0 deletions fetch-negotiator.h
Expand Up @@ -53,7 +53,15 @@ struct fetch_negotiator {
void *data;
};

/*
* Initialize a negotiator based on the repository settings.
*/
void fetch_negotiator_init(struct repository *r,
struct fetch_negotiator *negotiator);

/*
* Initialize a noop negotiator.
*/
void fetch_negotiator_init_noop(struct fetch_negotiator *negotiator);

#endif
46 changes: 29 additions & 17 deletions fetch-pack.c
Expand Up @@ -312,19 +312,21 @@ static int find_common(struct fetch_negotiator *negotiator,
const char *remote_hex;
struct object *o;

/*
* If that object is complete (i.e. it is an ancestor of a
* local ref), we tell them we have it but do not have to
* tell them about its ancestors, which they already know
* about.
*
* We use lookup_object here because we are only
* interested in the case we *know* the object is
* reachable and we have already scanned it.
*/
if (((o = lookup_object(the_repository, remote)) != NULL) &&
(o->flags & COMPLETE)) {
continue;
if (!args->refetch) {
/*
* If that object is complete (i.e. it is an ancestor of a
* local ref), we tell them we have it but do not have to
* tell them about its ancestors, which they already know
* about.
*
* We use lookup_object here because we are only
* interested in the case we *know* the object is
* reachable and we have already scanned it.
*/
if (((o = lookup_object(the_repository, remote)) != NULL) &&
(o->flags & COMPLETE)) {
continue;
}
}

remote_hex = oid_to_hex(remote);
Expand Down Expand Up @@ -692,6 +694,9 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
int old_save_commit_buffer = save_commit_buffer;
timestamp_t cutoff = 0;

if (args->refetch)
return;

save_commit_buffer = 0;

trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL);
Expand Down Expand Up @@ -1028,7 +1033,11 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
struct fetch_negotiator *negotiator;

negotiator = &negotiator_alloc;
fetch_negotiator_init(r, negotiator);
if (args->refetch) {
fetch_negotiator_init_noop(negotiator);
} else {
fetch_negotiator_init(r, negotiator);
}

sort_ref_list(&ref, ref_compare_name);
QSORT(sought, nr_sought, cmp_ref_by_name);
Expand Down Expand Up @@ -1121,7 +1130,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,

mark_complete_and_common_ref(negotiator, args, &ref);
filter_refs(args, &ref, sought, nr_sought);
if (everything_local(args, &ref)) {
if (!args->refetch && everything_local(args, &ref)) {
packet_flush(fd[1]);
goto all_done;
}
Expand Down Expand Up @@ -1587,7 +1596,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
struct strvec index_pack_args = STRVEC_INIT;

negotiator = &negotiator_alloc;
fetch_negotiator_init(r, negotiator);
if (args->refetch)
fetch_negotiator_init_noop(negotiator);
else
fetch_negotiator_init(r, negotiator);

packet_reader_init(&reader, fd[0], NULL, 0,
PACKET_READ_CHOMP_NEWLINE |
Expand All @@ -1613,7 +1625,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
/* Filter 'ref' by 'sought' and those that aren't local */
mark_complete_and_common_ref(negotiator, args, &ref);
filter_refs(args, &ref, sought, nr_sought);
if (everything_local(args, &ref))
if (!args->refetch && everything_local(args, &ref))
state = FETCH_DONE;
else
state = FETCH_SEND_REQUEST;
Expand Down
1 change: 1 addition & 0 deletions fetch-pack.h
Expand Up @@ -42,6 +42,7 @@ struct fetch_pack_args {
unsigned update_shallow:1;
unsigned reject_shallow_remote:1;
unsigned deepen:1;
unsigned refetch:1;

/*
* Indicate that the remote of this request is a promisor remote. The
Expand Down
6 changes: 6 additions & 0 deletions remote-curl.c
Expand Up @@ -43,6 +43,7 @@ struct options {
/* see documentation of corresponding flag in fetch-pack.h */
from_promisor : 1,

refetch : 1,
atomic : 1,
object_format : 1,
force_if_includes : 1;
Expand Down Expand Up @@ -198,6 +199,9 @@ static int set_option(const char *name, const char *value)
} else if (!strcmp(name, "from-promisor")) {
options.from_promisor = 1;
return 0;
} else if (!strcmp(name, "refetch")) {
options.refetch = 1;
return 0;
} else if (!strcmp(name, "filter")) {
options.filter = xstrdup(value);
return 0;
Expand Down Expand Up @@ -1182,6 +1186,8 @@ static int fetch_git(struct discovery *heads,
strvec_push(&args, "--deepen-relative");
if (options.from_promisor)
strvec_push(&args, "--from-promisor");
if (options.refetch)
strvec_push(&args, "--refetch");
if (options.filter)
strvec_pushf(&args, "--filter=%s", options.filter);
strvec_push(&args, url.buf);
Expand Down
81 changes: 80 additions & 1 deletion t/t5616-partial-clone.sh
Expand Up @@ -166,6 +166,85 @@ test_expect_success 'manual prefetch of missing objects' '
test_line_count = 0 observed.oids
'

# create new commits in "src" repo to establish a history on file.4.txt
# and push to "srv.bare".
test_expect_success 'push new commits to server for file.4.txt' '
for x in a b c d e f
do
echo "Mod file.4.txt $x" >src/file.4.txt &&
if list_contains "a,b" "$x"; then
printf "%10000s" X >>src/file.4.txt
fi &&
if list_contains "c,d" "$x"; then
printf "%20000s" X >>src/file.4.txt
fi &&
git -C src add file.4.txt &&
git -C src commit -m "mod $x" || return 1
done &&
git -C src push -u srv main
'

# Do partial fetch to fetch smaller files; then verify that without --refetch
# applying a new filter does not refetch missing large objects. Then use
# --refetch to apply the new filter on existing commits. Test it under both
# protocol v2 & v0.
test_expect_success 'apply a different filter using --refetch' '
git -C pc1 fetch --filter=blob:limit=999 origin &&
git -C pc1 rev-list --quiet --objects --missing=print \
main..origin/main >observed &&
test_line_count = 4 observed &&
git -C pc1 fetch --filter=blob:limit=19999 --refetch origin &&
git -C pc1 rev-list --quiet --objects --missing=print \
main..origin/main >observed &&
test_line_count = 2 observed &&
git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 \
--refetch origin &&
git -C pc1 rev-list --quiet --objects --missing=print \
main..origin/main >observed &&
test_line_count = 0 observed
'

test_expect_success 'fetch --refetch works with a shallow clone' '
git clone --no-checkout --depth=1 --filter=blob:none "file://$(pwd)/srv.bare" pc1s &&
git -C pc1s rev-list --objects --missing=print HEAD >observed &&
test_line_count = 6 observed &&
GIT_TRACE=1 git -C pc1s fetch --filter=blob:limit=999 --refetch origin &&
git -C pc1s rev-list --objects --missing=print HEAD >observed &&
test_line_count = 6 observed
'

test_expect_success 'fetch --refetch triggers repacking' '
GIT_TRACE2_CONFIG_PARAMS=gc.autoPackLimit,maintenance.incremental-repack.auto &&
export GIT_TRACE2_CONFIG_PARAMS &&
GIT_TRACE2_EVENT="$PWD/trace1.event" \
git -C pc1 fetch --refetch origin &&
test_subcommand git maintenance run --auto --no-quiet <trace1.event &&
grep \"param\":\"gc.autopacklimit\",\"value\":\"1\" trace1.event &&
grep \"param\":\"maintenance.incremental-repack.auto\",\"value\":\"-1\" trace1.event &&
GIT_TRACE2_EVENT="$PWD/trace2.event" \
git -c protocol.version=0 \
-c gc.autoPackLimit=0 \
-c maintenance.incremental-repack.auto=1234 \
-C pc1 fetch --refetch origin &&
test_subcommand git maintenance run --auto --no-quiet <trace2.event &&
grep \"param\":\"gc.autopacklimit\",\"value\":\"0\" trace2.event &&
grep \"param\":\"maintenance.incremental-repack.auto\",\"value\":\"-1\" trace2.event &&
GIT_TRACE2_EVENT="$PWD/trace3.event" \
git -c protocol.version=0 \
-c gc.autoPackLimit=1234 \
-c maintenance.incremental-repack.auto=0 \
-C pc1 fetch --refetch origin &&
test_subcommand git maintenance run --auto --no-quiet <trace3.event &&
grep \"param\":\"gc.autopacklimit\",\"value\":\"1\" trace3.event &&
grep \"param\":\"maintenance.incremental-repack.auto\",\"value\":\"0\" trace3.event
'

test_expect_success 'partial clone with transfer.fsckobjects=1 works with submodules' '
test_create_repo submodule &&
test_commit -C submodule mycommit &&
Expand Down Expand Up @@ -225,7 +304,7 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr
# Auto-fetch all remaining trees and blobs with --missing=error
git -C dst rev-list --missing=error --objects main >fetched_objects &&
test_line_count = 70 fetched_objects &&
test_line_count = 88 fetched_objects &&
awk -f print_1.awk fetched_objects |
xargs -n1 git -C dst cat-file -t >fetched_types &&
Expand Down
3 changes: 3 additions & 0 deletions transport-helper.c
Expand Up @@ -715,6 +715,9 @@ static int fetch_refs(struct transport *transport,
if (data->transport_options.update_shallow)
set_helper_option(transport, "update-shallow", "true");

if (data->transport_options.refetch)
set_helper_option(transport, "refetch", "true");

if (data->transport_options.filter_options.choice) {
const char *spec = expand_list_objects_filter_spec(
&data->transport_options.filter_options);
Expand Down
4 changes: 4 additions & 0 deletions transport.c
Expand Up @@ -250,6 +250,9 @@ static int set_git_option(struct git_transport_options *opts,
list_objects_filter_die_if_populated(&opts->filter_options);
parse_list_objects_filter(&opts->filter_options, value);
return 0;
} else if (!strcmp(name, TRANS_OPT_REFETCH)) {
opts->refetch = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_REJECT_SHALLOW)) {
opts->reject_shallow = !!value;
return 0;
Expand Down Expand Up @@ -384,6 +387,7 @@ static int fetch_refs_via_pack(struct transport *transport,
args.update_shallow = data->options.update_shallow;
args.from_promisor = data->options.from_promisor;
args.filter_options = data->options.filter_options;
args.refetch = data->options.refetch;
args.stateless_rpc = transport->stateless_rpc;
args.server_options = transport->server_options;
args.negotiation_tips = data->options.negotiation_tips;
Expand Down

0 comments on commit 649b59b

Please sign in to comment.