From 2d2da172f37f5a406a0f5a099cd268bcb1bd7d42 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 28 Feb 2024 09:44:10 +0000 Subject: [PATCH] commit-reach(paint_down_to_common): prepare for handling shallow commits When `git fetch --update-shallow` needs to test for commit ancestry, it can naturally run into a missing object (e.g. if it is a parent of a shallow commit). For the purpose of `--update-shallow`, this needs to be treated as if the child commit did not even have that parent, i.e. the commit history needs to be clamped. For all other scenarios, clamping the commit history is actually a bug, as it would hide repository corruption (for an analysis regarding shallow and partial clones, see the analysis further down). Add a flag to optionally ask the function to ignore missing commits, as `--update-shallow` needs it to, while detecting missing objects as a repository corruption error by default. This flag is needed, and cannot be replaced by `is_repository_shallow()` to indicate that situation, because that function would return 0 in the `--update-shallow` scenario: There is not actually a `shallow` file in that scenario, as demonstrated e.g. by t5537.10 ("add new shallow root with receive.updateshallow on") and t5538.4 ("add new shallow root with receive.updateshallow on"). Note: shallow commits' parents are set to `NULL` internally already, therefore there is no need to special-case shallow repositories here, as the merge-base logic will not try to access parent commits of shallow commits. Likewise, partial clones aren't an issue either: If a commit is missing during the revision walk in the merge-base logic, it is fetched via `promisor_remote_get_direct()`. And not only the single missing commit object: Due to the way the "promised" objects are fetched (in `fetch_objects()` in `promisor-remote.c`, using `fetch --filter=blob:none`), there is no actual way to fetch a single commit object, as the remote side will pass that commit OID to `pack-objects --revs [...]` which in turn passes it to `rev-list` which interprets this as a commit _range_ instead of a single object. Therefore, in partial clones (unless they are shallow in addition), all commits reachable from a commit that is in the local object database are also present in that local database. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- commit-reach.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 1c5121397879f1..616738fc4e18c6 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -52,7 +52,8 @@ static int queue_has_nonstale(struct prio_queue *queue) static struct commit_list *paint_down_to_common(struct repository *r, struct commit *one, int n, struct commit **twos, - timestamp_t min_generation) + timestamp_t min_generation, + int ignore_missing_commits) { struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; @@ -107,6 +108,13 @@ static struct commit_list *paint_down_to_common(struct repository *r, if (repo_parse_commit(r, p)) { clear_prio_queue(&queue); free_commit_list(result); + /* + * At this stage, we know that the commit is + * missing: `repo_parse_commit()` uses + * `OBJECT_INFO_DIE_IF_CORRUPT` and therefore + * corrupt commits would already have been + * dispatched with a `die()`. + */ return NULL; } p->object.flags |= flags; @@ -142,7 +150,7 @@ static struct commit_list *merge_bases_many(struct repository *r, return NULL; } - list = paint_down_to_common(r, one, n, twos, 0); + list = paint_down_to_common(r, one, n, twos, 0, 0); while (list) { struct commit *commit = pop_commit(&list); @@ -213,7 +221,7 @@ static int remove_redundant_no_gen(struct repository *r, min_generation = curr_generation; } common = paint_down_to_common(r, array[i], filled, - work, min_generation); + work, min_generation, 0); if (array[i]->object.flags & PARENT2) redundant[i] = 1; for (j = 0; j < filled; j++) @@ -503,7 +511,7 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, bases = paint_down_to_common(r, commit, nr_reference, reference, - generation); + generation, ignore_missing_commits); if (commit->object.flags & PARENT2) ret = 1; clear_commit_marks(commit, all_flags);