diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 624470e198202a..1d1cc70de40583 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -51,6 +51,9 @@ or `--stdin-packs`.) + With the `--append` option, include all commits that are present in the existing commit-graph file. ++ +With the `--version=` option, specify the file format version. Used +only for testing. 'read':: diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 16452a0504c8fa..e367aa94b15eed 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -31,13 +31,22 @@ and hash type. All 4-byte numbers are in network order. +There are two versions available, 1 and 2. These currently differ only in +the header. + HEADER: +All commit-graph files use the first five bytes for the same purpose. + 4-byte signature: The signature is: {'C', 'G', 'P', 'H'} 1-byte version number: - Currently, the only valid version is 1. + Currently, the valid version numbers are 1 and 2. + +The remainder of the header changes depending on the version. + +Version 1: 1-byte Hash Version (1 = SHA-1) We infer the hash length (H) from this value. @@ -47,6 +56,21 @@ HEADER: 1-byte (reserved for later use) Current clients should ignore this value. +Version 2: + + 1-byte number (C) of "chunks" + + 1-byte reachability index version number: + Currently, the only valid number is 1. + + 1-byte (reserved for later use) + Current clients expect this value to be zero, and will not + try to read the commit-graph file if it is non-zero. + + 4-byte format identifier for the hash algorithm: + If this identifier does not agree with the repository's current + hash algorithm, then the client will not read the commit graph. + CHUNK LOOKUP: (C + 1) * 12 bytes listing the table of contents for the chunks: diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 4ae502754c292d..28787d0c9cc457 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits] [--version=]"), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits] [--version=]"), NULL }; @@ -35,6 +35,7 @@ static struct opts_commit_graph { int stdin_packs; int stdin_commits; int append; + int version; } opts; @@ -101,6 +102,11 @@ static int graph_read(int argc, const char **argv) *(unsigned char*)(graph->data + 5), *(unsigned char*)(graph->data + 6), *(unsigned char*)(graph->data + 7)); + + if (*(unsigned char *)(graph->data + 4) == 2) + printf("hash algorithm: %X\n", + get_be32(graph->data + 8)); + printf("num_commits: %u\n", graph->num_commits); printf("chunks:"); @@ -126,6 +132,8 @@ static int graph_write(int argc, const char **argv) struct string_list *pack_indexes = NULL; struct string_list *commit_hex = NULL; struct string_list lines; + int result; + int flags = COMMIT_GRAPH_PROGRESS; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -139,6 +147,8 @@ static int graph_write(int argc, const char **argv) N_("start walk at commits listed by stdin")), OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), + OPT_INTEGER(0, "version", &opts.version, + N_("specify the file format version")), OPT_END(), }; @@ -150,14 +160,24 @@ static int graph_write(int argc, const char **argv) die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs")); if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.append) + flags |= COMMIT_GRAPH_APPEND; - read_replace_refs = 0; + switch (opts.version) { + case 1: + flags |= COMMIT_GRAPH_VERSION_1; + break; - if (opts.reachable) { - write_commit_graph_reachable(opts.obj_dir, opts.append, 1); - return 0; + case 2: + flags |= COMMIT_GRAPH_VERSION_2; + break; } + read_replace_refs = 0; + + if (opts.reachable) + return write_commit_graph_reachable(opts.obj_dir, flags); + string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { struct strbuf buf = STRBUF_INIT; @@ -173,14 +193,13 @@ static int graph_write(int argc, const char **argv) UNLEAK(buf); } - write_commit_graph(opts.obj_dir, - pack_indexes, - commit_hex, - opts.append, - 1); + result = write_commit_graph(opts.obj_dir, + pack_indexes, + commit_hex, + flags); UNLEAK(lines); - return 0; + return result; } int cmd_commit_graph(int argc, const char **argv, const char *prefix) diff --git a/builtin/commit.c b/builtin/commit.c index 0e83eba6340317..8d0941159311dd 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1662,8 +1662,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "new_index file. Check that disk is not full and quota is\n" "not exceeded, and then \"git reset HEAD\" to recover.")); - if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0)) - write_commit_graph_reachable(get_object_directory(), 0, 0); + if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && + write_commit_graph_reachable(get_object_directory(), 0)) + return 1; repo_rerere(the_repository, 0); run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); diff --git a/builtin/gc.c b/builtin/gc.c index 020f725acc40f4..df2573f124d5fd 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -664,9 +664,10 @@ int cmd_gc(int argc, const char **argv, const char *prefix) clean_pack_garbage(); } - if (gc_write_commit_graph) - write_commit_graph_reachable(get_object_directory(), 0, - !quiet && !daemonized); + if (gc_write_commit_graph && + write_commit_graph_reachable(get_object_directory(), + !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0)) + return 1; if (auto_gc && too_many_loose_objects()) warning(_("There are too many unreachable loose objects; " diff --git a/commit-graph.c b/commit-graph.c index 47e9be0a3aad88..d54b8d3efc0e67 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -25,9 +25,6 @@ #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) -#define GRAPH_VERSION_1 0x1 -#define GRAPH_VERSION GRAPH_VERSION_1 - #define GRAPH_EXTRA_EDGES_NEEDED 0x80000000 #define GRAPH_EDGE_LAST_MASK 0x7fffffff #define GRAPH_PARENT_NONE 0x70000000 @@ -121,7 +118,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, uint64_t last_chunk_offset; uint32_t last_chunk_id; uint32_t graph_signature; - unsigned char graph_version, hash_version; + unsigned char graph_version, hash_version, reach_index_version; + uint32_t hash_id; if (!graph_map) return NULL; @@ -139,30 +137,59 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, } graph_version = *(unsigned char*)(data + 4); - if (graph_version != GRAPH_VERSION) { - error(_("graph version %X does not match version %X"), - graph_version, GRAPH_VERSION); - return NULL; - } - - hash_version = *(unsigned char*)(data + 5); - if (hash_version != oid_version()) { - error(_("hash version %X does not match version %X"), - hash_version, oid_version()); + if (!graph_version || graph_version > 2) { + error(_("unsupported graph version %X"), + graph_version); return NULL; } graph = alloc_commit_graph(); + switch (graph_version) { + case 1: + hash_version = *(unsigned char*)(data + 5); + if (hash_version != oid_version()) { + error(_("hash version %X does not match version %X"), + hash_version, oid_version()); + return NULL; + } + + graph->num_chunks = *(unsigned char*)(data + 6); + chunk_lookup = data + 8; + break; + + case 2: + graph->num_chunks = *(unsigned char *)(data + 5); + + reach_index_version = *(unsigned char *)(data + 6); + if (reach_index_version != 1) { + error(_("unsupported reachability index version %d"), + reach_index_version); + return NULL; + } + + if (*(unsigned char*)(data + 7)) { + error(_("unsupported value in commit-graph header")); + return NULL; + } + + hash_id = get_be32(data + 8); + if (hash_id != the_hash_algo->format_id) { + error(_("commit-graph hash algorithm does not match current algorithm")); + return NULL; + } + + chunk_lookup = data + 12; + break; + } + graph->hash_len = the_hash_algo->rawsz; - graph->num_chunks = *(unsigned char*)(data + 6); graph->graph_fd = fd; graph->data = graph_map; graph->data_len = graph_size; last_chunk_id = 0; last_chunk_offset = 8; - chunk_lookup = data + 8; for (i = 0; i < graph->num_chunks; i++) { uint32_t chunk_id; uint64_t chunk_offset; @@ -797,27 +824,29 @@ static int add_ref_to_list(const char *refname, return 0; } -void write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress) +int write_commit_graph_reachable(const char *obj_dir, int flags) { struct string_list list = STRING_LIST_INIT_DUP; + int result; for_each_ref(add_ref_to_list, &list); - write_commit_graph(obj_dir, NULL, &list, append, report_progress); + result = write_commit_graph(obj_dir, NULL, &list, + flags); string_list_clear(&list, 0); + return result; } -void write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - int append, int report_progress) +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + int flags) { struct packed_oid_list oids; struct packed_commit_list commits; struct hashfile *f; uint32_t i, count_distinct = 0; - char *graph_name; + char *graph_name = NULL; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; @@ -829,15 +858,33 @@ void write_commit_graph(const char *obj_dir, uint64_t progress_cnt = 0; struct strbuf progress_title = STRBUF_INIT; unsigned long approx_nr_objects; + int res = 0; + int append = flags & COMMIT_GRAPH_APPEND; + int report_progress = flags & COMMIT_GRAPH_PROGRESS; + int version = 0; + int header_size = 0; if (!commit_graph_compatible(the_repository)) - return; + return 0; + + if (flags & COMMIT_GRAPH_VERSION_1) + version = 1; + if (flags & COMMIT_GRAPH_VERSION_2) + version = 2; + if (!version) + version = 2; + if (version <= 0 || version > 2) { + error(_("unsupported commit-graph version %d"), + version); + return 1; + } oids.nr = 0; approx_nr_objects = approximate_object_count(); oids.alloc = approx_nr_objects / 32; oids.progress = NULL; oids.progress_done = 0; + commits.list = NULL; if (append) { prepare_commit_graph_one(the_repository, obj_dir); @@ -878,10 +925,16 @@ void write_commit_graph(const char *obj_dir, strbuf_setlen(&packname, dirlen); strbuf_addstr(&packname, pack_indexes->items[i].string); p = add_packed_git(packname.buf, packname.len, 1); - if (!p) - die(_("error adding pack %s"), packname.buf); - if (open_pack_index(p)) - die(_("error opening index for %s"), packname.buf); + if (!p) { + error(_("error adding pack %s"), packname.buf); + res = 1; + goto cleanup; + } + if (open_pack_index(p)) { + error(_("error opening index for %s"), packname.buf); + res = 1; + goto cleanup; + } for_each_object_in_pack(p, add_packed_commits, &oids, FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); @@ -952,8 +1005,11 @@ void write_commit_graph(const char *obj_dir, } stop_progress(&progress); - if (count_distinct >= GRAPH_EDGE_LAST_MASK) - die(_("the commit graph format cannot write %d commits"), count_distinct); + if (count_distinct >= GRAPH_EDGE_LAST_MASK) { + error(_("the commit graph format cannot write %d commits"), count_distinct); + res = 1; + goto cleanup; + } commits.nr = 0; commits.alloc = count_distinct; @@ -985,16 +1041,21 @@ void write_commit_graph(const char *obj_dir, num_chunks = num_extra_edges ? 4 : 3; stop_progress(&progress); - if (commits.nr >= GRAPH_EDGE_LAST_MASK) - die(_("too many commits to write graph")); + if (commits.nr >= GRAPH_EDGE_LAST_MASK) { + error(_("too many commits to write graph")); + res = 1; + goto cleanup; + } compute_generation_numbers(&commits, report_progress); graph_name = get_commit_graph_filename(obj_dir); if (safe_create_leading_directories(graph_name)) { UNLEAK(graph_name); - die_errno(_("unable to create leading directories of %s"), - graph_name); + error(_("unable to create leading directories of %s"), + graph_name); + res = errno; + goto cleanup; } hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); @@ -1002,10 +1063,24 @@ void write_commit_graph(const char *obj_dir, hashwrite_be32(f, GRAPH_SIGNATURE); - hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); /* unused padding byte */ + hashwrite_u8(f, version); + + switch (version) { + case 1: + hashwrite_u8(f, oid_version()); + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 0); /* unused padding byte */ + header_size = 8; + break; + + case 2: + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 1); /* reachability index version */ + hashwrite_u8(f, 0); /* unused padding byte */ + hashwrite_be32(f, the_hash_algo->format_id); + header_size = 12; + break; + } chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; @@ -1016,7 +1091,7 @@ void write_commit_graph(const char *obj_dir, chunk_ids[3] = 0; chunk_ids[4] = 0; - chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; + chunk_offsets[0] = header_size + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; chunk_offsets[2] = chunk_offsets[1] + hashsz * commits.nr; chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * commits.nr; @@ -1053,9 +1128,12 @@ void write_commit_graph(const char *obj_dir, finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); +cleanup: free(graph_name); free(commits.list); free(oids.list); + + return res; } #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2 diff --git a/commit-graph.h b/commit-graph.h index 096d8bac340514..dd0bba3931b9f6 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -63,12 +63,16 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, */ int generation_numbers_enabled(struct repository *r); -void write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress); -void write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - int append, int report_progress); +#define COMMIT_GRAPH_APPEND (1 << 0) +#define COMMIT_GRAPH_PROGRESS (1 << 1) +#define COMMIT_GRAPH_VERSION_1 (1 << 2) +#define COMMIT_GRAPH_VERSION_2 (1 << 3) + +int write_commit_graph_reachable(const char *obj_dir, int flags); +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + int flags); int verify_commit_graph(struct repository *r, struct commit_graph *g); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 16d10ebce82fa1..1778be63a93d05 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -33,13 +33,13 @@ test_expect_success 'create commits and repack' ' git repack ' -graph_git_two_modes() { +graph_git_two_modes () { git -c core.commitGraph=true $1 >output git -c core.commitGraph=false $1 >expect test_cmp expect output } -graph_git_behavior() { +graph_git_behavior () { MSG=$1 DIR=$2 BRANCH=$3 @@ -56,7 +56,7 @@ graph_git_behavior() { graph_git_behavior 'no graph' full commits/3 commits/1 -graph_read_expect() { +graph_read_expect () { OPTIONAL="" NUM_CHUNKS=3 if test ! -z $2 @@ -65,7 +65,8 @@ graph_read_expect() { NUM_CHUNKS=$((3 + $(echo "$2" | wc -w))) fi cat >expect <<- EOF - header: 43475048 1 1 $NUM_CHUNKS 0 + header: 43475048 2 $NUM_CHUNKS 1 0 + hash algorithm: 73686131 num_commits: $1 chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL EOF @@ -328,7 +329,7 @@ test_expect_success 'replace-objects invalidates commit-graph' ' test_expect_success 'git commit-graph verify' ' cd "$TRASH_DIRECTORY/full" && - git rev-parse commits/8 | git commit-graph write --stdin-commits && + git rev-parse commits/8 | git commit-graph write --stdin-commits --version=1 && git commit-graph verify >output ' @@ -372,7 +373,7 @@ GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES)) # starting at , then runs 'git commit-graph verify' # and places the output in the file 'err'. Test 'err' for # the given string. -corrupt_graph_and_verify() { +corrupt_graph_and_verify () { pos=$1 data="${2:-\0}" grepstr=$3 @@ -395,10 +396,14 @@ test_expect_success 'detect bad signature' ' ' test_expect_success 'detect bad version' ' - corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \ + corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\03" \ "graph version" ' +test_expect_success 'detect version 2 with version 1 data' ' + corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \ + "reachability index version" +' test_expect_success 'detect bad hash version' ' corrupt_graph_and_verify $GRAPH_BYTE_HASH "\02" \ "hash version" @@ -502,6 +507,37 @@ test_expect_success 'git fsck (checks commit-graph)' ' test_must_fail git fsck ' +test_expect_success 'rewrite commmit-graph with version 2' ' + rm -f .git/objects/info/commit-graph && + git commit-graph write --reachable --version=2 && + git commit-graph verify +' + +GRAPH_BYTE_CHUNK_COUNT=5 +GRAPH_BYTE_REACH_INDEX=6 +GRAPH_BYTE_UNUSED=7 +GRAPH_BYTE_HASH=8 + +test_expect_success 'detect low chunk count (v2)' ' + corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ + "missing the .* chunk" +' + +test_expect_success 'detect incorrect reachability index' ' + corrupt_graph_and_verify $GRAPH_BYTE_REACH_INDEX "\03" \ + "reachability index version" +' + +test_expect_success 'detect non-zero unused byte' ' + corrupt_graph_and_verify $GRAPH_BYTE_UNUSED "\01" \ + "unsupported value" +' + +test_expect_success 'detect bad hash version (v2)' ' + corrupt_graph_and_verify $GRAPH_BYTE_HASH "\00" \ + "hash algorithm" +' + test_expect_success 'setup non-the_repository tests' ' rm -rf repo && git init repo &&