From 8e7e6c055e6754703145cb454b65582b15b059e5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 23 Jan 2019 13:59:15 -0800 Subject: [PATCH 1/7] commit-graph: return with errors during write The write_commit_graph() method uses die() to report failure and exit when confronted with an unexpected condition. This use of die() in a library function is incorrect and is now replaced by error() statements and an int return type. Now that we use 'goto cleanup' to jump to the terminal condition on an error, we have new paths that could lead to uninitialized values. New initializers are added to correct for this. The builtins 'commit-graph', 'gc', and 'commit' call these methods, so update them to check the return value. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 19 +++++++------ builtin/commit.c | 5 ++-- builtin/gc.c | 7 ++--- commit-graph.c | 60 +++++++++++++++++++++++++++++------------- commit-graph.h | 10 +++---- 5 files changed, 62 insertions(+), 39 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 4ae502754c292d..b12d46fdc88ca6 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -126,6 +126,7 @@ static int graph_write(int argc, const char **argv) struct string_list *pack_indexes = NULL; struct string_list *commit_hex = NULL; struct string_list lines; + int result; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -153,10 +154,8 @@ static int graph_write(int argc, const char **argv) read_replace_refs = 0; - if (opts.reachable) { - write_commit_graph_reachable(opts.obj_dir, opts.append, 1); - return 0; - } + if (opts.reachable) + return write_commit_graph_reachable(opts.obj_dir, opts.append, 1); string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -173,14 +172,14 @@ static int graph_write(int argc, const char **argv) UNLEAK(buf); } - write_commit_graph(opts.obj_dir, - pack_indexes, - commit_hex, - opts.append, - 1); + result = write_commit_graph(opts.obj_dir, + pack_indexes, + commit_hex, + opts.append, + 1); UNLEAK(lines); - return 0; + return result; } int cmd_commit_graph(int argc, const char **argv, const char *prefix) diff --git a/builtin/commit.c b/builtin/commit.c index 004b816635bf16..04b0717b3551bc 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1667,8 +1667,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "new_index file. Check that disk is not full and quota is\n" "not exceeded, and then \"git reset HEAD\" to recover.")); - if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0)) - write_commit_graph_reachable(get_object_directory(), 0, 0); + if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && + write_commit_graph_reachable(get_object_directory(), 0, 0)) + return 1; repo_rerere(the_repository, 0); run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); diff --git a/builtin/gc.c b/builtin/gc.c index 7696017cd4152e..9c6c9c90071d52 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -662,9 +662,10 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (pack_garbage.nr > 0) clean_pack_garbage(); - if (gc_write_commit_graph) - write_commit_graph_reachable(get_object_directory(), 0, - !quiet && !daemonized); + if (gc_write_commit_graph && + write_commit_graph_reachable(get_object_directory(), 0, + !quiet && !daemonized)) + return 1; if (auto_gc && too_many_loose_objects()) warning(_("There are too many unreachable loose objects; " diff --git a/commit-graph.c b/commit-graph.c index 3538ebff9c0186..80e7cc52749f9c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -755,27 +755,30 @@ static int add_ref_to_list(const char *refname, return 0; } -void write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress) +int write_commit_graph_reachable(const char *obj_dir, int append, + int report_progress) { struct string_list list = STRING_LIST_INIT_DUP; + int result; for_each_ref(add_ref_to_list, &list); - write_commit_graph(obj_dir, NULL, &list, append, report_progress); + result = write_commit_graph(obj_dir, NULL, &list, + append, report_progress); string_list_clear(&list, 0); + return result; } -void write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - int append, int report_progress) +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + int append, int report_progress) { struct packed_oid_list oids; struct packed_commit_list commits; struct hashfile *f; uint32_t i, count_distinct = 0; - char *graph_name; + char *graph_name = NULL; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; @@ -787,15 +790,17 @@ void write_commit_graph(const char *obj_dir, struct strbuf progress_title = STRBUF_INIT; unsigned long approx_nr_objects; const unsigned hashsz = the_hash_algo->rawsz; + int res = 0; if (!commit_graph_compatible(the_repository)) - return; + return 0; oids.nr = 0; approx_nr_objects = approximate_object_count(); oids.alloc = approx_nr_objects / 32; oids.progress = NULL; oids.progress_done = 0; + commits.list = NULL; if (append) { prepare_commit_graph_one(the_repository, obj_dir); @@ -836,10 +841,16 @@ void write_commit_graph(const char *obj_dir, strbuf_setlen(&packname, dirlen); strbuf_addstr(&packname, pack_indexes->items[i].string); p = add_packed_git(packname.buf, packname.len, 1); - if (!p) - die(_("error adding pack %s"), packname.buf); - if (open_pack_index(p)) - die(_("error opening index for %s"), packname.buf); + if (!p) { + error(_("error adding pack %s"), packname.buf); + res = 1; + goto cleanup; + } + if (open_pack_index(p)) { + error(_("error opening index for %s"), packname.buf); + res = 1; + goto cleanup; + } for_each_object_in_pack(p, add_packed_commits, &oids, FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); @@ -910,8 +921,11 @@ void write_commit_graph(const char *obj_dir, } stop_progress(&progress); - if (count_distinct >= GRAPH_PARENT_MISSING) - die(_("the commit graph format cannot write %d commits"), count_distinct); + if (count_distinct >= GRAPH_PARENT_MISSING) { + error(_("the commit graph format cannot write %d commits"), count_distinct); + res = 1; + goto cleanup; + } commits.nr = 0; commits.alloc = count_distinct; @@ -943,16 +957,21 @@ void write_commit_graph(const char *obj_dir, num_chunks = num_extra_edges ? 4 : 3; stop_progress(&progress); - if (commits.nr >= GRAPH_PARENT_MISSING) - die(_("too many commits to write graph")); + if (commits.nr >= GRAPH_PARENT_MISSING) { + error(_("too many commits to write graph")); + res = 1; + goto cleanup; + } compute_generation_numbers(&commits, report_progress); graph_name = get_commit_graph_filename(obj_dir); if (safe_create_leading_directories(graph_name)) { UNLEAK(graph_name); - die_errno(_("unable to create leading directories of %s"), - graph_name); + error(_("unable to create leading directories of %s"), + graph_name); + res = errno; + goto cleanup; } hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); @@ -1011,9 +1030,12 @@ void write_commit_graph(const char *obj_dir, finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); +cleanup: free(graph_name); free(commits.list); free(oids.list); + + return res; } #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2 diff --git a/commit-graph.h b/commit-graph.h index e6aff2c2e1042f..cd333a0cd0c3ef 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -60,12 +60,12 @@ struct commit_graph *load_commit_graph_one(const char *graph_file); */ int generation_numbers_enabled(struct repository *r); -void write_commit_graph_reachable(const char *obj_dir, int append, +int write_commit_graph_reachable(const char *obj_dir, int append, int report_progress); -void write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - int append, int report_progress); +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + int append, int report_progress); int verify_commit_graph(struct repository *r, struct commit_graph *g); From 615151bf5c1cd15ba7efa19629d6016a9b4e1923 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 23 Jan 2019 13:59:16 -0800 Subject: [PATCH 2/7] commit-graph: collapse parameters into flags The write_commit_graph() and write_commit_graph_reachable() methods currently take two boolean parameters: 'append' and 'report_progress'. We will soon expand the possible options to send to these methods, so instead of complicating the parameter list, first simplify it. Collapse these parameters into a 'flags' parameter, and adjust the callers to provide flags as necessary. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 8 +++++--- builtin/commit.c | 2 +- builtin/gc.c | 4 ++-- commit-graph.c | 9 +++++---- commit-graph.h | 8 +++++--- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index b12d46fdc88ca6..0c92421f759347 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -127,6 +127,7 @@ static int graph_write(int argc, const char **argv) struct string_list *commit_hex = NULL; struct string_list lines; int result; + int flags = COMMIT_GRAPH_PROGRESS; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -151,11 +152,13 @@ static int graph_write(int argc, const char **argv) die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs")); if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.append) + flags |= COMMIT_GRAPH_APPEND; read_replace_refs = 0; if (opts.reachable) - return write_commit_graph_reachable(opts.obj_dir, opts.append, 1); + return write_commit_graph_reachable(opts.obj_dir, flags); string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -175,8 +178,7 @@ static int graph_write(int argc, const char **argv) result = write_commit_graph(opts.obj_dir, pack_indexes, commit_hex, - opts.append, - 1); + flags); UNLEAK(lines); return result; diff --git a/builtin/commit.c b/builtin/commit.c index 04b0717b3551bc..3228de4e3c7f5c 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1668,7 +1668,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "not exceeded, and then \"git reset HEAD\" to recover.")); if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - write_commit_graph_reachable(get_object_directory(), 0, 0)) + write_commit_graph_reachable(get_object_directory(), 0)) return 1; repo_rerere(the_repository, 0); diff --git a/builtin/gc.c b/builtin/gc.c index 9c6c9c90071d52..198872206be8ca 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -663,8 +663,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) clean_pack_garbage(); if (gc_write_commit_graph && - write_commit_graph_reachable(get_object_directory(), 0, - !quiet && !daemonized)) + write_commit_graph_reachable(get_object_directory(), + !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0)) return 1; if (auto_gc && too_many_loose_objects()) diff --git a/commit-graph.c b/commit-graph.c index 80e7cc52749f9c..c7f3d75f01a95c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -755,15 +755,14 @@ static int add_ref_to_list(const char *refname, return 0; } -int write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress) +int write_commit_graph_reachable(const char *obj_dir, int flags) { struct string_list list = STRING_LIST_INIT_DUP; int result; for_each_ref(add_ref_to_list, &list); result = write_commit_graph(obj_dir, NULL, &list, - append, report_progress); + flags); string_list_clear(&list, 0); return result; @@ -772,7 +771,7 @@ int write_commit_graph_reachable(const char *obj_dir, int append, int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - int append, int report_progress) + int flags) { struct packed_oid_list oids; struct packed_commit_list commits; @@ -791,6 +790,8 @@ int write_commit_graph(const char *obj_dir, unsigned long approx_nr_objects; const unsigned hashsz = the_hash_algo->rawsz; int res = 0; + int append = flags & COMMIT_GRAPH_APPEND; + int report_progress = flags & COMMIT_GRAPH_PROGRESS; if (!commit_graph_compatible(the_repository)) return 0; diff --git a/commit-graph.h b/commit-graph.h index cd333a0cd0c3ef..83fa5481380b32 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -60,12 +60,14 @@ struct commit_graph *load_commit_graph_one(const char *graph_file); */ int generation_numbers_enabled(struct repository *r); -int write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress); +#define COMMIT_GRAPH_APPEND (1 << 0) +#define COMMIT_GRAPH_PROGRESS (1 << 1) + +int write_commit_graph_reachable(const char *obj_dir, int flags); int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - int append, int report_progress); + int flags); int verify_commit_graph(struct repository *r, struct commit_graph *g); From b1beb0502e1bc5243236a177136de8929b42babb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 23 Jan 2019 13:59:17 -0800 Subject: [PATCH 3/7] commit-graph: create new version flags In anticipation of a new commit-graph file format version, create a flag for the write_commit_graph() and write_commit_graph_reachable() methods to take a version number. When there is no specified version, the implementation selects a default value. Currently, the only valid value is 1. The file format will change the header information, so place the existing header logic inside a switch statement with only one case. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 58 +++++++++++++++++++++++++++++++++----------------- commit-graph.h | 1 + 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index c7f3d75f01a95c..a087d7bda06aa9 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -25,9 +25,6 @@ #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) -#define GRAPH_VERSION_1 0x1 -#define GRAPH_VERSION GRAPH_VERSION_1 - #define GRAPH_EXTRA_EDGES_NEEDED 0x80000000 #define GRAPH_PARENT_MISSING 0x7fffffff #define GRAPH_EDGE_LAST_MASK 0x7fffffff @@ -118,30 +115,35 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) } graph_version = *(unsigned char*)(data + 4); - if (graph_version != GRAPH_VERSION) { + if (graph_version != 1) { error(_("graph version %X does not match version %X"), - graph_version, GRAPH_VERSION); - goto cleanup_fail; - } - - hash_version = *(unsigned char*)(data + 5); - if (hash_version != oid_version()) { - error(_("hash version %X does not match version %X"), - hash_version, oid_version()); + graph_version, 1); goto cleanup_fail; } graph = alloc_commit_graph(); + switch (graph_version) { + case 1: + hash_version = *(unsigned char*)(data + 5); + if (hash_version != oid_version()) { + error(_("hash version %X does not match version %X"), + hash_version, oid_version()); + goto cleanup_fail; + } + + graph->num_chunks = *(unsigned char*)(data + 6); + chunk_lookup = data + 8; + break; + } + graph->hash_len = the_hash_algo->rawsz; - graph->num_chunks = *(unsigned char*)(data + 6); graph->graph_fd = fd; graph->data = graph_map; graph->data_len = graph_size; last_chunk_id = 0; last_chunk_offset = 8; - chunk_lookup = data + 8; for (i = 0; i < graph->num_chunks; i++) { uint32_t chunk_id = get_be32(chunk_lookup + 0); uint64_t chunk_offset = get_be64(chunk_lookup + 4); @@ -792,10 +794,22 @@ int write_commit_graph(const char *obj_dir, int res = 0; int append = flags & COMMIT_GRAPH_APPEND; int report_progress = flags & COMMIT_GRAPH_PROGRESS; + int version = 0; + int header_size = 0; if (!commit_graph_compatible(the_repository)) return 0; + if (flags & COMMIT_GRAPH_VERSION_1) + version = 1; + if (!version) + version = 1; + if (version != 1) { + error(_("unsupported commit-graph version %d"), + version); + return 1; + } + oids.nr = 0; approx_nr_objects = approximate_object_count(); oids.alloc = approx_nr_objects / 32; @@ -980,10 +994,16 @@ int write_commit_graph(const char *obj_dir, hashwrite_be32(f, GRAPH_SIGNATURE); - hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); /* unused padding byte */ + hashwrite_u8(f, version); + + switch (version) { + case 1: + hashwrite_u8(f, oid_version()); + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 0); /* unused padding byte */ + header_size = 8; + break; + } chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; @@ -994,7 +1014,7 @@ int write_commit_graph(const char *obj_dir, chunk_ids[3] = 0; chunk_ids[4] = 0; - chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; + chunk_offsets[0] = header_size + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; chunk_offsets[2] = chunk_offsets[1] + hashsz * commits.nr; chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * commits.nr; diff --git a/commit-graph.h b/commit-graph.h index 83fa5481380b32..e03df54e33bf28 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -62,6 +62,7 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_APPEND (1 << 0) #define COMMIT_GRAPH_PROGRESS (1 << 1) +#define COMMIT_GRAPH_VERSION_1 (1 << 2) int write_commit_graph_reachable(const char *obj_dir, int flags); int write_commit_graph(const char *obj_dir, From 02fc77fc920dd917fd94f6d1bb849b8cb5987349 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 23 Jan 2019 13:59:18 -0800 Subject: [PATCH 4/7] commit-graph: add --version= option Allo the commit-graph builtin to specify the file format version using the '--version=' option. Specify the version exactly in the verification tests as using a different version would change the offsets used in those tests. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 3 +++ builtin/commit-graph.c | 13 +++++++++++-- t/t5318-commit-graph.sh | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 624470e198202a..1d1cc70de40583 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -51,6 +51,9 @@ or `--stdin-packs`.) + With the `--append` option, include all commits that are present in the existing commit-graph file. ++ +With the `--version=` option, specify the file format version. Used +only for testing. 'read':: diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 0c92421f759347..b1bed842602b28 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits] [--version=]"), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits] [--version=]"), NULL }; @@ -35,6 +35,7 @@ static struct opts_commit_graph { int stdin_packs; int stdin_commits; int append; + int version; } opts; @@ -141,6 +142,8 @@ static int graph_write(int argc, const char **argv) N_("start walk at commits listed by stdin")), OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), + OPT_INTEGER(0, "version", &opts.version, + N_("specify the file format version")), OPT_END(), }; @@ -155,6 +158,12 @@ static int graph_write(int argc, const char **argv) if (opts.append) flags |= COMMIT_GRAPH_APPEND; + switch (opts.version) { + case 1: + flags |= COMMIT_GRAPH_VERSION_1; + break; + } + read_replace_refs = 0; if (opts.reachable) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index f4deb13b1da1ba..b79d6263e9cbb9 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -328,7 +328,7 @@ test_expect_success 'replace-objects invalidates commit-graph' ' test_expect_success 'git commit-graph verify' ' cd "$TRASH_DIRECTORY/full" && - git rev-parse commits/8 | git commit-graph write --stdin-commits && + git rev-parse commits/8 | git commit-graph write --stdin-commits --version=1 && git commit-graph verify >output ' From 1f39fd27b2eecabad73763cf20cb6c741ab8f97b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 23 Jan 2019 13:59:19 -0800 Subject: [PATCH 5/7] commit-graph: implement file format version 2 The commit-graph file format had some shortcomings which we now correct: 1. The hash algorithm was determined by a single byte, instead of the 4-byte format identifier. 2. There was no way to update the reachability index we used. We currently only support generation numbers, but that will change in the future. 3. Git did not fail with error if the unused eighth byte was non-zero, so we could not use that to indicate an incremental file format without breaking compatibility across versions. The new format modifies the header of the commit-graph to solve these problems. We use the 4-byte hash format id, freeing up a byte in our 32-bit alignment to introduce a reachability index version. We can also fail to read the commit-graph if the eighth byte is non-zero. The 'git commit-graph read' subcommand needs updating to show the new data. Set the default file format version to 2, and adjust the tests to expect the new 'git commit-graph read' output. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- .../technical/commit-graph-format.txt | 26 +++++++++- builtin/commit-graph.c | 9 ++++ commit-graph.c | 47 ++++++++++++++++--- commit-graph.h | 1 + t/t5318-commit-graph.sh | 9 +++- 5 files changed, 83 insertions(+), 9 deletions(-) diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 16452a0504c8fa..e367aa94b15eed 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -31,13 +31,22 @@ and hash type. All 4-byte numbers are in network order. +There are two versions available, 1 and 2. These currently differ only in +the header. + HEADER: +All commit-graph files use the first five bytes for the same purpose. + 4-byte signature: The signature is: {'C', 'G', 'P', 'H'} 1-byte version number: - Currently, the only valid version is 1. + Currently, the valid version numbers are 1 and 2. + +The remainder of the header changes depending on the version. + +Version 1: 1-byte Hash Version (1 = SHA-1) We infer the hash length (H) from this value. @@ -47,6 +56,21 @@ HEADER: 1-byte (reserved for later use) Current clients should ignore this value. +Version 2: + + 1-byte number (C) of "chunks" + + 1-byte reachability index version number: + Currently, the only valid number is 1. + + 1-byte (reserved for later use) + Current clients expect this value to be zero, and will not + try to read the commit-graph file if it is non-zero. + + 4-byte format identifier for the hash algorithm: + If this identifier does not agree with the repository's current + hash algorithm, then the client will not read the commit graph. + CHUNK LOOKUP: (C + 1) * 12 bytes listing the table of contents for the chunks: diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index b1bed842602b28..28787d0c9cc457 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -102,6 +102,11 @@ static int graph_read(int argc, const char **argv) *(unsigned char*)(graph->data + 5), *(unsigned char*)(graph->data + 6), *(unsigned char*)(graph->data + 7)); + + if (*(unsigned char *)(graph->data + 4) == 2) + printf("hash algorithm: %X\n", + get_be32(graph->data + 8)); + printf("num_commits: %u\n", graph->num_commits); printf("chunks:"); @@ -162,6 +167,10 @@ static int graph_write(int argc, const char **argv) case 1: flags |= COMMIT_GRAPH_VERSION_1; break; + + case 2: + flags |= COMMIT_GRAPH_VERSION_2; + break; } read_replace_refs = 0; diff --git a/commit-graph.c b/commit-graph.c index a087d7bda06aa9..3c83263f11a1b8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -90,7 +90,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) uint64_t last_chunk_offset; uint32_t last_chunk_id; uint32_t graph_signature; - unsigned char graph_version, hash_version; + unsigned char graph_version, hash_version, reach_index_version; + uint32_t hash_id; if (fd < 0) return NULL; @@ -115,9 +116,9 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) } graph_version = *(unsigned char*)(data + 4); - if (graph_version != 1) { - error(_("graph version %X does not match version %X"), - graph_version, 1); + if (!graph_version || graph_version > 2) { + error(_("unsupported graph version %X"), + graph_version); goto cleanup_fail; } @@ -135,6 +136,30 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) graph->num_chunks = *(unsigned char*)(data + 6); chunk_lookup = data + 8; break; + + case 2: + graph->num_chunks = *(unsigned char *)(data + 5); + + reach_index_version = *(unsigned char *)(data + 6); + if (reach_index_version != 1) { + error(_("unsupported reachability index version %d"), + reach_index_version); + goto cleanup_fail; + } + + if (*(unsigned char*)(data + 7)) { + error(_("unsupported value in commit-graph header")); + goto cleanup_fail; + } + + hash_id = get_be32(data + 8); + if (hash_id != the_hash_algo->format_id) { + error(_("commit-graph hash algorithm does not match current algorithm")); + goto cleanup_fail; + } + + chunk_lookup = data + 12; + break; } graph->hash_len = the_hash_algo->rawsz; @@ -802,9 +827,11 @@ int write_commit_graph(const char *obj_dir, if (flags & COMMIT_GRAPH_VERSION_1) version = 1; + if (flags & COMMIT_GRAPH_VERSION_2) + version = 2; if (!version) - version = 1; - if (version != 1) { + version = 2; + if (version <= 0 || version > 2) { error(_("unsupported commit-graph version %d"), version); return 1; @@ -1003,6 +1030,14 @@ int write_commit_graph(const char *obj_dir, hashwrite_u8(f, 0); /* unused padding byte */ header_size = 8; break; + + case 2: + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 1); /* reachability index version */ + hashwrite_u8(f, 0); /* unused padding byte */ + hashwrite_be32(f, the_hash_algo->format_id); + header_size = 12; + break; } chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; diff --git a/commit-graph.h b/commit-graph.h index e03df54e33bf28..050137063b4f0c 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -63,6 +63,7 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_APPEND (1 << 0) #define COMMIT_GRAPH_PROGRESS (1 << 1) #define COMMIT_GRAPH_VERSION_1 (1 << 2) +#define COMMIT_GRAPH_VERSION_2 (1 << 3) int write_commit_graph_reachable(const char *obj_dir, int flags); int write_commit_graph(const char *obj_dir, diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index b79d6263e9cbb9..3ff5e3b48d2024 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -65,7 +65,8 @@ graph_read_expect() { NUM_CHUNKS=$((3 + $(echo "$2" | wc -w))) fi cat >expect <<- EOF - header: 43475048 1 1 $NUM_CHUNKS 0 + header: 43475048 2 $NUM_CHUNKS 1 0 + hash algorithm: 73686131 num_commits: $1 chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL EOF @@ -390,10 +391,14 @@ test_expect_success 'detect bad signature' ' ' test_expect_success 'detect bad version' ' - corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \ + corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\03" \ "graph version" ' +test_expect_success 'detect version 2 with version 1 data' ' + corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \ + "reachability index version" +' test_expect_success 'detect bad hash version' ' corrupt_graph_and_verify $GRAPH_BYTE_HASH "\02" \ "hash version" From f7777349d6ac9e27eff1edb0d821fcace1accb98 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 23 Jan 2019 13:59:20 -0800 Subject: [PATCH 6/7] commit-graph: test verifying a corrupt v2 header The commit-graph file format v2 changes the v1 data only in the header information. Add tests that check the 'verify' subcommand catches corruption in the v2 header. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 3ff5e3b48d2024..be7bbf911a830c 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -497,6 +497,37 @@ test_expect_success 'git fsck (checks commit-graph)' ' test_must_fail git fsck ' +test_expect_success 'rewrite commmit-graph with version 2' ' + rm -f .git/objects/info/commit-graph && + git commit-graph write --reachable --version=2 && + git commit-graph verify +' + +GRAPH_BYTE_CHUNK_COUNT=5 +GRAPH_BYTE_REACH_INDEX=6 +GRAPH_BYTE_UNUSED=7 +GRAPH_BYTE_HASH=8 + +test_expect_success 'detect low chunk count (v2)' ' + corrupt_graph_and_verify $GRAPH_CHUNK_COUNT "\02" \ + "missing the .* chunk" +' + +test_expect_success 'detect incorrect reachability index' ' + corrupt_graph_and_verify $GRAPH_REACH_INDEX "\03" \ + "reachability index version" +' + +test_expect_success 'detect non-zero unused byte' ' + corrupt_graph_and_verify $GRAPH_BYTE_UNUSED "\01" \ + "unsupported value" +' + +test_expect_success 'detect bad hash version (v2)' ' + corrupt_graph_and_verify $GRAPH_BYTE_HASH "\00" \ + "hash algorithm" +' + test_expect_success 'setup non-the_repository tests' ' rm -rf repo && git init repo && From f851b4583c2ab45bbf5b2e7f8a49c53911527f46 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 29 Jan 2019 11:47:06 -0800 Subject: [PATCH 7/7] SQUASH : misnamed variables and style fix The need for this fix may indicate that the tests weren't even run before submission, which is not a good sign... --- t/t5318-commit-graph.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index be7bbf911a830c..4230df199b543b 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -33,13 +33,13 @@ test_expect_success 'create commits and repack' ' git repack ' -graph_git_two_modes() { +graph_git_two_modes () { git -c core.commitGraph=true $1 >output git -c core.commitGraph=false $1 >expect test_cmp expect output } -graph_git_behavior() { +graph_git_behavior () { MSG=$1 DIR=$2 BRANCH=$3 @@ -56,7 +56,7 @@ graph_git_behavior() { graph_git_behavior 'no graph' full commits/3 commits/1 -graph_read_expect() { +graph_read_expect () { OPTIONAL="" NUM_CHUNKS=3 if test ! -z $2 @@ -372,7 +372,7 @@ GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES)) # by inserting the data, then runs 'git commit-graph verify' # and places the output in the file 'err'. Test 'err' for # the given string. -corrupt_graph_and_verify() { +corrupt_graph_and_verify () { pos=$1 data="${2:-\0}" grepstr=$3 @@ -509,12 +509,12 @@ GRAPH_BYTE_UNUSED=7 GRAPH_BYTE_HASH=8 test_expect_success 'detect low chunk count (v2)' ' - corrupt_graph_and_verify $GRAPH_CHUNK_COUNT "\02" \ + corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \ "missing the .* chunk" ' test_expect_success 'detect incorrect reachability index' ' - corrupt_graph_and_verify $GRAPH_REACH_INDEX "\03" \ + corrupt_graph_and_verify $GRAPH_BYTE_REACH_INDEX "\03" \ "reachability index version" '