Skip to content

Commit

Permalink
Merge branch 'jk/hash-object-fsck'
Browse files Browse the repository at this point in the history
"git hash-object" now checks that the resulting object is well
formed with the same code as "git fsck".

* jk/hash-object-fsck:
  fsck: do not assume NUL-termination of buffers
  hash-object: use fsck for object checks
  fsck: provide a function to fsck buffer without object struct
  t: use hash-object --literally when created malformed objects
  t7030: stop using invalid tag name
  t1006: stop using 0-padded timestamps
  t1007: modernize malformed object tests
  • Loading branch information
gitster committed Jan 30, 2023
2 parents 4ac326f + 8e43090 commit abf2bb8
Show file tree
Hide file tree
Showing 21 changed files with 294 additions and 96 deletions.
94 changes: 71 additions & 23 deletions fsck.c
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,23 @@ static int fsck_tree(const struct object_id *tree_oid,
return retval;
}

/*
* Confirm that the headers of a commit or tag object end in a reasonable way,
* either with the usual "\n\n" separator, or at least with a trailing newline
* on the final header line.
*
* This property is important for the memory safety of our callers. It allows
* them to scan the buffer linewise without constantly checking the remaining
* size as long as:
*
* - they check that there are bytes left in the buffer at the start of any
* line (i.e., that the last newline they saw was not the final one we
* found here)
*
* - any intra-line scanning they do will stop at a newline, which will worst
* case hit the newline we found here as the end-of-header. This makes it
* OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
*/
static int verify_headers(const void *data, unsigned long size,
const struct object_id *oid, enum object_type type,
struct fsck_options *options)
Expand Down Expand Up @@ -808,6 +825,20 @@ static int fsck_ident(const char **ident,
if (*p != ' ')
return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
p++;
/*
* Our timestamp parser is based on the C strto*() functions, which
* will happily eat whitespace, including the newline that is supposed
* to prevent us walking past the end of the buffer. So do our own
* scan, skipping linear whitespace but not newlines, and then
* confirming we found a digit. We _could_ be even more strict here,
* as we really expect only a single space, but since we have
* traditionally allowed extra whitespace, we'll continue to do so.
*/
while (*p == ' ' || *p == '\t')
p++;
if (!isdigit(*p))
return report(options, oid, type, FSCK_MSG_BAD_DATE,
"invalid author/committer line - bad date");
if (*p == '0' && p[1] != ' ')
return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
if (date_overflows(parse_timestamp(p, &end, 10)))
Expand All @@ -834,20 +865,26 @@ static int fsck_commit(const struct object_id *oid,
unsigned author_count;
int err;
const char *buffer_begin = buffer;
const char *buffer_end = buffer + size;
const char *p;

/*
* We _must_ stop parsing immediately if this reports failure, as the
* memory safety of the rest of the function depends on it. See the
* comment above the definition of verify_headers() for more details.
*/
if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
return -1;

if (!skip_prefix(buffer, "tree ", &buffer))
if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
if (err)
return err;
}
buffer = p + 1;
while (skip_prefix(buffer, "parent ", &buffer)) {
while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
if (err)
Expand All @@ -856,7 +893,7 @@ static int fsck_commit(const struct object_id *oid,
buffer = p + 1;
}
author_count = 0;
while (skip_prefix(buffer, "author ", &buffer)) {
while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
author_count++;
err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
if (err)
Expand All @@ -868,7 +905,7 @@ static int fsck_commit(const struct object_id *oid,
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
if (err)
return err;
if (!skip_prefix(buffer, "committer ", &buffer))
if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
if (err)
Expand Down Expand Up @@ -899,13 +936,19 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
int ret = 0;
char *eol;
struct strbuf sb = STRBUF_INIT;
const char *buffer_end = buffer + size;
const char *p;

/*
* We _must_ stop parsing immediately if this reports failure, as the
* memory safety of the rest of the function depends on it. See the
* comment above the definition of verify_headers() for more details.
*/
ret = verify_headers(buffer, size, oid, OBJ_TAG, options);
if (ret)
goto done;

if (!skip_prefix(buffer, "object ", &buffer)) {
if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
goto done;
}
Expand All @@ -916,11 +959,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
}
buffer = p + 1;

if (!skip_prefix(buffer, "type ", &buffer)) {
if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
goto done;
}
eol = strchr(buffer, '\n');
eol = memchr(buffer, '\n', buffer_end - buffer);
if (!eol) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
goto done;
Expand All @@ -932,11 +975,11 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
goto done;
buffer = eol + 1;

if (!skip_prefix(buffer, "tag ", &buffer)) {
if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
goto done;
}
eol = strchr(buffer, '\n');
eol = memchr(buffer, '\n', buffer_end - buffer);
if (!eol) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
goto done;
Expand All @@ -952,18 +995,16 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
}
buffer = eol + 1;

if (!skip_prefix(buffer, "tagger ", &buffer)) {
if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) {
/* early tags do not contain 'tagger' lines; warn only */
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
if (ret)
goto done;
}
else
ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
if (!*buffer)
goto done;

if (!starts_with(buffer, "\n")) {
if (buffer < buffer_end && !starts_with(buffer, "\n")) {
/*
* The verify_headers() check will allow
* e.g. "[...]tagger <tagger>\nsome
Expand Down Expand Up @@ -1237,19 +1278,26 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
if (!obj)
return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");

if (obj->type == OBJ_BLOB)
return fsck_blob(&obj->oid, data, size, options);
if (obj->type == OBJ_TREE)
return fsck_tree(&obj->oid, data, size, options);
if (obj->type == OBJ_COMMIT)
return fsck_commit(&obj->oid, data, size, options);
if (obj->type == OBJ_TAG)
return fsck_tag(&obj->oid, data, size, options);
return fsck_buffer(&obj->oid, obj->type, data, size, options);
}

int fsck_buffer(const struct object_id *oid, enum object_type type,
void *data, unsigned long size,
struct fsck_options *options)
{
if (type == OBJ_BLOB)
return fsck_blob(oid, data, size, options);
if (type == OBJ_TREE)
return fsck_tree(oid, data, size, options);
if (type == OBJ_COMMIT)
return fsck_commit(oid, data, size, options);
if (type == OBJ_TAG)
return fsck_tag(oid, data, size, options);

return report(options, &obj->oid, obj->type,
return report(options, oid, type,
FSCK_MSG_UNKNOWN_TYPE,
"unknown type '%d' (internal fsck error)",
obj->type);
type);
}

int fsck_error_function(struct fsck_options *o,
Expand Down
8 changes: 8 additions & 0 deletions fsck.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,14 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
int fsck_object(struct object *obj, void *data, unsigned long size,
struct fsck_options *options);

/*
* Same as fsck_object(), but for when the caller doesn't have an object
* struct.
*/
int fsck_buffer(const struct object_id *oid, enum object_type,
void *data, unsigned long size,
struct fsck_options *options);

/*
* fsck a tag, and pass info about it back to the caller. This is
* exposed fsck_object() internals for git-mktag(1).
Expand Down
55 changes: 23 additions & 32 deletions object-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "object-store.h"
#include "promisor-remote.h"
#include "submodule.h"
#include "fsck.h"

/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
Expand Down Expand Up @@ -2284,32 +2285,21 @@ int repo_has_object_file(struct repository *r,
return repo_has_object_file_with_flags(r, oid, 0);
}

static void check_tree(const void *buf, size_t size)
{
struct tree_desc desc;
struct name_entry entry;

init_tree_desc(&desc, buf, size);
while (tree_entry(&desc, &entry))
/* do nothing
* tree_entry() will die() on malformed entries */
;
}

static void check_commit(const void *buf, size_t size)
{
struct commit c;
memset(&c, 0, sizeof(c));
if (parse_commit_buffer(the_repository, &c, buf, size, 0))
die(_("corrupt commit"));
}

static void check_tag(const void *buf, size_t size)
{
struct tag t;
memset(&t, 0, sizeof(t));
if (parse_tag_buffer(the_repository, &t, buf, size))
die(_("corrupt tag"));
/*
* We can't use the normal fsck_error_function() for index_mem(),
* because we don't yet have a valid oid for it to report. Instead,
* report the minimal fsck error here, and rely on the caller to
* give more context.
*/
static int hash_format_check_report(struct fsck_options *opts,
const struct object_id *oid,
enum object_type object_type,
enum fsck_msg_type msg_type,
enum fsck_msg_id msg_id,
const char *message)
{
error(_("object fails fsck: %s"), message);
return 1;
}

static int index_mem(struct index_state *istate,
Expand All @@ -2336,12 +2326,13 @@ static int index_mem(struct index_state *istate,
}
}
if (flags & HASH_FORMAT_CHECK) {
if (type == OBJ_TREE)
check_tree(buf, size);
if (type == OBJ_COMMIT)
check_commit(buf, size);
if (type == OBJ_TAG)
check_tag(buf, size);
struct fsck_options opts = FSCK_OPTIONS_DEFAULT;

opts.strict = 1;
opts.error_func = hash_format_check_report;
if (fsck_buffer(null_oid(), type, buf, size, &opts))
die(_("refusing to create malformed object"));
fsck_finish(&opts);
}

if (write_object)
Expand Down
6 changes: 3 additions & 3 deletions t/t1006-cat-file.sh
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,8 @@ commit_message="Initial commit"
commit_sha1=$(echo_without_newline "$commit_message" | git commit-tree $tree_sha1)
commit_size=$(($(test_oid hexsz) + 137))
commit_content="tree $tree_sha1
author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> 0000000000 +0000
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 0000000000 +0000
author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> 0 +0000
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> 0 +0000
$commit_message"

Expand All @@ -304,7 +304,7 @@ type blob
tag hellotag
tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>"
tag_description="This is a tag"
tag_content="$tag_header_without_timestamp 0000000000 +0000
tag_content="$tag_header_without_timestamp 0 +0000
$tag_description"

Expand Down
29 changes: 20 additions & 9 deletions t/t1007-hash-object.sh
Original file line number Diff line number Diff line change
Expand Up @@ -203,23 +203,34 @@ done
test_expect_success 'too-short tree' '
echo abc >malformed-tree &&
test_must_fail git hash-object -t tree malformed-tree 2>err &&
test_i18ngrep "too-short tree object" err
grep "too-short tree object" err
'

test_expect_success 'malformed mode in tree' '
hex_sha1=$(echo foo | git hash-object --stdin -w) &&
bin_sha1=$(echo $hex_sha1 | hex2oct) &&
printf "9100644 \0$bin_sha1" >tree-with-malformed-mode &&
hex_oid=$(echo foo | git hash-object --stdin -w) &&
bin_oid=$(echo $hex_oid | hex2oct) &&
printf "9100644 \0$bin_oid" >tree-with-malformed-mode &&
test_must_fail git hash-object -t tree tree-with-malformed-mode 2>err &&
test_i18ngrep "malformed mode in tree entry" err
grep "malformed mode in tree entry" err
'

test_expect_success 'empty filename in tree' '
hex_sha1=$(echo foo | git hash-object --stdin -w) &&
bin_sha1=$(echo $hex_sha1 | hex2oct) &&
printf "100644 \0$bin_sha1" >tree-with-empty-filename &&
hex_oid=$(echo foo | git hash-object --stdin -w) &&
bin_oid=$(echo $hex_oid | hex2oct) &&
printf "100644 \0$bin_oid" >tree-with-empty-filename &&
test_must_fail git hash-object -t tree tree-with-empty-filename 2>err &&
test_i18ngrep "empty filename in tree entry" err
grep "empty filename in tree entry" err
'

test_expect_success 'duplicate filename in tree' '
hex_oid=$(echo foo | git hash-object --stdin -w) &&
bin_oid=$(echo $hex_oid | hex2oct) &&
{
printf "100644 file\0$bin_oid" &&
printf "100644 file\0$bin_oid"
} >tree-with-duplicate-filename &&
test_must_fail git hash-object -t tree tree-with-duplicate-filename 2>err &&
grep "duplicateEntries" err
'

test_expect_success 'corrupt commit' '
Expand Down

0 comments on commit abf2bb8

Please sign in to comment.