diff --git a/convert.c b/convert.c index 0d6fb3410aecef..df7186bd813a2e 100644 --- a/convert.c +++ b/convert.c @@ -613,7 +613,7 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf, struct filter_params { const char *src; - unsigned long size; + size_t size; int fd; const char *cmd; const char *path; diff --git a/delta.h b/delta.h index 2df5fe13d954df..8a56ec07992c75 100644 --- a/delta.h +++ b/delta.h @@ -90,15 +90,15 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap, const unsigned char *top) { const unsigned char *data = *datap; - unsigned long cmd, size = 0; + size_t cmd, size = 0; int i = 0; do { cmd = *data++; - size |= (cmd & 0x7f) << i; + size |= st_left_shift(cmd & 0x7f, i); i += 7; } while (cmd & 0x80 && data < top); *datap = data; - return size; + return cast_size_t_to_ulong(size); } #endif diff --git a/entry.c b/entry.c index dfa7975f172cf3..0c6ba1052d3a88 100644 --- a/entry.c +++ b/entry.c @@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode) return open(path, O_WRONLY | O_CREAT | O_EXCL, mode); } -void *read_blob_entry(const struct cache_entry *ce, unsigned long *size) +void *read_blob_entry(const struct cache_entry *ce, size_t *size) { enum object_type type; - void *blob_data = read_object_file(&ce->oid, &type, size); + unsigned long ul; + void *blob_data = read_object_file(&ce->oid, &type, &ul); + *size = ul; if (blob_data) { if (type == OBJ_BLOB) return blob_data; @@ -271,7 +273,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca int fd, ret, fstat_done = 0; char *new_blob; struct strbuf buf = STRBUF_INIT; - unsigned long size; + size_t size; ssize_t wrote; size_t newsize = 0; struct stat st; diff --git a/entry.h b/entry.h index 2254c62727fdcf..252fd24c2ecad4 100644 --- a/entry.h +++ b/entry.h @@ -52,7 +52,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts, */ void unlink_entry(const struct cache_entry *ce); -void *read_blob_entry(const struct cache_entry *ce, unsigned long *size); +void *read_blob_entry(const struct cache_entry *ce, size_t *size); int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st); void update_ce_after_write(const struct checkout *state, struct cache_entry *ce, struct stat *st); diff --git a/git-compat-util.h b/git-compat-util.h index b18032ee55a4ea..db2d6d372e66d6 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -113,6 +113,14 @@ #define unsigned_mult_overflows(a, b) \ ((a) && (b) > maximum_unsigned_value_of_type(a) / (a)) +/* + * Returns true if the left shift of "a" by "shift" bits will + * overflow. The type of "a" must be unsigned. + */ +#define unsigned_left_shift_overflows(a, shift) \ + ((shift) < bitsizeof(a) && \ + (a) > maximum_unsigned_value_of_type(a) >> (shift)) + #ifdef __GNUC__ #define TYPEOF(x) (__typeof__(x)) #else @@ -883,6 +891,23 @@ static inline size_t st_sub(size_t a, size_t b) return a - b; } +static inline size_t st_left_shift(size_t a, unsigned shift) +{ + if (unsigned_left_shift_overflows(a, shift)) + die("size_t overflow: %"PRIuMAX" << %u", + (uintmax_t)a, shift); + return a << shift; +} + +static inline unsigned long cast_size_t_to_ulong(size_t a) +{ + if (a != (unsigned long)a) + die("object too large to read on this platform: %" + PRIuMAX" is cut off to %lu", + (uintmax_t)a, (unsigned long)a); + return (unsigned long)a; +} + #ifdef HAVE_ALLOCA_H # include # define xalloca(size) (alloca(size)) diff --git a/object-file.c b/object-file.c index 9d0aac792aee6c..1d4cfcbc78cbc8 100644 --- a/object-file.c +++ b/object-file.c @@ -1349,7 +1349,7 @@ static void *unpack_loose_rest(git_zstream *stream, int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; - unsigned long size; + size_t size; int type, type_len = 0; /* @@ -1384,12 +1384,12 @@ int parse_loose_header(const char *hdr, struct object_info *oi) if (c > 9) break; hdr++; - size = size * 10 + c; + size = st_add(st_mult(size, 10), c); } } if (oi->sizep) - *oi->sizep = size; + *oi->sizep = cast_size_t_to_ulong(size); /* * The length must be followed by a zero byte diff --git a/packfile.c b/packfile.c index 89402cfc69cd0f..6423d77faad46c 100644 --- a/packfile.c +++ b/packfile.c @@ -1060,7 +1060,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { unsigned shift; - unsigned long size, c; + size_t size, c; unsigned long used = 0; c = buf[used++]; @@ -1074,10 +1074,10 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, break; } c = buf[used++]; - size += (c & 0x7f) << shift; + size = st_add(size, st_left_shift(c & 0x7f, shift)); shift += 7; } - *sizep = size; + *sizep = cast_size_t_to_ulong(size); return used; } diff --git a/parallel-checkout.c b/parallel-checkout.c index ed9c999520703b..8dd7e7bad40432 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -261,7 +261,7 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, struct stream_filter *filter; struct strbuf buf = STRBUF_INIT; char *blob; - unsigned long size; + size_t size; ssize_t wrote; /* Sanity check */ diff --git a/t/helper/test-genzeros.c b/t/helper/test-genzeros.c index 9532f5bac97687..8ca988d6216e78 100644 --- a/t/helper/test-genzeros.c +++ b/t/helper/test-genzeros.c @@ -3,18 +3,31 @@ int cmd__genzeros(int argc, const char **argv) { - long count; + /* static, so that it is NUL-initialized */ + static const char zeros[256 * 1024]; + intmax_t count; + ssize_t n; if (argc > 2) { fprintf(stderr, "usage: %s []\n", argv[0]); return 1; } - count = argc > 1 ? strtol(argv[1], NULL, 0) : -1L; + count = argc > 1 ? strtoimax(argv[1], NULL, 0) : -1; - while (count < 0 || count--) { - if (putchar(0) == EOF) + /* Writing out individual NUL bytes is slow... */ + while (count < 0) + if (write(1, zeros, ARRAY_SIZE(zeros)) < 0) return -1; + + while (count > 0) { + n = write(1, zeros, count < ARRAY_SIZE(zeros) ? + count : ARRAY_SIZE(zeros)); + + if (n < 0) + return -1; + + count -= n; } return 0; diff --git a/t/t1051-large-conversion.sh b/t/t1051-large-conversion.sh index 8b7640b3ba8a80..d4cfe8bf5ded1a 100755 --- a/t/t1051-large-conversion.sh +++ b/t/t1051-large-conversion.sh @@ -83,4 +83,29 @@ test_expect_success 'ident converts on output' ' test_cmp small.clean large.clean ' +# This smudge filter prepends 5GB of zeros to the file it checks out. This +# ensures that smudging doesn't mangle large files on 64-bit Windows. +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ + 'files over 4GB convert on output' ' + test_commit test small "a small file" && + test_config filter.makelarge.smudge \ + "test-tool genzeros $((5*1024*1024*1024)) && cat" && + echo "small filter=makelarge" >.gitattributes && + rm small && + git checkout -- small && + size=$(test_file_size small) && + test "$size" -ge $((5 * 1024 * 1024 * 1024)) +' + +# This clean filter writes down the size of input it receives. By checking against +# the actual size, we ensure that cleaning doesn't mangle large files on 64-bit Windows. +test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \ + 'files over 4GB convert on input' ' + test-tool genzeros $((5*1024*1024*1024)) >big && + test_config filter.checklarge.clean "wc -c >big.size" && + echo "big filter=checklarge" >.gitattributes && + git add big && + test $(test_file_size big) -eq $(cat big.size) +' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index 92f0ca56f89832..2dbb52669466fd 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1761,6 +1761,10 @@ build_option () { sed -ne "s/^$1: //p" } +test_lazy_prereq SIZE_T_IS_64BIT ' + test 8 -eq "$(build_option sizeof-size_t)" +' + test_lazy_prereq LONG_IS_64BIT ' test 8 -le "$(build_option sizeof-long)" '