Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #3487 from vtbassmatt/huge-file-smudge-clean
Teach Git to handle huge files in smudge/clean
  • Loading branch information
dscho authored and vdye committed Nov 15, 2021
2 parents 009b3ea + e2cf6ca commit 7dad2af
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 19 deletions.
2 changes: 1 addition & 1 deletion convert.c
Expand Up @@ -613,7 +613,7 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf,

struct filter_params {
const char *src;
unsigned long size;
size_t size;
int fd;
const char *cmd;
const char *path;
Expand Down
6 changes: 3 additions & 3 deletions delta.h
Expand Up @@ -90,15 +90,15 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
const unsigned char *top)
{
const unsigned char *data = *datap;
unsigned long cmd, size = 0;
size_t cmd, size = 0;
int i = 0;
do {
cmd = *data++;
size |= (cmd & 0x7f) << i;
size |= st_left_shift(cmd & 0x7f, i);
i += 7;
} while (cmd & 0x80 && data < top);
*datap = data;
return size;
return cast_size_t_to_ulong(size);
}

#endif
8 changes: 5 additions & 3 deletions entry.c
Expand Up @@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode)
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}

void *read_blob_entry(const struct cache_entry *ce, unsigned long *size)
void *read_blob_entry(const struct cache_entry *ce, size_t *size)
{
enum object_type type;
void *blob_data = read_object_file(&ce->oid, &type, size);
unsigned long ul;
void *blob_data = read_object_file(&ce->oid, &type, &ul);

*size = ul;
if (blob_data) {
if (type == OBJ_BLOB)
return blob_data;
Expand Down Expand Up @@ -271,7 +273,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca
int fd, ret, fstat_done = 0;
char *new_blob;
struct strbuf buf = STRBUF_INIT;
unsigned long size;
size_t size;
ssize_t wrote;
size_t newsize = 0;
struct stat st;
Expand Down
2 changes: 1 addition & 1 deletion entry.h
Expand Up @@ -52,7 +52,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts,
*/
void unlink_entry(const struct cache_entry *ce);

void *read_blob_entry(const struct cache_entry *ce, unsigned long *size);
void *read_blob_entry(const struct cache_entry *ce, size_t *size);
int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st);
void update_ce_after_write(const struct checkout *state, struct cache_entry *ce,
struct stat *st);
Expand Down
25 changes: 25 additions & 0 deletions git-compat-util.h
Expand Up @@ -113,6 +113,14 @@
#define unsigned_mult_overflows(a, b) \
((a) && (b) > maximum_unsigned_value_of_type(a) / (a))

/*
* Returns true if the left shift of "a" by "shift" bits will
* overflow. The type of "a" must be unsigned.
*/
#define unsigned_left_shift_overflows(a, shift) \
((shift) < bitsizeof(a) && \
(a) > maximum_unsigned_value_of_type(a) >> (shift))

#ifdef __GNUC__
#define TYPEOF(x) (__typeof__(x))
#else
Expand Down Expand Up @@ -883,6 +891,23 @@ static inline size_t st_sub(size_t a, size_t b)
return a - b;
}

static inline size_t st_left_shift(size_t a, unsigned shift)
{
if (unsigned_left_shift_overflows(a, shift))
die("size_t overflow: %"PRIuMAX" << %u",
(uintmax_t)a, shift);
return a << shift;
}

static inline unsigned long cast_size_t_to_ulong(size_t a)
{
if (a != (unsigned long)a)
die("object too large to read on this platform: %"
PRIuMAX" is cut off to %lu",
(uintmax_t)a, (unsigned long)a);
return (unsigned long)a;
}

#ifdef HAVE_ALLOCA_H
# include <alloca.h>
# define xalloca(size) (alloca(size))
Expand Down
6 changes: 3 additions & 3 deletions object-file.c
Expand Up @@ -1349,7 +1349,7 @@ static void *unpack_loose_rest(git_zstream *stream,
int parse_loose_header(const char *hdr, struct object_info *oi)
{
const char *type_buf = hdr;
unsigned long size;
size_t size;
int type, type_len = 0;

/*
Expand Down Expand Up @@ -1384,12 +1384,12 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
if (c > 9)
break;
hdr++;
size = size * 10 + c;
size = st_add(st_mult(size, 10), c);
}
}

if (oi->sizep)
*oi->sizep = size;
*oi->sizep = cast_size_t_to_ulong(size);

/*
* The length must be followed by a zero byte
Expand Down
6 changes: 3 additions & 3 deletions packfile.c
Expand Up @@ -1060,7 +1060,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
unsigned shift;
unsigned long size, c;
size_t size, c;
unsigned long used = 0;

c = buf[used++];
Expand All @@ -1074,10 +1074,10 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
break;
}
c = buf[used++];
size += (c & 0x7f) << shift;
size = st_add(size, st_left_shift(c & 0x7f, shift));
shift += 7;
}
*sizep = size;
*sizep = cast_size_t_to_ulong(size);
return used;
}

Expand Down
2 changes: 1 addition & 1 deletion parallel-checkout.c
Expand Up @@ -261,7 +261,7 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
struct stream_filter *filter;
struct strbuf buf = STRBUF_INIT;
char *blob;
unsigned long size;
size_t size;
ssize_t wrote;

/* Sanity check */
Expand Down
21 changes: 17 additions & 4 deletions t/helper/test-genzeros.c
Expand Up @@ -3,18 +3,31 @@

int cmd__genzeros(int argc, const char **argv)
{
long count;
/* static, so that it is NUL-initialized */
static const char zeros[256 * 1024];
intmax_t count;
ssize_t n;

if (argc > 2) {
fprintf(stderr, "usage: %s [<count>]\n", argv[0]);
return 1;
}

count = argc > 1 ? strtol(argv[1], NULL, 0) : -1L;
count = argc > 1 ? strtoimax(argv[1], NULL, 0) : -1;

while (count < 0 || count--) {
if (putchar(0) == EOF)
/* Writing out individual NUL bytes is slow... */
while (count < 0)
if (write(1, zeros, ARRAY_SIZE(zeros)) < 0)
return -1;

while (count > 0) {
n = write(1, zeros, count < ARRAY_SIZE(zeros) ?
count : ARRAY_SIZE(zeros));

if (n < 0)
return -1;

count -= n;
}

return 0;
Expand Down
25 changes: 25 additions & 0 deletions t/t1051-large-conversion.sh
Expand Up @@ -83,4 +83,29 @@ test_expect_success 'ident converts on output' '
test_cmp small.clean large.clean
'

# This smudge filter prepends 5GB of zeros to the file it checks out. This
# ensures that smudging doesn't mangle large files on 64-bit Windows.
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
'files over 4GB convert on output' '
test_commit test small "a small file" &&
test_config filter.makelarge.smudge \
"test-tool genzeros $((5*1024*1024*1024)) && cat" &&
echo "small filter=makelarge" >.gitattributes &&
rm small &&
git checkout -- small &&
size=$(test_file_size small) &&
test "$size" -ge $((5 * 1024 * 1024 * 1024))
'

# This clean filter writes down the size of input it receives. By checking against
# the actual size, we ensure that cleaning doesn't mangle large files on 64-bit Windows.
test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
'files over 4GB convert on input' '
test-tool genzeros $((5*1024*1024*1024)) >big &&
test_config filter.checklarge.clean "wc -c >big.size" &&
echo "big filter=checklarge" >.gitattributes &&
git add big &&
test $(test_file_size big) -eq $(cat big.size)
'

test_done
4 changes: 4 additions & 0 deletions t/test-lib.sh
Expand Up @@ -1761,6 +1761,10 @@ build_option () {
sed -ne "s/^$1: //p"
}

test_lazy_prereq SIZE_T_IS_64BIT '
test 8 -eq "$(build_option sizeof-size_t)"
'

test_lazy_prereq LONG_IS_64BIT '
test 8 -le "$(build_option sizeof-long)"
'
Expand Down

0 comments on commit 7dad2af

Please sign in to comment.