Skip to content

Commit

Permalink
archive-zip: streaming for deflated files
Browse files Browse the repository at this point in the history
After an entry has been streamed out, its CRC and sizes are written as
part of a data descriptor.

For simplicity, we make the buffer for the compressed chunks twice as
big as for the uncompressed ones, to be sure the result fit in even
if deflate makes them bigger.

t5000 verifies output. t1050 makes sure the command always respects
core.bigfilethreshold

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
René Scharfe authored and gitster committed May 3, 2012
1 parent 2158f88 commit c743c21
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 1 deletion.
64 changes: 63 additions & 1 deletion archive-zip.c
Expand Up @@ -211,7 +211,7 @@ static int write_zip_entry(struct archiver_args *args,
compressed_size = size;

if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
size > big_file_threshold && method == 0) {
size > big_file_threshold) {
stream = open_istream(sha1, &type, &size, NULL);
if (!stream)
return error("cannot stream blob %s",
Expand Down Expand Up @@ -307,6 +307,68 @@ static int write_zip_entry(struct archiver_args *args,
write_zip_data_desc(size, compressed_size, crc);
zip_offset += ZIP_DATA_DESC_SIZE;

set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
} else if (stream && method == 8) {
unsigned char buf[STREAM_BUFFER_SIZE];
ssize_t readlen;
git_zstream zstream;
int result;
size_t out_len;
unsigned char compressed[STREAM_BUFFER_SIZE * 2];

memset(&zstream, 0, sizeof(zstream));
git_deflate_init(&zstream, args->compression_level);

compressed_size = 0;
zstream.next_out = compressed;
zstream.avail_out = sizeof(compressed);

for (;;) {
readlen = read_istream(stream, buf, sizeof(buf));
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);

zstream.next_in = buf;
zstream.avail_in = readlen;
result = git_deflate(&zstream, 0);
if (result != Z_OK)
die("deflate error (%d)", result);
out = compressed;
if (!compressed_size)
out += 2;
out_len = zstream.next_out - out;

if (out_len > 0) {
write_or_die(1, out, out_len);
compressed_size += out_len;
zstream.next_out = compressed;
zstream.avail_out = sizeof(compressed);
}

}
close_istream(stream);
if (readlen)
return readlen;

zstream.next_in = buf;
zstream.avail_in = 0;
result = git_deflate(&zstream, Z_FINISH);
if (result != Z_STREAM_END)
die("deflate error (%d)", result);

git_deflate_end(&zstream);
out = compressed;
if (!compressed_size)
out += 2;
out_len = zstream.next_out - out - 4;
write_or_die(1, out, out_len);
compressed_size += out_len;
zip_offset += compressed_size;

write_zip_data_desc(size, compressed_size, crc);
zip_offset += ZIP_DATA_DESC_SIZE;

set_zip_dir_data_desc(&dirent, size, compressed_size, crc);
} else if (compressed_size > 0) {
write_or_die(1, out, compressed_size);
Expand Down
4 changes: 4 additions & 0 deletions t/t1050-large.sh
Expand Up @@ -142,4 +142,8 @@ test_expect_success 'zip achiving, store only' '
git archive --format=zip -0 HEAD >/dev/null
'

test_expect_success 'zip achiving, deflate' '
git archive --format=zip HEAD >/dev/null
'

test_done
7 changes: 7 additions & 0 deletions t/t5000-tar-tree.sh
Expand Up @@ -250,6 +250,13 @@ test_expect_success UNZIP 'git archive -0 --format=zip on large files' '
(mkdir large && cd large && $UNZIP ../large.zip)
'

test_expect_success UNZIP 'git archive --format=zip on large files' '
test_config core.bigfilethreshold 1 &&
git archive --format=zip HEAD >large-compressed.zip &&
(mkdir large-compressed && cd large-compressed && $UNZIP ../large-compressed.zip) &&
test_cmp large-compressed/a/bin/sh large/a/bin/sh
'

test_expect_success \
'git archive --list outside of a git repo' \
'GIT_DIR=some/non-existing/directory git archive --list'
Expand Down

0 comments on commit c743c21

Please sign in to comment.