New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
I110/gunzip #1140
I110/gunzip #1140
Changes from 3 commits
f6611cc
7f4250a
019c70f
874a290
5cb73ed
6875145
0accf37
52a8175
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,15 +54,17 @@ static void expand_buf(iovec_vector_t *bufs) | |
bufs->entries[bufs->size++] = h2o_iovec_init(h2o_mem_alloc(BUF_SIZE), 0); | ||
} | ||
|
||
static size_t compress_chunk(struct st_gzip_context_t *self, const void *src, size_t len, int flush, size_t bufindex) | ||
typedef int (*processor)(z_streamp strm, int flush); | ||
|
||
static size_t process_chunk(struct st_gzip_context_t *self, const void *src, size_t len, int flush, size_t bufindex, processor proc) | ||
{ | ||
int ret; | ||
|
||
self->zs.next_in = (void *)src; | ||
self->zs.avail_in = (unsigned)len; | ||
|
||
/* man says: If deflate returns with avail_out == 0, this function must be called again with the same value of the flush | ||
* parameter and more output space (updated avail_out), until the flush is complete (deflate returns with non-zero avail_out). | ||
/* man says: If inflate/deflate returns with avail_out == 0, this function must be called again with the same value of the flush | ||
* parameter and more output space (updated avail_out), until the flush is complete (function returns with non-zero avail_out). | ||
*/ | ||
do { | ||
/* expand buffer (note: in case of Z_SYNC_FLUSH we need to supply at least 6 bytes of output buffer) */ | ||
|
@@ -74,16 +76,16 @@ static size_t compress_chunk(struct st_gzip_context_t *self, const void *src, si | |
} | ||
self->zs.next_out = (void *)(self->bufs.entries[bufindex].base + self->bufs.entries[bufindex].len); | ||
self->zs.avail_out = (unsigned)(BUF_SIZE - self->bufs.entries[bufindex].len); | ||
ret = deflate(&self->zs, flush); | ||
assert(ret == Z_OK || ret == Z_STREAM_END); | ||
ret = proc(&self->zs, flush); | ||
assert(ret == Z_OK || ret == Z_STREAM_END || ret == Z_BUF_ERROR); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we had the discussion before, but could you please once more explain why we need to take care of The intention of the original code was to always supply enough space to zlib so that it would never return Note that current design secures at least 32 bytes of room in the out buffer (see the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kazuho The current implementation of zlib's
At the last of
So, I believe we should handle There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for looking into the issue. So to clarify, it seems that If that is the case, I have no objections to the change. OTOH, I would appreciate if you could add a in-line comment to the source code explaining why such change is necessary. Thank you in advance. |
||
self->bufs.entries[bufindex].len = BUF_SIZE - self->zs.avail_out; | ||
} while (self->zs.avail_out == 0 && ret != Z_STREAM_END); | ||
|
||
return bufindex; | ||
} | ||
|
||
static void do_compress(h2o_compress_context_t *_self, h2o_iovec_t *inbufs, size_t inbufcnt, h2o_send_state_t state, | ||
h2o_iovec_t **outbufs, size_t *outbufcnt) | ||
static void do_process(h2o_compress_context_t *_self, h2o_iovec_t *inbufs, size_t inbufcnt, h2o_send_state_t state, | ||
h2o_iovec_t **outbufs, size_t *outbufcnt, processor proc) | ||
{ | ||
struct st_gzip_context_t *self = (void *)_self; | ||
size_t outbufindex; | ||
|
@@ -95,50 +97,89 @@ static void do_compress(h2o_compress_context_t *_self, h2o_iovec_t *inbufs, size | |
if (inbufcnt != 0) { | ||
size_t i; | ||
for (i = 0; i != inbufcnt - 1; ++i) | ||
outbufindex = compress_chunk(self, inbufs[i].base, inbufs[i].len, Z_NO_FLUSH, outbufindex); | ||
outbufindex = process_chunk(self, inbufs[i].base, inbufs[i].len, Z_NO_FLUSH, outbufindex, proc); | ||
last_buf = inbufs[i]; | ||
} else { | ||
last_buf = h2o_iovec_init(NULL, 0); | ||
} | ||
outbufindex = compress_chunk(self, last_buf.base, last_buf.len, h2o_send_state_is_in_progress(state) ? Z_SYNC_FLUSH : Z_FINISH, | ||
outbufindex); | ||
outbufindex = process_chunk(self, last_buf.base, last_buf.len, h2o_send_state_is_in_progress(state) ? Z_SYNC_FLUSH : Z_FINISH, | ||
outbufindex, proc); | ||
|
||
*outbufs = self->bufs.entries; | ||
*outbufcnt = outbufindex + 1; | ||
|
||
if (!h2o_send_state_is_in_progress(state)) { | ||
deflateEnd(&self->zs); | ||
if (self->super.compress != NULL) { | ||
deflateEnd(&self->zs); | ||
} else { | ||
inflateEnd(&self->zs); | ||
} | ||
self->zs_is_open = 0; | ||
} | ||
} | ||
|
||
static void do_compress(h2o_compress_context_t *_self, h2o_iovec_t *inbufs, size_t inbufcnt, h2o_send_state_t state, | ||
h2o_iovec_t **outbufs, size_t *outbufcnt) | ||
{ | ||
do_process(_self, inbufs, inbufcnt, state, outbufs, outbufcnt, (processor)deflate); | ||
} | ||
|
||
static void do_decompress(h2o_compress_context_t *_self, h2o_iovec_t *inbufs, size_t inbufcnt, h2o_send_state_t state, | ||
h2o_iovec_t **outbufs, size_t *outbufcnt) | ||
{ | ||
do_process(_self, inbufs, inbufcnt, state, outbufs, outbufcnt, (processor)inflate); | ||
} | ||
|
||
static void do_free(void *_self) | ||
{ | ||
struct st_gzip_context_t *self = _self; | ||
size_t i; | ||
|
||
if (self->zs_is_open) | ||
deflateEnd(&self->zs); | ||
if (self->zs_is_open) { | ||
if (self->super.compress != NULL) { | ||
deflateEnd(&self->zs); | ||
} else { | ||
inflateEnd(&self->zs); | ||
} | ||
} | ||
|
||
for (i = 0; i != self->bufs.size; ++i) | ||
free(self->bufs.entries[i].base); | ||
free(self->bufs.entries); | ||
} | ||
|
||
h2o_compress_context_t *h2o_compress_gzip_open(h2o_mem_pool_t *pool, int quality) | ||
static struct st_gzip_context_t *gzip_open(h2o_mem_pool_t *pool) | ||
{ | ||
struct st_gzip_context_t *self = h2o_mem_alloc_shared(pool, sizeof(*self), do_free); | ||
|
||
self->super.name = h2o_iovec_init(H2O_STRLIT("gzip")); | ||
self->super.compress = do_compress; | ||
self->super.compress = NULL; | ||
self->super.decompress = NULL; | ||
self->zs.zalloc = alloc_cb; | ||
self->zs.zfree = free_cb; | ||
self->zs.opaque = NULL; | ||
/* Z_BEST_SPEED for on-the-fly compression, memlevel set to 8 as suggested by the manual */ | ||
deflateInit2(&self->zs, quality, Z_DEFLATED, WINDOW_BITS, 8, Z_DEFAULT_STRATEGY); | ||
self->zs_is_open = 1; | ||
self->bufs = (iovec_vector_t){NULL}; | ||
expand_buf(&self->bufs); | ||
|
||
return self; | ||
} | ||
|
||
h2o_compress_context_t *h2o_compress_gzip_open(h2o_mem_pool_t *pool, int quality) | ||
{ | ||
struct st_gzip_context_t *self = gzip_open(pool); | ||
self->super.compress = do_compress; | ||
/* Z_BEST_SPEED for on-the-fly compression, memlevel set to 8 as suggested by the manual */ | ||
deflateInit2(&self->zs, quality, Z_DEFLATED, WINDOW_BITS, 8, Z_DEFAULT_STRATEGY); | ||
|
||
return &self->super; | ||
} | ||
|
||
h2o_compress_context_t *h2o_decompress_gzip_open(h2o_mem_pool_t *pool) | ||
{ | ||
struct st_gzip_context_t *self = gzip_open(pool); | ||
self->super.decompress = do_decompress; | ||
inflateInit2(&self->zs, WINDOW_BITS); | ||
|
||
return &self->super; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,7 @@ struct st_h2o_sendfile_generator_t { | |
h2o_iovec_t content_encoding; | ||
unsigned send_vary : 1; | ||
unsigned send_etag : 1; | ||
unsigned gunzip : 1; | ||
char *buf; | ||
struct { | ||
size_t filesize; | ||
|
@@ -81,6 +82,11 @@ struct st_h2o_specific_file_handler_t { | |
int flags; | ||
}; | ||
|
||
struct st_gzip_decompress_t { | ||
h2o_ostream_t super; | ||
h2o_compress_context_t *decompressor; | ||
}; | ||
|
||
static const char *default_index_files[] = {"index.html", "index.htm", "index.txt", NULL}; | ||
|
||
const char **h2o_file_default_index_files = default_index_files; | ||
|
@@ -231,7 +237,8 @@ static struct st_h2o_sendfile_generator_t *create_generator(h2o_req_t *req, cons | |
{ | ||
struct st_h2o_sendfile_generator_t *self; | ||
h2o_filecache_ref_t *fileref; | ||
h2o_iovec_t content_encoding; | ||
h2o_iovec_t content_encoding = (h2o_iovec_t){NULL}; | ||
unsigned gunzip = 0; | ||
|
||
*is_dir = 0; | ||
|
||
|
@@ -253,9 +260,19 @@ static struct st_h2o_sendfile_generator_t *create_generator(h2o_req_t *req, cons | |
#undef TRY_VARIANT | ||
} | ||
} | ||
if ((fileref = h2o_filecache_open_file(req->conn->ctx->filecache, path, O_RDONLY | O_CLOEXEC)) == NULL) | ||
return NULL; | ||
content_encoding = (h2o_iovec_t){NULL}; | ||
if ((fileref = h2o_filecache_open_file(req->conn->ctx->filecache, path, O_RDONLY | O_CLOEXEC)) != NULL) { | ||
goto Opened; | ||
} | ||
if ((flags & H2O_FILE_FLAG_GUNZIP) != 0){ | ||
char *variant_path = h2o_mem_alloc_pool(&req->pool, path_len + sizeof(".gz")); | ||
memcpy(variant_path, path, path_len); | ||
strcpy(variant_path + path_len, ".gz"); | ||
if ((fileref = h2o_filecache_open_file(req->conn->ctx->filecache, variant_path, O_RDONLY | O_CLOEXEC)) != NULL) { | ||
gunzip = 1; | ||
goto Opened; | ||
} | ||
} | ||
return NULL; | ||
|
||
Opened: | ||
if (S_ISDIR(fileref->st.st_mode)) { | ||
|
@@ -276,6 +293,7 @@ static struct st_h2o_sendfile_generator_t *create_generator(h2o_req_t *req, cons | |
self->content_encoding = content_encoding; | ||
self->send_vary = (flags & H2O_FILE_FLAG_SEND_COMPRESSED) != 0; | ||
self->send_etag = (flags & H2O_FILE_FLAG_NO_ETAG) == 0; | ||
self->gunzip = gunzip; | ||
|
||
return self; | ||
} | ||
|
@@ -294,6 +312,16 @@ static void add_headers_unconditional(struct st_h2o_sendfile_generator_t *self, | |
h2o_set_header_token(&req->pool, &req->res.headers, H2O_TOKEN_VARY, H2O_STRLIT("accept-encoding")); | ||
} | ||
|
||
static void send_decompressed(h2o_ostream_t *_self, h2o_req_t *req, h2o_iovec_t *inbufs, size_t inbufcnt, h2o_send_state_t state) | ||
{ | ||
struct st_gzip_decompress_t *self = (void *)_self; | ||
h2o_iovec_t *outbufs; | ||
size_t outbufcnt; | ||
|
||
self->decompressor->decompress(self->decompressor, inbufs, inbufcnt, state, &outbufs, &outbufcnt); | ||
h2o_ostream_send_next(&self->super, req, outbufs, outbufcnt, state); | ||
} | ||
|
||
static void do_send_file(struct st_h2o_sendfile_generator_t *self, h2o_req_t *req, int status, const char *reason, | ||
h2o_iovec_t mime_type, h2o_mime_attributes_t *mime_attr, int is_get) | ||
{ | ||
|
@@ -303,7 +331,7 @@ static void do_send_file(struct st_h2o_sendfile_generator_t *self, h2o_req_t *re | |
/* setup response */ | ||
req->res.status = status; | ||
req->res.reason = reason; | ||
req->res.content_length = self->bytesleft; | ||
req->res.content_length = self->gunzip ? SIZE_MAX : self->bytesleft; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe there is an optional attribute in a gzip file that contains the size of the original content, and that the attribute is accessible from libz. It would be beneficial to use the information, since the clients that lack gzip support would likely overlap with those that do not support chunked encoding (i.e. HTTP/1.0). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean I read this spec and searched the web, then I found a few difficulties to use it:
and I couldn't found zlib functions which support that information. BTW, above nginx module also seems to use chunked encoding: How do you think? |
||
req->res.mime_attr = mime_attr; | ||
|
||
if (self->ranged.range_count > 1) { | ||
|
@@ -340,6 +368,13 @@ static void do_send_file(struct st_h2o_sendfile_generator_t *self, h2o_req_t *re | |
/* send data */ | ||
h2o_start_response(req, &self->super); | ||
|
||
/* dynamically setup gzip decompress ostream */ | ||
if (self->gunzip) { | ||
struct st_gzip_decompress_t *decoder = (void *)h2o_add_ostream(req, sizeof(struct st_gzip_decompress_t), &req->_ostr_top); | ||
decoder->decompressor = h2o_decompress_gzip_open(&req->pool); | ||
decoder->super.do_send = send_decompressed; | ||
} | ||
|
||
if (self->ranged.range_count == 1) | ||
self->file.off = self->ranged.range_infos[0]; | ||
if (req->_ostr_top->start_pull != NULL && self->ranged.range_count < 2) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ use strict; | |
use warnings; | ||
use Test::More; | ||
use t::Util; | ||
use Gzip::Faster qw/gunzip_file/; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
|
||
plan skip_all => 'curl not found' | ||
unless prog_exists('curl'); | ||
|
@@ -48,48 +49,55 @@ EOT | |
|
||
subtest 'send-compressed' => sub { | ||
my $doit = sub { | ||
my ($send_compressed, $curl_opts, $expected_length) = @_; | ||
my ($send_compressed, $curl_opts, $paths, $expected_length) = @_; | ||
my $server = spawn_h2o(<< "EOT"); | ||
hosts: | ||
default: | ||
paths: | ||
/: | ||
file.dir: t/assets/doc_root | ||
@{[ defined $send_compressed ? "file.send-compressed: $send_compressed" : "" ]} | ||
@{[ $send_compressed ? "file.send-compressed: $send_compressed" : "" ]} | ||
EOT | ||
my $fetch = sub { | ||
my $path = shift; | ||
subtest "send-compressed:@{[ $send_compressed || q(default) ]}, $curl_opts, $path" => sub { | ||
my $resp = `curl --silent --dump-header /dev/stderr $curl_opts http://127.0.0.1:$server->{port}$path 2>&1 > /dev/null`; | ||
like $resp, qr/^content-length:\s*$expected_length\r$/im, "length is as expected"; | ||
if (($send_compressed || '') eq 'ON') { | ||
if ($send_compressed ne 'gunzip') { | ||
like $resp, qr/^content-length:\s*$expected_length\r$/im, "length is as expected"; | ||
} | ||
if ($send_compressed eq 'ON' || $send_compressed eq 'gunzip') { | ||
like $resp, qr/^vary:\s*accept-encoding\r$/im, "has vary set"; | ||
} else { | ||
unlike $resp, qr/^vary:\s*accept-encoding\r$/im, "not has vary set"; | ||
} | ||
}; | ||
}; | ||
$fetch->('/index.txt'); | ||
$fetch->('/'); | ||
$fetch->($_) for @$paths; | ||
}; | ||
|
||
my $orig_len = (stat 't/assets/doc_root/index.txt')[7]; | ||
my $gz_len = (stat 't/assets/doc_root/index.txt.gz')[7]; | ||
my $br_len = (stat 't/assets/doc_root/index.txt.br')[7]; | ||
my $index_orig_len = (stat 't/assets/doc_root/index.txt')[7]; | ||
my $index_gz_len = (stat 't/assets/doc_root/index.txt.gz')[7]; | ||
my $index_br_len = (stat 't/assets/doc_root/index.txt.br')[7]; | ||
my $alice2_orig_len = length(gunzip_file('t/assets/doc_root/alice2.txt.gz')); | ||
my $alice2_gz_len = (stat 't/assets/doc_root/alice2.txt.gz')[7]; | ||
|
||
$doit->("", "", ['/index.txt', '/'], $index_orig_len); | ||
$doit->("", q{--header "Accept-Encoding: gzip"}, ['/index.txt', '/'], $index_orig_len); | ||
$doit->("OFF", q{--header "Accept-Encoding: gzip"}, ['/index.txt', '/'], $index_orig_len); | ||
$doit->("OFF", q{--header "Accept-Encoding: br, gzip"}, ['/index.txt', '/'], $index_orig_len); | ||
|
||
$doit->("ON", "", ['/index.txt', '/'], $index_orig_len); | ||
$doit->("ON", q{--header "Accept-Encoding: gzip"}, ['/index.txt', '/'], $index_gz_len); | ||
$doit->("ON", q{--header "Accept-Encoding: gzip, deflate"}, ['/index.txt', '/'], $index_gz_len); | ||
$doit->("ON", q{--header "Accept-Encoding: deflate, gzip"}, ['/index.txt', '/'], $index_gz_len); | ||
$doit->("ON", q{--header "Accept-Encoding: deflate"}, ['/index.txt', '/'], $index_orig_len); | ||
$doit->("ON", q{--header "Accept-Encoding: br, gzip"}, ['/index.txt', '/'], $index_br_len); | ||
$doit->("ON", q{--header "Accept-Encoding: gzip, br"}, ['/index.txt', '/'], $index_br_len); | ||
$doit->("ON", q{--header "Accept-Encoding: br"}, ['/index.txt', '/'], $index_br_len); | ||
|
||
$doit->(undef, "", $orig_len); | ||
$doit->(undef, q{--header "Accept-Encoding: gzip"}, $orig_len); | ||
$doit->("OFF", q{--header "Accept-Encoding: gzip"}, $orig_len); | ||
$doit->("OFF", q{--header "Accept-Encoding: br, gzip"}, $orig_len); | ||
$doit->("gunzip", "", ['/alice2.txt'], $alice2_orig_len); | ||
$doit->("gunzip", q{--header "Accept-Encoding: gzip"}, ['/alice2.txt'], $alice2_gz_len); | ||
|
||
$doit->("ON", "", $orig_len); | ||
$doit->("ON", q{--header "Accept-Encoding: gzip"}, $gz_len); | ||
$doit->("ON", q{--header "Accept-Encoding: gzip, deflate"}, $gz_len); | ||
$doit->("ON", q{--header "Accept-Encoding: deflate, gzip"}, $gz_len); | ||
$doit->("ON", q{--header "Accept-Encoding: deflate"}, $orig_len); | ||
$doit->("ON", q{--header "Accept-Encoding: br, gzip"}, $br_len); | ||
$doit->("ON", q{--header "Accept-Encoding: gzip, br"}, $br_len); | ||
$doit->("ON", q{--header "Accept-Encoding: br"}, $br_len); | ||
|
||
subtest 'MSIE-workaround' => sub { | ||
my $server = spawn_h2o(<< "EOT"); | ||
|
@@ -101,7 +109,7 @@ hosts: | |
file.send-gzip: ON | ||
EOT | ||
my $resp = `curl --silent --dump-header /dev/stderr --user-agent "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)" --header "Accept-Encoding: gzip" http://127.0.0.1:$server->{port}/ 2>&1 > /dev/null`; | ||
like $resp, qr/^content-length:\s*$gz_len\r$/im, "length is as expected"; | ||
like $resp, qr/^content-length:\s*$index_gz_len\r$/im, "length is as expected"; | ||
like $resp, qr/^cache-control:.*private.*\r$/im, "cache-control: private"; | ||
unlike $resp, qr/^vary:/im, "no vary"; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compression context is a context for transforming one representation (e.g. a gzip-ed file) to another (e.g. an decompressed file). A single context can only be used for one purpose; it cannot be used for compressing and decompressing.
Therefore, you should not add a
decompress
callback.You should either:
h2o_compress_context_t
that decompresses the input passed into thecompress
callbackThe above list reflects my preference of addressing the issue.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I preffer your first option too. The name may be misleading, but I believe it's most reasonable for this issue.