Skip to content
This repository has been archived by the owner on Apr 21, 2023. It is now read-only.

Commit

Permalink
Only strip the content-encoding headers in ipro recorder if it is safe
Browse files Browse the repository at this point in the history
to
ignore them (eg. don't drop Content-Encoding: gzip if the content is
actually
gzipped).

fixes #1371
  • Loading branch information
crowell committed Aug 3, 2016
1 parent 922b7e7 commit 02dfd36
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 17 deletions.
22 changes: 22 additions & 0 deletions install/debug.conf.template
Expand Up @@ -1006,6 +1006,28 @@ NameVirtualHost localhost:@@APACHE_SECONDARY_PORT@@
AddOutputFilterByType DEFLATE text/css
</VirtualHost>

<VirtualHost localhost:@@APACHE_SECONDARY_PORT@@>
ServerName compressedcache.example.com
DocumentRoot "@@APACHE_DOC_ROOT@@"
ModPagespeedFileCachePath "@@MOD_PAGESPEED_CACHE@@"
ModPagespeed on
ModPagespeedRewriteLevel PassThrough
ModPagespeedEnableFilters rewrite_css
ModPagespeedHttpCacheCompressionLevel 9
</VirtualHost>

<VirtualHost localhost:@@APACHE_SECONDARY_PORT@@>
ServerName uncompressedcache.example.com
DocumentRoot "@@APACHE_DOC_ROOT@@"
ModPagespeedFileCachePath "@@MOD_PAGESPEED_CACHE@@"
ModPagespeed on
ModPagespeedRewriteLevel PassThrough
ModPagespeedEnableFilters rewrite_css
ModPagespeedHttpCacheCompressionLevel 0
AddOutputFilterByType DEFLATE text/css
DeflateCompressionLevel 1
</VirtualHost>

# Backend for ipro-proxy.example.com
Listen 127.0.0.1:@@APACHE_TERTIARY_PORT@@
NameVirtualHost 127.0.0.1:@@APACHE_TERTIARY_PORT@@
Expand Down
21 changes: 21 additions & 0 deletions install/mod_pagespeed_test/php_gzip.php
@@ -0,0 +1,21 @@
<?php

header("Connection: close\r\n");
header("Cache-Control: max-age=86400");
header("Pragma: ", true);
header("Content-Type: text/css");
header("Expires: " . gmdate("D, d M Y H:i:s", time() + 86400) . " GMT");

$data = ".peachpuff {background-color: peachpuff;}" .
"\n" .
"." . str_pad("", 10000, uniqid()) . " {background-color: antiquewhite;}\n";

$output = "\x1f\x8b\x08\x00\x00\x00\x00\x00" .
substr(gzcompress($data, 2), 0, -4) .
pack('V', crc32($data)) .
pack('V', mb_strlen($data, "latin1"));

header("Content-Encoding: gzip\r\n");
header("Content-Length: " . mb_strlen($output, "latin1"));

echo $output;
2 changes: 1 addition & 1 deletion pagespeed/automatic/system_test_helpers.sh
Expand Up @@ -787,7 +787,7 @@ function scrape_header {
# Extracts the value from wget's emitted headers. We use " " as a delimeter
# here to avoid a leading space on the returned string. Note also that wget
# always generates "name: value\r", never "name:value\r".
grep -i "^$1:" | cut -d' ' -f2- | tr -d '\r'
tr -s '\r\n' '\n'| egrep -i "^.?$1:" | rev | cut -d' ' -f 1 | rev
}

# Scrapes HTTP headers from stdin for Content-Length and returns the value.
Expand Down
21 changes: 15 additions & 6 deletions pagespeed/system/in_place_resource_recorder.cc
Expand Up @@ -27,6 +27,7 @@
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/http_names.h"
#include "pagespeed/kernel/http/response_headers.h"
#include "pagespeed/kernel/util/gzip_inflater.h"

namespace net_instaweb {

Expand Down Expand Up @@ -249,12 +250,20 @@ void InPlaceResourceRecorder::DoneAndSetHeaders(
if (failure_) {
num_failed_->Add(1);
} else {
// We don't consider content-encoding to be valid here, since it can
// be captured post-mod_deflate with pre-deflate content. Also note
// that content-length doesn't have to be accurate either, since it can be
// due to compression; we do still use it for quickly reject since
// if gzip'd is too large uncompressed is likely too large, too.
response_headers->RemoveAll(HttpAttributes::kContentEncoding);
// We are skeptical of the correctness of the content-encoding here,
// since it can be captured post-mod_deflate with pre-deflate content.
// Also note that content-length doesn't have to be accurate either, since
// it can be due to compression; we do still use it for quickly reject since
// if gzip'd is too large uncompressed is likely too large, too. We sniff
// the content to make sure that the headers match the Content-Encoding.
StringPiece contents;
resource_value_.ExtractContents(&contents);
// TODO(jcrowell): remove this sniffing fix, and do a proper fix by merging
// the IPRO filters in mod_instaweb.cc and in ngx_pagespeed.
if (!GzipInflater::HasGzipMagicBytes(contents)) {
// Only remove these headers if the content is not gzipped.
response_headers->RemoveAll(HttpAttributes::kContentEncoding);
}
response_headers->RemoveAll(HttpAttributes::kContentLength);

if (cache_control_set_) {
Expand Down
11 changes: 1 addition & 10 deletions pagespeed/system/in_place_resource_recorder_test.cc
Expand Up @@ -108,20 +108,11 @@ class InPlaceResourceRecorderTest : public RewriteTestBase {
&value_out, &headers_out));
StringPiece contents;
EXPECT_TRUE(value_out.ExtractContents(&contents));

// gzip in prelim headers will cause us to decompress, gzip in final_headers
// will not.
EXPECT_EQ(header_time == kPrelimGzipHeader ? kUncompressedData : gzipped,
contents);
EXPECT_EQ(headers_out.IsGzipped() ? gzipped : kUncompressedData, contents);

// We should not have a content-encoding header since we either decompressed
// data ourselves or captured it before data was saved.
EXPECT_FALSE(headers_out.Has(HttpAttributes::kContentEncoding));

// We get content-length iff the cache was compressed.
bool is_compressed_cache = (http_cache()->compression_level() != 0);
int64 length;
EXPECT_EQ(is_compressed_cache, headers_out.FindContentLength(&length));
EXPECT_TRUE(headers_out.DetermineContentType()->IsCompressible());

// TODO(jcrowell): Add test for non-compressible type.
Expand Down
33 changes: 33 additions & 0 deletions pagespeed/system/system_test.sh
Expand Up @@ -1363,6 +1363,39 @@ if [ "$SECONDARY_HOSTNAME" != "" ]; then
fetch_until -save -recursive $URL \
'fgrep -c 150x100xOptPuzzle.jpg.pagespeed.ic.' 1

if [ -z "${DISABLE_PHP_TESTS:-}" ]; then
function php_ipro_record() {
local url="$1"
local max_cache_bytes="$2"
local cache_bytes_op="$3"
local cache_bytes_cmp="$4"
echo http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP --save-headers $url
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP --save-headers $url)
check_from "$OUT" egrep -iq 'Content-Encoding: gzip'
# Now check that we receive the optimized content from cache (not double
# zipped). When the resource is optimized, "peachpuff" is rewritten to its
# hex equivalent.
http_proxy=$SECONDARY_HOSTNAME fetch_until $url 'fgrep -c ffdab9' 1
# The gzipped size is 175 with the default compression. Our compressed
# cache uses gzip -9, which will compress even better (below 150).
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET -O - \
--header="Accept-Encoding: gzip" $url)
bytes_from_cache=$(echo "$OUT" | wc -c)
check [ $bytes_from_cache -lt $max_cache_bytes ]
check [ $bytes_from_cache $cache_bytes_op $cache_bytes_cmp ]
}
start_test Cache Compression On: PHP Pre-Gzipping does not get destroyed by the cache.
URL="http://compressedcache.example.com/mod_pagespeed_test/php_gzip.php"
# The gzipped size is 175 with the default compression. Our compressed
# cache uses gzip -9, which will compress even better (below 150).
php_ipro_record "$URL" 150 "-lt" 175
start_test Cache Compression Off: PHP Pre-Gzipping does not get destroyed by the cache.
# With cache compression off, we should see about 175 for both the pre and
# post optimized resource.
URL="http://uncompressedcache.example.com/mod_pagespeed_test/php_gzip.php"
php_ipro_record "$URL" 200 "-gt" 150
fi

# Verify that downstream caches and rebeaconing interact correctly for images.
start_test lazyload_images,rewrite_images with downstream cache rebeaconing
HOST_NAME="http://downstreamcacherebeacon.example.com"
Expand Down

0 comments on commit 02dfd36

Please sign in to comment.