Skip to content
This repository was archived by the owner on Apr 10, 2025. It is now read-only.

Commit 0bd84a1

Browse files
committed
Only strip the content-encoding headers in ipro recorder if it is safe
to ignore them (eg. don't drop Content-Encoding: gzip if the content is actually gzipped). fixes #1371 add two small checks to ipro recording gzipped content tests
1 parent 306eebe commit 0bd84a1

File tree

6 files changed

+113
-13
lines changed

6 files changed

+113
-13
lines changed

install/debug.conf.template

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,28 @@ NameVirtualHost localhost:@@APACHE_SECONDARY_PORT@@
925925
AddOutputFilterByType DEFLATE text/css
926926
</VirtualHost>
927927

928+
<VirtualHost localhost:@@APACHE_SECONDARY_PORT@@>
929+
ServerName compressedcache.example.com
930+
DocumentRoot "@@APACHE_DOC_ROOT@@"
931+
ModPagespeedFileCachePath "@@MOD_PAGESPEED_CACHE@@"
932+
ModPagespeed on
933+
ModPagespeedRewriteLevel PassThrough
934+
ModPagespeedEnableFilters rewrite_css
935+
ModPagespeedHttpCacheCompressionLevel 9
936+
</VirtualHost>
937+
938+
<VirtualHost localhost:@@APACHE_SECONDARY_PORT@@>
939+
ServerName uncompressedcache.example.com
940+
DocumentRoot "@@APACHE_DOC_ROOT@@"
941+
ModPagespeedFileCachePath "@@MOD_PAGESPEED_CACHE@@"
942+
ModPagespeed on
943+
ModPagespeedRewriteLevel PassThrough
944+
ModPagespeedEnableFilters rewrite_css
945+
ModPagespeedHttpCacheCompressionLevel 0
946+
AddOutputFilterByType DEFLATE text/css
947+
DeflateCompressionLevel 1
948+
</VirtualHost>
949+
928950
# Backend for ipro-proxy.example.com
929951
Listen 127.0.0.1:@@APACHE_TERTIARY_PORT@@
930952
NameVirtualHost 127.0.0.1:@@APACHE_TERTIARY_PORT@@
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?php
2+
3+
header("Connection: close\r\n");
4+
header("Cache-Control: max-age=86400");
5+
header("Pragma: ", true);
6+
header("Content-Type: text/css");
7+
header("Expires: " . gmdate("D, d M Y H:i:s", time() + 86400) . " GMT");
8+
9+
$data = ".peachpuff {background-color: peachpuff;}" .
10+
"\n" .
11+
"." . str_pad("", 10000, uniqid()) . " {background-color: antiquewhite;}\n";
12+
13+
$output = "\x1f\x8b\x08\x00\x00\x00\x00\x00" .
14+
substr(gzcompress($data, 2), 0, -4) .
15+
pack('V', crc32($data)) .
16+
pack('V', mb_strlen($data, "latin1"));
17+
18+
header("Content-Encoding: gzip\r\n");
19+
header("Content-Length: " . mb_strlen($output, "latin1"));
20+
21+
echo $output;

pagespeed/automatic/system_test_helpers.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ function scrape_header {
743743
# Extracts the value from wget's emitted headers. We use " " as a delimeter
744744
# here to avoid a leading space on the returned string. Note also that wget
745745
# always generates "name: value\r", never "name:value\r".
746-
grep -i "^$1:" | cut -d' ' -f2- | tr -d '\r'
746+
tr -s '\r\n' '\n'| egrep -ia "^.?$1:" | rev | cut -d' ' -f 1 | rev
747747
}
748748

749749
# Scrapes HTTP headers from stdin for Content-Length and returns the value.

pagespeed/system/in_place_resource_recorder.cc

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "pagespeed/kernel/http/content_type.h"
2727
#include "pagespeed/kernel/http/http_names.h"
2828
#include "pagespeed/kernel/http/response_headers.h"
29+
#include "pagespeed/kernel/util/gzip_inflater.h"
2930

3031
namespace net_instaweb {
3132

@@ -240,12 +241,20 @@ void InPlaceResourceRecorder::DoneAndSetHeaders(
240241
if (failure_) {
241242
num_failed_->Add(1);
242243
} else {
243-
// We don't consider content-encoding to be valid here, since it can
244-
// be captured post-mod_deflate with pre-deflate content. Also note
245-
// that content-length doesn't have to be accurate either, since it can be
246-
// due to compression; we do still use it for quickly reject since
247-
// if gzip'd is too large uncompressed is likely too large, too.
248-
response_headers->RemoveAll(HttpAttributes::kContentEncoding);
244+
// We are skeptical of the correctness of the content-encoding here,
245+
// since it can be captured post-mod_deflate with pre-deflate content.
246+
// Also note that content-length doesn't have to be accurate either, since
247+
// it can be due to compression; we do still use it for quickly reject since
248+
// if gzip'd is too large uncompressed is likely too large, too. We sniff
249+
// the content to make sure that the headers match the Content-Encoding.
250+
StringPiece contents;
251+
resource_value_.ExtractContents(&contents);
252+
// TODO(jcrowell): remove this sniffing fix, and do a proper fix by merging
253+
// the IPRO filters in mod_instaweb.cc and in ngx_pagespeed.
254+
if (!GzipInflater::HasGzipMagicBytes(contents)) {
255+
// Only remove these headers if the content is not gzipped.
256+
response_headers->RemoveAll(HttpAttributes::kContentEncoding);
257+
}
249258
response_headers->RemoveAll(HttpAttributes::kContentLength);
250259
resource_value_.SetHeaders(response_headers);
251260
cache_->Put(url_, fragment_, request_properties_, http_options_,

pagespeed/system/in_place_resource_recorder_test.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,15 @@ class InPlaceResourceRecorderTest : public RewriteTestBase {
107107
&value_out, &headers_out));
108108
StringPiece contents;
109109
EXPECT_TRUE(value_out.ExtractContents(&contents));
110-
111-
// gzip in prelim headers will cause us to decompress, gzip in final_headers
112-
// will not.
113-
EXPECT_EQ(header_time == kPrelimGzipHeader ? kUncompressedData : gzipped,
114-
contents);
110+
EXPECT_EQ(headers_out.IsGzipped() ? gzipped : kUncompressedData, contents);
115111

116112
// We should not have a content-encoding header since we either decompressed
117113
// data ourselves or captured it before data was saved. Also no
118114
// content-length since we may have done gunzip'ing.
119115
EXPECT_FALSE(headers_out.Has(HttpAttributes::kContentEncoding));
120-
EXPECT_FALSE(headers_out.Has(HttpAttributes::kContentLength));
116+
EXPECT_TRUE(headers_out.DetermineContentType()->IsCompressible());
117+
118+
// TODO(jcrowell): Add test for non-compressible type.
121119
}
122120

123121
void CheckCacheableContentType(const ContentType* content_type) {

pagespeed/system/system_test.sh

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,56 @@ if [ "$SECONDARY_HOSTNAME" != "" ]; then
13181318
fetch_until -save -recursive $URL \
13191319
'fgrep -c 150x100xOptPuzzle.jpg.pagespeed.ic.' 1
13201320

1321+
if [ -z "${DISABLE_PHP_TESTS:-}" ]; then
1322+
function php_ipro_record() {
1323+
local url="$1"
1324+
local max_cache_bytes="$2"
1325+
local cache_bytes_op="$3"
1326+
local cache_bytes_cmp="$4"
1327+
echo http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP --save-headers $url
1328+
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP --save-headers $url)
1329+
check_from "$OUT" egrep -iq 'Content-Encoding: gzip'
1330+
# Now check that we receive the optimized content from cache (not double
1331+
# zipped). When the resource is optimized, "peachpuff" is rewritten to its
1332+
# hex equivalent.
1333+
http_proxy=$SECONDARY_HOSTNAME fetch_until $url 'fgrep -c ffdab9' 1
1334+
# The gzipped size is 175 with the default compression. Our compressed
1335+
# cache uses gzip -9, which will compress even better (below 150).
1336+
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET -O - \
1337+
--header="Accept-Encoding: gzip" $url)
1338+
# TODO(jcrowell): clean up when ngx_pagespeed stops chunk-encoding static
1339+
# resources.
1340+
bytes_from_cache=$(echo "$OUT" | wc -c)
1341+
check [ $bytes_from_cache -lt $max_cache_bytes ]
1342+
check [ $bytes_from_cache $cache_bytes_op $cache_bytes_cmp ]
1343+
# Ensure that the Content-Encoding header matches the data that is sent.
1344+
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP \
1345+
--header='Accept-Encoding: gzip' $url \
1346+
| scrape_header 'Content-Encoding')
1347+
check_from "$OUT" fgrep -q 'gzip'
1348+
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET -O - $url)
1349+
bytes_from_cache_uncompressed=$(echo "$OUT" | wc -c)
1350+
# The data should uncompressed, but minified at this point.
1351+
check [ $bytes_from_cache_uncompressed -gt 10000 ]
1352+
check_from "$OUT" grep -q "ffdab9"
1353+
# Ensure that the Content-Encoding header matches the data that is sent.
1354+
# In this case we didn't sent the Accept-Encoding header, so we don't
1355+
# expect the data to have any Content-Encoding header.
1356+
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET_DUMP $url)
1357+
check_not_from "$OUT" egrep -q 'Content-Encoding'
1358+
}
1359+
start_test Cache Compression On: PHP Pre-Gzipping does not get destroyed by the cache.
1360+
URL="http://compressedcache.example.com/mod_pagespeed_test/php_gzip.php"
1361+
# The gzipped size is 175 with the default compression. Our compressed
1362+
# cache uses gzip -9, which will compress even better (below 150).
1363+
php_ipro_record "$URL" 150 "-lt" 175
1364+
start_test Cache Compression Off: PHP Pre-Gzipping does not get destroyed by the cache.
1365+
# With cache compression off, we should see about 175 for both the pre and
1366+
# post optimized resource.
1367+
URL="http://uncompressedcache.example.com/mod_pagespeed_test/php_gzip.php"
1368+
php_ipro_record "$URL" 200 "-gt" 150
1369+
fi
1370+
13211371
# Verify that downstream caches and rebeaconing interact correctly for images.
13221372
start_test lazyload_images,rewrite_images with downstream cache rebeaconing
13231373
HOST_NAME="http://downstreamcacherebeacon.example.com"

0 commit comments

Comments
 (0)