Permalink
Browse files

inliners: don't inline gzipped text

If what we're going to inline into CSS or JS starts with the gzip magic
bytes (file signature) then it's very likely to be gzipped and very
unlikely to be valid CSS or JS, so we should abort the inlining.

Fixes the amount of #1307
that I could repro.  If we later see mangling files in-place (or combining)
then we can extend this.
  • Loading branch information...
jeffkaufman authored and crowell committed Jul 6, 2016
1 parent d05a8b6 commit 6dfe527c0f6f30bdf438de61e2cff1eb33817b0b
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,7 @@
<h1>gzip magic bytes test</h1>

<p>The files in this directory are gzipped, but served without
the correct content encoding header.</p>

<link rel=stylesheet href=compressed.css>
<script src=compressed.js></script>
@@ -34,6 +34,7 @@
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/util/gzip_inflater.h"

namespace net_instaweb {

@@ -161,16 +162,23 @@ bool CssInlineFilter::HasClosingStyleTag(StringPiece contents) {
bool CssInlineFilter::ShouldInline(const ResourcePtr& resource,
const StringPiece& attrs_charset,
GoogleString* reason) const {
// If the contents are bigger than our threshold or the contents contain
// "</style>" anywhere, don't inline. If we inline an external stylesheet
// containing a "</style>", the <style> tag will be ended early.
if (resource->UncompressedContentsSize() > size_threshold_bytes_) {
// If the contents are bigger than our threshold, don't inline.
StringPiece contents(resource->ExtractUncompressedContents());
if (contents.size() > size_threshold_bytes_) {
*reason = StrCat("CSS not inlined since it's bigger than ",
Integer64ToString(size_threshold_bytes_),
" bytes");
return false;
}
if (HasClosingStyleTag(resource->ExtractUncompressedContents())) {
// Also don't inline if it looks gzipped.
if (GzipInflater::HasGzipMagicBytes(contents)) {
*reason = "CSS not inlined because it appears to be gzip-encoded";
return false;
}
// And also not if the contents contain "</style>" anywhere. If we inline an
// external stylesheet containing a "</style>", the <style> tag will be ended
// early.
if (HasClosingStyleTag(contents)) {
*reason = "CSS not inlined since it contains style closing tag";
return false;
}
@@ -31,6 +31,7 @@
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/util/gzip_inflater.h"
#include "pagespeed/kernel/util/re2.h"

namespace net_instaweb {
@@ -113,17 +114,20 @@ void JsInlineFilter::EndElementImpl(HtmlElement* element) {

bool JsInlineFilter::ShouldInline(const ResourcePtr& resource,
GoogleString* reason) const {
// Don't inline if it's too big or looks like it's trying to get at its own
// url.

// Don't inline if it's too big.
StringPiece contents(resource->ExtractUncompressedContents());
if (contents.size() > size_threshold_bytes_) {
*reason = StrCat("JS not inlined since it's bigger than ",
Integer64ToString(size_threshold_bytes_),
" bytes");
return false;
}

// Or if it looks like it's gzip encoded.
if (GzipInflater::HasGzipMagicBytes(contents)) {
*reason = "JS not inlined because it appears to be gzip-encoded";
return false;
}
// Or if it looks like it's trying to get at its own url.
if (driver()->options()->avoid_renaming_introspective_javascript() &&
JavascriptCodeBlock::UnsafeToRename(contents)) {
*reason = "JS not inlined since it may be looking for its source";
@@ -53,3 +53,22 @@ fi

test_filter inline_javascript inlines a small JS file.
fetch_until $URL 'grep -c document.write' 1

start_test inlining gzip-encoded resources
# If a resource is double-gzipped, or gzipped once but missing the headers,
# we need to not inline the compressed (binary) version.
#
# compressed.css and compressed.js are gzipped on disk, but small enough that
# PageSpeed would inline them if it were allowed to. So fetch the page until we
# see two .pagespeed. resources, then verify that we see the debug comments we
# expect to see.
URL="$TEST_ROOT/gzip_precompressed/?PageSpeedFilters=+debug"
fetch_until -save $URL 'fgrep -c .pagespeed.' 2

OUT=$(cat $FETCH_UNTIL_OUTFILE)
# First verify that the inliners are actually enabled.
check_from "$OUT" fgrep "Inline Javascript"
check_from "$OUT" fgrep "Inline Css"
# Then check for the debug comments.
check_from "$OUT" fgrep "JS not inlined because it appears to be gzip-encoded"
check_from "$OUT" fgrep "CSS not inlined because it appears to be gzip-encoded"
@@ -427,4 +427,11 @@ bool GzipInflater::Inflate(StringPiece in, InflateType format, Writer* writer) {
return true;
}

// All gzip files start with a ten-byte header beginning with 0x1f8b.
bool GzipInflater::HasGzipMagicBytes(StringPiece in) {
return in.size() >= 10 &&
in[0] == static_cast<char>(0x1f) &&
in[1] == static_cast<char>(0x8b);
}

} // namespace net_instaweb
@@ -76,6 +76,9 @@ class GzipInflater {
// if there was some kind of failure, such as a corrupt input.
static bool Inflate(StringPiece in, InflateType format, Writer* writer);

// Checks whether in starts with the gzip file signature.
static bool HasGzipMagicBytes(StringPiece in);

private:
friend class GzipInflaterTestPeer;

0 comments on commit 6dfe527

Please sign in to comment.