Permalink
Browse files

strip-subresource-hints: respect preserve with rel=preload, use href …

…and not src (#1394)

* With rel=preload the hint tells us what type of resource it is, and if
  urls have been preserved for that type we should not strip it.
* If the rel=preload type isn't image, script, or style we shouldn't
  strip it, because those are the only urls we change.
* The filter was originally written to use src= when it should have used
  href=, which meant it removed hints it shouldn't have.

This is a minimal change for backporting for branch 33.  For the master branch
I have a draft of a more complex change that does additional cleanups.

Fixes: #1392
Fixes: #1393
  • Loading branch information...
jeffkaufman committed Sep 14, 2016
1 parent 18d5549 commit 95ef580465d9ac9dbbe95d0e3c8ae4fd1292aac6
@@ -1,5 +1,5 @@
<html>
<head><link rel="subresource" src="dontrewriteme_resource.jpg"/></head>
<head><link rel="subresource" href="dontrewriteme_resource.jpg"/></head>
<body>
</body>
</html>
@@ -1,5 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<head><link rel="subresource" href="test"/></head>
<body>
</body>
</html>
@@ -1,7 +1,7 @@
<html>
<head>
<link rel="subresource" src="test1"/>
<link rel="subresource" src="test2"/>
<link rel="subresource" href="test1"/>
<link rel="subresource" href="test2"/>
</head>
<body>
</body>
@@ -1,5 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<head><link rel="subresource" href="test"/></head>
<body>
</body>
</html>
@@ -1,5 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<head><link rel="subresource" href="test"/></head>
<body>
</body>
</html>
@@ -1,5 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<head><link rel="subresource" href="test"/></head>
<body>
</body>
</html>
@@ -43,7 +43,10 @@ class StripSubresourceHintsFilter : public EmptyHtmlFilter {
private:
RewriteDriver* driver_;
HtmlElement* delete_element_;
bool remove_;
bool remove_script_;
bool remove_style_;
bool remove_image_;
bool remove_any_;

DISALLOW_COPY_AND_ASSIGN(StripSubresourceHintsFilter);
};
@@ -32,32 +32,48 @@ namespace net_instaweb {
StripSubresourceHintsFilter::StripSubresourceHintsFilter(RewriteDriver* driver)
: driver_(driver),
delete_element_(NULL),
remove_(false) {
remove_script_(false),
remove_style_(false),
remove_image_(false),
remove_any_(false) {
}

StripSubresourceHintsFilter::~StripSubresourceHintsFilter() { }

void StripSubresourceHintsFilter::StartDocument() {
const RewriteOptions *options = driver_->options();
remove_ = (driver_->can_modify_urls() &&
(!(options->css_preserve_urls() &&
options->image_preserve_urls() &&
options->js_preserve_urls())));
remove_script_ = driver_->can_modify_urls() && !options->js_preserve_urls();
remove_style_ = driver_->can_modify_urls() && !options->css_preserve_urls();
remove_image_ = driver_->can_modify_urls() && !options->image_preserve_urls();
remove_any_ = remove_script_ || remove_style_ || remove_image_;
delete_element_ = NULL;
}

void StripSubresourceHintsFilter::StartElement(HtmlElement* element) {
if (!remove_ || delete_element_) return;
if (!remove_any_ || delete_element_) return;

if (element->keyword() == HtmlName::kLink) {
const char *value = element->AttributeValue(HtmlName::kRel);
if (value && (
StringCaseEqual(value, "subresource") ||
StringCaseEqual(value, "preload"))) {
// Strip:
// <link rel=subresource href=...> regardless
// <link rel=preload as=script href=...> unless preserve scripts
// <link rel=preload as=style href=...> unless preserve styles
// <link rel=preload as=image href=...> unless preserve images
//
// Don't strip other kinds of rel=preload hints, because we don't change
// their urls, so existing ones are still valid.
const char* rel_value;
const char* as_value;
if ((rel_value = element->AttributeValue(HtmlName::kRel)) != NULL &&
(StringCaseEqual(rel_value, "subresource") ||
(StringCaseEqual(rel_value, "preload") &&
(as_value = element->AttributeValue(HtmlName::kAs)) != NULL &&
((remove_script_ && StringCaseEqual(as_value, "script")) ||
(remove_style_ && StringCaseEqual(as_value, "style")) ||
(remove_image_ && StringCaseEqual(as_value, "image")))))) {
const RewriteOptions *options = driver_->options();
const char *resource_url = element->AttributeValue(HtmlName::kSrc);
const char *resource_url = element->AttributeValue(HtmlName::kHref);
if (!resource_url) {
// There's either no src attr, or one that we can't decode (utf8 etc).
// There's either no href attr, or one that we can't decode (utf8 etc).
// One way this could happen is if we have a url-encoded utf8 url in an
// img tag and a utf8 encoded url in the subresource tag. Delete the
// subresource link to be on the safe side.
@@ -88,8 +88,28 @@ class StripSubresourceHintsFilterTest :
public StripSubresourceHintsFilterTestBase {
};

class StripSubresourceHintsFilterTestPartialPreserve :
public StripSubresourceHintsFilterTestBase {
class StripSubresourceHintsFilterTestPreserveStyle :
public StripSubresourceHintsFilterTestBase {
protected:
virtual void CustomSetup() {
options()->set_css_preserve_urls(true);
options()->set_js_preserve_urls(false);
options()->set_image_preserve_urls(false);
}
};

class StripSubresourceHintsFilterTestPreserveScript :
public StripSubresourceHintsFilterTestBase {
protected:
virtual void CustomSetup() {
options()->set_css_preserve_urls(false);
options()->set_js_preserve_urls(true);
options()->set_image_preserve_urls(false);
}
};

class StripSubresourceHintsFilterTestPreserveImage :
public StripSubresourceHintsFilterTestBase {
protected:
virtual void CustomSetup() {
options()->set_css_preserve_urls(false);
@@ -148,7 +168,7 @@ TEST_F(StripSubresourceHintsFilterTest, SingleResourceNoLink) {

TEST_F(StripSubresourceHintsFilterTest, SingleResourceValidLink) {
const char *source =
"<head><link rel=\"subresource\" src=\"/test.gif\"/></head>"
"<head><link rel=\"subresource\" href=\"/test.gif\"/></head>"
"<body><img src=\"1.jpg\"/></body>";
const char *rewritten =
"<head></head>"
@@ -158,7 +178,7 @@ TEST_F(StripSubresourceHintsFilterTest, SingleResourceValidLink) {

TEST_F(StripSubresourceHintsFilterTest, SingleResourceValidPreloadLink) {
const char *source =
"<head><link rel=\"preload\" src=\"/test.gif\" as=\"image\"/></head>"
"<head><link rel=\"preload\" href=\"/test.gif\" as=\"image\"/></head>"
"<body><img src=\"1.jpg\"/></body>";
const char *rewritten =
"<head></head>"
@@ -169,7 +189,7 @@ TEST_F(StripSubresourceHintsFilterTest, SingleResourceValidPreloadLink) {
TEST_F(StripSubresourceHintsFilterTest, SingleResourceExternalLink) {
const char *source =
"<head>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
ValidateStripSubresourceHint(source, source);
@@ -178,21 +198,21 @@ TEST_F(StripSubresourceHintsFilterTest, SingleResourceExternalLink) {
TEST_F(StripSubresourceHintsFilterTest, MultiResourceMixedLinks) {
const char *source =
"<head>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
const char *rewritten =
"<head>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
ValidateStripSubresourceHint(source, rewritten);
}

TEST_F(StripSubresourceHintsFilterTest, SingleResourceRewriteDomain) {
const char *source =
"<head><link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<head><link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
const char *rewritten =
@@ -203,30 +223,32 @@ TEST_F(StripSubresourceHintsFilterTest, SingleResourceRewriteDomain) {

TEST_F(StripSubresourceHintsFilterTest, SingleResourceDisallow) {
const char *source =
"<head><link rel=\"subresource\" src=\"/dontdropme/test.gif\"/>"
"<head><link rel=\"subresource\" href=\"/dontdropme/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
const char *rewritten =
"<head><link rel=\"subresource\" src=\"/dontdropme/test.gif\"/>"
"<head><link rel=\"subresource\" href=\"/dontdropme/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
ValidateStripSubresourceHint(source, rewritten);
}

TEST_F(StripSubresourceHintsFilterTestPartialPreserve,
MultiResourcePreserveAll) {
// Even if you turn on preserve images, we still strip all rel=subresource hints
// because we don't know which ones are images.
TEST_F(StripSubresourceHintsFilterTestPreserveImage,
MultiSubresourcePreserveImages) {
const char *source =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
const char *rewritten =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
ValidateStripSubresourceHint(source, rewritten);
@@ -238,23 +260,59 @@ TEST_F(StripSubresourceHintsFilterTestFullPreserve,
MultiResourcePreserveAll) {
const char *source =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
const char *rewritten =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
ValidateStripSubresourceHint(source, rewritten);
}

// With rel=preload, if you have set preserve for a type we don't strip it.
TEST_F(StripSubresourceHintsFilterTestPreserveImage, ImagesPreserved) {
const char* source =
"<link rel=preload as=image href=a.jpg>"
"<link rel=preload as=script href=a.js>"
"<link rel=preload as=style href=a.css>";
const char* rewritten =
"<link rel=preload as=image href=a.jpg>";
ValidateStripSubresourceHint(source, rewritten);
}
TEST_F(StripSubresourceHintsFilterTestPreserveScript, ScriptsPreserved) {
const char* source =
"<link rel=preload as=image href=a.jpg>"
"<link rel=preload as=script href=a.js>"
"<link rel=preload as=style href=a.css>";
const char* rewritten =
"<link rel=preload as=script href=a.js>";
ValidateStripSubresourceHint(source, rewritten);
}
TEST_F(StripSubresourceHintsFilterTestPreserveStyle, StylesPreserved) {
const char* source =
"<link rel=preload as=image href=a.jpg>"
"<link rel=preload as=script href=a.js>"
"<link rel=preload as=style href=a.css>";
const char* rewritten =
"<link rel=preload as=style href=a.css>";
ValidateStripSubresourceHint(source, rewritten);
}

// With rel=preload we don't strip unknown types.
TEST_F(StripSubresourceHintsFilterTest, DontStripUnknownTypes) {
const char* source = "<link rel=preload as=font href=a.woff>";
ValidateStripSubresourceHint(source, source);
}


TEST_F(StripSubresourceHintsFilterTestDisabled,
PreserveSubResourceHintsIsTrue) {
can_modify_urls_filter_->set_can_modify_urls(true);
@@ -266,10 +324,10 @@ TEST_F(StripSubresourceHintsFilterTestDisabled,
can_modify_urls_filter_->set_can_modify_urls(true);
const char *source =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body><img src=\"1.jpg\"/></body>";
ValidateStripSubresourceHint(source, source);
@@ -279,18 +337,18 @@ TEST_F(StripSubresourceHintsFilterTestRewriteLevelPassthrough,
MultiResource) {
const char *source =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body></body>";
const char *rewritten =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body></body>";
ValidateExpected("multi_resource", source, rewritten);
@@ -300,16 +358,16 @@ TEST_F(StripSubresourceHintsFilterTestRewriteLevelCoreFilters,
MultiResource) {
const char *source =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://from1.test.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body></body>";
const char *rewritten =
"<head>"
"<link rel=\"subresource\" src=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" src=\"http://www.example.com/test.gif\"/>"
"<link rel=\"subresource\" href=\"/dontdropme.gif\"/>"
"<link rel=\"subresource\" href=\"http://www.example.com/test.gif\"/>"
"</head>"
"<body></body>";
ValidateExpected("multi_resource", source, rewritten);
@@ -33,6 +33,7 @@ struct KeywordMap {const char* name; net_instaweb::HtmlName::Keyword keyword;};
"alt", HtmlName::kAlt
"area", HtmlName::kArea
"article", HtmlName::kArticle
"as", HtmlName::kAs
"aside", HtmlName::kAside
"async", HtmlName::kAsync
"audio", HtmlName::kAudio
@@ -45,6 +45,7 @@ class HtmlName {
kAlt,
kArea,
kArticle,
kAs,
kAside,
kAsync,
kAudio,

0 comments on commit 95ef580

Please sign in to comment.