Permalink
Browse files

Strip subresource hints

Default behaviour is to strip subresource links which are in scope for
pagespeed, these are the resources that are not disallowed or are valid
domains in the domain laywer.

Added can_modify_url flag to HtmlParse and CanModifyUrl function to the
HtmlFilter which indicates whether urls can be rewritten by the parser
and thus should be removed.  This is tested now in the
strip_subresource_hints_filter_test.cc as this is only used by the strip
subresource hints feature right now. This should be moved to the
HtmlParse tests.

DetermineEnabledFilters has been rolled up into
DetermineFiltersBehaviour, which also determines can_modify_url for all
the filters and possible future "behaviors".

Added new option to explicitly prevent the default behaviour:
ModPreserveSubresourceHints on/off

For ubuntu a check for the new setup /var/www/html instead of /var/www
for the document root has been added.

Fixes Issue #973.

(Squash of 54983f4 and b651c78.)
  • Loading branch information...
keesspoelstra authored and jeffkaufman committed Dec 8, 2015
1 parent b8e27c8 commit d0bae6814aa54e7e59475903bee5b15fd430d219
Showing with 655 additions and 39 deletions.
  1. +24 −0 install/debug.conf.template
  2. +5 −0 install/mod_pagespeed_test/strip_subresource_hints/default/disallowtest.html
  3. +5 −0 install/mod_pagespeed_test/strip_subresource_hints/default/index.html
  4. +8 −0 install/mod_pagespeed_test/strip_subresource_hints/default/multiple_subresource_hints.html
  5. +5 −0 install/mod_pagespeed_test/strip_subresource_hints/default_passthrough/index.html
  6. +5 −0 install/mod_pagespeed_test/strip_subresource_hints/preserve_off/index.html
  7. +5 −0 install/mod_pagespeed_test/strip_subresource_hints/preserve_on/index.html
  8. +1 −0 install/setup_test_machine.sh
  9. +8 −0 install/ubuntu.sh
  10. +1 −0 net/instaweb/instaweb.gyp
  11. +1 −1 net/instaweb/rewriter/public/rewrite_driver.h
  12. +9 −6 net/instaweb/rewriter/public/rewrite_filter.h
  13. +12 −0 net/instaweb/rewriter/public/rewrite_options.h
  14. +53 −0 net/instaweb/rewriter/public/strip_subresource_hints_filter.h
  15. +12 −7 net/instaweb/rewriter/rewrite_driver.cc
  16. +8 −0 net/instaweb/rewriter/rewrite_options.cc
  17. +1 −0 net/instaweb/rewriter/rewrite_options_test.cc
  18. +91 −0 net/instaweb/rewriter/strip_subresource_hints_filter.cc
  19. +308 −0 net/instaweb/rewriter/strip_subresource_hints_filter_test.cc
  20. +1 −0 net/instaweb/test.gyp
  21. +4 −0 pagespeed/apache/mod_instaweb.cc
  22. +4 −0 pagespeed/kernel/html/empty_html_filter.h
  23. +4 −0 pagespeed/kernel/html/html_filter.h
  24. +20 −15 pagespeed/kernel/html/html_parse.cc
  25. +19 −9 pagespeed/kernel/html/html_parse.h
  26. +2 −0 pagespeed/kernel/html/html_parse_test.cc
  27. +2 −0 pagespeed/kernel/html/html_writer_filter.h
  28. +37 −1 pagespeed/system/system_test.sh
@@ -238,6 +238,29 @@ ModPagespeedLoadFromFile "http://@@APACHE_DOMAIN@@/mod_pagespeed_test/ipro/insta
Header append 'Cache-Control' 'no-transform'
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/default" >
ModPagespeedRewriteLevel CoreFilters
ModPagespeedDisableFilters add_instrumentation
ModPagespeedDisallow *dontrewriteme*
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/preserve_on/" >
ModPagespeedPreserveSubresourceHints on
ModPagespeedRewriteLevel CoreFilters
ModPagespeedDisableFilters add_instrumentation
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/preserve_off/" >
ModPagespeedPreserveSubresourceHints off
ModPagespeedRewriteLevel CoreFilters
ModPagespeedDisableFilters add_instrumentation
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/default_passthrough/" >
ModPagespeedRewriteLevel PassThrough
ModPagespeedDisableFilters add_instrumentation
</Directory>

# This Directory does not even exist, but by setting some options in that
# scope we test to make sure the options we claim are really settable in
# .htaccess. Note that <Directory> and .htaccess are enforced the same way.
@@ -1912,6 +1935,7 @@ Listen 127.0.0.2:@@APACHE_TERTIARY_PORT@@
#ALL_DIRECTIVES ModPagespeedNumExpensiveRewriteThreads 2
#ALL_DIRECTIVES ModPagespeedNumRewriteThreads 4
#ALL_DIRECTIVES ModPagespeedOptionCookiesDurationMs 12345
#ALL_DIRECTIVES ModPagespeedPreserveSubresourceHints on
#ALL_DIRECTIVES ModPagespeedPreserveUrlRelativity on
#ALL_DIRECTIVES ModPagespeedProgressiveJpegMinBytes 1000
#ALL_DIRECTIVES ModPagespeedRateLimitBackgroundFetches true
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="dontrewriteme_resource.jpg"/></head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
@@ -0,0 +1,8 @@
<html>
<head>
<link rel="subresource" src="test1"/>
<link rel="subresource" src="test2"/>
</head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
@@ -66,6 +66,7 @@ fi
# This sequence is described in /usr/share/doc/apache2.2-common/README.Debian.gz
sudo a2ensite default-ssl
sudo a2enmod ssl
sudo a2enmod headers
sudo make-ssl-cert generate-default-snakeoil --force-overwrite

# TODO(jefftk): We don't restart the test servers often enough for this to be
@@ -2,11 +2,19 @@

echo make $*

APACHE_DOC_ROOT=/var/www
# Test for new ubuntu setups where doc/root is in /var/www/html. We could run
# into a false positive here, but probably not for build systems.
if [ -e /var/www/html ]; then
APACHE_DOC_ROOT=/var/www/html
fi

exec make \
APACHE_CONTROL_PROGRAM=/etc/init.d/apache2 \
APACHE_LOG=/var/log/apache2/error.log \
APACHE_MODULES=/usr/lib/apache2/modules \
APACHE_CONF_FILE=/etc/apache2/apache2.conf \
APACHE_DOC_ROOT=$APACHE_DOC_ROOT \
APACHE_PIDFILE=/var/run/apache2.pid \
APACHE_PROGRAM=/usr/sbin/apache2 \
APACHE_ROOT=/etc/apache2 \
@@ -1832,6 +1832,7 @@
'rewriter/split_html_helper_filter.cc',
'rewriter/strip_non_cacheable_filter.cc',
'rewriter/strip_scripts_filter.cc',
'rewriter/strip_subresource_hints_filter.cc',
'rewriter/support_noscript_filter.cc',
'rewriter/suppress_prehead_filter.cc',
'rewriter/url_input_resource.cc',
@@ -1244,7 +1244,7 @@ class RewriteDriver : public HtmlParse {
bool Decode(StringPiece leaf, ResourceNamer* resource_namer) const;

protected:
virtual void DetermineEnabledFiltersImpl();
virtual void DetermineFiltersBehaviorImpl();

private:
friend class DistributedRewriteContextTest;
@@ -20,20 +20,18 @@
#define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_FILTER_H_

#include "net/instaweb/rewriter/public/common_filter.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_context.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/util/url_segment_encoder.h"

namespace net_instaweb {

class Resource;
class ResourceContext;
class RewriteContext;
class RewriteDriver;
class UrlSegmentEncoder;

class RewriteFilter : public CommonFilter {
public:
explicit RewriteFilter(RewriteDriver* driver)
@@ -49,6 +47,11 @@ class RewriteFilter : public CommonFilter {
// UsePropertyCacheDomCohort to return true.
virtual void DetermineEnabled(GoogleString* disabled_reason);

// Returns whether this filter can modify urls. Because most filters do
// modify urls this defaults returning true, and filters that commit to never
// modifying urls should override it to return false.
virtual bool CanModifyUrls() { return true; }

// All RewriteFilters define how they encode URLs and other
// associated information needed for a rewrite into a URL.
// The default implementation handles a single URL with
@@ -364,6 +364,7 @@ class RewriteOptions {
static const char kObliviousPagespeedUrls[];
static const char kOptionCookiesDurationMs[];
static const char kOverrideCachingTtlMs[];
static const char kPreserveSubresourceHints[];
static const char kPreserveUrlRelativity[];
static const char kPrivateNotVaryForIE[];
static const char kProactiveResourceFreshening[];
@@ -1627,6 +1628,14 @@ class RewriteOptions {
set_option(x, &blink_blacklist_end_timestamp_ms_);
}

bool preserve_subresource_hints() const {
return preserve_subresource_hints_.value();
}
void set_preserve_subresource_hints(bool x) {
set_option(x, &preserve_subresource_hints_);
}


bool preserve_url_relativity() const {
return preserve_url_relativity_.value();
}
@@ -4134,6 +4143,9 @@ class RewriteOptions {
// The timestamp when blink blacklist expires.
Option<int64> blink_blacklist_end_timestamp_ms_;

// Keep the original subresource hints
Option<bool> preserve_subresource_hints_;

// Keep rewritten URLs as relative as the original resource URL was.
// TODO(sligocki): Remove this option once we know it's always safe.
Option<bool> preserve_url_relativity_;
@@ -0,0 +1,53 @@
/*
* Copyright 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Author: kspoelstra@we-amp.com (Kees Spoelstra)

#ifndef NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_

#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/html/empty_html_filter.h"

namespace net_instaweb {

class HtmlElement;
class RewriteDriver;

// Removes rel=subresource links.
class StripSubresourceHintsFilter : public EmptyHtmlFilter {
public:
explicit StripSubresourceHintsFilter(RewriteDriver* driver);
virtual ~StripSubresourceHintsFilter();

virtual void StartDocument();
virtual void StartElement(HtmlElement* element);
virtual void EndDocument();
virtual void EndElement(HtmlElement* element);
virtual void Flush();
virtual const char* Name() const { return "StripSubresourceHints"; }

private:
RewriteDriver* driver_;
HtmlElement* delete_element_;
bool remove_;

DISALLOW_COPY_AND_ASSIGN(StripSubresourceHintsFilter);
};

} // namespace net_instaweb

#endif // NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_
@@ -127,6 +127,7 @@
#include "net/instaweb/rewriter/public/split_html_helper_filter.h"
#include "net/instaweb/rewriter/public/strip_non_cacheable_filter.h"
#include "net/instaweb/rewriter/public/strip_scripts_filter.h"
#include "net/instaweb/rewriter/public/strip_subresource_hints_filter.h"
#include "net/instaweb/rewriter/public/support_noscript_filter.h"
#include "net/instaweb/rewriter/public/suppress_prehead_filter.h"
#include "net/instaweb/rewriter/public/url_input_resource.h"
@@ -656,7 +657,9 @@ void RewriteDriver::FlushAsync(Function* callback) {
}
flush_requested_ = false;

DetermineEnabledFilters();
// Figure out which filters should be enabled and whether any enabled filter
// can modify urls.
DetermineFiltersBehavior();

for (FilterList::iterator it = early_pre_render_filters_.begin();
it != early_pre_render_filters_.end(); ++it) {
@@ -1010,7 +1013,9 @@ void RewriteDriver::AddPreRenderFilters() {
dom_stats_filter_ = new DomStatsFilter(this);
AddOwnedEarlyPreRenderFilter(dom_stats_filter_);
}

if (!rewrite_options->preserve_subresource_hints()) {
AddOwnedEarlyPreRenderFilter(new StripSubresourceHintsFilter(this));
}
if (rewrite_options->Enabled(RewriteOptions::kDecodeRewrittenUrls)) {
AddOwnedEarlyPreRenderFilter(new DecodeRewrittenUrlsFilter(this));
}
@@ -3609,12 +3614,12 @@ bool RewriteDriver::Write(const ResourceVector& inputs,
return ret;
}

void RewriteDriver::DetermineEnabledFiltersImpl() {
DetermineEnabledFiltersInList(early_pre_render_filters_);
DetermineEnabledFiltersInList(pre_render_filters_);
void RewriteDriver::DetermineFiltersBehaviorImpl() {
DetermineFilterListBehavior(early_pre_render_filters_);
DetermineFilterListBehavior(pre_render_filters_);

// Call parent DetermineEnabled to setup post render filters.
HtmlParse::DetermineEnabledFiltersImpl();
// Call parent to set up post render filters.
HtmlParse::DetermineFiltersBehaviorImpl();
}

void RewriteDriver::ClearRequestProperties() {
@@ -267,6 +267,8 @@ const char RewriteOptions::kObliviousPagespeedUrls[] = "ObliviousPagespeedUrls";
const char RewriteOptions::kOptionCookiesDurationMs[] =
"OptionCookiesDurationMs";
const char RewriteOptions::kOverrideCachingTtlMs[] = "OverrideCachingTtlMs";
const char RewriteOptions::kPreserveSubresourceHints[] =
"PreserveSubresourceHints";
const char RewriteOptions::kPreserveUrlRelativity[] = "PreserveUrlRelativity";
const char RewriteOptions::kPrivateNotVaryForIE[] = "PrivateNotVaryForIE";
const char RewriteOptions::kPubliclyCacheMismatchedHashesExperimental[] =
@@ -2238,6 +2240,12 @@ void RewriteOptions::AddProperties() {
AddRequestProperty(
-1, &RewriteOptions::blink_blacklist_end_timestamp_ms_, "bbet", false);

AddBaseProperty(
false, &RewriteOptions::preserve_subresource_hints_, "psrh",
kPreserveSubresourceHints, kQueryScope,
"Keep original subresource hints in place.",
true);

AddBaseProperty(
true, &RewriteOptions::preserve_url_relativity_, "pur",
kPreserveUrlRelativity, kDirectoryScope,
@@ -1055,6 +1055,7 @@ TEST_F(RewriteOptionsTest, LookupOptionByNameTest) {
RewriteOptions::kObliviousPagespeedUrls,
RewriteOptions::kOptionCookiesDurationMs,
RewriteOptions::kOverrideCachingTtlMs,
RewriteOptions::kPreserveSubresourceHints,
RewriteOptions::kPreserveUrlRelativity,
RewriteOptions::kPrivateNotVaryForIE,
RewriteOptions::kProactiveResourceFreshening,
Oops, something went wrong.

0 comments on commit d0bae68

Please sign in to comment.