Skip to content
This repository has been archived by the owner on Apr 21, 2023. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Strip subresource hints
Default behaviour is to strip subresource links which are in scope for
pagespeed, these are the resources that are not disallowed or are valid
domains in the domain laywer.

Added can_modify_url flag to HtmlParse and CanModifyUrl function to the
HtmlFilter which indicates whether urls can be rewritten by the parser
and thus should be removed.  This is tested now in the
strip_subresource_hints_filter_test.cc as this is only used by the strip
subresource hints feature right now. This should be moved to the
HtmlParse tests.

DetermineEnabledFilters has been rolled up into
DetermineFiltersBehaviour, which also determines can_modify_url for all
the filters and possible future "behaviors".

Added new option to explicitly prevent the default behaviour:
ModPreserveSubresourceHints on/off

For ubuntu a check for the new setup /var/www/html instead of /var/www
for the document root has been added.

Fixes Issue #973.

(Squash of 54983f4 and b651c78.)
  • Loading branch information
keesspoelstra authored and jeffkaufman committed Mar 11, 2016
1 parent b8e27c8 commit d0bae68
Show file tree
Hide file tree
Showing 28 changed files with 655 additions and 39 deletions.
24 changes: 24 additions & 0 deletions install/debug.conf.template
Expand Up @@ -238,6 +238,29 @@ ModPagespeedLoadFromFile "http://@@APACHE_DOMAIN@@/mod_pagespeed_test/ipro/insta
Header append 'Cache-Control' 'no-transform'
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/default" >
ModPagespeedRewriteLevel CoreFilters
ModPagespeedDisableFilters add_instrumentation
ModPagespeedDisallow *dontrewriteme*
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/preserve_on/" >
ModPagespeedPreserveSubresourceHints on
ModPagespeedRewriteLevel CoreFilters
ModPagespeedDisableFilters add_instrumentation
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/preserve_off/" >
ModPagespeedPreserveSubresourceHints off
ModPagespeedRewriteLevel CoreFilters
ModPagespeedDisableFilters add_instrumentation
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/strip_subresource_hints/default_passthrough/" >
ModPagespeedRewriteLevel PassThrough
ModPagespeedDisableFilters add_instrumentation
</Directory>

# This Directory does not even exist, but by setting some options in that
# scope we test to make sure the options we claim are really settable in
# .htaccess. Note that <Directory> and .htaccess are enforced the same way.
Expand Down Expand Up @@ -1912,6 +1935,7 @@ Listen 127.0.0.2:@@APACHE_TERTIARY_PORT@@
#ALL_DIRECTIVES ModPagespeedNumExpensiveRewriteThreads 2
#ALL_DIRECTIVES ModPagespeedNumRewriteThreads 4
#ALL_DIRECTIVES ModPagespeedOptionCookiesDurationMs 12345
#ALL_DIRECTIVES ModPagespeedPreserveSubresourceHints on
#ALL_DIRECTIVES ModPagespeedPreserveUrlRelativity on
#ALL_DIRECTIVES ModPagespeedProgressiveJpegMinBytes 1000
#ALL_DIRECTIVES ModPagespeedRateLimitBackgroundFetches true
Expand Down
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="dontrewriteme_resource.jpg"/></head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
@@ -0,0 +1,8 @@
<html>
<head>
<link rel="subresource" src="test1"/>
<link rel="subresource" src="test2"/>
</head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
@@ -0,0 +1,5 @@
<html>
<head><link rel="subresource" src="test"/></head>
<body>
</body>
</html>
1 change: 1 addition & 0 deletions install/setup_test_machine.sh
Expand Up @@ -66,6 +66,7 @@ fi
# This sequence is described in /usr/share/doc/apache2.2-common/README.Debian.gz
sudo a2ensite default-ssl
sudo a2enmod ssl
sudo a2enmod headers
sudo make-ssl-cert generate-default-snakeoil --force-overwrite

# TODO(jefftk): We don't restart the test servers often enough for this to be
Expand Down
8 changes: 8 additions & 0 deletions install/ubuntu.sh
Expand Up @@ -2,11 +2,19 @@

echo make $*

APACHE_DOC_ROOT=/var/www
# Test for new ubuntu setups where doc/root is in /var/www/html. We could run
# into a false positive here, but probably not for build systems.
if [ -e /var/www/html ]; then
APACHE_DOC_ROOT=/var/www/html
fi

exec make \
APACHE_CONTROL_PROGRAM=/etc/init.d/apache2 \
APACHE_LOG=/var/log/apache2/error.log \
APACHE_MODULES=/usr/lib/apache2/modules \
APACHE_CONF_FILE=/etc/apache2/apache2.conf \
APACHE_DOC_ROOT=$APACHE_DOC_ROOT \
APACHE_PIDFILE=/var/run/apache2.pid \
APACHE_PROGRAM=/usr/sbin/apache2 \
APACHE_ROOT=/etc/apache2 \
Expand Down
1 change: 1 addition & 0 deletions net/instaweb/instaweb.gyp
Expand Up @@ -1832,6 +1832,7 @@
'rewriter/split_html_helper_filter.cc',
'rewriter/strip_non_cacheable_filter.cc',
'rewriter/strip_scripts_filter.cc',
'rewriter/strip_subresource_hints_filter.cc',
'rewriter/support_noscript_filter.cc',
'rewriter/suppress_prehead_filter.cc',
'rewriter/url_input_resource.cc',
Expand Down
2 changes: 1 addition & 1 deletion net/instaweb/rewriter/public/rewrite_driver.h
Expand Up @@ -1244,7 +1244,7 @@ class RewriteDriver : public HtmlParse {
bool Decode(StringPiece leaf, ResourceNamer* resource_namer) const;

protected:
virtual void DetermineEnabledFiltersImpl();
virtual void DetermineFiltersBehaviorImpl();

private:
friend class DistributedRewriteContextTest;
Expand Down
15 changes: 9 additions & 6 deletions net/instaweb/rewriter/public/rewrite_filter.h
Expand Up @@ -20,20 +20,18 @@
#define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_FILTER_H_

#include "net/instaweb/rewriter/public/common_filter.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_context.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/util/url_segment_encoder.h"

namespace net_instaweb {

class Resource;
class ResourceContext;
class RewriteContext;
class RewriteDriver;
class UrlSegmentEncoder;

class RewriteFilter : public CommonFilter {
public:
explicit RewriteFilter(RewriteDriver* driver)
Expand All @@ -49,6 +47,11 @@ class RewriteFilter : public CommonFilter {
// UsePropertyCacheDomCohort to return true.
virtual void DetermineEnabled(GoogleString* disabled_reason);

// Returns whether this filter can modify urls. Because most filters do
// modify urls this defaults returning true, and filters that commit to never
// modifying urls should override it to return false.
virtual bool CanModifyUrls() { return true; }

// All RewriteFilters define how they encode URLs and other
// associated information needed for a rewrite into a URL.
// The default implementation handles a single URL with
Expand Down
12 changes: 12 additions & 0 deletions net/instaweb/rewriter/public/rewrite_options.h
Expand Up @@ -364,6 +364,7 @@ class RewriteOptions {
static const char kObliviousPagespeedUrls[];
static const char kOptionCookiesDurationMs[];
static const char kOverrideCachingTtlMs[];
static const char kPreserveSubresourceHints[];
static const char kPreserveUrlRelativity[];
static const char kPrivateNotVaryForIE[];
static const char kProactiveResourceFreshening[];
Expand Down Expand Up @@ -1627,6 +1628,14 @@ class RewriteOptions {
set_option(x, &blink_blacklist_end_timestamp_ms_);
}

bool preserve_subresource_hints() const {
return preserve_subresource_hints_.value();
}
void set_preserve_subresource_hints(bool x) {
set_option(x, &preserve_subresource_hints_);
}


bool preserve_url_relativity() const {
return preserve_url_relativity_.value();
}
Expand Down Expand Up @@ -4134,6 +4143,9 @@ class RewriteOptions {
// The timestamp when blink blacklist expires.
Option<int64> blink_blacklist_end_timestamp_ms_;

// Keep the original subresource hints
Option<bool> preserve_subresource_hints_;

// Keep rewritten URLs as relative as the original resource URL was.
// TODO(sligocki): Remove this option once we know it's always safe.
Option<bool> preserve_url_relativity_;
Expand Down
53 changes: 53 additions & 0 deletions net/instaweb/rewriter/public/strip_subresource_hints_filter.h
@@ -0,0 +1,53 @@
/*
* Copyright 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Author: kspoelstra@we-amp.com (Kees Spoelstra)

#ifndef NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_

#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/html/empty_html_filter.h"

namespace net_instaweb {

class HtmlElement;
class RewriteDriver;

// Removes rel=subresource links.
class StripSubresourceHintsFilter : public EmptyHtmlFilter {
public:
explicit StripSubresourceHintsFilter(RewriteDriver* driver);
virtual ~StripSubresourceHintsFilter();

virtual void StartDocument();
virtual void StartElement(HtmlElement* element);
virtual void EndDocument();
virtual void EndElement(HtmlElement* element);
virtual void Flush();
virtual const char* Name() const { return "StripSubresourceHints"; }

private:
RewriteDriver* driver_;
HtmlElement* delete_element_;
bool remove_;

DISALLOW_COPY_AND_ASSIGN(StripSubresourceHintsFilter);
};

} // namespace net_instaweb

#endif // NET_INSTAWEB_REWRITER_PUBLIC_STRIP_SUBRESOURCE_HINTS_FILTER_H_
19 changes: 12 additions & 7 deletions net/instaweb/rewriter/rewrite_driver.cc
Expand Up @@ -127,6 +127,7 @@
#include "net/instaweb/rewriter/public/split_html_helper_filter.h"
#include "net/instaweb/rewriter/public/strip_non_cacheable_filter.h"
#include "net/instaweb/rewriter/public/strip_scripts_filter.h"
#include "net/instaweb/rewriter/public/strip_subresource_hints_filter.h"
#include "net/instaweb/rewriter/public/support_noscript_filter.h"
#include "net/instaweb/rewriter/public/suppress_prehead_filter.h"
#include "net/instaweb/rewriter/public/url_input_resource.h"
Expand Down Expand Up @@ -656,7 +657,9 @@ void RewriteDriver::FlushAsync(Function* callback) {
}
flush_requested_ = false;

DetermineEnabledFilters();
// Figure out which filters should be enabled and whether any enabled filter
// can modify urls.
DetermineFiltersBehavior();

for (FilterList::iterator it = early_pre_render_filters_.begin();
it != early_pre_render_filters_.end(); ++it) {
Expand Down Expand Up @@ -1010,7 +1013,9 @@ void RewriteDriver::AddPreRenderFilters() {
dom_stats_filter_ = new DomStatsFilter(this);
AddOwnedEarlyPreRenderFilter(dom_stats_filter_);
}

if (!rewrite_options->preserve_subresource_hints()) {
AddOwnedEarlyPreRenderFilter(new StripSubresourceHintsFilter(this));
}
if (rewrite_options->Enabled(RewriteOptions::kDecodeRewrittenUrls)) {
AddOwnedEarlyPreRenderFilter(new DecodeRewrittenUrlsFilter(this));
}
Expand Down Expand Up @@ -3609,12 +3614,12 @@ bool RewriteDriver::Write(const ResourceVector& inputs,
return ret;
}

void RewriteDriver::DetermineEnabledFiltersImpl() {
DetermineEnabledFiltersInList(early_pre_render_filters_);
DetermineEnabledFiltersInList(pre_render_filters_);
void RewriteDriver::DetermineFiltersBehaviorImpl() {
DetermineFilterListBehavior(early_pre_render_filters_);
DetermineFilterListBehavior(pre_render_filters_);

// Call parent DetermineEnabled to setup post render filters.
HtmlParse::DetermineEnabledFiltersImpl();
// Call parent to set up post render filters.
HtmlParse::DetermineFiltersBehaviorImpl();
}

void RewriteDriver::ClearRequestProperties() {
Expand Down
8 changes: 8 additions & 0 deletions net/instaweb/rewriter/rewrite_options.cc
Expand Up @@ -267,6 +267,8 @@ const char RewriteOptions::kObliviousPagespeedUrls[] = "ObliviousPagespeedUrls";
const char RewriteOptions::kOptionCookiesDurationMs[] =
"OptionCookiesDurationMs";
const char RewriteOptions::kOverrideCachingTtlMs[] = "OverrideCachingTtlMs";
const char RewriteOptions::kPreserveSubresourceHints[] =
"PreserveSubresourceHints";
const char RewriteOptions::kPreserveUrlRelativity[] = "PreserveUrlRelativity";
const char RewriteOptions::kPrivateNotVaryForIE[] = "PrivateNotVaryForIE";
const char RewriteOptions::kPubliclyCacheMismatchedHashesExperimental[] =
Expand Down Expand Up @@ -2238,6 +2240,12 @@ void RewriteOptions::AddProperties() {
AddRequestProperty(
-1, &RewriteOptions::blink_blacklist_end_timestamp_ms_, "bbet", false);

AddBaseProperty(
false, &RewriteOptions::preserve_subresource_hints_, "psrh",
kPreserveSubresourceHints, kQueryScope,
"Keep original subresource hints in place.",
true);

AddBaseProperty(
true, &RewriteOptions::preserve_url_relativity_, "pur",
kPreserveUrlRelativity, kDirectoryScope,
Expand Down
1 change: 1 addition & 0 deletions net/instaweb/rewriter/rewrite_options_test.cc
Expand Up @@ -1055,6 +1055,7 @@ TEST_F(RewriteOptionsTest, LookupOptionByNameTest) {
RewriteOptions::kObliviousPagespeedUrls,
RewriteOptions::kOptionCookiesDurationMs,
RewriteOptions::kOverrideCachingTtlMs,
RewriteOptions::kPreserveSubresourceHints,
RewriteOptions::kPreserveUrlRelativity,
RewriteOptions::kPrivateNotVaryForIE,
RewriteOptions::kProactiveResourceFreshening,
Expand Down

0 comments on commit d0bae68

Please sign in to comment.