Permalink
Browse files

Do not rewrite nor cache empty resources.

While there is nothing technically wrong with caching and rewriting empty resources, there's not much value and we have run into situations where we unexpectedly produce empty resources.

Fixes issue 1050.
  • Loading branch information...
sligocki authored and crowell committed May 1, 2015
1 parent 0a90bb1 commit 741a86580dd6bf5b2a2f4bb24fca6d3bae919152
@@ -123,8 +123,14 @@ class CachePutFetch : public SharedAsyncFetch {

virtual void HandleDone(bool success) {
DCHECK_EQ(request_headers()->method(), RequestHeaders::kGet);
// We do not cache empty 200 responses. (Empty 404, 500 are fine.)
// https://github.com/pagespeed/mod_pagespeed/issues/1050
const bool empty_200 =
(response_headers()->status_code() == HttpStatus::kOK &&
cache_value_.contents_size() == 0);
const bool insert_into_cache = (success &&
cacheable_ &&
!empty_200 &&
cache_value_writer_.has_buffered());

if (insert_into_cache) {
@@ -155,6 +161,10 @@ class CachePutFetch : public SharedAsyncFetch {
cache_->Put(url_, fragment_, req_properties_, http_options_,
&cache_value_, handler_);
}
// Note: We explicitly do not remember fetch failure, uncacheable nor
// empty resources here since we still want to proxy those through every
// time they are requested.
// TODO(sligocki): Maybe we should be remembering failures.
delete this;
}

@@ -329,7 +339,8 @@ class CacheFindCallback : public HTTPCache::Callback {
// TODO(sligocki): Should we mark resources as such in this class?
case HTTPCache::kRecentFetchFailed:
case HTTPCache::kRecentFetchNotCacheable:
VLOG(1) << "RecentFetchFailedOrNotCacheable: "
case HTTPCache::kRecentFetchEmpty:
VLOG(1) << "RecentFetchFailed, NotCacheable or Empty: "
<< url_ << " (" << fragment_ << ")";
if (!ignore_recent_fetch_failed_) {
base_fetch_->Done(false);
@@ -1438,15 +1438,55 @@ TEST_F(CacheUrlAsyncFetcherTest, FetchFailedIgnore) {
EXPECT_EQ(0, http_cache_->cache_inserts()->Get());
}

TEST_F(CacheUrlAsyncFetcherTest, NoCacheEmpty) {
const char url[] = "http://www.example.com/empty.html";
ResponseHeaders response_headers;
SetDefaultHeaders(kContentTypeHtml, &response_headers);
int ttl_ms = 5 * Timer::kMinuteMs;
response_headers.SetDateAndCaching(timer_.NowMs(), ttl_ms);

GoogleString empty_contents = "";
mock_fetcher_.SetResponse(url, response_headers, empty_contents);
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
empty_contents, kBackendFetch, false);

GoogleString non_empty_contents = "foobar";
mock_fetcher_.SetResponse(url, response_headers, non_empty_contents);
// cache_url_fetcher did not remember the empty contents.
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
non_empty_contents, kBackendFetch, true);
}

TEST_F(CacheUrlAsyncFetcherTest, CacheNonEmpty) {
// Companion test to NoCacheEmpty to make sure we are caching non-empty
// through the same flow.
const char url[] = "http://www.example.com/non_empty.html";
ResponseHeaders response_headers;
SetDefaultHeaders(kContentTypeHtml, &response_headers);
int ttl_ms = 5 * Timer::kMinuteMs;
response_headers.SetDateAndCaching(timer_.NowMs(), ttl_ms);

GoogleString original_contents = "foo";
mock_fetcher_.SetResponse(url, response_headers, original_contents);
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
original_contents, kBackendFetch, true);

GoogleString new_contents = "foobar";
mock_fetcher_.SetResponse(url, response_headers, new_contents);
// cache_url_fetcher did remember the original content.
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
original_contents, kBackendFetch, true);
}

TEST_F(CacheUrlAsyncFetcherTest, NoCacheHtmlOnEmptyHeader) {
ResponseHeaders response_headers;
SetDefaultHeaders(kContentTypeHtml, &response_headers);
response_headers.SetDate(timer_.NowMs());
response_headers.RemoveAll(HttpAttributes::kCacheControl);
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectNoCache(url, "");
ExpectNoCache(url, "foo");
}

TEST_F(CacheUrlAsyncFetcherTest, DoCacheHtmlOnEmptyHeader) {
@@ -1458,9 +1498,9 @@ TEST_F(CacheUrlAsyncFetcherTest, DoCacheHtmlOnEmptyHeader) {
response_headers.SetDate(timer_.NowMs());
response_headers.RemoveAll(HttpAttributes::kCacheControl);
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectCache(url, "");
ExpectCache(url, "foo");
}

// Even when set_default_cache_html(true), we still don't cache responses
@@ -1474,9 +1514,9 @@ TEST_F(CacheUrlAsyncFetcherTest, NoCacheSetCookie) {
response_headers.RemoveAll(HttpAttributes::kCacheControl);
response_headers.Add(HttpAttributes::kSetCookie, "foo=bar");
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectNoCache(url, "");
ExpectNoCache(url, "foo");
}

TEST_F(CacheUrlAsyncFetcherTest, CachePublicSansTtl) {
@@ -1488,9 +1528,9 @@ TEST_F(CacheUrlAsyncFetcherTest, CachePublicSansTtl) {
response_headers.SetDate(timer_.NowMs());
response_headers.Replace(HttpAttributes::kCacheControl, "public");
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectCache(url, "");
ExpectCache(url, "foo");
}

TEST_F(CacheUrlAsyncFetcherTest, CacheVaryForNonHtml) {
@@ -45,6 +45,7 @@ namespace {
// in this case we could arguably remember it using the original cc-private ttl.
const int kRememberNotCacheableTtlSec = 300;
const int kRememberFetchFailedTtlSec = 300;
const int kRememberEmptyTtlSec = 300;

// We use an extremely low TTL for load-shed resources since we don't
// want this to get in the way of debugging, or letting a page with
@@ -97,6 +98,7 @@ HTTPCache::HTTPCache(CacheInterface* cache, Timer* timer, Hasher* hasher,
remember_not_cacheable_ttl_seconds_ = kRememberNotCacheableTtlSec;
remember_fetch_failed_ttl_seconds_ = kRememberFetchFailedTtlSec;
remember_fetch_dropped_ttl_seconds_ = kRememberFetchDroppedTtlSec;
remember_empty_ttl_seconds_ = kRememberEmptyTtlSec;
max_cacheable_response_content_length_ = kCacheSizeUnlimited;
}

@@ -192,7 +194,8 @@ class HTTPCacheCallback : public CacheInterface::Callback {

if (http_status == HttpStatus::kRememberNotCacheableStatusCode ||
http_status == HttpStatus::kRememberNotCacheableAnd200StatusCode ||
http_status == HttpStatus::kRememberFetchFailedStatusCode) {
http_status == HttpStatus::kRememberFetchFailedStatusCode ||
http_status == HttpStatus::kRememberEmptyStatusCode) {
// If the response was stored as uncacheable and a 200, it may since
// have since been added to the override caching group. Hence, we
// consider it invalid if override_cache_ttl_ms > 0.
@@ -209,9 +212,14 @@ class HTTPCacheCallback : public CacheInterface::Callback {
HttpStatus::kRememberNotCacheableAnd200StatusCode) {
status = "not-cacheable";
result = HTTPCache::kRecentFetchNotCacheable;
} else {
} else if (http_status ==
HttpStatus::kRememberFetchFailedStatusCode) {
status = "not-found";
result = HTTPCache::kRecentFetchFailed;
} else {
DCHECK(http_status == HttpStatus::kRememberEmptyStatusCode);
status = "empty";
result = HTTPCache::kRecentFetchEmpty;
}
if (handler_ != NULL) {
handler_->Message(kInfo,
@@ -313,7 +321,7 @@ void HTTPCache::RememberNotCacheable(const GoogleString& key,
const GoogleString& fragment,
bool is_200_status_code,
MessageHandler* handler) {
RememberFetchFailedorNotCacheableHelper(
RememberFetchFailedOrNotCacheableHelper(
key, fragment, handler,
is_200_status_code ? HttpStatus::kRememberNotCacheableAnd200StatusCode :
HttpStatus::kRememberNotCacheableStatusCode,
@@ -323,27 +331,35 @@ void HTTPCache::RememberNotCacheable(const GoogleString& key,
void HTTPCache::RememberFetchFailed(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler) {
RememberFetchFailedorNotCacheableHelper(key, fragment, handler,
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
HttpStatus::kRememberFetchFailedStatusCode,
remember_fetch_failed_ttl_seconds_);
}

void HTTPCache::RememberFetchDropped(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler) {
RememberFetchFailedorNotCacheableHelper(key, fragment, handler,
MessageHandler* handler) {
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
HttpStatus::kRememberFetchFailedStatusCode,
remember_fetch_dropped_ttl_seconds_);
}

void HTTPCache::RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler) {
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
HttpStatus::kRememberEmptyStatusCode,
remember_empty_ttl_seconds_);
}

void HTTPCache::set_max_cacheable_response_content_length(int64 value) {
DCHECK(value >= kCacheSizeUnlimited);
if (value >= kCacheSizeUnlimited) {
max_cacheable_response_content_length_ = value;
}
}

void HTTPCache::RememberFetchFailedorNotCacheableHelper(
void HTTPCache::RememberFetchFailedOrNotCacheableHelper(
const GoogleString& key, const GoogleString& fragment,
MessageHandler* handler, HttpStatus::Code code, int64 ttl_sec) {
ResponseHeaders headers;
@@ -21,6 +21,7 @@
#include "net/instaweb/http/public/http_cache.h"

#include <cstddef> // for size_t

#include "net/instaweb/http/public/content_type.h"
#include "net/instaweb/http/public/http_value.h"
#include "net/instaweb/http/public/meta_data.h"
@@ -29,6 +30,7 @@
#include "net/instaweb/util/public/google_message_handler.h"
#include "net/instaweb/util/public/gtest.h"
#include "net/instaweb/util/public/lru_cache.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/mock_hasher.h"
#include "net/instaweb/util/public/mock_timer.h"
#include "net/instaweb/util/public/platform.h"
@@ -53,8 +55,6 @@ const char kFragment2[] = "www.other.com";

namespace net_instaweb {

class MessageHandler;

class HTTPCacheTest : public testing::Test {
protected:
// Helper class for calling Get and Query methods on cache implementations
@@ -484,6 +484,29 @@ TEST_F(HTTPCacheTest, RememberDropped) {
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
}

// Remember empty resources.
TEST_F(HTTPCacheTest, RememberEmpty) {
ResponseHeaders meta_data_out;
http_cache_->RememberEmpty(kUrl, kFragment, &message_handler_);
HTTPValue value;
EXPECT_EQ(HTTPCache::kRecentFetchEmpty,
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));

// Now advance time 301 seconds; the cache should allow us to try fetching
// again.
mock_timer_.AdvanceMs(301 * 1000);
EXPECT_EQ(HTTPCache::kNotFound,
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));

http_cache_->set_remember_empty_ttl_seconds(600);
http_cache_->RememberEmpty(kUrl, kFragment, &message_handler_);
// Now advance time 301 seconds; the cache should remember that the resource
// is empty.
mock_timer_.AdvanceMs(301 * 1000);
EXPECT_EQ(HTTPCache::kRecentFetchEmpty,
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
}

// Make sure we don't remember 'non-cacheable' once we've put it into
// non-recording of failures mode (but do before that), and that we
// remember successful results even when in SetIgnoreFailurePuts() mode.
@@ -75,6 +75,7 @@ class HTTPCache {
// codes or are not cacheable.
kRecentFetchFailed,
kRecentFetchNotCacheable,
kRecentFetchEmpty, // We do not cache empty resources.
};

virtual void set_hasher(Hasher* hasher) { hasher_ = hasher; }
@@ -287,6 +288,14 @@ class HTTPCache {
const GoogleString& fragment,
MessageHandler* handler);

// Tell the HTTP Cache to remember that a particular URL shouldn't be cached
// because it was an empty resource. We defensively avoid caching empty input
// resources.
// https://github.com/pagespeed/mod_pagespeed/issues/1050
virtual void RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler);

// Indicates if the response is within the cacheable size limit. Clients of
// HTTPCache must check if they will be eventually able to cache their entries
// before buffering them in memory. If the content length header is not found
@@ -353,6 +362,17 @@ class HTTPCache {
}
}

int64 remember_empty_ttl_seconds() {
return remember_empty_ttl_seconds_;
}

virtual void set_remember_empty_ttl_seconds(int64 value) {
DCHECK_LE(0, value);
if (value >= 0) {
remember_empty_ttl_seconds_ = value;
}
}

int max_cacheable_response_content_length() {
return max_cacheable_response_content_length_;
}
@@ -391,7 +411,7 @@ class HTTPCache {
void UpdateStats(const GoogleString& key, const GoogleString& fragment,
CacheInterface::KeyState backend_state, FindResult result,
bool has_fallback, bool is_expired, MessageHandler* handler);
void RememberFetchFailedorNotCacheableHelper(
void RememberFetchFailedOrNotCacheableHelper(
const GoogleString& key, const GoogleString& fragment,
MessageHandler* handler, HttpStatus::Code code, int64 ttl_sec);

@@ -422,6 +442,7 @@ class HTTPCache {
int64 remember_not_cacheable_ttl_seconds_;
int64 remember_fetch_failed_ttl_seconds_;
int64 remember_fetch_dropped_ttl_seconds_;
int64 remember_empty_ttl_seconds_;
int64 max_cacheable_response_content_length_;
AtomicBool ignore_failure_puts_;

@@ -23,15 +23,14 @@

#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/cache_interface.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string.h"
#include "pagespeed/kernel/base/string_util.h"

namespace net_instaweb {

class CacheInterface;
class Hasher;
class HTTPValue;
class MessageHandler;
class Statistics;
class Timer;
@@ -80,6 +79,9 @@ class WriteThroughHTTPCache : public HTTPCache {
// Implements HTTPCache::set_remember_fetch_dropped_ttl_seconds();
virtual void set_remember_fetch_dropped_ttl_seconds(int64 value);

// Implements HTTPCache::set_remember_empty_ttl_seconds();
virtual void set_remember_empty_ttl_seconds(int64 value);

// Implements HTTPCache::set_max_cacheable_response_content_length().
virtual void set_max_cacheable_response_content_length(int64 value);

@@ -99,6 +101,11 @@ class WriteThroughHTTPCache : public HTTPCache {
const GoogleString& fragment,
MessageHandler * handler);

// Implements HTTPCache::RememberEmpty().
virtual void RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler * handler);

// By default, all data goes into both cache1 and cache2. But
// if you only want to put small items in cache1, you can set the
// size limit. Note that both the key and value will count
@@ -273,6 +273,12 @@ void WriteThroughHTTPCache::set_remember_fetch_dropped_ttl_seconds(
cache2_->set_remember_fetch_dropped_ttl_seconds(value);
}

void WriteThroughHTTPCache::set_remember_empty_ttl_seconds(int64 value) {
HTTPCache::set_remember_empty_ttl_seconds(value);
cache1_->set_remember_empty_ttl_seconds(value);
cache2_->set_remember_empty_ttl_seconds(value);
}

void WriteThroughHTTPCache::set_max_cacheable_response_content_length(
int64 value) {
HTTPCache::set_max_cacheable_response_content_length(value);
@@ -302,4 +308,11 @@ void WriteThroughHTTPCache::RememberFetchDropped(const GoogleString& key,
cache2_->RememberFetchDropped(key, fragment, handler);
}

void WriteThroughHTTPCache::RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler * handler) {
cache1_->RememberEmpty(key, fragment, handler);
cache2_->RememberEmpty(key, fragment, handler);
}

} // namespace net_instaweb
Oops, something went wrong.

0 comments on commit 741a865

Please sign in to comment.