Skip to content
This repository has been archived by the owner on Apr 21, 2023. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Do not rewrite nor cache empty resources.
While there is nothing technically wrong with caching and rewriting empty resources, there's not much value and we have run into situations where we unexpectedly produce empty resources.

Fixes issue 1050.
  • Loading branch information
sligocki authored and crowell committed May 14, 2015
1 parent 0a90bb1 commit 741a865
Show file tree
Hide file tree
Showing 22 changed files with 374 additions and 77 deletions.
13 changes: 12 additions & 1 deletion net/instaweb/http/cache_url_async_fetcher.cc
Expand Up @@ -123,8 +123,14 @@ class CachePutFetch : public SharedAsyncFetch {

virtual void HandleDone(bool success) {
DCHECK_EQ(request_headers()->method(), RequestHeaders::kGet);
// We do not cache empty 200 responses. (Empty 404, 500 are fine.)
// https://github.com/pagespeed/mod_pagespeed/issues/1050
const bool empty_200 =
(response_headers()->status_code() == HttpStatus::kOK &&
cache_value_.contents_size() == 0);
const bool insert_into_cache = (success &&
cacheable_ &&
!empty_200 &&
cache_value_writer_.has_buffered());

if (insert_into_cache) {
Expand Down Expand Up @@ -155,6 +161,10 @@ class CachePutFetch : public SharedAsyncFetch {
cache_->Put(url_, fragment_, req_properties_, http_options_,
&cache_value_, handler_);
}
// Note: We explicitly do not remember fetch failure, uncacheable nor
// empty resources here since we still want to proxy those through every
// time they are requested.
// TODO(sligocki): Maybe we should be remembering failures.
delete this;
}

Expand Down Expand Up @@ -329,7 +339,8 @@ class CacheFindCallback : public HTTPCache::Callback {
// TODO(sligocki): Should we mark resources as such in this class?
case HTTPCache::kRecentFetchFailed:
case HTTPCache::kRecentFetchNotCacheable:
VLOG(1) << "RecentFetchFailedOrNotCacheable: "
case HTTPCache::kRecentFetchEmpty:
VLOG(1) << "RecentFetchFailed, NotCacheable or Empty: "
<< url_ << " (" << fragment_ << ")";
if (!ignore_recent_fetch_failed_) {
base_fetch_->Done(false);
Expand Down
56 changes: 48 additions & 8 deletions net/instaweb/http/cache_url_async_fetcher_test.cc
Expand Up @@ -1438,15 +1438,55 @@ TEST_F(CacheUrlAsyncFetcherTest, FetchFailedIgnore) {
EXPECT_EQ(0, http_cache_->cache_inserts()->Get());
}

TEST_F(CacheUrlAsyncFetcherTest, NoCacheEmpty) {
const char url[] = "http://www.example.com/empty.html";
ResponseHeaders response_headers;
SetDefaultHeaders(kContentTypeHtml, &response_headers);
int ttl_ms = 5 * Timer::kMinuteMs;
response_headers.SetDateAndCaching(timer_.NowMs(), ttl_ms);

GoogleString empty_contents = "";
mock_fetcher_.SetResponse(url, response_headers, empty_contents);
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
empty_contents, kBackendFetch, false);

GoogleString non_empty_contents = "foobar";
mock_fetcher_.SetResponse(url, response_headers, non_empty_contents);
// cache_url_fetcher did not remember the empty contents.
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
non_empty_contents, kBackendFetch, true);
}

TEST_F(CacheUrlAsyncFetcherTest, CacheNonEmpty) {
// Companion test to NoCacheEmpty to make sure we are caching non-empty
// through the same flow.
const char url[] = "http://www.example.com/non_empty.html";
ResponseHeaders response_headers;
SetDefaultHeaders(kContentTypeHtml, &response_headers);
int ttl_ms = 5 * Timer::kMinuteMs;
response_headers.SetDateAndCaching(timer_.NowMs(), ttl_ms);

GoogleString original_contents = "foo";
mock_fetcher_.SetResponse(url, response_headers, original_contents);
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
original_contents, kBackendFetch, true);

GoogleString new_contents = "foobar";
mock_fetcher_.SetResponse(url, response_headers, new_contents);
// cache_url_fetcher did remember the original content.
FetchAndValidate(url, empty_request_headers_, true, HttpStatus::kOK,
original_contents, kBackendFetch, true);
}

TEST_F(CacheUrlAsyncFetcherTest, NoCacheHtmlOnEmptyHeader) {
ResponseHeaders response_headers;
SetDefaultHeaders(kContentTypeHtml, &response_headers);
response_headers.SetDate(timer_.NowMs());
response_headers.RemoveAll(HttpAttributes::kCacheControl);
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectNoCache(url, "");
ExpectNoCache(url, "foo");
}

TEST_F(CacheUrlAsyncFetcherTest, DoCacheHtmlOnEmptyHeader) {
Expand All @@ -1458,9 +1498,9 @@ TEST_F(CacheUrlAsyncFetcherTest, DoCacheHtmlOnEmptyHeader) {
response_headers.SetDate(timer_.NowMs());
response_headers.RemoveAll(HttpAttributes::kCacheControl);
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectCache(url, "");
ExpectCache(url, "foo");
}

// Even when set_default_cache_html(true), we still don't cache responses
Expand All @@ -1474,9 +1514,9 @@ TEST_F(CacheUrlAsyncFetcherTest, NoCacheSetCookie) {
response_headers.RemoveAll(HttpAttributes::kCacheControl);
response_headers.Add(HttpAttributes::kSetCookie, "foo=bar");
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectNoCache(url, "");
ExpectNoCache(url, "foo");
}

TEST_F(CacheUrlAsyncFetcherTest, CachePublicSansTtl) {
Expand All @@ -1488,9 +1528,9 @@ TEST_F(CacheUrlAsyncFetcherTest, CachePublicSansTtl) {
response_headers.SetDate(timer_.NowMs());
response_headers.Replace(HttpAttributes::kCacheControl, "public");
const char url[] = "http://www.example.com/foo.html";
mock_fetcher_.SetResponse(url, response_headers, "");
mock_fetcher_.SetResponse(url, response_headers, "foo");

ExpectCache(url, "");
ExpectCache(url, "foo");
}

TEST_F(CacheUrlAsyncFetcherTest, CacheVaryForNonHtml) {
Expand Down
30 changes: 23 additions & 7 deletions net/instaweb/http/http_cache.cc
Expand Up @@ -45,6 +45,7 @@ namespace {
// in this case we could arguably remember it using the original cc-private ttl.
const int kRememberNotCacheableTtlSec = 300;
const int kRememberFetchFailedTtlSec = 300;
const int kRememberEmptyTtlSec = 300;

// We use an extremely low TTL for load-shed resources since we don't
// want this to get in the way of debugging, or letting a page with
Expand Down Expand Up @@ -97,6 +98,7 @@ HTTPCache::HTTPCache(CacheInterface* cache, Timer* timer, Hasher* hasher,
remember_not_cacheable_ttl_seconds_ = kRememberNotCacheableTtlSec;
remember_fetch_failed_ttl_seconds_ = kRememberFetchFailedTtlSec;
remember_fetch_dropped_ttl_seconds_ = kRememberFetchDroppedTtlSec;
remember_empty_ttl_seconds_ = kRememberEmptyTtlSec;
max_cacheable_response_content_length_ = kCacheSizeUnlimited;
}

Expand Down Expand Up @@ -192,7 +194,8 @@ class HTTPCacheCallback : public CacheInterface::Callback {

if (http_status == HttpStatus::kRememberNotCacheableStatusCode ||
http_status == HttpStatus::kRememberNotCacheableAnd200StatusCode ||
http_status == HttpStatus::kRememberFetchFailedStatusCode) {
http_status == HttpStatus::kRememberFetchFailedStatusCode ||
http_status == HttpStatus::kRememberEmptyStatusCode) {
// If the response was stored as uncacheable and a 200, it may since
// have since been added to the override caching group. Hence, we
// consider it invalid if override_cache_ttl_ms > 0.
Expand All @@ -209,9 +212,14 @@ class HTTPCacheCallback : public CacheInterface::Callback {
HttpStatus::kRememberNotCacheableAnd200StatusCode) {
status = "not-cacheable";
result = HTTPCache::kRecentFetchNotCacheable;
} else {
} else if (http_status ==
HttpStatus::kRememberFetchFailedStatusCode) {
status = "not-found";
result = HTTPCache::kRecentFetchFailed;
} else {
DCHECK(http_status == HttpStatus::kRememberEmptyStatusCode);
status = "empty";
result = HTTPCache::kRecentFetchEmpty;
}
if (handler_ != NULL) {
handler_->Message(kInfo,
Expand Down Expand Up @@ -313,7 +321,7 @@ void HTTPCache::RememberNotCacheable(const GoogleString& key,
const GoogleString& fragment,
bool is_200_status_code,
MessageHandler* handler) {
RememberFetchFailedorNotCacheableHelper(
RememberFetchFailedOrNotCacheableHelper(
key, fragment, handler,
is_200_status_code ? HttpStatus::kRememberNotCacheableAnd200StatusCode :
HttpStatus::kRememberNotCacheableStatusCode,
Expand All @@ -323,27 +331,35 @@ void HTTPCache::RememberNotCacheable(const GoogleString& key,
void HTTPCache::RememberFetchFailed(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler) {
RememberFetchFailedorNotCacheableHelper(key, fragment, handler,
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
HttpStatus::kRememberFetchFailedStatusCode,
remember_fetch_failed_ttl_seconds_);
}

void HTTPCache::RememberFetchDropped(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler) {
RememberFetchFailedorNotCacheableHelper(key, fragment, handler,
MessageHandler* handler) {
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
HttpStatus::kRememberFetchFailedStatusCode,
remember_fetch_dropped_ttl_seconds_);
}

void HTTPCache::RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler) {
RememberFetchFailedOrNotCacheableHelper(key, fragment, handler,
HttpStatus::kRememberEmptyStatusCode,
remember_empty_ttl_seconds_);
}

void HTTPCache::set_max_cacheable_response_content_length(int64 value) {
DCHECK(value >= kCacheSizeUnlimited);
if (value >= kCacheSizeUnlimited) {
max_cacheable_response_content_length_ = value;
}
}

void HTTPCache::RememberFetchFailedorNotCacheableHelper(
void HTTPCache::RememberFetchFailedOrNotCacheableHelper(
const GoogleString& key, const GoogleString& fragment,
MessageHandler* handler, HttpStatus::Code code, int64 ttl_sec) {
ResponseHeaders headers;
Expand Down
27 changes: 25 additions & 2 deletions net/instaweb/http/http_cache_test.cc
Expand Up @@ -21,6 +21,7 @@
#include "net/instaweb/http/public/http_cache.h"

#include <cstddef> // for size_t

#include "net/instaweb/http/public/content_type.h"
#include "net/instaweb/http/public/http_value.h"
#include "net/instaweb/http/public/meta_data.h"
Expand All @@ -29,6 +30,7 @@
#include "net/instaweb/util/public/google_message_handler.h"
#include "net/instaweb/util/public/gtest.h"
#include "net/instaweb/util/public/lru_cache.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/mock_hasher.h"
#include "net/instaweb/util/public/mock_timer.h"
#include "net/instaweb/util/public/platform.h"
Expand All @@ -53,8 +55,6 @@ const char kFragment2[] = "www.other.com";

namespace net_instaweb {

class MessageHandler;

class HTTPCacheTest : public testing::Test {
protected:
// Helper class for calling Get and Query methods on cache implementations
Expand Down Expand Up @@ -484,6 +484,29 @@ TEST_F(HTTPCacheTest, RememberDropped) {
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
}

// Remember empty resources.
TEST_F(HTTPCacheTest, RememberEmpty) {
ResponseHeaders meta_data_out;
http_cache_->RememberEmpty(kUrl, kFragment, &message_handler_);
HTTPValue value;
EXPECT_EQ(HTTPCache::kRecentFetchEmpty,
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));

// Now advance time 301 seconds; the cache should allow us to try fetching
// again.
mock_timer_.AdvanceMs(301 * 1000);
EXPECT_EQ(HTTPCache::kNotFound,
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));

http_cache_->set_remember_empty_ttl_seconds(600);
http_cache_->RememberEmpty(kUrl, kFragment, &message_handler_);
// Now advance time 301 seconds; the cache should remember that the resource
// is empty.
mock_timer_.AdvanceMs(301 * 1000);
EXPECT_EQ(HTTPCache::kRecentFetchEmpty,
Find(kUrl, kFragment, &value, &meta_data_out, &message_handler_));
}

// Make sure we don't remember 'non-cacheable' once we've put it into
// non-recording of failures mode (but do before that), and that we
// remember successful results even when in SetIgnoreFailurePuts() mode.
Expand Down
23 changes: 22 additions & 1 deletion net/instaweb/http/public/http_cache.h
Expand Up @@ -75,6 +75,7 @@ class HTTPCache {
// codes or are not cacheable.
kRecentFetchFailed,
kRecentFetchNotCacheable,
kRecentFetchEmpty, // We do not cache empty resources.
};

virtual void set_hasher(Hasher* hasher) { hasher_ = hasher; }
Expand Down Expand Up @@ -287,6 +288,14 @@ class HTTPCache {
const GoogleString& fragment,
MessageHandler* handler);

// Tell the HTTP Cache to remember that a particular URL shouldn't be cached
// because it was an empty resource. We defensively avoid caching empty input
// resources.
// https://github.com/pagespeed/mod_pagespeed/issues/1050
virtual void RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler* handler);

// Indicates if the response is within the cacheable size limit. Clients of
// HTTPCache must check if they will be eventually able to cache their entries
// before buffering them in memory. If the content length header is not found
Expand Down Expand Up @@ -353,6 +362,17 @@ class HTTPCache {
}
}

int64 remember_empty_ttl_seconds() {
return remember_empty_ttl_seconds_;
}

virtual void set_remember_empty_ttl_seconds(int64 value) {
DCHECK_LE(0, value);
if (value >= 0) {
remember_empty_ttl_seconds_ = value;
}
}

int max_cacheable_response_content_length() {
return max_cacheable_response_content_length_;
}
Expand Down Expand Up @@ -391,7 +411,7 @@ class HTTPCache {
void UpdateStats(const GoogleString& key, const GoogleString& fragment,
CacheInterface::KeyState backend_state, FindResult result,
bool has_fallback, bool is_expired, MessageHandler* handler);
void RememberFetchFailedorNotCacheableHelper(
void RememberFetchFailedOrNotCacheableHelper(
const GoogleString& key, const GoogleString& fragment,
MessageHandler* handler, HttpStatus::Code code, int64 ttl_sec);

Expand Down Expand Up @@ -422,6 +442,7 @@ class HTTPCache {
int64 remember_not_cacheable_ttl_seconds_;
int64 remember_fetch_failed_ttl_seconds_;
int64 remember_fetch_dropped_ttl_seconds_;
int64 remember_empty_ttl_seconds_;
int64 max_cacheable_response_content_length_;
AtomicBool ignore_failure_puts_;

Expand Down
11 changes: 9 additions & 2 deletions net/instaweb/http/public/write_through_http_cache.h
Expand Up @@ -23,15 +23,14 @@

#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/util/public/basictypes.h"
#include "net/instaweb/util/public/cache_interface.h"
#include "net/instaweb/util/public/scoped_ptr.h"
#include "net/instaweb/util/public/string.h"
#include "pagespeed/kernel/base/string_util.h"

namespace net_instaweb {

class CacheInterface;
class Hasher;
class HTTPValue;
class MessageHandler;
class Statistics;
class Timer;
Expand Down Expand Up @@ -80,6 +79,9 @@ class WriteThroughHTTPCache : public HTTPCache {
// Implements HTTPCache::set_remember_fetch_dropped_ttl_seconds();
virtual void set_remember_fetch_dropped_ttl_seconds(int64 value);

// Implements HTTPCache::set_remember_empty_ttl_seconds();
virtual void set_remember_empty_ttl_seconds(int64 value);

// Implements HTTPCache::set_max_cacheable_response_content_length().
virtual void set_max_cacheable_response_content_length(int64 value);

Expand All @@ -99,6 +101,11 @@ class WriteThroughHTTPCache : public HTTPCache {
const GoogleString& fragment,
MessageHandler * handler);

// Implements HTTPCache::RememberEmpty().
virtual void RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler * handler);

// By default, all data goes into both cache1 and cache2. But
// if you only want to put small items in cache1, you can set the
// size limit. Note that both the key and value will count
Expand Down
13 changes: 13 additions & 0 deletions net/instaweb/http/write_through_http_cache.cc
Expand Up @@ -273,6 +273,12 @@ void WriteThroughHTTPCache::set_remember_fetch_dropped_ttl_seconds(
cache2_->set_remember_fetch_dropped_ttl_seconds(value);
}

void WriteThroughHTTPCache::set_remember_empty_ttl_seconds(int64 value) {
HTTPCache::set_remember_empty_ttl_seconds(value);
cache1_->set_remember_empty_ttl_seconds(value);
cache2_->set_remember_empty_ttl_seconds(value);
}

void WriteThroughHTTPCache::set_max_cacheable_response_content_length(
int64 value) {
HTTPCache::set_max_cacheable_response_content_length(value);
Expand Down Expand Up @@ -302,4 +308,11 @@ void WriteThroughHTTPCache::RememberFetchDropped(const GoogleString& key,
cache2_->RememberFetchDropped(key, fragment, handler);
}

void WriteThroughHTTPCache::RememberEmpty(const GoogleString& key,
const GoogleString& fragment,
MessageHandler * handler) {
cache1_->RememberEmpty(key, fragment, handler);
cache2_->RememberEmpty(key, fragment, handler);
}

} // namespace net_instaweb

0 comments on commit 741a865

Please sign in to comment.