Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #580 -- Added mega support for generating Vary headers, includi…

…ng some view decorators, and changed the CacheMiddleware to account for the Vary header. Also added GZipMiddleware and ConditionalGetMiddleware, which are no longer handled by CacheMiddleware itself. Also updated the cache.txt and middleware.txt docs. Thanks to Hugo and Sune for the excellent patches

git-svn-id: http://code.djangoproject.com/svn/django/trunk@810 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit d65526d6886067a8ef368e5b02fce80e1e4c4903 1 parent a5a89b5
@adrianholovaty adrianholovaty authored
View
116 django/middleware/cache.py
@@ -1,88 +1,70 @@
+import copy
from django.conf import settings
from django.core.cache import cache
+from django.utils.cache import get_cache_key, learn_cache_key, patch_response_headers
from django.utils.httpwrappers import HttpResponseNotModified
-from django.utils.text import compress_string
-import datetime, md5
class CacheMiddleware:
"""
Cache middleware. If this is enabled, each Django-powered page will be
- cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs. Pages
- with GET or POST parameters are not cached.
+ cached for CACHE_MIDDLEWARE_SECONDS seconds. Cache is based on URLs.
- If the cache is shared across multiple sites using the same Django
- installation, set the CACHE_MIDDLEWARE_KEY_PREFIX to the name of the site,
- or some other string that is unique to this Django instance, to prevent key
- collisions.
+ Only parameter-less GET or HEAD-requests with status code 200 are cached.
- This middleware will also make the following optimizations:
+ This middleware expects that a HEAD request is answered with a response
+ exactly like the corresponding GET request.
- * If the CACHE_MIDDLEWARE_GZIP setting is True, the content will be
- gzipped.
+ When a hit occurs, a shallow copy of the original response object is
+ returned from process_request.
- * ETags will be added, using a simple MD5 hash of the page's content.
+ Pages will be cached based on the contents of the request headers
+ listed in the response's "Vary" header. This means that pages shouldn't
+ change their "Vary" header.
+
+ This middleware also sets ETag, Last-Modified, Expires and Cache-Control
+ headers on the response object.
"""
+ def __init__(self, cache_timeout=None, key_prefix=None):
+ self.cache_timeout = cache_timeout
+ if cache_timeout is None:
+ self.cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
+ self.key_prefix = key_prefix
+ if key_prefix is None:
+ self.key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
+
def process_request(self, request):
- """
- Checks whether the page is already cached. If it is, returns the cached
- version. Also handles ETag stuff.
- """
- if request.GET or request.POST:
- request._cache_middleware_set_cache = False
+ "Checks whether the page is already cached and returns the cached version if available."
+ if not request.META['REQUEST_METHOD'] in ('GET', 'HEAD') or request.GET:
+ request._cache_update_cache = False
return None # Don't bother checking the cache.
- accept_encoding = ''
- if settings.CACHE_MIDDLEWARE_GZIP:
- try:
- accept_encoding = request.META['HTTP_ACCEPT_ENCODING']
- except KeyError:
- pass
- accepts_gzip = 'gzip' in accept_encoding
- request._cache_middleware_accepts_gzip = accepts_gzip
-
- # This uses the same cache_key as views.decorators.cache.cache_page,
- # so the cache can be shared.
- cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % \
- (settings.CACHE_MIDDLEWARE_KEY_PREFIX, request.path, accepts_gzip)
- request._cache_middleware_key = cache_key
+ cache_key = get_cache_key(request, self.key_prefix)
+ if cache_key is None:
+ request._cache_update_cache = True
+ return None # No cache information available, need to rebuild.
response = cache.get(cache_key, None)
if response is None:
- request._cache_middleware_set_cache = True
- return None
- else:
- request._cache_middleware_set_cache = False
- # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet
- try:
- if_none_match = request.META['HTTP_IF_NONE_MATCH']
- except KeyError:
- if_none_match = None
- try:
- if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
- except KeyError:
- if_modified_since = None
- if if_none_match is None and if_modified_since is None:
- pass
- elif if_none_match is not None and response['ETag'] != if_none_match:
- pass
- elif if_modified_since is not None and response['Last-Modified'] != if_modified_since:
- pass
- else:
- return HttpResponseNotModified()
- return response
+ request._cache_update_cache = True
+ return None # No cache information available, need to rebuild.
+
+ request._cache_update_cache = False
+ return copy.copy(response)
def process_response(self, request, response):
- """
- Sets the cache, if needed.
- """
- if request._cache_middleware_set_cache:
- content = response.get_content_as_string(settings.DEFAULT_CHARSET)
- if request._cache_middleware_accepts_gzip:
- content = compress_string(content)
- response.content = content
- response['Content-Encoding'] = 'gzip'
- response['ETag'] = md5.new(content).hexdigest()
- response['Content-Length'] = '%d' % len(content)
- response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
- cache.set(request._cache_middleware_key, response, settings.CACHE_MIDDLEWARE_SECONDS)
+ "Sets the cache, if needed."
+ if not request._cache_update_cache:
+ # We don't need to update the cache, just return.
+ return response
+ if not request.META['REQUEST_METHOD'] == 'GET':
+ # This is a stronger requirement than above. It is needed
+ # because of interactions between this middleware and the
+ # HTTPMiddleware, which throws the body of a HEAD-request
+ # away before this middleware gets a chance to cache it.
+ return response
+ if not response.status_code == 200:
+ return response
+ patch_response_headers(response, self.cache_timeout)
+ cache_key = learn_cache_key(request, response, self.cache_timeout, self.key_prefix)
+ cache.set(cache_key, response, self.cache_timeout)
return response
View
24 django/middleware/gzip.py
@@ -0,0 +1,24 @@
+import re
+from django.utils.text import compress_string
+from django.utils.cache import patch_vary_headers
+
+re_accepts_gzip = re.compile(r'\bgzip\b')
+
+class GZipMiddleware:
+ """
+ This middleware compresses content if the browser allows gzip compression.
+ It sets the Vary header accordingly, so that caches will base their storage
+ on the Accept-Encoding header.
+ """
+ def process_response(self, request, response):
+ patch_vary_headers(response, ('Accept-Encoding',))
+ if response.has_header('Content-Encoding'):
+ return response
+
+ ae = request.META.get('HTTP_ACCEPT_ENCODING', '')
+ if not re_accepts_gzip.search(ae):
+ return response
+
+ response.content = compress_string(response.content)
+ response['Content-Encoding'] = 'gzip'
+ return response
View
37 django/middleware/http.py
@@ -0,0 +1,37 @@
+import datetime
+
+class ConditionalGetMiddleware:
+ """
+ Handles conditional GET operations. If the response has a ETag or
+ Last-Modified header, and the request has If-None-Match or
+ If-Modified-Since, the response is replaced by an HttpNotModified.
+
+ Removes the content from any response to a HEAD request.
+
+ Also sets the Date and Content-Length response-headers.
+ """
+ def process_response(self, request, response):
+ now = datetime.datetime.utcnow()
+ response['Date'] = now.strftime('%a, %d %b %Y %H:%M:%S GMT')
+ if not response.has_header('Content-Length'):
+ response['Content-Length'] = str(len(response.content))
+
+ if response.has_header('ETag'):
+ if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None)
+ if if_none_match == response['ETag']:
+ response.status_code = 304
+ response.content = ''
+ response['Content-Length'] = '0'
+
+ if response.has_header('Last-Modified'):
+ last_mod = response['Last-Modified']
+ if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None)
+ if if_modified_since == response['Last-Modified']:
+ response.status_code = 304
+ response.content = ''
+ response['Content-Length'] = '0'
+
+ if request.META['REQUEST_METHOD'] == 'HEAD':
+ response.content = ''
+
+ return response
View
2  django/middleware/sessions.py
@@ -1,5 +1,6 @@
from django.conf.settings import SESSION_COOKIE_NAME, SESSION_COOKIE_AGE, SESSION_COOKIE_DOMAIN
from django.models.core import sessions
+from django.utils.cache import patch_vary_headers
import datetime
TEST_COOKIE_NAME = 'testcookie'
@@ -61,6 +62,7 @@ def process_request(self, request):
def process_response(self, request, response):
# If request.session was modified, or if response.session was set, save
# those changes and set a session cookie.
+ patch_vary_headers(response, ('Cookie',))
try:
modified = request.session.modified
except AttributeError:
View
70 django/views/decorators/cache.py
@@ -1,57 +1,17 @@
-from django.core.cache import cache
-from django.utils.httpwrappers import HttpResponseNotModified
-from django.utils.text import compress_string
-from django.conf.settings import DEFAULT_CHARSET
-import datetime, md5
+"""
+Decorator for views that tries getting the page from the cache and
+populates the cache if the page isn't in the cache yet.
-def cache_page(view_func, cache_timeout, key_prefix=''):
- """
- Decorator for views that tries getting the page from the cache and
- populates the cache if the page isn't in the cache yet. Also takes care
- of ETags and gzips the page if the client supports it.
+The cache is keyed by the URL and some data from the headers. Additionally
+there is the key prefix that is used to distinguish different cache areas
+in a multi-site setup. You could use the sites.get_current().domain, for
+example, as that is unique across a Django project.
- The cache is keyed off of the page's URL plus the optional key_prefix
- variable. Use key_prefix if your Django setup has multiple sites that
- use cache; otherwise the cache for one site would affect the other. A good
- example of key_prefix is to use sites.get_current().domain, because that's
- unique across all Django instances on a particular server.
- """
- def _check_cache(request, *args, **kwargs):
- try:
- accept_encoding = request.META['HTTP_ACCEPT_ENCODING']
- except KeyError:
- accept_encoding = ''
- accepts_gzip = 'gzip' in accept_encoding
- cache_key = 'views.decorators.cache.cache_page.%s.%s.%s' % (key_prefix, request.path, accepts_gzip)
- response = cache.get(cache_key, None)
- if response is None:
- response = view_func(request, *args, **kwargs)
- content = response.get_content_as_string(DEFAULT_CHARSET)
- if accepts_gzip:
- content = compress_string(content)
- response.content = content
- response['Content-Encoding'] = 'gzip'
- response['ETag'] = md5.new(content).hexdigest()
- response['Content-Length'] = '%d' % len(content)
- response['Last-Modified'] = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
- cache.set(cache_key, response, cache_timeout)
- else:
- # Logic is from http://simon.incutio.com/archive/2003/04/23/conditionalGet
- try:
- if_none_match = request.META['HTTP_IF_NONE_MATCH']
- except KeyError:
- if_none_match = None
- try:
- if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
- except KeyError:
- if_modified_since = None
- if if_none_match is None and if_modified_since is None:
- pass
- elif if_none_match is not None and response['ETag'] != if_none_match:
- pass
- elif if_modified_since is not None and response['Last-Modified'] != if_modified_since:
- pass
- else:
- return HttpResponseNotModified()
- return response
- return _check_cache
+Additionally, all headers from the response's Vary header will be taken into
+account on caching -- just like the middleware does.
+"""
+
+from django.utils.decorators import decorator_from_middleware
+from django.middleware.cache import CacheMiddleware
+
+cache_page = decorator_from_middleware(CacheMiddleware)
View
6 django/views/decorators/gzip.py
@@ -0,0 +1,6 @@
+"Decorator for views that gzips pages if the client supports it."
+
+from django.utils.decorators import decorator_from_middleware
+from django.middleware.gzip import GZipMiddleware
+
+gzip_page = decorator_from_middleware(GZipMiddleware)
View
9 django/views/decorators/http.py
@@ -0,0 +1,9 @@
+"""
+Decorator for views that supports conditional get on ETag and Last-Modified
+headers.
+"""
+
+from django.utils.decorators import decorator_from_middleware
+from django.middleware.http import ConditionalGetMiddleware
+
+conditional_page = decorator_from_middleware(ConditionalGetMiddleware)
View
174 docs/cache.txt
@@ -2,25 +2,27 @@
Django's cache framework
========================
-So, you got slashdotted. Now what?
+So, you got slashdotted_. Now what?
Django's cache framework gives you three methods of caching dynamic pages in
memory or in a database. You can cache the output of entire pages, you can
cache only the pieces that are difficult to produce, or you can cache your
entire site.
+.. _slashdotted: http://en.wikipedia.org/wiki/Slashdot_effect
+
Setting up the cache
====================
-The cache framework is split into a set of "backends" that provide different
-methods of caching data. There's a simple single-process memory cache (mostly
-useful as a fallback) and a memcached_ backend (the fastest option, by far, if
-you've got the RAM).
+The cache framework allows for different "backends" -- different methods of
+caching data. There's a simple single-process memory cache (mostly useful as a
+fallback) and a memcached_ backend (the fastest option, by far, if you've got
+the RAM).
Before using the cache, you'll need to tell Django which cache backend you'd
like to use. Do this by setting the ``CACHE_BACKEND`` in your settings file.
-The CACHE_BACKEND setting is a "fake" URI (really an unregistered scheme).
+The ``CACHE_BACKEND`` setting is a "fake" URI (really an unregistered scheme).
Examples:
============================== ===========================================
@@ -39,7 +41,7 @@ Examples:
simple:/// A simple single-process memory cache; you
probably don't want to use this except for
testing. Note that this cache backend is
- NOT threadsafe!
+ NOT thread-safe!
locmem:/// A more sophisticated local memory cache;
this is multi-process- and thread-safe.
@@ -72,22 +74,24 @@ For example::
Invalid arguments are silently ignored, as are invalid values of known
arguments.
+.. _memcached: http://www.danga.com/memcached/
+
The per-site cache
==================
-Once the cache is set up, the simplest way to use the cache is to simply
-cache your entire site. Just add ``django.middleware.cache.CacheMiddleware``
-to your ``MIDDLEWARE_CLASSES`` setting, as in this example::
+Once the cache is set up, the simplest way to use the cache is to cache your
+entire site. Just add ``django.middleware.cache.CacheMiddleware`` to your
+``MIDDLEWARE_CLASSES`` setting, as in this example::
MIDDLEWARE_CLASSES = (
"django.middleware.cache.CacheMiddleware",
"django.middleware.common.CommonMiddleware",
)
-Make sure it's the first entry in ``MIDDLEWARE_CLASSES``. (The order of
-``MIDDLEWARE_CLASSES`` matters.)
+(The order of ``MIDDLEWARE_CLASSES`` matters. See "Order of MIDDLEWARE_CLASSES"
+below.)
-Then, add the following three required settings:
+Then, add the following three required settings to your Django settings file:
* ``CACHE_MIDDLEWARE_SECONDS`` -- The number of seconds each page should be
cached.
@@ -102,16 +106,20 @@ Then, add the following three required settings:
in the cache. That means subsequent requests won't have the overhead of
zipping, and the cache will hold more pages because each one is smaller.
-Pages with GET or POST parameters won't be cached.
+The cache middleware caches every page that doesn't have GET or POST
+parameters. Additionally, ``CacheMiddleware`` automatically sets a few headers
+in each ``HttpResponse``:
-The cache middleware also makes a few more optimizations:
-
-* Sets and deals with ``ETag`` headers.
-* Sets the ``Content-Length`` header.
* Sets the ``Last-Modified`` header to the current date/time when a fresh
(uncached) version of the page is requested.
+* Sets the ``Expires`` header to the current date/time plus the defined
+ ``CACHE_MIDDLEWARE_SECONDS``.
+* Sets the ``Cache-Control`` header to give a max age for the page -- again,
+ from the ``CACHE_MIDDLEWARE_SECONDS`` setting.
+
+See the `middleware documentation`_ for more on middleware.
-It doesn't matter where in the middleware stack you put the cache middleware.
+.. _`middleware documentation`: http://www.djangoproject.com/documentation/middleware/
The per-page cache
==================
@@ -134,25 +142,25 @@ Or, using Python 2.4's decorator syntax::
def slashdot_this(request):
...
-This will cache the result of that view for 15 minutes. (The cache timeout is
-in seconds.)
+``cache_page`` takes a single argument: the cache timeout, in seconds. In the
+above example, the result of the ``slashdot_this()`` view will be cached for 15
+minutes.
The low-level cache API
=======================
-There are times, however, that caching an entire rendered page doesn't gain
-you very much. The Django developers have found it's only necessary to cache a
-list of object IDs from an intensive database query, for example. In cases like
-these, you can use the cache API to store objects in the cache with any level
-of granularity you like.
+Sometimes, however, caching an entire rendered page doesn't gain you very much.
+For example, you may find it's only necessary to cache the result of an
+intensive database. In cases like this, you can use the low-level cache API to
+store objects in the cache with any level of granularity you like.
The cache API is simple::
- # the cache module exports a cache object that's automatically
- # created from the CACHE_BACKEND setting
+ # The cache module exports a cache object that's automatically
+ # created from the CACHE_BACKEND setting.
>>> from django.core.cache import cache
- # The basic interface is set(key, value, timeout_seconds) and get(key)
+ # The basic interface is set(key, value, timeout_seconds) and get(key).
>>> cache.set('my_key', 'hello, world!', 30)
>>> cache.get('my_key')
'hello, world!'
@@ -161,7 +169,7 @@ The cache API is simple::
>>> cache.get('my_key')
None
- # get() can take a default argument
+ # get() can take a default argument.
>>> cache.get('my_key', 'has_expired')
'has_expired'
@@ -183,4 +191,108 @@ The cache API is simple::
That's it. The cache has very few restrictions: You can cache any object that
can be pickled safely, although keys must be strings.
-.. _memcached: http://www.danga.com/memcached/
+Controlling cache: Using Vary headers
+=====================================
+
+The Django cache framework works with `HTTP Vary headers`_ to allow developers
+to instruct caching mechanisms to differ their cache contents depending on
+request HTTP headers.
+
+Essentially, the ``Vary`` response HTTP header defines which request headers a
+cache mechanism should take into account when building its cache key.
+
+By default, Django's cache system creates its cache keys using the requested
+path -- e.g., ``"/stories/2005/jun/23/bank_robbed/"``. This means every request
+to that URL will use the same cached version, regardless of user-agent
+differences such as cookies or language preferences.
+
+That's where ``Vary`` comes in.
+
+If your Django-powered page outputs different content based on some difference
+in request headers -- such as a cookie, or language, or user-agent -- you'll
+need to use the ``Vary`` header to tell caching mechanisms that the page output
+depends on those things.
+
+To do this in Django, use the convenient ``vary_on_headers`` view decorator,
+like so::
+
+ from django.views.decorators.vary import vary_on_headers
+
+ # Python 2.3 syntax.
+ def my_view(request):
+ ...
+ my_view = vary_on_headers(my_view, 'User-Agent')
+
+ # Python 2.4 decorator syntax.
+ @vary_on_headers('User-Agent')
+ def my_view(request):
+ ...
+
+In this case, a caching mechanism (such as Django's own cache middleware) will
+cache a separate version of the page for each unique user-agent.
+
+The advantage to using the ``vary_on_headers`` decorator rather than manually
+setting the ``Vary`` header (using something like
+``response['Vary'] = 'user-agent'``) is that the decorator adds to the ``Vary``
+header (which may already exist) rather than setting it from scratch.
+
+Note that you can pass multiple headers to ``vary_on_headers()``:
+
+ @vary_on_headers('User-Agent', 'Cookie')
+ def my_view(request):
+ ...
+
+Because varying on cookie is such a common case, there's a ``vary_on_cookie``
+decorator. These two views are equivalent::
+
+ @vary_on_cookie
+ def my_view(request):
+ ...
+
+ @vary_on_headers('Cookie')
+ def my_view(request):
+ ...
+
+Also note that the headers you pass to ``vary_on_headers`` are not case
+sensitive. ``"User-Agent"`` is the same thing as ``"user-agent"``.
+
+You can also use a helper function, ``patch_vary_headers()``, directly::
+
+ from django.utils.cache import patch_vary_headers
+ def my_view(request):
+ ...
+ response = render_to_response('template_name', context)
+ patch_vary_headers(response, ['Cookie'])
+ return response
+
+``patch_vary_headers`` takes an ``HttpResponse`` instance as its first argument
+and a list/tuple of header names as its second argument.
+
+.. _`HTTP Vary headers`: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.44
+
+Other optimizations
+===================
+
+Django comes with a few other pieces of middleware that can help optimize your
+apps' performance:
+
+ * ``django.middleware.http.ConditionalGetMiddleware`` adds support for
+ conditional GET. This makes use of ``ETag`` and ``Last-Modified``
+ headers.
+
+ * ``django.middleware.gzip.GZipMiddleware`` compresses content for browsers
+ that understand gzip compression (all modern browsers).
+
+Order of MIDDLEWARE_CLASSES
+===========================
+
+If you use ``CacheMiddleware``, it's important to put it in the right place
+within the ``MIDDLEWARE_CLASSES`` setting, because the cache middleware needs
+to know which headers by which to vary the cache storage. Middleware always
+adds something the ``Vary`` response header when it can.
+
+Put the ``CacheMiddleware`` after any middlewares that might add something to
+the ``Vary`` header. The following middlewares do so:
+
+ * ``SessionMiddleware`` adds ``Cookie``
+ * ``GzipMiddleware`` adds ``Accept-Encoding``
View
12 docs/middleware.txt
@@ -88,6 +88,18 @@ Available middleware
addresses defined in the ``INTERNAL_IPS`` setting. This is used by Django's
automatic documentation system.
+``django.middleware.gzip.GZipMiddleware``
+ Compresses content for browsers that understand gzip compression (all
+ modern browsers).
+
+``django.middleware.http.ConditionalGetMiddleware``
+ Handles conditional GET operations. If the response has a ``ETag`` or
+ ``Last-Modified`` header, and the request has ``If-None-Match`` or
+ ``If-Modified-Since``, the response is replaced by an HttpNotModified.
+
+ Also removes the content from any response to a HEAD request and sets the
+ ``Date`` and ``Content-Length`` response-headers.
+
``django.middleware.sessions.SessionMiddleware``
Enables session support. See the `session documentation`_.
Please sign in to comment.
Something went wrong with that request. Please try again.