Skip to content
This repository has been archived by the owner on Aug 1, 2019. It is now read-only.

Commit

Permalink
Set up caching/etag for detail/archive views
Browse files Browse the repository at this point in the history
  • Loading branch information
tsauerwein committed Aug 11, 2016
1 parent 32f6dce commit 57e1cea
Show file tree
Hide file tree
Showing 14 changed files with 429 additions and 83 deletions.
13 changes: 9 additions & 4 deletions c2corg_ui/__init__.py
@@ -1,11 +1,13 @@
import requests
from pyramid.config import Configurator
from pyramid_mako import add_mako_renderer
from c2corg_ui.lib.cacheversion import version_cache_buster, CACHE_PATH
from pyramid.httpexceptions import (
HTTPBadRequest, HTTPInternalServerError)
from pyramid.view import view_config
from pyramid.view import notfound_view_config
from pyramid.view import view_config
from pyramid_mako import add_mako_renderer

from c2corg_ui.caching import configure_caches
from c2corg_ui.caching.cacheversion import version_cache_buster, CACHE_PATH


def main(global_config, **settings):
Expand All @@ -15,6 +17,9 @@ def main(global_config, **settings):
config = Configurator(settings=settings)
add_mako_renderer(config, '.html')

# set up redis cache
configure_caches(settings)

# configure connection pool for http requests
max_connections = int(settings.get('http_request_connection_pool_size'))
http_requests.session = requests.Session()
Expand All @@ -27,7 +32,7 @@ def main(global_config, **settings):
http_requests.session.mount('http://', adapter)

# Register a tween to get back the cache buster path.
config.add_tween("c2corg_ui.lib.cacheversion.CachebusterTween")
config.add_tween("c2corg_ui.caching.cacheversion.CachebusterTween")

_add_static_view(config, 'static', 'c2corg_ui:static')
config.add_static_view('node_modules', settings.get('node_modules_path'),
Expand Down
70 changes: 70 additions & 0 deletions c2corg_ui/caching/__init__.py
@@ -0,0 +1,70 @@
import logging

import time
from dogpile.cache import make_region
from redis.connection import BlockingConnectionPool

log = logging.getLogger(__name__)

# prefix for all cache keys
KEY_PREFIX = 'c2corg_ui'

# cache version (for production the current git revisions, for development
# the git revision and a timestamp).
CACHE_VERSION = None


def create_region(name):
return make_region(
# prefix all keys (e.g. returns 'c2corg_ui_main:detail:3575-1-c796286')
key_mangler=lambda key: '{0}:{1}:{2}'.format(KEY_PREFIX, name, key)
)

cache_document_detail = create_region('detail')
cache_document_archive = create_region('archive')

caches = [
cache_document_detail,
cache_document_archive
]


def configure_caches(settings):
global KEY_PREFIX
global CACHE_VERSION
KEY_PREFIX = settings['redis.cache_key_prefix']

# append a timestamp to the cache key when running in dev. mode
# (to make sure that the cache values are invalidated when the dev.
# server reloads when the code changes)
cache_version = settings['cache_version']
if settings['cache_version_timestamp'] == 'True':
cache_version = '{0}-{1}'.format(cache_version, int(time.time()))
CACHE_VERSION = cache_version

log.debug('Cache version {0}'.format(CACHE_VERSION))
log.debug('Cache Redis: {0}'.format(settings['redis.url']))

redis_pool = BlockingConnectionPool.from_url(
settings['redis.url'],
max_connections=int(settings['redis.cache_pool']),
timeout=3, # 3 seconds (waiting for connection)
socket_timeout=3 # 3 seconds (timeout on open socket)
)

for cache in caches:
cache.configure(
'dogpile.cache.redis',
arguments={
'connection_pool': redis_pool,
'distributed_lock': True,
'lock_timeout': 5 # 5 seconds (dogpile lock)
},
replace_existing_backend=True
)


class CachedPage(object):
def __init__(self, api_cache_key, page_html):
self.api_cache_key = api_cache_key
self.page_html = page_html
Expand Up @@ -30,6 +30,10 @@ def is_hexa(str):
@staticmethod
def is_git_hash(str):
# 592d5db = git rev-parse --short HEAD
if '-' in str:
# when in dev. mode, the cache key is appended
# with a timestamp, e.g. "592d5db-123456789"
str = str.split('-')[0]
return len(str) == 7 and CachebusterTween.is_hexa(str)

def __call__(self, request):
Expand Down
38 changes: 38 additions & 0 deletions c2corg_ui/views/__init__.py
@@ -0,0 +1,38 @@
import logging

from pyramid.httpexceptions import HTTPNotModified

log = logging.getLogger(__name__)


def etag_cache(request, etag_key):
"""Use the HTTP Entity Tag cache for Browser side caching
If a "If-None-Match" header is found, and equivalent to ``key``,
then a ``304`` HTTP message will be returned with the ETag to tell
the browser that it should use its current cache of the page.
Otherwise, the ETag header will be added to the response headers.
Suggested use is within a view like so:
.. code-block:: python
def view(request):
etag_cache(request, key=1)
return render('/splash.mako')
.. note::
This works because etag_cache will raise an HTTPNotModified
exception if the ETag received matches the key provided.
Implementation adapted from:
https://github.com/Pylons/pylons/blob/799c310/pylons/controllers/util.py#L148 # noqa
"""
# we are always using a weak ETag validator
etag = 'W/"%s"' % etag_key
etag_matcher = request.if_none_match

if str(etag_key) in etag_matcher:
headers = [
('ETag', etag)
]
log.debug("ETag match, returning 304 HTTP Not Modified Response")
raise HTTPNotModified(headers=headers)
else:
request.response.headers['ETag'] = etag
log.debug("ETag didn't match, returning response object")
174 changes: 153 additions & 21 deletions c2corg_ui/views/document.py
@@ -1,4 +1,10 @@
import re

from dogpile.cache.api import NO_VALUE

from c2corg_ui import http_requests
from c2corg_ui.caching import cache_document_detail, CachedPage, \
cache_document_archive, CACHE_VERSION
from c2corg_ui.diff.differ import diff_documents
from shapely.geometry import asShape
from shapely.ops import transform
Expand All @@ -12,13 +18,17 @@
from pyramid.httpexceptions import (
HTTPBadRequest, HTTPNotFound, HTTPInternalServerError)

from c2corg_ui.views import etag_cache

log = logging.getLogger(__name__)

IF_NONE_MATCH = re.compile('(?:W/)?(?:"([^"]*)",?\s*)')


class Document(object):

# FIXME Is a "documents" route available/relevant in the API?
_API_ROUTE = 'documents'
# set in inheriting classes
_API_ROUTE = None

# FIXME sync with API => use a CONSTANT in c2corg_common?
_DEFAULT_FILTERS = {
Expand All @@ -28,15 +38,86 @@ class Document(object):
def __init__(self, request):
self.request = request
self.settings = request.registry.settings
self.debug = 'debug' in self.request.params
self.template_input = {
'debug': 'debug' in self.request.params,
'debug': self.debug,
'api_url': self.settings['api_url'],
'ign_api_key': self.settings['ign_api_key'],
'bing_api_key': self.settings['bing_api_key'],
'image_backend_url': self.settings['image_backend_url'],
'image_url': self.settings['image_url']
}

def _get_or_create_detail(self, id, lang, render_page):
""" Returns a detail page for a document
If the document page is currently in the cache, and the version in
the API and the code version has not changed, the page is served
from the cache. Otherwise the page is rendered and cached.
If the request includes an ETag and the provided ETag equals the
current version, "304 Not Modified" is returned.
"""
def load_data(old_api_cache_key=None):
not_modified, api_cache_key, document_and_locale = \
self._get_document(id, lang, old_api_cache_key)
return not_modified, api_cache_key, document_and_locale

return self._get_or_create(
(id, lang), cache_document_detail, load_data, render_page,
self._get_cache_key)

def _get_or_create_archive(self, id, lang, version_id, render_page):
""" Returns an archived version of a document.
The response is cached and ETags are handled.
"""
def load_data(old_api_cache_key=None):
not_modified, api_cache_key, document_locale_version = \
self._get_archived_document(
id, lang, version_id, old_api_cache_key)
return not_modified, api_cache_key, document_locale_version

return self._get_or_create(
(id, lang, version_id), cache_document_archive, load_data,
render_page, self._get_cache_key_archive)

def _get_or_create(
self, request_data, cache, load_data, render_page, get_cache_key):
if self.debug:
# do not cache when in debug mode
_, _, loaded_data = load_data()
return self._get_response(render_page(*loaded_data))

cache_key = get_cache_key(*request_data)

# try to get a rendered page from the cache
cached_page = cache.get(cache_key, ignore_expiration=True)

old_api_cache_key = cached_page.api_cache_key \
if cached_page != NO_VALUE else None

# request the document from the api. if there was an entry in the
# cache, set the `If-None-Match` header with the last ETag. if the
# document version on the api has not changed, `not modified` will
# be returned.
not_modified, api_cache_key, loaded_data = load_data(old_api_cache_key)

ui_etag_key = self._get_etag_key(api_cache_key)
if not_modified:
# the cached page is still valid
log.debug('Serving from cache {0}'.format(cache_key))
etag_cache(self.request, ui_etag_key)
return self._get_response(cached_page.page_html)
else:
# there is a new version from the api, render the page
page_html = render_page(*loaded_data)

cache.set(cache_key, CachedPage(api_cache_key, page_html))

etag_cache(self.request, ui_etag_key)

return self._get_response(page_html)

def _call_api(self, url, headers=None):
settings = self.settings
if 'api_url_internal' in settings and settings['api_url_internal']:
Expand All @@ -51,7 +132,12 @@ def _call_api(self, url, headers=None):
log.debug('API: %s %s', 'GET', url)
try:
resp = http_requests.session.get(url, headers=headers)
return resp, resp.json()

if resp.status_code == 304:
# no content for 'not modified'
return resp, {}
else:
return resp, resp.json()
except Exception:
log.error('Request failed: {0}'.format(url), exc_info=1)
return resp, {}
Expand All @@ -78,37 +164,64 @@ def _validate_int(self, field):
except Exception:
raise HTTPBadRequest("Incorrect " + field)

def _get_document(self, id, lang):
def _get_document(self, id, lang, old_api_cache_key=None):
url = '%s/%d?l=%s' % (self._API_ROUTE, id, lang)
resp, document = self._call_api(url)
if resp.status_code == 404:
raise HTTPNotFound()
elif resp.status_code != 200:
raise HTTPInternalServerError(
"An error occured while loading the document")
not_modified, api_cache_key, document = self._get_with_etag(
url, old_api_cache_key)

if not_modified:
return not_modified, api_cache_key, None

# When requesting a lang that does not exist yet, the API sends
# back an empty list as 'locales'
if not document['locales']:
raise HTTPNotFound('Requested lang does not exist')

# We need to pass locale data to Mako as a dedicated object to make it
# available to the parent templates:
locale = document['locales'][0]
return document, locale

def _get_archived_document(self, id, lang, version_id):
return False, api_cache_key, (document, locale)

def _get_archived_document(
self, id, lang, version_id, old_api_cache_key=None):
url = '%s/%d/%s/%d' % (self._API_ROUTE, id, lang, version_id)
resp, content = self._call_api(url)
not_modified, api_cache_key, content = self._get_with_etag(
url, old_api_cache_key)

if not_modified:
return not_modified, api_cache_key, None

document = content['document']
version = content['version']
locale = document['locales'][0]

return False, api_cache_key, (document, locale, version)

def _get_with_etag(self, url, old_api_cache_key=None):
headers = None
if old_api_cache_key:
headers = {'If-None-Match': 'W/"{0}"'.format(old_api_cache_key)}

resp, document = self._call_api(url, headers)

api_cache_key = None
if resp.headers.get('ETag'):
api_cache_key = self._get_api_cache_key_from_etag(
resp.headers.get('ETag'))

if resp.status_code in [200, 304] and not api_cache_key:
log.warn('no etag found for {0}'.format(url))

if resp.status_code == 404:
raise HTTPNotFound()
elif resp.status_code == 304:
return True, api_cache_key, None
elif resp.status_code != 200:
raise HTTPInternalServerError(
"An error occured while loading the document")
document = content['document']
version = content['version']
# We need to pass locale data to Mako as a dedicated object to make it
# available to the parent templates:
locale = document['locales'][0]
return document, locale, version
"An error occurred while loading the document")

return False, api_cache_key, document

def _get_documents(self):
params = []
Expand Down Expand Up @@ -199,3 +312,22 @@ def _diff(self):
return self.template_input

raise HTTPNotFound()

def _get_cache_key(self, id, lang):
return '{0}-{1}-{2}'.format(id, lang, CACHE_VERSION)

def _get_cache_key_archive(self, id, lang, version_id):
return '{0}-{1}-{2}-{3}'.format(id, lang, version_id, CACHE_VERSION)

def _get_etag_key(self, api_cache_key):
return '{0}-{1}'.format(api_cache_key, CACHE_VERSION)

def _get_api_cache_key_from_etag(self, etag):
if_none_matches = IF_NONE_MATCH.findall(etag)

if if_none_matches:
return if_none_matches[0]

def _get_response(self, page_html):
self.request.response.text = page_html
return self.request.response

0 comments on commit 57e1cea

Please sign in to comment.