Permalink
Browse files

A lot of things. Read further.

* Activated App Engine "Datastore Administration"
* Rewritten the "sitemap.xml" generation to take into account informations like:
** last modification timestamp
** priority
** update schedule
* sitemap.xml now accounts pages, and it gives different priority based on the kind of 'static content'
* Some extra 'indexes' for the Datastore performance
* Better scheduling of the 'sitemap' generation:
** 1) Max 1 sitemap generation per hour
** 2) The job is started with a configurable delay - this is done to allow the rest of the content to be generated first
* Added proper links to the new footer
  • Loading branch information...
1 parent 2f0bf8d commit 2cb875cf048abbea127d6aac2a39fa63f66cd7dd @detro committed Mar 14, 2011
Showing with 102 additions and 41 deletions.
  1. +3 −0 app.yaml
  2. +5 −0 config.py
  3. +8 −5 generators.py
  4. +20 −0 index.yaml
  5. +6 −4 post_deploy.py
  6. +35 −16 static.py
  7. +6 −3 themes/default/sitemap.xml
  8. +4 −4 themes/squared/base.html
  9. +15 −9 utils.py
View
@@ -3,6 +3,9 @@ version: master
runtime: python
api_version: 1
+builtins:
+- datastore_admin: on
+
handlers:
- url: /remote_api
script: $PYTHON_LIB/google/appengine/ext/remote_api/handler.py
View
@@ -119,6 +119,11 @@
# see: http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=34609 for more information
google_sitemap_ping = True
+# Content in Bloggart is generated using App Engine Deferred Task API
+# For website with a lot of content, is preferable to generate the sitemap with a delay, to
+# ensure that the rest of the content is ready, before the 'sitemap.xml' is generated
+sitemap_generation_delay_sec = 900 # 15min
+
# If you want to use Google Site verification, go to
# https://www.google.com/webmasters/tools/ , add your site, choose the 'upload
# an html file' method, then set the NAME of the file below.
View
@@ -109,7 +109,7 @@ def generate_resource(cls, post, resource, action='post'):
if next is not None:
template_vals['next'] = next;
rendered = utils.render_template("post.html", template_vals);
- static.set(post.path, rendered, config.html_mime_type);
+ static.set(post.path, rendered, config.html_mime_type, last_modified=post.updated, type=static.TYPE_POST);
generator_list.append(PostContentGenerator)
@@ -156,7 +156,7 @@ def generate_resource(cls, page, resource, action='post'):
'breadcrumb_stack' : breadcrumb_stack
}
rendered = utils.render_template("page.html", template_vals)
- static.set(curr_page.path, rendered, config.html_mime_type)
+ static.set(curr_page.path, rendered, config.html_mime_type, last_modified=curr_page.updated, type=static.TYPE_PAGE)
generator_list.append(PageContentGenerator)
@@ -186,7 +186,7 @@ def generate_resource(cls, post, resource):
if next is not None:
template_vals['next']=next
rendered = utils.render_template("post.html", template_vals)
- static.set(post.path, rendered, config.html_mime_type)
+ static.set(post.path, rendered, config.html_mime_type, last_modified=post.updated, type=static.TYPE_POST)
generator_list.append(PostPrevNextContentGenerator)
@@ -226,8 +226,11 @@ def generate_resource(cls, post, resource, pagenum=1, start_ts=None):
path_args = {
'resource': resource,
}
+
+ # Lambda Function used later to get the right path
_get_path = lambda: \
- cls.first_page_path if path_args['pagenum'] == 1 else cls.path
+ cls.first_page_path if path_args['pagenum'] == 1 else cls.path
+
path_args['pagenum'] = pagenum - 1
prev_page = _get_path() % path_args
path_args['pagenum'] = pagenum + 1
@@ -241,7 +244,7 @@ def generate_resource(cls, post, resource, pagenum=1, start_ts=None):
rendered = utils.render_template("listing.html", template_vals)
path_args['pagenum'] = pagenum
- static.set(_get_path() % path_args, rendered, config.html_mime_type, type=static.TYPE_INDEX);
+ static.set(_get_path() % path_args, rendered, config.html_mime_type, type = static.TYPE_INDEX)
if more_posts:
deferred.defer(cls.generate_resource, None, resource, pagenum + 1,
posts[-2].published)
View
@@ -1,5 +1,25 @@
indexes:
+- kind: StaticContent
+ properties:
+ - name: indexed
+ - name: __key__
+ direction: asc
+
+- kind: BlogPost
+ properties:
+ - name: __key__
+
+- kind: BlogPost
+ properties:
+ - name: published
+ direction: asc
+
+- kind: BlogPost
+ properties:
+ - name: published
+ direction: desc
+
- kind: BlogPost
properties:
- name: normalized_tags
View
@@ -39,8 +39,8 @@ def regenerate(self, batch_size=30, start_ts=None, content_model=models.BlogPost
try:
# (try to) regenerate dependency
generator_class.generate_resource(None, dep)
- except:
- logging.error("Dependency regeneration failed:")
+ except Exception as e:
+ logging.error("Dependency regeneration failed: " + e)
logging.error(dep)
# Remember not to process this dependency again
@@ -60,7 +60,7 @@ def generate_static_pages(pages):
def generate(previous_version):
for path, template, indexed, type in pages:
rendered = utils.render_template(template)
- static.set(path, rendered, config.html_mime_type, indexed, type=type);
+ static.set(path, rendered, config.html_mime_type, indexed=indexed, type=type);
return generate
post_deploy_tasks.append(generate_static_pages([
@@ -79,14 +79,16 @@ def regenerate_all(previous_version=None, force=False):
deferred.defer(ContentRegenerator().regenerate, content_model=models.BlogPost)
# Defer all Page regeneration
deferred.defer(ContentRegenerator().regenerate, content_model=models.Page)
+ # Regenerate the Sitemap
+ static.regenerate_sitemap()
post_deploy_tasks.append(regenerate_all);
def site_verification(previous_version):
static.set('/' + config.google_site_verification,
utils.render_template('site_verification.html'),
- config.html_mime_type, False)
+ config.html_mime_type, indexed=False)
if config.google_site_verification:
post_deploy_tasks.append(site_verification)
View
@@ -18,10 +18,19 @@
HTTP_DATE_FMT = "%a, %d %b %Y %H:%M:%S GMT"
-TYPE_POST = 0x0001; # 'Post'
-TYPE_PAGE = 0x0002; # 'Page'
-TYPE_INDEX = 0x0004; # 'Index' (i.e. Listing, Pagination, Tag, Archive)
-TYPE_OTHER = 0x0008; # 'Other' (i.e. atom feed, robots.txt, ...)
+TYPE_HOME = 0x0001 # 'Homepage'
+TYPE_POST = 0x0002 # 'Post'
+TYPE_PAGE = 0x0004 # 'Page'
+TYPE_INDEX = 0x008 # 'Index' (i.e. Listing, Pagination, Tag, Archive, Search)
+TYPE_OTHER = 0x0010 # 'Other' (i.e. atom feed, robots.txt, ...)
+
+SITEMAP_DATA_MAPPING = {
+ TYPE_HOME : { "priority" : 1, "changefreq" : "daily" },
+ TYPE_POST : { "priority" : 0.8, "changefreq" : "weekly" },
+ TYPE_PAGE : { "priority" : 0.7, "changefreq" : "monthly" },
+ TYPE_INDEX : { "priority" : 0.3, "changefreq" : "weekly" },
+ TYPE_OTHER : { "priority" : 0.3, "changefreq" : "yearly" }
+}
if config.google_site_verification is not None:
ROOT_ONLY_FILES = ['/robots.txt','/' + config.google_site_verification]
@@ -40,7 +49,7 @@ class StaticContent(db.Model):
etag = aetycoon.DerivedProperty(lambda x: hashlib.sha1(x.body).hexdigest());
indexed = db.BooleanProperty(required=True, default=True);
headers = db.StringListProperty();
- type = db.IntegerProperty(choices=(TYPE_POST, TYPE_PAGE, TYPE_INDEX, TYPE_OTHER), default=TYPE_POST);
+ type = db.IntegerProperty(choices=(TYPE_HOME, TYPE_POST, TYPE_PAGE, TYPE_INDEX, TYPE_OTHER), default=TYPE_POST);
def get(path):
@@ -65,7 +74,8 @@ def get(path):
return entity
-def set(path, body, content_type, indexed=True, type=TYPE_POST, **kwargs):
+def set(path, body, content_type, last_modified=None, indexed=True, type=TYPE_POST, **kwargs):
+ import static
"""Sets the StaticContent for the provided path.
Args:
@@ -78,30 +88,39 @@ def set(path, body, content_type, indexed=True, type=TYPE_POST, **kwargs):
Returns:
A StaticContent object.
"""
- now = datetime.datetime.now(utils.tzinfo()).replace(second=0, microsecond=0)
+ if last_modified is None:
+ last_modified = datetime.datetime.now(utils.tzinfo()).replace(second=0, microsecond=0)
defaults = {
- "last_modified": now,
+ "last_modified": last_modified,
}
defaults.update(kwargs)
content = StaticContent(
key_name = path,
body = body,
content_type = content_type,
indexed = indexed,
- type = type,
+ type = static.TYPE_HOME if path == '/' else type,
**defaults);
content.put()
memcache.replace(path, db.model_to_protobuf(content).Encode())
+
+ if indexed:
+ regenerate_sitemap()
+
+ return content
+
+def regenerate_sitemap():
try:
- eta = now.replace(second=0, microsecond=0) + datetime.timedelta(seconds=65)
- if indexed:
- deferred.defer(
- utils._regenerate_sitemap,
- _name='sitemap-%s' % (now.strftime('%Y%m%d%H%M'),),
- _eta=eta)
+ now = datetime.datetime.now(utils.tzinfo()).replace(second=0, microsecond=0)
+ eta = now.replace(second=0, microsecond=0) + datetime.timedelta(seconds=config.sitemap_generation_delay_sec)
+
+ # Queue a Deferred Task to regenerate the 'sitemap.xml', in 5 minutes from now
+ deferred.defer(
+ utils._regenerate_sitemap,
+ _name='sitemap-%s' % (now.strftime('%Y%m%d%H'),), # Run max 1 per hour
+ _eta=eta)
except (taskqueue.TaskAlreadyExistsError, taskqueue.TombstonedTaskError), e:
pass
- return content
def add(path, body, content_type, indexed=True, **kwargs):
"""Adds a new StaticContent and returns it.
@@ -1,8 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
- {% for path in paths %}
+ {% for content in static_contents %}
<url>
- <loc>http://{{config.host}}{{config.url_prefix}}{{path}}</loc>
+ <loc>http://{{ config.host }}{{ config.url_prefix }}{{ content.loc }}</loc>
+ <lastmod>{{ content.lastmod }}</lastmod>
+ <changefreq>{{ content.changefreq }}</changefreq>
+ <priority>{{ content.priority }}</priority>
</url>
- {% endfor %}
+ {% endfor %}
</urlset>
View
@@ -84,10 +84,10 @@
</footer>
<footer id="credits" class="fancy">
- Hosted by <a href="#">App Engine</a> &amp;
- Powered by <a href="#">Bloggart</a> &amp;
- Themed by <a href="#">Squared</a> &amp;
- Licensed by <a href="#">CC ASA 3.0</a>
+ Hosted by <a href="http://code.google.com/appengine/" target="_blank">App Engine</a> &amp;
+ Powered by <a href="https://github.com/detro/bloggart" target="_blank">detro's Bloggart fork</a> &amp;
+ Themed by <a href="http://blog.ivandemarino.me/Projects/Squared" target="_blank">Squared</a> &amp;
+ Licensed by <a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/" target="_blank">CC ASA 3.0</a>
<br />
<a href="http://www.w3.org/html/logo/" alt="HTML5 Powered with CSS3 / Styling, and Semantics" target="_blank">
<img src="/static/{{ config.theme }}/html5_semantics.png" title="HTML5 Semantics" />
View
@@ -1,6 +1,7 @@
import os
import re
import unicodedata
+from datetime import datetime
from google.appengine.api import memcache
from google.appengine.ext import webapp
@@ -48,7 +49,7 @@ def format_post_path(post, num):
'slug': slug,
'year': date.year,
'month': date.month,
- 'day': date.day,
+ 'day': date.day
}
@@ -78,33 +79,38 @@ def render_template(template_name, template_vals=None, theme=None):
return rendered
-def _get_all_paths():
+def _get_all_static_content_data():
import static
keys = []
- q = static.StaticContent.all(keys_only=True).filter('indexed', True)
+ q = static.StaticContent.all().filter('indexed', True).order('__key__')
cur = q.fetch(1000)
while len(cur) == 1000:
keys.extend(cur)
- q = static.StaticContent.all(keys_only=True)
+ q = static.StaticContent.all()
q.filter('indexed', True)
q.filter('__key__ >', cur[-1])
+ q.order('__key__')
cur = q.fetch(1000)
keys.extend(cur)
- return [x.name() for x in keys]
+ return [{ "loc" : k.key().name(),
+ "lastmod" : k.last_modified.strftime("%Y-%m-%dT%H:%M:%S%z"),
+ "priority" : static.SITEMAP_DATA_MAPPING[k.type]["priority"],
+ "changefreq" : static.SITEMAP_DATA_MAPPING[k.type]["changefreq"]} for k in keys]
def _regenerate_sitemap():
import static
import gzip
from StringIO import StringIO
- paths = _get_all_paths()
- rendered = render_template('sitemap.xml', {'paths': paths})
- static.set('/sitemap.xml', rendered, 'application/xml', False, type=static.TYPE_OTHER)
+
+ static_contents = _get_all_static_content_data()
+ rendered = render_template('sitemap.xml', {'static_contents': static_contents})
+ static.set('/sitemap.xml', rendered, 'application/xml', indexed=False, type=static.TYPE_OTHER)
s = StringIO()
gzip.GzipFile(fileobj=s,mode='wb').write(rendered)
s.seek(0)
renderedgz = s.read()
- static.set('/sitemap.xml.gz',renderedgz, 'application/x-gzip', False, type=static.TYPE_OTHER)
+ static.set('/sitemap.xml.gz',renderedgz, 'application/x-gzip', indexed=False, type=static.TYPE_OTHER)
# Ping Google only if configured to do so and NOT on localhost
if ( config.google_sitemap_ping and not (config.host.find("localhost") > -1) ):
ping_googlesitemap();

0 comments on commit 2cb875c

Please sign in to comment.