Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #2628 -- Added django.contrib.sitemap. Thanks for the patch, Da…

…n Watson

git-svn-id: http://code.djangoproject.com/svn/django/trunk@3694 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit 44dabecf6e4a009ff1a34ba12032ba07476baad2 1 parent 0a7f218
@adrianholovaty adrianholovaty authored
View
1  AUTHORS
@@ -139,6 +139,7 @@ answer newbie questions, and generally made Django that much better:
Amit Upadhyay
Geert Vanderkelen
Milton Waddams
+ Dan Watson <http://theidioteque.net/>
Rachel Willmer <http://www.willmer.com/kb/>
wojtek
ye7cakf02@sneakemail.com
View
90 django/contrib/sitemap/__init__.py
@@ -0,0 +1,90 @@
+from django.core import urlresolvers
+import urllib
+
+PING_URL = "http://www.google.com/webmasters/sitemaps/ping"
+
+class SitemapNotFound(Exception):
+ pass
+
+def ping_google(sitemap_url=None, ping_url=PING_URL):
+ """
+ Alerts Google that the sitemap for the current site has been updated.
+ If sitemap_url is provided, it should be an absolute path to the sitemap
+ for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
+ function will attempt to deduce it by using urlresolvers.reverse().
+ """
+ if sitemap_url is None:
+ try:
+ # First, try to get the "index" sitemap URL.
+ sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.index')
+ except urlresolvers.NoReverseMatch:
+ try:
+ # Next, try for the "global" sitemap URL.
+ sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap')
+ except urlresolvers.NoReverseMatch:
+ pass
+
+ if sitemap_url is None:
+ raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
+
+ from django.contrib.sites.models import Site
+ current_site = Site.objects.get_current()
+ url = "%s%s" % (current_site.domain, sitemap)
+ params = urllib.urlencode({'sitemap':url})
+ urllib.urlopen("%s?%s" % (ping_url, params))
+
+class Sitemap:
+ def __get(self, name, obj, default=None):
+ try:
+ attr = getattr(self, name)
+ except AttributeError:
+ return default
+ if callable(attr):
+ return attr(obj)
+ return attr
+
+ def items(self):
+ return []
+
+ def location(self, obj):
+ return obj.get_absolute_url()
+
+ def get_urls(self):
+ from django.contrib.sites.models import Site
+ current_site = Site.objects.get_current()
+ urls = []
+ for item in self.items():
+ loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
+ url_info = {
+ 'location': loc,
+ 'lastmod': self.__get('lastmod', item, None),
+ 'changefreq': self.__get('changefreq', item, None),
+ 'priority': self.__get('priority', item, None)
+ }
+ urls.append(url_info)
+ return urls
+
+class FlatpageSitemap(Sitemap):
+ def items(self):
+ from django.contrib.sites.models import Site
+ current_site = Site.objects.get_current()
+ return current_site.flatpage_set.all()
+
+class GenericSitemap(Sitemap):
+ priority = None
+ changefreq = None
+
+ def __init__(self, info_dict, priority=None, changefreq=None):
+ self.queryset = info_dict['queryset']
+ self.date_field = info_dict.get('date_field', None)
+ self.priority = priority
+ self.changefreq = changefreq
+
+ def items(self):
+ # Make sure to return a clone; we don't want premature evaluation.
+ return self.queryset.filter()
+
+ def lastmod(self, item):
+ if self.date_field is not None:
+ return getattr(item, self.date_field)
+ return None
View
11 django/contrib/sitemap/templates/sitemap.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
+{% for url in urlset %}
+ <url>
+ <loc>{{ url.location|escape }}</loc>
+ {% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %}
+ {% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %}
+ {% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %}
+ </url>
+{% endfor %}
+</urlset>
View
8 django/contrib/sitemap/templates/sitemap_index.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<sitemapindex xmlns="http://www.google.com/schemas/sitemap/0.84">
+{% for location in sitemaps %}
+ <sitemap>
+ <loc>{{ location|escape }}</loc>
+ </sitemap>
+{% endfor %}
+</sitemapindex>
View
30 django/contrib/sitemap/views.py
@@ -0,0 +1,30 @@
+from django.http import HttpResponse, Http404
+from django.template import loader
+from django.contrib.sites.models import Site
+from django.core import urlresolvers
+
+def index(request, sitemaps):
+ current_site = Site.objects.get_current()
+ sites = []
+ protocol = request.is_secure() and 'https' or 'http'
+ for section in sitemaps.keys():
+ sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap', kwargs={'section': section})
+ sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
+ xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
+ return HttpResponse(xml, mimetype='application/xml')
+
+def sitemap(request, sitemaps, section=None):
+ maps, urls = [], []
+ if section is not None:
+ if not sitemaps.has_key(section):
+ raise Http404("No sitemap available for section: %r" % section)
+ maps.append(sitemaps[section])
+ else:
+ maps = sitemaps.values()
+ for site in maps:
+ if callable(site):
+ urls.extend(site().get_urls())
+ else:
+ urls.extend(site.get_urls())
+ xml = loader.render_to_string('sitemap.xml', {'urlset': urls})
+ return HttpResponse(xml, mimetype='application/xml')
View
318 docs/sitemaps.txt
@@ -0,0 +1,318 @@
+=====================
+The sitemap framework
+=====================
+
+Django comes with a high-level sitemap-generating framework that makes
+creating `Google Sitemap`_ XML files easy.
+
+.. _Google Sitemap: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html
+
+Overview
+========
+
+A sitemap is an XML file on your Web site that tells search-engine indexers how
+frequently your pages change and how "important" certain pages are in relation
+to other pages on your site. This information helps search engines index your
+site.
+
+The Django sitemap framework automates the creation of this XML file by letting
+you express this information in Python code.
+
+It works much like Django's `syndication framework`_. To create a sitemap, just
+write a ``Sitemap`` class and point to it in your URLconf_.
+
+.. _syndication framework: http://www.djangoproject.com/documentation/syndication/
+.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/
+
+Installation
+============
+
+To install the sitemap app, follow these steps:
+
+ 1. Add ``'django.contrib.sitemap'`` to your INSTALLED_APPS_ setting.
+ 2. Make sure ``'django.template.loaders.app_directories.load_template_source'``
+ is in your TEMPLATE_LOADERS_ setting. It's in there by default, so
+ you'll only need to change this if you've changed that setting.
+ 3. Make sure you've installed the `sites framework`_.
+
+(Note: The sitemap application doesn't install any database tables. The only
+reason it needs to go into ``INSTALLED_APPS`` is so that the
+``load_template_source`` template loader can find the default templates.)
+
+.. _INSTALLED_APPS: http://www.djangoproject.com/documentation/settings/#installed-apps
+.. _TEMPLATE_LOADERS: http://www.djangoproject.com/documentation/settings/#template-loaders
+.. _sites framework: http://www.djangoproject.com/documentation/sites/
+
+Initialization
+==============
+
+To activate sitemap generation on your Django site, add this line to your
+URLconf_:
+
+ (r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps})
+
+This tells Django to build a sitemap when a client accesses ``/sitemap.xml``.
+
+The name of the sitemap file is not important, but the location is. Google will
+only index links in your sitemap for the current URL level and below. For
+instance, if ``sitemap.xml`` lives in your root directory, it may reference any
+URL in your site. However, if your sitemap lives at ``/content/sitemap.xml``,
+it may only reference URLs that begin with ``/content/``.
+
+The sitemap view takes an extra, required argument: ``{'sitemaps': sitemaps}``.
+``sitemaps`` should be a dictionary that maps a short section label (e.g.,
+``blog`` or ``news``) to its ``Sitemap`` class (e.g., ``BlogSitemap`` or
+``NewsSitemap``). It may also map to an *instance* of a ``Sitemap`` class
+(e.g., ``BlogSitemap(some_var)``).
+
+.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/
+
+Sitemap classes
+===============
+
+A ``Sitemap`` class is a simple Python class that represents a "section" of
+entries in your sitemap. For example, one ``Sitemap`` class could represent all
+the entries of your weblog, while another could represent all of the events in
+your events calendar.
+
+In the simplest case, all these sections get lumped together into one
+``sitemap.xml``, but it's also possible to use the framework to generate a
+sitemap index that references individual sitemap files, one per section. (See
+`Creating a sitemap index`_ below.)
+
+``Sitemap`` classes must subclass ``django.contrib.sitemap.Sitemap``. They can
+live anywhere in your codebase.
+
+A simple example
+================
+
+Let's assume you have a blog system, with an ``Entry`` model, and you want your
+sitemap to include all the links to your individual blog entries. Here's how
+your sitemap class might look::
+
+ from django.contrib.sitemap import Sitemap
+ from mysite.blog.models import Entry
+
+ class BlogSitemap(Sitemap):
+ changefreq = "never"
+ priority = 0.5
+
+ def items(self):
+ return Entry.objects.filter(is_draft=False)
+
+ def lastmod(self, obj):
+ return obj.pub_date
+
+Note:
+
+ * ``changefreq`` and ``priority`` are class attributes corresponding to
+ ``<changefreq>`` and ``<priority>`` elements, respectively. They can be
+ made callable as functions, as ``lastmod`` was in the example.
+ * ``items()`` is simply a method that returns a list of objects. The objects
+ returned will get passed to any callable methods corresponding to a
+ sitemap property (``location``, ``lastmod``, ``changefreq``, and
+ ``priority``).
+ * ``lastmod`` should return a Python ``datetime`` object.
+ * There is no ``location`` method in this example, but you can provide it
+ in order to specify the URL for your object. By default, ``location()``
+ calls ``get_absolute_url()`` on each object and returns the result.
+
+Sitemap class reference
+=======================
+
+A ``Sitemap`` class can define the following methods/attributes:
+
+``items``
+---------
+
+**Required.** A method that returns a list of objects. The framework doesn't
+care what *type* of objects they are; all that matters is that these objects
+get passed to the ``location()``, ``lastmod()``, ``changefreq()`` and
+``priority()`` methods.
+
+``location``
+------------
+
+**Optional.** Either a method or attribute.
+
+If it's a method, it should return the absolute URL for a given object as
+returned by ``items()``.
+
+If it's an attribute, its value should be a string representing an absolute URL
+to use for *every* object returned by ``items()``.
+
+In both cases, "absolute URL" means a URL that doesn't include the protocol or
+domain. Examples:
+
+ * Good: ``'/foo/bar/'``
+ * Bad: ``'example.com/foo/bar/'``
+ * Bad: ``'http://example.com/foo/bar/'``
+
+If ``location`` isn't provided, the framework will call the
+``get_absolute_url()`` method on each object as returned by ``items()``.
+
+``lastmod``
+-----------
+
+**Optional.** Either a method or attribute.
+
+If it's a method, it should take one argument -- an object as returned by
+``items()`` -- and return that object's last-modified date/time, as a Python
+``datetime.datetime`` object.
+
+If it's an attribute, its value should be a Python ``datetime.datetime`` object
+representing the last-modified date/time for *every* object returned by
+``items()``.
+
+``changefreq``
+--------------
+
+**Optional.** Either a method or attribute.
+
+If it's a method, it should take one argument -- an object as returned by
+``items()`` -- and return that object's change frequency, as a Python string.
+
+If it's an attribute, its value should be a string representing the change
+frequency of *every* object returned by ``items()``.
+
+Possible values for ``changefreq``, whether you use a method or attribute, are:
+
+ * ``'always'``
+ * ``'hourly'``
+ * ``'daily'``
+ * ``'weekly'``
+ * ``'monthly'``
+ * ``'yearly'``
+ * ``'never'``
+
+``priority``
+------------
+
+**Optional.** Either a method or attribute.
+
+If it's a method, it should take one argument -- an object as returned by
+``items()`` -- and return that object's priority, as either a string or float.
+
+If it's an attribute, its value should be either a string or float representing
+the priority of *every* object returned by ``items()``.
+
+Example values for ``priority``: ``0.4``, ``1.0``. The default priority of a
+page is ``0.5``. See Google's documentation for more documentation.
+
+.. _Google's documentation: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html
+
+Shortcuts
+=========
+
+The sitemap framework provides a couple convenience classes for common cases:
+
+``FlatpageSitemap``
+-------------------
+
+The ``FlatpageSitemap`` class looks at all flatpages_ defined for the current
+``SITE_ID`` (see the `sites documentation`_) and creates an entry in the
+sitemap. These entries include only the ``location`` attribute -- not
+``lastmod``, ``changefreq`` or ``priority``.
+
+.. _flatpages: http://www.djangoproject.com/documentation/flatpages/
+.. _sites documentation: http://www.djangoproject.com/documentation/sites/
+
+``GenericSitemap``
+------------------
+
+The ``GenericSitemap`` class works with any `generic views`_ you already have.
+To use it, create an instance, passing in the same ``info_dict`` you pass to
+the generic views. The only requirement is that the dictionary have a
+``queryset`` entry. It may also have a ``date_field`` entry that specifies a
+date field for objects retrieved from the ``queryset``. This will be used for
+the ``lastmod`` attribute in the generated sitemap. You may also pass
+``priority`` and ``changefreq`` keyword arguments to the ``GenericSitemap``
+constructor to specify these attributes for all URLs.
+
+.. _generic views: http://www.djangoproject.com/documentation/generic_views/
+
+Example
+-------
+
+Here's an example of a URLconf_ using both::
+
+ from django.conf.urls.defaults import *
+ from django.contrib.sitemap import FlatpageSitemap, GenericSitemap
+ from mysite.blog.models import Entry
+
+ info_dict = {
+ 'queryset': Entry.objects.all(),
+ 'date_field': 'pub_date',
+ }
+
+ sitemaps = {
+ 'flatpages': FlatpageSitemap,
+ 'blog': GenericSitemap(info_dict, priority=0.6),
+ }
+
+ urlpatterns = patterns('',
+ # some generic view using info_dict
+ # ...
+
+ # the sitemap
+ (r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps})
+ )
+
+.. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/
+
+Creating a sitemap index
+========================
+
+The sitemap framework also has the ability to create a sitemap index that
+references individual sitemap files, one per each section defined in your
+``sitemaps`` dictionary. The only differences in usage are:
+
+ * You use two views in your URLconf: ``django.contrib.sitemap.views.index``
+ and ``django.contrib.sitemap.views.sitemap``.
+ * The ``django.contrib.sitemap.views.sitemap`` view should take a
+ ``section`` keyword argument.
+
+Here is what the relevant URLconf lines would look like for the example above::
+
+ (r'^sitemap.xml$', 'django.contrib.sitemap.views.index', {'sitemaps': sitemaps})
+ (r'^sitemap-(?P<section>.+).xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps})
+
+This will automatically generate a ``sitemap.xml`` file that references
+both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
+classes and the ``sitemaps`` dict don't change at all.
+
+Pinging Google
+==============
+
+You may want to "ping" Google when your sitemap changes, to let it know to
+reindex your site. The framework provides a function to do just that:
+``django.contrib.sitemap.ping_google()``.
+
+``ping_google()`` takes an optional argument, ``sitemap_url``, which should be
+the absolute URL of your site's sitemap (e.g., ``'/sitemap.xml'``). If this
+argument isn't provided, ``ping_google()`` will attempt to figure out your
+sitemap by performing a reverse looking in your URLconf.
+
+``ping_google()`` raises the exception
+``django.contrib.sitemap.SitemapNotFound`` if it cannot determine your sitemap
+URL.
+
+One useful way to call ``ping_google()`` is from a model's ``save()`` method::
+
+ from django.contrib.sitemap import ping_google
+
+ class Entry(models.Model):
+ # ...
+ def save(self):
+ super(Entry, self).save()
+ try:
+ ping_google()
+ except Exception:
+ # Bare 'except' because we could get a variety
+ # of HTTP-related exceptions.
+ pass
+
+A more efficient solution, however, would be to call ``ping_google()`` from a
+cron script, or some other scheduled task. The function makes an HTTP request
+to Google's servers, so you may not want to introduce that network overhead
+each time you call ``save()``.
Please sign in to comment.
Something went wrong with that request. Please try again.