Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by…

… using

pagination). Patch from Julian Bez.

The docs patch here could probably do with some rewording.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit badde8a7e5090347feea0b39221dbdea428582b8 1 parent a26ba33
@malcolmt malcolmt authored
View
3  AUTHORS
@@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better:
Esdras Beleza <linux@esdrasbeleza.com>
Chris Bennett <chrisrbennett@yahoo.com>
James Bennett
- Ben Godfrey <http://aftnn.org>
+ Julian Bez
Arvis Bickovskis <viestards.lists@gmail.com>
Paul Bissex <http://e-scribe.com/>
Simon Blanchard
@@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better:
glin@seznam.cz
martin.glueck@gmail.com
Artyom Gnilov <boobsd@gmail.com>
+ Ben Godfrey <http://aftnn.org>
GomoX <gomo@datafull.com>
Guilherme Mesquita Gondim <semente@taurinus.org>
Mario Gonzalez <gonzalemario@gmail.com>
View
16 django/contrib/sitemaps/__init__.py
@@ -1,4 +1,4 @@
-from django.core import urlresolvers
+from django.core import urlresolvers, paginator
import urllib
PING_URL = "http://www.google.com/webmasters/tools/ping"
@@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL):
urllib.urlopen("%s?%s" % (ping_url, params))
class Sitemap:
+ # This limit is defined by Google. See the index documentation at
+ # http://sitemaps.org/protocol.php#index.
+ limit = 50000
+
def __get(self, name, obj, default=None):
try:
attr = getattr(self, name)
@@ -49,11 +53,17 @@ def items(self):
def location(self, obj):
return obj.get_absolute_url()
- def get_urls(self):
+ def _get_paginator(self):
+ if not hasattr(self, "paginator"):
+ self.paginator = paginator.Paginator(self.items(), self.limit)
+ return self.paginator
+ paginator = property(_get_paginator)
+
+ def get_urls(self, page=1):
from django.contrib.sites.models import Site
current_site = Site.objects.get_current()
urls = []
- for item in self.items():
+ for item in self.paginator.page(page).object_list:
loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
url_info = {
'location': loc,
View
24 django/contrib/sitemaps/views.py
@@ -3,14 +3,22 @@
from django.contrib.sites.models import Site
from django.core import urlresolvers
from django.utils.encoding import smart_str
+from django.core.paginator import EmptyPage, PageNotAnInteger
def index(request, sitemaps):
current_site = Site.objects.get_current()
sites = []
protocol = request.is_secure() and 'https' or 'http'
- for section in sitemaps.keys():
+ for section, site in sitemaps.items():
+ if callable(site):
+ pages = site().paginator.num_pages
+ else:
+ pages = site.paginator.num_pages
sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
+ if pages > 1:
+ for page in range(2, pages+1):
+ sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
return HttpResponse(xml, mimetype='application/xml')
@@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None):
maps.append(sitemaps[section])
else:
maps = sitemaps.values()
+ page = request.GET.get("p", 1)
for site in maps:
- if callable(site):
- urls.extend(site().get_urls())
- else:
- urls.extend(site.get_urls())
+ try:
+ if callable(site):
+ urls.extend(site().get_urls(page))
+ else:
+ urls.extend(site.get_urls(page))
+ except EmptyPage:
+ raise Http404("Page %s empty" % page)
+ except PageNotAnInteger:
+ raise Http404("No page '%s'" % page)
xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
return HttpResponse(xml, mimetype='application/xml')
View
4 docs/sitemaps.txt
@@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references
both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
classes and the ``sitemaps`` dict don't change at all.
+If one of your sitemaps is going to have more than 50,000 URLs you should
+create an index file. Your sitemap will be paginated and the index will
+reflect that.
+
Pinging Google
==============

0 comments on commit badde8a

Please sign in to comment.
Something went wrong with that request. Please try again.