Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by…

… using

pagination). Patch from Julian Bez.

The docs patch here could probably do with some rewording.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@8088 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit badde8a7e5090347feea0b39221dbdea428582b8 1 parent a26ba33
Malcolm Tredinnick authored
3  AUTHORS
@@ -71,7 +71,7 @@ answer newbie questions, and generally made Django that much better:
71 71
     Esdras Beleza <linux@esdrasbeleza.com>
72 72
     Chris Bennett <chrisrbennett@yahoo.com>
73 73
     James Bennett
74  
-    Ben Godfrey <http://aftnn.org>
  74
+    Julian Bez
75 75
     Arvis Bickovskis <viestards.lists@gmail.com>
76 76
     Paul Bissex <http://e-scribe.com/>
77 77
     Simon Blanchard
@@ -166,6 +166,7 @@ answer newbie questions, and generally made Django that much better:
166 166
     glin@seznam.cz
167 167
     martin.glueck@gmail.com
168 168
     Artyom Gnilov <boobsd@gmail.com>
  169
+    Ben Godfrey <http://aftnn.org>
169 170
     GomoX <gomo@datafull.com>
170 171
     Guilherme Mesquita Gondim <semente@taurinus.org>
171 172
     Mario Gonzalez <gonzalemario@gmail.com>
16  django/contrib/sitemaps/__init__.py
... ...
@@ -1,4 +1,4 @@
1  
-from django.core import urlresolvers
  1
+from django.core import urlresolvers, paginator
2 2
 import urllib
3 3
 
4 4
 PING_URL = "http://www.google.com/webmasters/tools/ping"
@@ -34,6 +34,10 @@ def ping_google(sitemap_url=None, ping_url=PING_URL):
34 34
     urllib.urlopen("%s?%s" % (ping_url, params))
35 35
 
36 36
 class Sitemap:
  37
+    # This limit is defined by Google. See the index documentation at
  38
+    # http://sitemaps.org/protocol.php#index.
  39
+    limit = 50000
  40
+
37 41
     def __get(self, name, obj, default=None):
38 42
         try:
39 43
             attr = getattr(self, name)
@@ -49,11 +53,17 @@ def items(self):
49 53
     def location(self, obj):
50 54
         return obj.get_absolute_url()
51 55
 
52  
-    def get_urls(self):
  56
+    def _get_paginator(self):
  57
+        if not hasattr(self, "paginator"):
  58
+            self.paginator = paginator.Paginator(self.items(), self.limit)
  59
+        return self.paginator
  60
+    paginator = property(_get_paginator)
  61
+
  62
+    def get_urls(self, page=1):
53 63
         from django.contrib.sites.models import Site
54 64
         current_site = Site.objects.get_current()
55 65
         urls = []
56  
-        for item in self.items():
  66
+        for item in self.paginator.page(page).object_list:
57 67
             loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
58 68
             url_info = {
59 69
                 'location':   loc,
24  django/contrib/sitemaps/views.py
@@ -3,14 +3,22 @@
3 3
 from django.contrib.sites.models import Site
4 4
 from django.core import urlresolvers
5 5
 from django.utils.encoding import smart_str
  6
+from django.core.paginator import EmptyPage, PageNotAnInteger
6 7
 
7 8
 def index(request, sitemaps):
8 9
     current_site = Site.objects.get_current()
9 10
     sites = []
10 11
     protocol = request.is_secure() and 'https' or 'http'
11  
-    for section in sitemaps.keys():
  12
+    for section, site in sitemaps.items():
  13
+        if callable(site):
  14
+            pages = site().paginator.num_pages
  15
+        else:
  16
+            pages = site.paginator.num_pages
12 17
         sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section})
13 18
         sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url))
  19
+        if pages > 1:
  20
+            for page in range(2, pages+1):
  21
+                sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page))
14 22
     xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})
15 23
     return HttpResponse(xml, mimetype='application/xml')
16 24
 
@@ -22,10 +30,16 @@ def sitemap(request, sitemaps, section=None):
22 30
         maps.append(sitemaps[section])
23 31
     else:
24 32
         maps = sitemaps.values()
  33
+    page = request.GET.get("p", 1)
25 34
     for site in maps:
26  
-        if callable(site):
27  
-            urls.extend(site().get_urls())
28  
-        else:
29  
-            urls.extend(site.get_urls())
  35
+        try:
  36
+            if callable(site):
  37
+                urls.extend(site().get_urls(page))
  38
+            else:
  39
+                urls.extend(site.get_urls(page))
  40
+        except EmptyPage:
  41
+            raise Http404("Page %s empty" % page)
  42
+        except PageNotAnInteger:
  43
+            raise Http404("No page '%s'" % page)
30 44
     xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
31 45
     return HttpResponse(xml, mimetype='application/xml')
4  docs/sitemaps.txt
@@ -282,6 +282,10 @@ This will automatically generate a ``sitemap.xml`` file that references
282 282
 both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap``
283 283
 classes and the ``sitemaps`` dict don't change at all.
284 284
 
  285
+If one of your sitemaps is going to have more than 50,000 URLs you should 
  286
+create an index file. Your sitemap will be paginated and the index will 
  287
+reflect that.
  288
+
285 289
 Pinging Google
286 290
 ==============
287 291
 

0 notes on commit badde8a

Please sign in to comment.
Something went wrong with that request. Please try again.