Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #20099 -- Eased subclassing of BrokenLinkEmailsMiddleware

Thanks Ram Rachum for the report and the initial patch, and Simon
Charette for the review.
  • Loading branch information...
commit f940e564e4623d531eb97a2cf1b116851003f9fd 1 parent 6de81d6
Claude Paroz authored May 24, 2013
1  AUTHORS
@@ -472,6 +472,7 @@ answer newbie questions, and generally made Django that much better:
472 472
     Jyrki Pulliainen <jyrki.pulliainen@gmail.com>
473 473
     Thejaswi Puthraya <thejaswi.puthraya@gmail.com>
474 474
     Johann Queuniet <johann.queuniet@adh.naellia.eu>
  475
+    Ram Rachum <ram@rachum.com>
475 476
     Jan Rademaker
476 477
     Michael Radziej <mir@noris.de>
477 478
     Laurent Rahuel <laurent.rahuel@gmail.com>
24  django/middleware/common.py
@@ -142,15 +142,17 @@ def process_response(self, request, response):
142 142
             domain = request.get_host()
143 143
             path = request.get_full_path()
144 144
             referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace')
145  
-            is_internal = self.is_internal_request(domain, referer)
146  
-            is_not_search_engine = '?' not in referer
147  
-            is_ignorable = self.is_ignorable_404(path)
148  
-            if referer and (is_internal or is_not_search_engine) and not is_ignorable:
  145
+
  146
+            if not self.is_ignorable_request(request, path, domain, referer):
149 147
                 ua = request.META.get('HTTP_USER_AGENT', '<none>')
150 148
                 ip = request.META.get('REMOTE_ADDR', '<none>')
151 149
                 mail_managers(
152  
-                    "Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain),
153  
-                    "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip),
  150
+                    "Broken %slink on %s" % (
  151
+                        ('INTERNAL ' if self.is_internal_request(domain, referer) else ''),
  152
+                        domain
  153
+                    ),
  154
+                    "Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
  155
+                    "IP address: %s\n" % (referer, path, ua, ip),
154 156
                     fail_silently=True)
155 157
         return response
156 158
 
@@ -159,10 +161,14 @@ def is_internal_request(self, domain, referer):
159 161
         Returns True if the referring URL is the same domain as the current request.
160 162
         """
161 163
         # Different subdomains are treated as different domains.
162  
-        return re.match("^https?://%s/" % re.escape(domain), referer)
  164
+        return bool(re.match("^https?://%s/" % re.escape(domain), referer))
163 165
 
164  
-    def is_ignorable_404(self, uri):
  166
+    def is_ignorable_request(self, request, uri, domain, referer):
165 167
         """
166  
-        Returns True if a 404 at the given URL *shouldn't* notify the site managers.
  168
+        Returns True if the given request *shouldn't* notify the site managers.
167 169
         """
  170
+        # '?' in referer is identified as search engine source
  171
+        if (not referer or
  172
+                (not self.is_internal_request(domain, referer) and '?' in referer)):
  173
+            return True
168 174
         return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
5  docs/howto/error-reporting.txt
@@ -98,6 +98,11 @@ crawlers often request::
98 98
 (Note that these are regular expressions, so we put a backslash in front of
99 99
 periods to escape them.)
100 100
 
  101
+If you'd like to customize the behavior of
  102
+:class:`django.middleware.common.BrokenLinkEmailsMiddleware` further (for
  103
+example to ignore requests coming from web crawlers), you should subclass it
  104
+and override its methods.
  105
+
101 106
 .. seealso::
102 107
 
103 108
    404 errors are logged using the logging framework. By default, these log
19  tests/middleware/tests.py
@@ -326,6 +326,25 @@ def test_404_error_nonascii_referrer(self):
326 326
         BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
327 327
         self.assertEqual(len(mail.outbox), 1)
328 328
 
  329
+    def test_custom_request_checker(self):
  330
+        class SubclassedMiddleware(BrokenLinkEmailsMiddleware):
  331
+            ignored_user_agent_patterns = (re.compile(r'Spider.*'),
  332
+                                           re.compile(r'Robot.*'))
  333
+            def is_ignorable_request(self, request, uri, domain, referer):
  334
+                '''Check user-agent in addition to normal checks.'''
  335
+                if super(SubclassedMiddleware, self).is_ignorable_request(request, uri, domain, referer):
  336
+                    return True
  337
+                user_agent = request.META['HTTP_USER_AGENT']
  338
+                return any(pattern.search(user_agent) for pattern in
  339
+                               self.ignored_user_agent_patterns)
  340
+
  341
+        self.req.META['HTTP_REFERER'] = '/another/url/'
  342
+        self.req.META['HTTP_USER_AGENT'] = 'Spider machine 3.4'
  343
+        SubclassedMiddleware().process_response(self.req, self.resp)
  344
+        self.assertEqual(len(mail.outbox), 0)
  345
+        self.req.META['HTTP_USER_AGENT'] = 'My user agent'
  346
+        SubclassedMiddleware().process_response(self.req, self.resp)
  347
+        self.assertEqual(len(mail.outbox), 1)
329 348
 
330 349
 class ConditionalGetMiddlewareTest(TestCase):
331 350
     urls = 'middleware.cond_get_urls'

0 notes on commit f940e56

Please sign in to comment.
Something went wrong with that request. Please try again.