Skip to content

Commit

Permalink
Fixed #19099 -- Split broken link emails out of common middleware.
Browse files Browse the repository at this point in the history
  • Loading branch information
aaugustin committed Jan 15, 2013
1 parent 83d0cc5 commit 50a985b
Show file tree
Hide file tree
Showing 8 changed files with 147 additions and 61 deletions.
4 changes: 2 additions & 2 deletions django/conf/global_settings.py
Expand Up @@ -146,7 +146,7 @@
# Email address that error messages come from. # Email address that error messages come from.
SERVER_EMAIL = 'root@localhost' SERVER_EMAIL = 'root@localhost'


# Whether to send broken-link emails. # Whether to send broken-link emails. Deprecated, must be removed in 1.8.
SEND_BROKEN_LINK_EMAILS = False SEND_BROKEN_LINK_EMAILS = False


# Database connection info. If left empty, will default to the dummy backend. # Database connection info. If left empty, will default to the dummy backend.
Expand Down Expand Up @@ -245,7 +245,7 @@
ADMIN_FOR = () ADMIN_FOR = ()


# List of compiled regular expression objects representing URLs that need not # List of compiled regular expression objects representing URLs that need not
# be reported when SEND_BROKEN_LINK_EMAILS is True. Here are a few examples: # be reported by BrokenLinkEmailsMiddleware. Here are a few examples:
# import re # import re
# IGNORABLE_404_URLS = ( # IGNORABLE_404_URLS = (
# re.compile(r'^/apple-touch-icon.*\.png$'), # re.compile(r'^/apple-touch-icon.*\.png$'),
Expand Down
78 changes: 46 additions & 32 deletions django/middleware/common.py
@@ -1,13 +1,14 @@
import hashlib import hashlib
import logging import logging
import re import re
import warnings


from django.conf import settings from django.conf import settings
from django import http
from django.core.mail import mail_managers from django.core.mail import mail_managers
from django.core import urlresolvers
from django import http
from django.utils.http import urlquote from django.utils.http import urlquote
from django.utils import six from django.utils import six
from django.core import urlresolvers




logger = logging.getLogger('django.request') logger = logging.getLogger('django.request')
Expand Down Expand Up @@ -102,25 +103,15 @@ def process_request(self, request):
return http.HttpResponsePermanentRedirect(newurl) return http.HttpResponsePermanentRedirect(newurl)


def process_response(self, request, response): def process_response(self, request, response):
"Send broken link emails and calculate the Etag, if needed." """
if response.status_code == 404: Calculate the ETag, if needed.
if settings.SEND_BROKEN_LINK_EMAILS and not settings.DEBUG: """
# If the referrer was from an internal link or a non-search-engine site, if settings.SEND_BROKEN_LINK_EMAILS:
# send a note to the managers. warnings.warn("SEND_BROKEN_LINK_EMAILS is deprecated. "
domain = request.get_host() "Use BrokenLinkEmailsMiddleware instead.",
referer = request.META.get('HTTP_REFERER', None) PendingDeprecationWarning, stacklevel=2)
is_internal = _is_internal_request(domain, referer) BrokenLinkEmailsMiddleware().process_response(request, response)
path = request.get_full_path()
if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer):
ua = request.META.get('HTTP_USER_AGENT', '<none>')
ip = request.META.get('REMOTE_ADDR', '<none>')
mail_managers("Broken %slink on %s" % ((is_internal and 'INTERNAL ' or ''), domain),
"Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" \
% (referer, request.get_full_path(), ua, ip),
fail_silently=True)
return response

# Use ETags, if requested.
if settings.USE_ETAGS: if settings.USE_ETAGS:
if response.has_header('ETag'): if response.has_header('ETag'):
etag = response['ETag'] etag = response['ETag']
Expand All @@ -139,15 +130,38 @@ def process_response(self, request, response):


return response return response


def _is_ignorable_404(uri):
"""
Returns True if a 404 at the given URL *shouldn't* notify the site managers.
"""
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)


def _is_internal_request(domain, referer): class BrokenLinkEmailsMiddleware(object):
"""
Returns true if the referring URL is the same domain as the current request. def process_response(self, request, response):
""" """
# Different subdomains are treated as different domains. Send broken link emails for relevant 404 NOT FOUND responses.
return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer) """
if response.status_code == 404 and not settings.DEBUG:
domain = request.get_host()
path = request.get_full_path()
referer = request.META.get('HTTP_REFERER', '')
is_internal = self.is_internal_request(domain, referer)
is_not_search_engine = '?' not in referer
is_ignorable = self.is_ignorable_404(path)
if referer and (is_internal or is_not_search_engine) and not is_ignorable:
ua = request.META.get('HTTP_USER_AGENT', '<none>')
ip = request.META.get('REMOTE_ADDR', '<none>')
mail_managers(
"Broken %slink on %s" % (('INTERNAL ' if is_internal else ''), domain),
"Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" % (referer, path, ua, ip),
fail_silently=True)
return response

def is_internal_request(self, domain, referer):
"""
Returns True if the referring URL is the same domain as the current request.
"""
# Different subdomains are treated as different domains.
return re.match("^https?://%s/" % re.escape(domain), referer)

def is_ignorable_404(self, uri):
"""
Returns True if a 404 at the given URL *shouldn't* notify the site managers.
"""
return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)
19 changes: 11 additions & 8 deletions docs/howto/error-reporting.txt
Expand Up @@ -54,18 +54,24 @@ setting.
Django can also be configured to email errors about broken links (404 "page Django can also be configured to email errors about broken links (404 "page
not found" errors). Django sends emails about 404 errors when: not found" errors). Django sends emails about 404 errors when:


* :setting:`DEBUG` is ``False`` * :setting:`DEBUG` is ``False``;


* :setting:`SEND_BROKEN_LINK_EMAILS` is ``True`` * Your :setting:`MIDDLEWARE_CLASSES` setting includes

:class:`django.middleware.common.BrokenLinkEmailsMiddleware`.
* Your :setting:`MIDDLEWARE_CLASSES` setting includes ``CommonMiddleware``
(which it does by default).


If those conditions are met, Django will email the users listed in the If those conditions are met, Django will email the users listed in the
:setting:`MANAGERS` setting whenever your code raises a 404 and the request has :setting:`MANAGERS` setting whenever your code raises a 404 and the request has
a referer. (It doesn't bother to email for 404s that don't have a referer -- a referer. (It doesn't bother to email for 404s that don't have a referer --
those are usually just people typing in broken URLs or broken Web 'bots). those are usually just people typing in broken URLs or broken Web 'bots).


.. note::

:class:`~django.middleware.common.BrokenLinkEmailsMiddleware` must appear
before other middleware that intercepts 404 errors, such as
:class:`~django.middleware.locale.LocaleMiddleware` or
:class:`~django.contrib.flatpages.middleware.FlatpageFallbackMiddleware`.
Put it towards the top of your :setting:`MIDDLEWARE_CLASSES` setting.

You can tell Django to stop reporting particular 404s by tweaking the You can tell Django to stop reporting particular 404s by tweaking the
:setting:`IGNORABLE_404_URLS` setting. It should be a tuple of compiled :setting:`IGNORABLE_404_URLS` setting. It should be a tuple of compiled
regular expression objects. For example:: regular expression objects. For example::
Expand All @@ -92,9 +98,6 @@ crawlers often request::
(Note that these are regular expressions, so we put a backslash in front of (Note that these are regular expressions, so we put a backslash in front of
periods to escape them.) periods to escape them.)


The best way to disable this behavior is to set
:setting:`SEND_BROKEN_LINK_EMAILS` to ``False``.

.. seealso:: .. seealso::


404 errors are logged using the logging framework. By default, these log 404 errors are logged using the logging framework. By default, these log
Expand Down
7 changes: 7 additions & 0 deletions docs/internals/deprecation.txt
Expand Up @@ -308,6 +308,13 @@ these changes.
* The ``depth`` keyword argument will be removed from * The ``depth`` keyword argument will be removed from
:meth:`~django.db.models.query.QuerySet.select_related`. :meth:`~django.db.models.query.QuerySet.select_related`.


1.8
---

* The ``SEND_BROKEN_LINK_EMAILS`` setting will be removed. Add the
:class:`django.middleware.common.BrokenLinkEmailsMiddleware` middleware to
your :setting:`MIDDLEWARE_CLASSES` setting instead.

2.0 2.0
--- ---


Expand Down
8 changes: 5 additions & 3 deletions docs/ref/middleware.txt
Expand Up @@ -61,14 +61,16 @@ Adds a few conveniences for perfectionists:
indexer would treat them as separate URLs -- so it's best practice to indexer would treat them as separate URLs -- so it's best practice to
normalize URLs. normalize URLs.


* Sends broken link notification emails to :setting:`MANAGERS` if
:setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True``.

* Handles ETags based on the :setting:`USE_ETAGS` setting. If * Handles ETags based on the :setting:`USE_ETAGS` setting. If
:setting:`USE_ETAGS` is set to ``True``, Django will calculate an ETag :setting:`USE_ETAGS` is set to ``True``, Django will calculate an ETag
for each request by MD5-hashing the page content, and it'll take care of for each request by MD5-hashing the page content, and it'll take care of
sending ``Not Modified`` responses, if appropriate. sending ``Not Modified`` responses, if appropriate.


.. class:: BrokenLinkEmailsMiddleware

* Sends broken link notification emails to :setting:`MANAGERS` (see
:doc:`/howto/error-reporting`).

View metadata middleware View metadata middleware
------------------------ ------------------------


Expand Down
13 changes: 10 additions & 3 deletions docs/ref/settings.txt
Expand Up @@ -1090,8 +1090,9 @@ query string, if any). Use this if your site does not provide a commonly
requested file such as ``favicon.ico`` or ``robots.txt``, or if it gets requested file such as ``favicon.ico`` or ``robots.txt``, or if it gets
hammered by script kiddies. hammered by script kiddies.


This is only used if :setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True`` and This is only used if
``CommonMiddleware`` is installed (see :doc:`/topics/http/middleware`). :class:`~django.middleware.common.BrokenLinkEmailsMiddleware` is enabled (see
:doc:`/topics/http/middleware`).


.. setting:: INSTALLED_APPS .. setting:: INSTALLED_APPS


Expand Down Expand Up @@ -1250,7 +1251,8 @@ MANAGERS
Default: ``()`` (Empty tuple) Default: ``()`` (Empty tuple)


A tuple in the same format as :setting:`ADMINS` that specifies who should get A tuple in the same format as :setting:`ADMINS` that specifies who should get
broken-link notifications when :setting:`SEND_BROKEN_LINK_EMAILS` is ``True``. broken link notifications when
:class:`~django.middleware.common.BrokenLinkEmailsMiddleware` is enabled.


.. setting:: MEDIA_ROOT .. setting:: MEDIA_ROOT


Expand Down Expand Up @@ -1448,6 +1450,11 @@ available in ``request.META``.)
SEND_BROKEN_LINK_EMAILS SEND_BROKEN_LINK_EMAILS
----------------------- -----------------------


.. deprecated:: 1.6
Since :class:`~django.middleware.common.BrokenLinkEmailsMiddleware`
was split from :class:`~django.middleware.common.CommonMiddleware`,
this setting no longer serves a purpose.

Default: ``False`` Default: ``False``


Whether to send an email to the :setting:`MANAGERS` each time somebody visits Whether to send an email to the :setting:`MANAGERS` each time somebody visits
Expand Down
18 changes: 18 additions & 0 deletions docs/releases/1.6.txt
Expand Up @@ -46,3 +46,21 @@ Backwards incompatible changes in 1.6


Features deprecated in 1.6 Features deprecated in 1.6
========================== ==========================

``SEND_BROKEN_LINK_EMAILS`` setting
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:class:`~django.middleware.common.CommonMiddleware` used to provide basic
reporting of broken links by email when ``SEND_BROKEN_LINK_EMAILS`` is set to
``True``.

Because of intractable ordering problems between
:class:`~django.middleware.common.CommonMiddleware` and
:class:`~django.middleware.locale.LocaleMiddleware`, this feature was split
out into a new middleware:
:class:`~django.middleware.common.BrokenLinkEmailsMiddleware`.

If you're relying on this feature, you should add
``'django.middleware.common.BrokenLinkEmailsMiddleware'`` to your
:setting:`MIDDLEWARE_CLASSES` setting and remove ``SEND_BROKEN_LINK_EMAILS``
from your settings.
61 changes: 48 additions & 13 deletions tests/regressiontests/middleware/tests.py
@@ -1,16 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-


import gzip import gzip
import re
import random
from io import BytesIO from io import BytesIO
import random
import re
import warnings


from django.conf import settings from django.conf import settings
from django.core import mail from django.core import mail
from django.http import HttpRequest from django.http import HttpRequest
from django.http import HttpResponse, StreamingHttpResponse from django.http import HttpResponse, StreamingHttpResponse
from django.middleware.clickjacking import XFrameOptionsMiddleware from django.middleware.clickjacking import XFrameOptionsMiddleware
from django.middleware.common import CommonMiddleware from django.middleware.common import CommonMiddleware, BrokenLinkEmailsMiddleware
from django.middleware.http import ConditionalGetMiddleware from django.middleware.http import ConditionalGetMiddleware
from django.middleware.gzip import GZipMiddleware from django.middleware.gzip import GZipMiddleware
from django.test import TestCase, RequestFactory from django.test import TestCase, RequestFactory
Expand Down Expand Up @@ -232,33 +233,39 @@ def test_prepend_www_append_slash_slashless_custom_urlconf(self):
self.assertEqual(r['Location'], self.assertEqual(r['Location'],
'http://www.testserver/middleware/customurlconf/slash/') 'http://www.testserver/middleware/customurlconf/slash/')


# Tests for the 404 error reporting via email # Legacy tests for the 404 error reporting via email (to be removed in 1.8)


@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),), @override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),),
SEND_BROKEN_LINK_EMAILS = True) SEND_BROKEN_LINK_EMAILS=True)
def test_404_error_reporting(self): def test_404_error_reporting(self):
request = self._get_request('regular_url/that/does/not/exist') request = self._get_request('regular_url/that/does/not/exist')
request.META['HTTP_REFERER'] = '/another/url/' request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path) with warnings.catch_warnings():
CommonMiddleware().process_response(request, response) warnings.simplefilter("ignore", PendingDeprecationWarning)
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 1) self.assertEqual(len(mail.outbox), 1)
self.assertIn('Broken', mail.outbox[0].subject) self.assertIn('Broken', mail.outbox[0].subject)


@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),), @override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),),
SEND_BROKEN_LINK_EMAILS = True) SEND_BROKEN_LINK_EMAILS=True)
def test_404_error_reporting_no_referer(self): def test_404_error_reporting_no_referer(self):
request = self._get_request('regular_url/that/does/not/exist') request = self._get_request('regular_url/that/does/not/exist')
response = self.client.get(request.path) with warnings.catch_warnings():
CommonMiddleware().process_response(request, response) warnings.simplefilter("ignore", PendingDeprecationWarning)
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0) self.assertEqual(len(mail.outbox), 0)


@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),), @override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),),
SEND_BROKEN_LINK_EMAILS = True) SEND_BROKEN_LINK_EMAILS=True)
def test_404_error_reporting_ignored_url(self): def test_404_error_reporting_ignored_url(self):
request = self._get_request('foo_url/that/does/not/exist/either') request = self._get_request('foo_url/that/does/not/exist/either')
request.META['HTTP_REFERER'] = '/another/url/' request.META['HTTP_REFERER'] = '/another/url/'
response = self.client.get(request.path) with warnings.catch_warnings():
CommonMiddleware().process_response(request, response) warnings.simplefilter("ignore", PendingDeprecationWarning)
response = self.client.get(request.path)
CommonMiddleware().process_response(request, response)
self.assertEqual(len(mail.outbox), 0) self.assertEqual(len(mail.outbox), 0)


# Other tests # Other tests
Expand All @@ -271,6 +278,34 @@ def test_non_ascii_query_string_does_not_crash(self):
self.assertEqual(response.status_code, 301) self.assertEqual(response.status_code, 301)




@override_settings(IGNORABLE_404_URLS=(re.compile(r'foo'),))
class BrokenLinkEmailsMiddlewareTest(TestCase):

def setUp(self):
self.req = HttpRequest()
self.req.META = {
'SERVER_NAME': 'testserver',
'SERVER_PORT': 80,
}
self.req.path = self.req.path_info = 'regular_url/that/does/not/exist'
self.resp = self.client.get(self.req.path)

def test_404_error_reporting(self):
self.req.META['HTTP_REFERER'] = '/another/url/'
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 1)
self.assertIn('Broken', mail.outbox[0].subject)

def test_404_error_reporting_no_referer(self):
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0)

def test_404_error_reporting_ignored_url(self):
self.req.path = self.req.path_info = 'foo_url/that/does/not/exist'
BrokenLinkEmailsMiddleware().process_response(self.req, self.resp)
self.assertEqual(len(mail.outbox), 0)


class ConditionalGetMiddlewareTest(TestCase): class ConditionalGetMiddlewareTest(TestCase):
urls = 'regressiontests.middleware.cond_get_urls' urls = 'regressiontests.middleware.cond_get_urls'
def setUp(self): def setUp(self):
Expand Down

0 comments on commit 50a985b

Please sign in to comment.