Skip to content

Fixed #24496 -- Checked CSRF Referer against CSRF_COOKIE_DOMAIN. #4337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions django/http/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from django.utils.encoding import (
escape_uri_path, force_bytes, force_str, force_text, iri_to_uri,
)
from django.utils.http import is_same_domain
from django.utils.six.moves.urllib.parse import (
parse_qsl, quote, urlencode, urljoin, urlsplit,
)
Expand Down Expand Up @@ -527,15 +528,7 @@ def validate_host(host, allowed_hosts):
host = host[:-1] if host.endswith('.') else host

for pattern in allowed_hosts:
pattern = pattern.lower()
match = (
pattern == '*' or
pattern.startswith('.') and (
host.endswith(pattern) or host == pattern[1:]
) or
pattern == host
)
if match:
if pattern == '*' or is_same_domain(host, pattern):
return True

return False
30 changes: 26 additions & 4 deletions django/middleware/csrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@
from django.utils.cache import patch_vary_headers
from django.utils.crypto import constant_time_compare, get_random_string
from django.utils.encoding import force_text
from django.utils.http import same_origin
from django.utils.http import is_same_domain
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth noting: same_origin was literally only used inside here, for all of the codebase.

But since it's technically sorta part of the public API of the django.utils.http package, I left it. Not sure if it's just safe to remove this function or not.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be fine to remove as it's not documented and GitHub search didn't reveal any uses in the wild.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some uses on GH: https://github.com/search?q=from+django.utils.http+import+same_origin&ref=reposearch&type=Code&utf8=%E2%9C%93

But it's definitely not documented in the Django docs.

I assume we can remove it and it'd be worth adding a note into the documentation that it was removed and was undocumented?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I looked, all those uses are copies of Django itself.

from django.utils.six.moves.urllib.parse import urlparse

logger = logging.getLogger('django.request')

REASON_NO_REFERER = "Referer checking failed - no Referer."
REASON_BAD_REFERER = "Referer checking failed - %s does not match %s."
REASON_NO_CSRF_COOKIE = "CSRF cookie not set."
REASON_BAD_TOKEN = "CSRF token missing or incorrect."
REASON_MALFORMED_REFERER = "Referer checking failed - Referer is malformed."
REASON_INSECURE_REFERER = "Referer checking failed - Referer is insecure while host is secure."

CSRF_KEY_LENGTH = 32

Expand Down Expand Up @@ -154,9 +157,28 @@ def process_view(self, request, callback, callback_args, callback_kwargs):
if referer is None:
return self._reject(request, REASON_NO_REFERER)

# Note that request.get_host() includes the port.
good_referer = 'https://%s/' % request.get_host()
if not same_origin(referer, good_referer):
referer = urlparse(referer)

# Make sure we have a valid URL for Referer.
if '' in (referer.scheme, referer.netloc):
return self._reject(request, REASON_MALFORMED_REFERER)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing tests for the two self._reject branches?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The MALFORMED_REFERER branch was already covered by test_https_malformed_referer. Added a test to cover the INSECURE_REFERER branch

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For lols, I added expanded the tests for the MALFORMED_REFERER branch.


# Ensure that our Referer is also secure.
if referer.scheme != 'https':
return self._reject(request, REASON_INSECURE_REFERER)

# If there isn't a CSRF_COOKIE_DOMAIN, assume we need an exact
# match on host:port. If not, obey the cookie rules.
if settings.CSRF_COOKIE_DOMAIN is None:
# request.get_host() includes the port.
good_referer = request.get_host()
else:
good_referer = settings.CSRF_COOKIE_DOMAIN
server_port = request.META['SERVER_PORT']
if server_port not in ('443', '80'):
good_referer = '%s:%s' % (good_referer, server_port)

if not is_same_domain(referer.netloc, good_referer):
reason = REASON_BAD_REFERER % (referer, good_referer)
return self._reject(request, reason)

Expand Down
22 changes: 14 additions & 8 deletions django/utils/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,18 +253,24 @@ def quote_etag(etag):
return '"%s"' % etag.replace('\\', '\\\\').replace('"', '\\"')


def same_origin(url1, url2):
def is_same_domain(host, pattern):
"""
Checks if two URLs are 'same-origin'
Return ``True`` if the host is either an exact match or a match
to the wildcard pattern.

Any pattern beginning with a period matches a domain and all of its
subdomains. (e.g. ``.example.com`` matches ``example.com`` and
``foo.example.com``). Anything else is an exact string match.
"""
p1, p2 = urlparse(url1), urlparse(url2)
try:
o1 = (p1.scheme, p1.hostname, p1.port or PROTOCOL_TO_PORT[p1.scheme])
o2 = (p2.scheme, p2.hostname, p2.port or PROTOCOL_TO_PORT[p2.scheme])
return o1 == o2
except (ValueError, KeyError):
if not pattern:
return False

pattern = pattern.lower()
return (
pattern[0] == '.' and (host.endswith(pattern) or host == pattern[1:]) or
pattern == host
)


def is_safe_url(url, host=None):
"""
Expand Down
11 changes: 8 additions & 3 deletions docs/ref/csrf.txt
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,14 @@ The CSRF protection is based on the following things:
due to the fact that HTTP 'Set-Cookie' headers are (unfortunately) accepted
by clients that are talking to a site under HTTPS. (Referer checking is not
done for HTTP requests because the presence of the Referer header is not
reliable enough under HTTP.)

This ensures that only forms that have originated from your Web site can be used
reliable enough under HTTP.) The referer is compared against the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one space between sentences please

:setting:`CSRF_COOKIE_DOMAIN` setting. The :setting:`CSRF_COOKIE_DOMAIN`
setting supports subdomains. For example, ".example.com" will allow
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you could add double backticks around the URL strings, that'll fix the spelling error for "api".

post requests from "www.example.com" and "api.example.com." If
:setting:`CSRF_COOKIE_DOMAIN` is not set, the referer must match the
``Host`` header.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need a ``.. versionchanged:: 1.9` block below this paragraph to describe what changed as well as a mention in the 1.9 release notes.


This ensures that only forms that have originated from trusted domains can be used
to POST data back.

It deliberately ignores GET requests (and other requests that are defined as
Expand Down
66 changes: 66 additions & 0 deletions tests/csrf_tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ def test_https_bad_referer(self):
req._is_secure_override = True
req.META['HTTP_HOST'] = 'www.example.com'
req.META['HTTP_REFERER'] = 'https://www.evil.org/somepage'
req.META['SERVER_PORT'] = '443'
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNotNone(req2)
self.assertEqual(403, req2.status_code)
Expand All @@ -325,6 +326,20 @@ def test_https_malformed_referer(self):
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNotNone(req2)
self.assertEqual(403, req2.status_code)
# missing scheme
# >>> urlparse('//example.com/')
# ParseResult(scheme='', netloc='example.com', path='/', params='', query='', fragment='')
req.META['HTTP_REFERER'] = '//example.com/'
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNotNone(req2)
self.assertEqual(403, req2.status_code)
# missing netloc
# >>> urlparse('https://')
# ParseResult(scheme='https', netloc='', path='', params='', query='', fragment='')
req.META['HTTP_REFERER'] = 'https://'
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNotNone(req2)
self.assertEqual(403, req2.status_code)

@override_settings(ALLOWED_HOSTS=['www.example.com'])
def test_https_good_referer(self):
Expand Down Expand Up @@ -352,6 +367,57 @@ def test_https_good_referer_2(self):
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNone(req2)

@override_settings(
ALLOWED_HOSTS=['www.example.com'],
CSRF_COOKIE_DOMAIN='.example.com',
)
def test_https_good_referer_matches_cookie_domain(self):
"""
A POST HTTPS request with a good referer should be accepted from a
subdomain that's allowed by CSRF_COOKIE_DOMAIN.
"""
req = self._get_POST_request_with_token()
req._is_secure_override = True
req.META['HTTP_HOST'] = 'www.example.com'
req.META['HTTP_REFERER'] = 'https://foo.example.com/'
req.META['SERVER_PORT'] = '443'
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNone(req2)

@override_settings(
ALLOWED_HOSTS=['www.example.com'],
CSRF_COOKIE_DOMAIN='.example.com',
)
def test_https_good_referer_matches_cookie_domain_with_different_port(self):
"""
A POST HTTPS request with a good referer should be accepted from a
subdomain that's allowed by CSRF_COOKIE_DOMAIN and a non-443 port.
"""
req = self._get_POST_request_with_token()
req._is_secure_override = True
req.META['HTTP_HOST'] = 'www.example.com'
req.META['HTTP_REFERER'] = 'https://foo.example.com:4443/'
req.META['SERVER_PORT'] = '4443'
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNone(req2)

@override_settings(
ALLOWED_HOSTS=['www.example.com'],
CSRF_COOKIE_DOMAIN='.example.com',
)
def test_https_reject_insecure_referer(self):
"""
A POST HTTPS request from an insecure referer should be rejected.
"""
req = self._get_POST_request_with_token()
req._is_secure_override = True
req.META['HTTP_HOST'] = 'www.example.com'
req.META['HTTP_REFERER'] = 'http://example.com/'
req.META['SERVER_PORT'] = '443'
req2 = CsrfViewMiddleware().process_view(req, post_form_view, (), {})
self.assertIsNotNone(req2)
self.assertEqual(403, req2.status_code)

def test_ensures_csrf_cookie_no_middleware(self):
"""
Tests that ensures_csrf_cookie decorator fulfils its promise
Expand Down
44 changes: 19 additions & 25 deletions tests/utils_tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,6 @@

class TestUtilsHttp(unittest.TestCase):

def test_same_origin_true(self):
# Identical
self.assertTrue(http.same_origin('http://foo.com/', 'http://foo.com/'))
# One with trailing slash - see #15617
self.assertTrue(http.same_origin('http://foo.com', 'http://foo.com/'))
self.assertTrue(http.same_origin('http://foo.com/', 'http://foo.com'))
# With port
self.assertTrue(http.same_origin('https://foo.com:8000', 'https://foo.com:8000/'))
# No port given but according to RFC6454 still the same origin
self.assertTrue(http.same_origin('http://foo.com', 'http://foo.com:80/'))
self.assertTrue(http.same_origin('https://foo.com', 'https://foo.com:443/'))

def test_same_origin_false(self):
# Different scheme
self.assertFalse(http.same_origin('http://foo.com', 'https://foo.com'))
# Different host
self.assertFalse(http.same_origin('http://foo.com', 'http://goo.com'))
# Different host again
self.assertFalse(http.same_origin('http://foo.com', 'http://foo.com.evil.com'))
# Different port
self.assertFalse(http.same_origin('http://foo.com:8000', 'http://foo.com:8001'))
# No port given
self.assertFalse(http.same_origin('http://foo.com', 'http://foo.com:8000/'))
self.assertFalse(http.same_origin('https://foo.com', 'https://foo.com:8000/'))

def test_urlencode(self):
# 2-tuples (the norm)
result = http.urlencode((('a', 1), ('b', 2), ('c', 3)))
Expand Down Expand Up @@ -157,6 +132,25 @@ def test_urlquote(self):
http.urlunquote_plus('Paris+&+Orl%C3%A9ans'),
'Paris & Orl\xe9ans')

def test_is_same_domain_good(self):
for pair in (
('example.com', 'example.com'),
('example.com', '.example.com'),
('foo.example.com', '.example.com'),
('example.com:8888', 'example.com:8888'),
('example.com:8888', '.example.com:8888'),
('foo.example.com:8888', '.example.com:8888'),
):
self.assertTrue(http.is_same_domain(*pair))

def test_is_same_domain_bad(self):
for pair in (
('example2.com', 'example.com'),
('foo.example.com', 'example.com'),
('example.com:9999', 'example.com:8888'),
):
self.assertFalse(http.is_same_domain(*pair))


class ETagProcessingTests(unittest.TestCase):
def test_parsing(self):
Expand Down