Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fixed #717 - If-Modified-Since handling should compare dates accordin…

…g to RFC 2616

Thanks to Maniac for the report, julienb for the initial patch, and
especially to aaugustin for the final patch and tests.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@15696 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
commit dbe6ced0d6911386d731a045e00b0d4c005b8e45 1 parent 3f38a99
Luke Plant authored March 01, 2011
18  django/middleware/http.py
... ...
@@ -1,5 +1,5 @@
1 1
 from django.core.exceptions import MiddlewareNotUsed
2  
-from django.utils.http import http_date
  2
+from django.utils.http import http_date, parse_http_date_safe
3 3
 
4 4
 class ConditionalGetMiddleware(object):
5 5
     """
@@ -15,7 +15,7 @@ def process_response(self, request, response):
15 15
             response['Content-Length'] = str(len(response.content))
16 16
 
17 17
         if response.has_header('ETag'):
18  
-            if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None)
  18
+            if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
19 19
             if if_none_match == response['ETag']:
20 20
                 # Setting the status is enough here. The response handling path
21 21
                 # automatically removes content for this status code (in
@@ -23,10 +23,14 @@ def process_response(self, request, response):
23 23
                 response.status_code = 304
24 24
 
25 25
         if response.has_header('Last-Modified'):
26  
-            if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE', None)
27  
-            if if_modified_since == response['Last-Modified']:
28  
-                # Setting the status code is enough here (same reasons as
29  
-                # above).
30  
-                response.status_code = 304
  26
+            if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
  27
+            if if_modified_since is not None:
  28
+                if_modified_since = parse_http_date_safe(if_modified_since)
  29
+            if if_modified_since is not None:
  30
+                last_modified = parse_http_date_safe(response['Last-Modified'])
  31
+                if last_modified is not None and last_modified <= if_modified_since:
  32
+                    # Setting the status code is enough here (same reasons as
  33
+                    # above).
  34
+                    response.status_code = 304
31 35
 
32 36
         return response
55  django/utils/http.py
... ...
@@ -1,3 +1,5 @@
  1
+import calendar
  2
+import datetime
1 3
 import re
2 4
 import sys
3 5
 import urllib
@@ -8,6 +10,17 @@
8 10
 
9 11
 ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
10 12
 
  13
+MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
  14
+__D = r'(?P<day>\d{2})'
  15
+__D2 = r'(?P<day>[ \d]\d)'
  16
+__M = r'(?P<mon>\w{3})'
  17
+__Y = r'(?P<year>\d{4})'
  18
+__Y2 = r'(?P<year>\d{2})'
  19
+__T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})'
  20
+RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T))
  21
+RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T))
  22
+ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y))
  23
+
11 24
 def urlquote(url, safe='/'):
12 25
     """
13 26
     A version of Python's urllib.quote() function that can operate on unicode
@@ -70,6 +83,48 @@ def http_date(epoch_seconds=None):
70 83
     rfcdate = formatdate(epoch_seconds)
71 84
     return '%s GMT' % rfcdate[:25]
72 85
 
  86
+def parse_http_date(date):
  87
+    """
  88
+    Parses a date format as specified by HTTP RFC2616 section 3.3.1.
  89
+
  90
+    The three formats allowed by the RFC are accepted, even if only the first
  91
+    one is still in widespread use.
  92
+
  93
+    Returns an floating point number expressed in seconds since the epoch, in
  94
+    UTC.
  95
+    """
  96
+    # emails.Util.parsedate does the job for RFC1123 dates; unfortunately
  97
+    # RFC2616 makes it mandatory to support RFC850 dates too. So we roll
  98
+    # our own RFC-compliant parsing.
  99
+    for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:
  100
+        m = regex.match(date)
  101
+        if m is not None:
  102
+            break
  103
+    else:
  104
+        raise ValueError("%r is not in a valid HTTP date format" % date)
  105
+    try:
  106
+        year = int(m.group('year'))
  107
+        if year < 100:
  108
+            year += 2000 if year < 70 else 1900
  109
+        month = MONTHS.index(m.group('mon').lower()) + 1
  110
+        day = int(m.group('day'))
  111
+        hour = int(m.group('hour'))
  112
+        min = int(m.group('min'))
  113
+        sec = int(m.group('sec'))
  114
+        result = datetime.datetime(year, month, day, hour, min, sec)
  115
+        return calendar.timegm(result.utctimetuple())
  116
+    except Exception:
  117
+        raise ValueError("%r is not a valid date" % date)
  118
+
  119
+def parse_http_date_safe(date):
  120
+    """
  121
+    Same as parse_http_date, but returns None if the input is invalid.
  122
+    """
  123
+    try:
  124
+        return parse_http_date(date)
  125
+    except Exception:
  126
+        pass
  127
+
73 128
 # Base 36 functions: useful for generating compact URLs
74 129
 
75 130
 def base36_to_int(s):
18  django/views/decorators/http.py
@@ -9,10 +9,9 @@
9 9
 
10 10
 from calendar import timegm
11 11
 from datetime import timedelta
12  
-from email.Utils import formatdate
13 12
 
14 13
 from django.utils.decorators import decorator_from_middleware, available_attrs
15  
-from django.utils.http import parse_etags, quote_etag
  14
+from django.utils.http import http_date, parse_http_date_safe, parse_etags, quote_etag
16 15
 from django.utils.log import getLogger
17 16
 from django.middleware.http import ConditionalGetMiddleware
18 17
 from django.http import HttpResponseNotAllowed, HttpResponseNotModified, HttpResponse
@@ -79,6 +78,8 @@ def decorator(func):
79 78
         def inner(request, *args, **kwargs):
80 79
             # Get HTTP request headers
81 80
             if_modified_since = request.META.get("HTTP_IF_MODIFIED_SINCE")
  81
+            if if_modified_since:
  82
+                if_modified_since = parse_http_date_safe(if_modified_since)
82 83
             if_none_match = request.META.get("HTTP_IF_NONE_MATCH")
83 84
             if_match = request.META.get("HTTP_IF_MATCH")
84 85
             if if_none_match or if_match:
@@ -102,7 +103,7 @@ def inner(request, *args, **kwargs):
102 103
             if last_modified_func:
103 104
                 dt = last_modified_func(request, *args, **kwargs)
104 105
                 if dt:
105  
-                    res_last_modified = formatdate(timegm(dt.utctimetuple()))[:26] + 'GMT'
  106
+                    res_last_modified = timegm(dt.utctimetuple())
106 107
                 else:
107 108
                     res_last_modified = None
108 109
             else:
@@ -116,7 +117,8 @@ def inner(request, *args, **kwargs):
116 117
                 if ((if_none_match and (res_etag in etags or
117 118
                         "*" in etags and res_etag)) and
118 119
                         (not if_modified_since or
119  
-                            res_last_modified == if_modified_since)):
  120
+                            (res_last_modified and if_modified_since and
  121
+                            res_last_modified <= if_modified_since))):
120 122
                     if request.method in ("GET", "HEAD"):
121 123
                         response = HttpResponseNotModified()
122 124
                     else:
@@ -136,9 +138,9 @@ def inner(request, *args, **kwargs):
136 138
                         }
137 139
                     )
138 140
                     response = HttpResponse(status=412)
139  
-                elif (not if_none_match and if_modified_since and
140  
-                        request.method == "GET" and
141  
-                        res_last_modified == if_modified_since):
  141
+                elif (not if_none_match and request.method == "GET" and
  142
+                        res_last_modified and if_modified_since and
  143
+                        res_last_modified <= if_modified_since):
142 144
                     response = HttpResponseNotModified()
143 145
 
144 146
             if response is None:
@@ -146,7 +148,7 @@ def inner(request, *args, **kwargs):
146 148
 
147 149
             # Set relevant headers on the response if they don't already exist.
148 150
             if res_last_modified and not response.has_header('Last-Modified'):
149  
-                response['Last-Modified'] = res_last_modified
  151
+                response['Last-Modified'] = http_date(res_last_modified)
150 152
             if res_etag and not response.has_header('ETag'):
151 153
                 response['ETag'] = quote_etag(res_etag)
152 154
 
8  django/views/static.py
@@ -9,12 +9,11 @@
9 9
 import re
10 10
 import stat
11 11
 import urllib
12  
-from email.Utils import parsedate_tz, mktime_tz
13 12
 
14 13
 from django.template import loader
15 14
 from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseNotModified
16 15
 from django.template import Template, Context, TemplateDoesNotExist
17  
-from django.utils.http import http_date
  16
+from django.utils.http import http_date, parse_http_date
18 17
 
19 18
 def serve(request, path, document_root=None, show_indexes=False):
20 19
     """
@@ -128,10 +127,7 @@ def was_modified_since(header=None, mtime=0, size=0):
128 127
             raise ValueError
129 128
         matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header,
130 129
                            re.IGNORECASE)
131  
-        header_date = parsedate_tz(matches.group(1))
132  
-        if header_date is None:
133  
-            raise ValueError
134  
-        header_mtime = mktime_tz(header_date)
  130
+        header_mtime = parse_http_date(matches.group(1))
135 131
         header_len = matches.group(3)
136 132
         if header_len and int(header_len) != size:
137 133
             raise ValueError
34  tests/regressiontests/conditional_processing/models.py
... ...
@@ -1,17 +1,20 @@
1 1
 # -*- coding:utf-8 -*-
2  
-from datetime import datetime, timedelta
3  
-from calendar import timegm
  2
+from datetime import datetime
4 3
 
5 4
 from django.test import TestCase
6  
-from django.utils.http import parse_etags, quote_etag
  5
+from django.utils import unittest
  6
+from django.utils.http import parse_etags, quote_etag, parse_http_date
7 7
 
8 8
 FULL_RESPONSE = 'Test conditional get response'
9 9
 LAST_MODIFIED = datetime(2007, 10, 21, 23, 21, 47)
10 10
 LAST_MODIFIED_STR = 'Sun, 21 Oct 2007 23:21:47 GMT'
  11
+LAST_MODIFIED_NEWER_STR = 'Mon, 18 Oct 2010 16:56:23 GMT'
  12
+LAST_MODIFIED_INVALID_STR = 'Mon, 32 Oct 2010 16:56:23 GMT'
11 13
 EXPIRED_LAST_MODIFIED_STR = 'Sat, 20 Oct 2007 23:21:47 GMT'
12 14
 ETAG = 'b4246ffc4f62314ca13147c9d4f76974'
13 15
 EXPIRED_ETAG = '7fae4cd4b0f81e7d2914700043aa8ed6'
14 16
 
  17
+
15 18
 class ConditionalGet(TestCase):
16 19
     def assertFullResponse(self, response, check_last_modified=True, check_etag=True):
17 20
         self.assertEquals(response.status_code, 200)
@@ -33,6 +36,12 @@ def testIfModifiedSince(self):
33 36
         self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_STR
34 37
         response = self.client.get('/condition/')
35 38
         self.assertNotModified(response)
  39
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_NEWER_STR
  40
+        response = self.client.get('/condition/')
  41
+        self.assertNotModified(response)
  42
+        self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = LAST_MODIFIED_INVALID_STR
  43
+        response = self.client.get('/condition/')
  44
+        self.assertFullResponse(response)
36 45
         self.client.defaults['HTTP_IF_MODIFIED_SINCE'] = EXPIRED_LAST_MODIFIED_STR
37 46
         response = self.client.get('/condition/')
38 47
         self.assertFullResponse(response)
@@ -118,7 +127,7 @@ def testInvalidETag(self):
118 127
         self.assertFullResponse(response, check_last_modified=False)
119 128
 
120 129
 
121  
-class ETagProcesing(TestCase):
  130
+class ETagProcessing(unittest.TestCase):
122 131
     def testParsing(self):
123 132
         etags = parse_etags(r'"", "etag", "e\"t\"ag", "e\\tag", W/"weak"')
124 133
         self.assertEquals(etags, ['', 'etag', 'e"t"ag', r'e\tag', 'weak'])
@@ -126,3 +135,20 @@ def testParsing(self):
126 135
     def testQuoting(self):
127 136
         quoted_etag = quote_etag(r'e\t"ag')
128 137
         self.assertEquals(quoted_etag, r'"e\\t\"ag"')
  138
+
  139
+
  140
+class HttpDateProcessing(unittest.TestCase):
  141
+    def testParsingRfc1123(self):
  142
+        parsed = parse_http_date('Sun, 06 Nov 1994 08:49:37 GMT')
  143
+        self.assertEqual(datetime.utcfromtimestamp(parsed),
  144
+                         datetime(1994, 11, 06, 8, 49, 37))
  145
+
  146
+    def testParsingRfc850(self):
  147
+        parsed = parse_http_date('Sunday, 06-Nov-94 08:49:37 GMT')
  148
+        self.assertEqual(datetime.utcfromtimestamp(parsed),
  149
+                         datetime(1994, 11, 06, 8, 49, 37))
  150
+
  151
+    def testParsingAsctime(self):
  152
+        parsed = parse_http_date('Sun Nov  6 08:49:37 1994')
  153
+        self.assertEqual(datetime.utcfromtimestamp(parsed),
  154
+                         datetime(1994, 11, 06, 8, 49, 37))
87  tests/regressiontests/middleware/tests.py
@@ -3,6 +3,7 @@
3 3
 from django.conf import settings
4 4
 from django.http import HttpRequest
5 5
 from django.middleware.common import CommonMiddleware
  6
+from django.middleware.http import ConditionalGetMiddleware
6 7
 from django.test import TestCase
7 8
 
8 9
 
@@ -247,3 +248,89 @@ def test_prepend_www_append_slash_slashless_custom_urlconf(self):
247 248
       self.assertEquals(r.status_code, 301)
248 249
       self.assertEquals(r['Location'],
249 250
                         'http://www.testserver/middleware/customurlconf/slash/')
  251
+
  252
+class ConditionalGetMiddlewareTest(TestCase):
  253
+    urls = 'regressiontests.middleware.cond_get_urls'
  254
+    def setUp(self):
  255
+        self.req = HttpRequest()
  256
+        self.req.META = {
  257
+            'SERVER_NAME': 'testserver',
  258
+            'SERVER_PORT': 80,
  259
+        }
  260
+        self.req.path = self.req.path_info = "/"
  261
+        self.resp = self.client.get(self.req.path)
  262
+
  263
+    # Tests for the Date header
  264
+
  265
+    def test_date_header_added(self):
  266
+        self.assertFalse('Date' in self.resp)
  267
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  268
+        self.assertTrue('Date' in self.resp)
  269
+
  270
+    # Tests for the Content-Length header
  271
+
  272
+    def test_content_length_header_added(self):
  273
+        content_length = len(self.resp.content)
  274
+        self.assertFalse('Content-Length' in self.resp)
  275
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  276
+        self.assertTrue('Content-Length' in self.resp)
  277
+        self.assertEqual(int(self.resp['Content-Length']), content_length)
  278
+
  279
+    def test_content_length_header_not_changed(self):
  280
+        bad_content_length = len(self.resp.content) + 10
  281
+        self.resp['Content-Length'] = bad_content_length
  282
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  283
+        self.assertEqual(int(self.resp['Content-Length']), bad_content_length)
  284
+
  285
+    # Tests for the ETag header
  286
+
  287
+    def test_if_none_match_and_no_etag(self):
  288
+        self.req.META['HTTP_IF_NONE_MATCH'] = 'spam'
  289
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  290
+        self.assertEquals(self.resp.status_code, 200)
  291
+
  292
+    def test_no_if_none_match_and_etag(self):
  293
+        self.resp['ETag'] = 'eggs'
  294
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  295
+        self.assertEquals(self.resp.status_code, 200)
  296
+
  297
+    def test_if_none_match_and_same_etag(self):
  298
+        self.req.META['HTTP_IF_NONE_MATCH'] = self.resp['ETag'] = 'spam'
  299
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  300
+        self.assertEquals(self.resp.status_code, 304)
  301
+
  302
+    def test_if_none_match_and_different_etag(self):
  303
+        self.req.META['HTTP_IF_NONE_MATCH'] = 'spam'
  304
+        self.resp['ETag'] = 'eggs'
  305
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  306
+        self.assertEquals(self.resp.status_code, 200)
  307
+
  308
+    # Tests for the Last-Modified header
  309
+
  310
+    def test_if_modified_since_and_no_last_modified(self):
  311
+        self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
  312
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  313
+        self.assertEquals(self.resp.status_code, 200)
  314
+
  315
+    def test_no_if_modified_since_and_last_modified(self):
  316
+        self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
  317
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  318
+        self.assertEquals(self.resp.status_code, 200)
  319
+
  320
+    def test_if_modified_since_and_same_last_modified(self):
  321
+        self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
  322
+        self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
  323
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  324
+        self.assertEquals(self.resp.status_code, 304)
  325
+
  326
+    def test_if_modified_since_and_last_modified_in_the_past(self):
  327
+        self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
  328
+        self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:35:44 GMT'
  329
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  330
+        self.assertEquals(self.resp.status_code, 304)
  331
+
  332
+    def test_if_modified_since_and_last_modified_in_the_future(self):
  333
+        self.req.META['HTTP_IF_MODIFIED_SINCE'] = 'Sat, 12 Feb 2011 17:38:44 GMT'
  334
+        self.resp['Last-Modified'] = 'Sat, 12 Feb 2011 17:41:44 GMT'
  335
+        self.resp = ConditionalGetMiddleware().process_response(self.req, self.resp)
  336
+        self.assertEquals(self.resp.status_code, 200)
2  tests/regressiontests/views/tests/static.py
@@ -51,7 +51,7 @@ def test_not_modified_since(self):
51 51
         file_name = 'file.txt'
52 52
         response = self.client.get(
53 53
             '/views/%s/%s' % (self.prefix, file_name),
54  
-            HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 UTC'
  54
+            HTTP_IF_MODIFIED_SINCE='Mon, 18 Jan 2038 05:14:07 GMT'
55 55
             # This is 24h before max Unix time. Remember to fix Django and
56 56
             # update this test well before 2038 :)
57 57
             )

0 notes on commit dbe6ced

Please sign in to comment.
Something went wrong with that request. Please try again.