From 37101a3d45350fae4dd219c16e320857b14634ee Mon Sep 17 00:00:00 2001 From: Ian Murray Date: Wed, 2 May 2012 12:47:02 +0100 Subject: [PATCH] [2314] Fix broken parse_rfc_2822 helper function. --- ckan/lib/helpers.py | 63 ++++++++++++++++++++++++++++------ ckan/tests/lib/test_helpers.py | 28 ++++++++------- 2 files changed, 68 insertions(+), 23 deletions(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index b84636ce32f..1e2b5bbbfeb 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -605,26 +605,69 @@ def date_str_to_datetime(date_str): # a strptime. Also avoids problem with Python 2.5 not having %f. return datetime.datetime(*map(int, re.split('[^\d]', date_str))) -def parse_rfc_2822_date(date_str, tz_aware=True): +def parse_rfc_2822_date(date_str, assume_utc=True): """ Parse a date string of the form specified in RFC 2822, and return a datetime. - RFC 2822 is the date format used in HTTP headers. - - If the date string contains a timezone indication, and tz_aware is True, - then the associated tzinfo is attached to the returned datetime object. - - Returns None if the string cannot be parse as a valid datetime. + RFC 2822 is the date format used in HTTP headers. It should contain timezone + information, but that cannot be relied upon. + + If date_str doesn't contain timezone information, then the 'assume_utc' flag + determines whether we assume this string is local (with respect to the + server running this code), or UTC. In practice, what this means is that if + assume_utc is True, then the returned datetime is 'aware', with an associated + tzinfo of offset zero. Otherwise, the returned datetime is 'naive'. + + If timezone information is available in date_str, then the returned datetime + is 'aware', ie - it has an associated tz_info object. + + Returns None if the string cannot be parsed as a valid datetime. """ time_tuple = email.utils.parsedate_tz(date_str) + # Not parsable if not time_tuple: return None - if not tz_aware: - time_tuple = time_tuple[:-1] + (None,) + # No timezone information available in the string + if time_tuple[-1] is None and not assume_utc: + return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple)) + else: + offset = 0 if time_tuple[-1] is None else time_tuple[-1] + tz_info = _RFC2282TzInfo(offset) + return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info) + +class _RFC2282TzInfo(datetime.tzinfo): + """ + A datetime.tzinfo implementation used by parse_rfc_2822_date() function. + + In order to return timezone information, a concrete implementation of + datetime.tzinfo is required. This class represents tzinfo that knows + about it's offset from UTC, has no knowledge of daylight savings time, and + no knowledge of the timezone name. + + """ + + def __init__(self, offset): + """ + offset from UTC in seconds. + """ + self.offset = datetime.timedelta(seconds=offset) + + def utcoffset(self, dt): + return self.offset + + def dst(self, dt): + """ + Dates parsed from an RFC 2822 string conflate timezone and dst, and so + it's not possible to determine whether we're in DST or not, hence + returning None. + """ + return None + + def tzname(self, dt): + return None - return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple)) def time_ago_in_words_from_str(date_str, granularity='month'): if date_str: diff --git a/ckan/tests/lib/test_helpers.py b/ckan/tests/lib/test_helpers.py index b7eeacbf02b..5233d853e37 100644 --- a/ckan/tests/lib/test_helpers.py +++ b/ckan/tests/lib/test_helpers.py @@ -92,15 +92,25 @@ def test_gravatar_encodes_url_correctly(self): for e in expected: assert e in res, (e,res) - def test_parse_rfc_2822_simple_case(self): + def test_parse_rfc_2822_no_timezone_specified(self): """ Parse "Tue, 15 Nov 1994 12:45:26" successfully. - No zone info. + Assuming it's UTC. """ dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26') - assert_equal(dt.isoformat(), '1994-11-15T12:45:26') + assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00') + def test_parse_rfc_2822_no_timezone_specified_assuming_local(self): + """ + Parse "Tue, 15 Nov 1994 12:45:26" successfully. + + Assuming it's local. + """ + dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False) + assert_equal(dt.isoformat(), '1994-11-15T12:45:26') + assert_equal(dt.tzinfo, None) + def test_parse_rfc_2822_gmt_case(self): """ Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully. @@ -108,20 +118,12 @@ def test_parse_rfc_2822_gmt_case(self): GMT obs-zone specified """ dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT') - assert_equal(dt.isoformat(), '1994-11-15T12:45:26') + assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00') def test_parse_rfc_2822_with_offset(self): """ Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully. """ dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700') - assert_equal(dt.isoformat(), '1994-11-15T05:45:26') - - def test_parse_rfc_2822_ignoring_offset(self): - """ - Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully. - """ - dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False) - assert_equal(dt.isoformat(), '1994-11-15T12:45:26') - + assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')