Skip to content

Commit

Permalink
Merge pull request #30 from pganssle/parser-default-change
Browse files Browse the repository at this point in the history
Add smart defaults and fallback options to parser.
  • Loading branch information
pganssle committed Apr 12, 2015
2 parents 18b3fe7 + 5994bfb commit f7a1d4e
Show file tree
Hide file tree
Showing 2 changed files with 304 additions and 10 deletions.
153 changes: 144 additions & 9 deletions dateutil/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,32 @@
This module offers a generic date/time string parser which is able to parse
most known formats to represent a date and/or time.
This module attempts to be forgiving with regards to unlikely input formats,
returning a datetime object even for dates which are ambiguous. If an element of
a date/time stamp is omitted, the following rules are applied:
- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
on a 12-hour clock (`0 <= hour <= 12`) *must* be specified if AM or PM is
specified.
- If a time zone is omitted, it is assumed to be UTC.
If any other elements are missing, they are taken from the `datetime.datetime`
object passed to the parameter `default`. If this results in a day number
exceeding the valid number of days per month, one can fall back to the last
day of the month by setting `fallback_on_invalid_day` parameter to `True`.
Also provided is the `smart_defaults` option, which attempts to fill in the
missing elements from context. If specified, the logic is:
- If the omitted element is smaller than the largest specified element, select
the *earliest* time matching the specified conditions; so `"June 2010"` is
interpreted as `June 1, 2010 0:00:00`) and the (somewhat strange)
`"Feb 1997 3:15 PM"` is interpreted as `February 1, 1997 15:15:00`.
- If the element is larger than the largest specified element, select the
*most recent* time matching the specified conditions (e.g parsing `"May"`
in June 2015 returns the date May 1st, 2015, whereas parsing it in April 2015
returns May 1st 2014). If using the `date_in_future` flag, this logic is
inverted, and instead the *next* time matching the specified conditions is
returned.
Additional resources about date/time string formats can be found below:
- `A summary of the international standard date and time notation
Expand All @@ -21,6 +47,7 @@
import time
import collections
from io import StringIO
from calendar import monthrange, isleap

from six import text_type, binary_type, integer_types

Expand Down Expand Up @@ -246,7 +273,7 @@ class parserinfo(object):
PERTAIN = ["of"]
TZOFFSET = {}

def __init__(self, dayfirst=False, yearfirst=False):
def __init__(self, dayfirst=False, yearfirst=False, smart_defaults=False):
self._jump = self._convert(self.JUMP)
self._weekdays = self._convert(self.WEEKDAYS)
self._months = self._convert(self.MONTHS)
Expand All @@ -257,6 +284,7 @@ def __init__(self, dayfirst=False, yearfirst=False):

self.dayfirst = dayfirst
self.yearfirst = yearfirst
self.smart_defaults = smart_defaults

self._year = time.localtime().tm_year
self._century = self._year // 100*100
Expand Down Expand Up @@ -338,12 +366,12 @@ def validate(self, res):


class parser(object):

def __init__(self, info=None):
self.info = info or parserinfo()

def parse(self, timestr, default=None, ignoretz=False, tzinfos=None,
**kwargs):
smart_defaults=None, date_in_future=False,
fallback_on_invalid_day=None, **kwargs):
"""
Parse the date/time string into a datetime object.
Expand All @@ -353,7 +381,35 @@ def parse(self, timestr, default=None, ignoretz=False, tzinfos=None,
:param default:
The default datetime object, if this is a datetime object and not
`None`, elements specified in `timestr` replace elements in the
default object.
default object, unless `smart_defaults` is set to `True`, in which
case to the extent necessary, timestamps are calculated relative to
this date.
:param smart_defaults:
If using smart defaults, the `default` parameter is treated as the
effective parsing date/time, and the context of the datetime string
is determined relative to `default`. If `None`, this parameter is
inherited from the :class:`parserinfo` object.
:param date_in_future:
If `smart_defaults` is `True`, the parser assumes by default that
the timestamp refers to a date in the past, and will return the
beginning of the most recent timespan which matches the time string
(e.g. if `default` is March 3rd, 2013, "Feb" parses to
"Feb 1, 2013" and "May 3" parses to May 3rd, 2012). Setting this
parameter to `True` inverts this assumption, and returns the
beginning of the *next* matching timespan.
:param fallback_on_invalid_day:
If specified `True`, an otherwise invalid date such as "Feb 30" or
"June 32" falls back to the last day of the month. If specified as
"False", the parser is strict about parsing otherwise valid dates
that would turn up as invalid because of the fallback rules (e.g.
"Feb 2010" run with a default of January 30, 2010 and `smartparser`
set to `False` would would throw an error, rather than falling
back to the end of February). If `None` or unspecified, the date
falls back to the most recent valid date only if the invalid date
is created as a result of an unspecified day in the time string.
:param ignoretz:
Whether or not to ignore the time zone.
Expand Down Expand Up @@ -382,11 +438,15 @@ def parse(self, timestr, default=None, ignoretz=False, tzinfos=None,
your system.
"""

default_specified = default is not None
if smart_defaults is None:
smart_defaults = self.info.smart_defaults

if not default_specified:
if default is None:
effective_dt = datetime.datetime.now()
default = datetime.datetime.now().replace(hour=0, minute=0,
second=0, microsecond=0)
second=0, microsecond=0)
else:
effective_dt = default

if kwargs.get('fuzzy_with_tokens', False):
res, skipped_tokens = self._parse(timestr, **kwargs)
Expand All @@ -397,12 +457,87 @@ def parse(self, timestr, default=None, ignoretz=False, tzinfos=None,
raise ValueError("Unknown string format")

repl = {}
for attr in ["year", "month", "day", "hour",
"minute", "second", "microsecond"]:
for attr in ("year", "month", "day", "hour",
"minute", "second", "microsecond"):
value = getattr(res, attr)
if value is not None:
repl[attr] = value

# Choose the correct fallback position if requested by the
# `smart_defaults` parameter.
if smart_defaults:
# Determine if it refers to this year, last year or next year
if res.year is None:
if res.month is not None:
# Explicitly deal with leap year problems
if res.month == 2 and (res.day is not None and
res.day == 29):

ly_offset = 4 if date_in_future else -4
next_year = 4 * (default.year // 4)

if date_in_future:
next_year += ly_offset

if not isleap(next_year):
next_year += ly_offset

if not isleap(default.year):
default = default.replace(year=next_year)
elif date_in_future:
next_year = default.year + 1
else:
next_year = default.year - 1

if ((res.month == default.month and res.day is not None and
((res.day < default.day and date_in_future) or
(res.day > default.day and not date_in_future))) or
((res.month < default.month and date_in_future) or
(res.month > default.month and not date_in_future))):

default = default.replace(year=next_year)

# Select a proper month
if res.month is None:
if res.year is not None:
default = default.replace(month=1)

# I'm not sure if this is even possible.
if res.day is not None:
if res.day < default.day and date_in_future:
default += datetime.timedelta(months=1)
elif res.day > default.day and not date_in_future:
default -= datetime.timedelta(months=1)

if res.day is None:
# Determine if it's today, tomorrow or yesterday.
if res.year is None and res.month is None:
t_repl = {}
for key, val in repl.iteritems():
if key in ('hour', 'minute', 'second', 'microsecond'):
t_repl[key] = val

stime = effective_dt.replace(**t_repl)

if stime < effective_dt and date_in_future:
default += datetime.timedelta(days=1)
elif stime > effective_dt and not date_in_future:
default -= datetime.timedelta(days=1)
else:
# Otherwise it's the beginning of the month
default = default.replace(day=1)

if fallback_on_invalid_day or (fallback_on_invalid_day is None and
'day' not in repl):
# If the default day exceeds the last day of the month, fall back to
# the end of the month.
cyear = default.year if res.year is None else res.year
cmonth = default.month if res.month is None else res.month
cday = default.day if res.day is None else res.day

if cday > monthrange(cyear, cmonth)[1]:
repl['day'] = monthrange(cyear, cmonth)[1]

ret = default.replace(**repl)

if res.weekday is not None and not res.day:
Expand Down
161 changes: 160 additions & 1 deletion dateutil/test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5336,9 +5336,168 @@ def testRandomFormat35(self):
self.assertEqual(parse("2004 10 Apr 11h30m", default=self.default),
datetime(2004, 4, 10, 11, 30))

# Test that if a year is omitted, we use the most recent matching value
def testSmartDefaultsNoYearMonthEarlier(self):
self.assertEqual(parse("August 3", default=datetime(2014, 5, 1),
smart_defaults=True),
datetime(2013, 8, 3))

def testSmartDefaultsNoYearDayEarlier(self):
self.assertEqual(parse("August 3", default=datetime(2014, 8, 1),
smart_defaults=True),
datetime(2013, 8, 3))

def testSmartDefaultsNoYearSameDay(self):
self.assertEqual(parse("August 3", default=datetime(2014, 8, 3),
smart_defaults=True),
datetime(2014, 8, 3))

def testSmartDefaultsNoYearDayLater(self):
self.assertEqual(parse("August 3", default=datetime(2014, 8, 4),
smart_defaults=True),
datetime(2014, 8, 3))

def testSmartDefaultsNoYearMonthLater(self):
self.assertEqual(parse("August 3", default=datetime(2014, 12, 19),
smart_defaults=True),
datetime(2014, 8, 3))

def testSmartDefaultsNoYearFeb29(self):
self.assertEqual(parse("February 29", default=datetime(2014, 12, 19),
date_in_future=False, smart_defaults=True),
datetime(2012, 2, 29))

def testSmartDefaultsNoYearFeb29Y2100(self):
# Year 2000 was not a leap year.
self.assertEqual(parse("February 29", default=datetime(2100, 12, 19),
smart_defaults=True),
datetime(2096, 2, 29))

# Test that if a year is omitted, we use the most next matching value
def testSmartDefaultsNoYearFutureDayEarlier(self):
self.assertEqual(parse("August 3", default=datetime(2014, 5, 1),
date_in_future=True, smart_defaults=True),
datetime(2014, 8, 3))

def testSmartDefaultsNoYearFutureMonthEarlier(self):
self.assertEqual(parse("August 3", default=datetime(2014, 8, 1),
date_in_future=True, smart_defaults=True),
datetime(2014, 8, 3))

def testSmartDefaultsNoYearFutureSameDay(self):
self.assertEqual(parse("August 3", default=datetime(2014, 8, 3),
date_in_future=True, smart_defaults=True),
datetime(2014, 8, 3))

def testSmartDefaultsNoYearFutureDayLater(self):
self.assertEqual(parse("August 3", default=datetime(2014, 8, 4),
date_in_future=True, smart_defaults=True),
datetime(2015, 8, 3))

def testSmartDefaultsNoYearFutureMonthLater(self):
self.assertEqual(parse("August 3", default=datetime(2014, 12, 19),
date_in_future=True, smart_defaults=True),
datetime(2015, 8, 3))

def testSmartDefaultsNoYearFutureFeb29Y2100(self):
self.assertEqual(parse("February 29", default=datetime(2098, 12, 19),
date_in_future=True, smart_defaults=True),
datetime(2104, 2, 29))

# Test that if only a month is provided, we select the beginning of the most recent
# occurrence of the specified month
def testSmartDefaultsMonthOnlyMonthEarlier(self):
self.assertEqual(parse("September", default=datetime(2014, 5, 1),
smart_defaults=True),
datetime(2013, 9, 1))

def testSmartDefaultsMonthOnlySameMonthFirstDay(self):
self.assertEqual(parse("September", default=datetime(2014, 9, 1),
smart_defaults=True),
datetime(2014, 9, 1))

def testSmartDefaultsMonthOnlySameMonthLastDay(self):
self.assertEqual(parse("September", default=datetime(2014, 9, 30),
smart_defaults=True),
datetime(2014, 9, 1))

def testSmartDefaultMonthOnlyMonthLater(self):
self.assertEqual(parse("September", default=datetime(2014, 11, 1),
smart_defaults=True),
datetime(2014, 9, 1))

# Test that if only a month is provided, we select the beginning of the most recent
# occurrence of the specified month
def testSmartDefaultsMonthOnlyFutureMonthEarlier(self):
self.assertEqual(parse("September", default=datetime(2014, 5, 1),
date_in_future=True, smart_defaults=True),
datetime(2014, 9, 1))

def testSmartDefaultsMonthOnlyFutureSameMonthFirstDay(self):
self.assertEqual(parse("September", default=datetime(2014, 9, 1),
date_in_future=True, smart_defaults=True),
datetime(2014, 9, 1))

def testSmartDefaultsMonthOnlyFutureSameMonthLastDay(self):
self.assertEqual(parse("September", default=datetime(2014, 9, 30),
date_in_future=True, smart_defaults=True),
datetime(2014, 9, 1))

def testSmartDefaultsMonthOnlyFutureMonthLater(self):
self.assertEqual(parse("September", default=datetime(2014, 11, 1),
date_in_future=True, smart_defaults=True),
datetime(2015, 9, 1))

# Test to ensure that if a year is specified, January 1st of that year is
# returned.
def testSmartDefaultsYearOnly(self):
self.assertEqual(parse("2009", smart_defaults=True),
datetime(2009, 1, 1))

def testSmartDefaultsYearOnlyFuture(self):
self.assertEqual(parse("2009", smart_defaults=True,
date_in_future=True),
datetime(2009, 1, 1))

# Tests that invalid days fall back to the end of the month if that's
# the desired behavior.
def testInvalidDayNoFallback(self):
self.assertRaises(ValueError, parse, "Feb 30, 2007",
**{'fallback_on_invalid_day':False})

def testInvalidDayFallbackFebNoLeapYear(self):
self.assertEqual(parse("Feb 31, 2007", fallback_on_invalid_day=True),
datetime(2007, 2, 28))

def testInvalidDayFallbackFebLeapYear(self):
self.assertEqual(parse("Feb 31, 2008", fallback_on_invalid_day=True),
datetime(2008, 2, 29))

def testUnspecifiedDayNoFallback(self):
self.assertRaises(ValueError, parse, "April 2009",
**{'fallback_on_invalid_day':False,
'default':datetime(2010, 1, 31)})

def testUnspecifiedDayUnspecifiedFallback(self):
self.assertEqual(parse("April 2009", default=datetime(2010, 1, 31)),
datetime(2009, 4, 30))

def testUnspecifiedDayUnspecifiedFallback(self):
self.assertEqual(parse("April 2009", fallback_on_invalid_day=True,
default=datetime(2010, 1, 31)),
datetime(2009, 4, 30))

def testUnspecifiedDayUnspecifiedFallbackFebNoLeapYear(self):
self.assertEqual(parse("Feb 2007", default=datetime(2010, 1, 31)),
datetime(2007, 2, 28))

def testUnspecifiedDayUnspecifiedFallbackFebLeapYear(self):
self.assertEqual(parse("Feb 2008", default=datetime(2010, 1, 31)),
datetime(2008, 2, 29))

def testErrorType01(self):
self.assertRaises(ValueError,
parse,'shouldfail')
parse, 'shouldfail')

def testIncreasingCTime(self):
# This test will check 200 different years, every month, every day,
Expand Down

0 comments on commit f7a1d4e

Please sign in to comment.