Skip to content

Commit

Permalink
Merge pull request #38 from rawouter/master
Browse files Browse the repository at this point in the history
Fix for #37
  • Loading branch information
akoumjian committed Feb 17, 2017
2 parents 6fb7621 + e6fdd1b commit e648129
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 18 deletions.
39 changes: 22 additions & 17 deletions datefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class DateFinder(object):
\:
(?P<minutes>\d{{1,2}})
(\:(?<seconds>\d{{1,2}}))?
([\.\,](?<microseconds>\d{{1,6}}))?
\s*
(?P<time_periods>{time_periods})?
\s*
Expand Down Expand Up @@ -196,24 +197,29 @@ def _add_tzinfo(self, datetime_obj, tz_string):
return datetime_obj.replace(tzinfo=tzinfo_match)

def parse_date_string(self, date_string, captures):
# replace tokens that are problematic for dateutil
date_string, tz_string = self._find_and_replace(date_string, captures)

## One last sweep after removing
date_string = date_string.strip(self.STRIP_CHARS)
## Match strings must be at least 3 characters long
## < 3 tends to be garbage
if len(date_string) < 3:
return None

# For well formatted string, we can already let dateutils parse them
# otherwise self._find_and_replace method might corrupt them
try:
logger.debug('Parsing {0} with dateutil'.format(date_string))
as_dt = parser.parse(date_string, default=self.base_date)
except Exception as e:
logger.debug(e)
as_dt = None
if tz_string:
as_dt = self._add_tzinfo(as_dt, tz_string)
except ValueError:
# replace tokens that are problematic for dateutil
date_string, tz_string = self._find_and_replace(date_string, captures)

## One last sweep after removing
date_string = date_string.strip(self.STRIP_CHARS)
## Match strings must be at least 3 characters long
## < 3 tends to be garbage
if len(date_string) < 3:
return None

try:
logger.debug('Parsing {0} with dateutil'.format(date_string))
as_dt = parser.parse(date_string, default=self.base_date)
except Exception as e:
logger.debug(e)
as_dt = None
if tz_string:
as_dt = self._add_tzinfo(as_dt, tz_string)
return as_dt

def extract_date_strings(self, text, strict=False):
Expand All @@ -237,7 +243,6 @@ def extract_date_strings(self, text, strict=False):
months = captures.get('months')
timezones = captures.get('timezones')
delimiters = captures.get('delimiters')
time = captures.get('time)')
time_periods = captures.get('time_periods')
extra_tokens = captures.get('extra_tokens')

Expand Down
6 changes: 5 additions & 1 deletion tests/test_find_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
datetime(1994, 10, 27),
datetime(1995, 6, 1)
]),
# Z dates with and without millis, from https://github.com/akoumjian/datefinder/issues/37
("2017-02-03T09:04:08.001Z", datetime(2017, 2, 3, 9, 4, 8, 1000, tzinfo=pytz.utc)),
("2017-02-03T09:04:08,00123Z", datetime(2017, 2, 3, 9, 4, 8, 1230, tzinfo=pytz.utc)),
("2017-02-03T09:04:08Z", datetime(2017, 2, 3, 9, 4, 8, tzinfo=pytz.utc)),
])
def test_find_date_strings(input_text, expected_date):
if isinstance(expected_date,list):
Expand All @@ -53,4 +57,4 @@ def test_find_date_strings(input_text, expected_date):
return_date = None
for return_date in datefinder.find_dates(input_text):
assert return_date == expected_date
assert return_date is not None # handles dates that were never matched
assert return_date is not None, 'Did not find date for test line: "{}"'.format(input_text) # handles dates that were never matched

0 comments on commit e648129

Please sign in to comment.