Skip to content

Commit

Permalink
added coverage target, handle ranges behind-the-scenes
Browse files Browse the repository at this point in the history
  • Loading branch information
alvinwan committed Aug 5, 2018
1 parent d8959c5 commit d13ed06
Show file tree
Hide file tree
Showing 6 changed files with 512 additions and 330 deletions.
4 changes: 4 additions & 0 deletions Makefile
@@ -0,0 +1,4 @@
coverage:
py.test --cov
coverage html
open htmlcov/index.html
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -28,7 +28,7 @@ Parse lists of dates and times with more complex relationships. **(coming soon)*
```
>>> timefhuman('7/17, 7/18, 7/19 at 2')
>>> timefhuman('7/17, 7/18, 7/19 at 2')
>>> timefhuman('7/17 to 7/19 at 2')
```

Use the vernacular to describe ranges or days. **(coming soon)**
Expand Down
325 changes: 325 additions & 0 deletions timefhuman/categorize.py
@@ -0,0 +1,325 @@
from .constants import MONTHS
from .constants import DAYS_OF_WEEK
from .tree import DayToken
from .tree import TimeToken
from .tree import DayRangeToken
from .tree import TimeRangeToken
from .tree import AmbiguousToken
from .tree import Token

import datetime


def categorize(tokens, now):
tokens = list(tokens)
tokens = convert_day_of_week(tokens, now)
tokens = convert_time_of_day(tokens)
tokens = maybe_extract_hour_minute(tokens)
tokens = maybe_extract_using_date(tokens, now)
tokens = maybe_extract_using_month(tokens, now)
tokens = extract_hour_minute_from_remaining(tokens, now)
return tokens


# TODO: "monday next week"
def convert_day_of_week(tokens, now=datetime.datetime.now()):
"""Convert day-of-week vernacular into date-like string.
WARNING: assumes that 'upcoming', and (no specification) implies
the same day. e.g., 'upcoming Monday', and 'Monday' are both
the same day. However, it assumes that 'next Monday' is the one *after.
Also assumes that 'last', 'past', and 'previous' are the same.
>>> now = datetime.datetime(year=2018, month=8, day=4)
>>> convert_day_of_week(['Monday', 'at', '3'])
[8/6/2018, 'at', '3']
>>> convert_day_of_week(['next', 'Monday', 'at', '3'])
[8/13/2018, 'at', '3']
>>> convert_day_of_week(['past', 'Monday', 'at', '3'])
[7/30/2018, 'at', '3']
"""
tokens = tokens.copy()
for i in range(7):
day = now + datetime.timedelta(i)
day_of_week = DAYS_OF_WEEK[day.weekday()]

for string in (day_of_week, day_of_week[:3], day_of_week[:2]):
if string in tokens:
index = tokens.index(string)
new_index, tokens, weeks = extract_weeks_offset(tokens, end=index)
day = now + datetime.timedelta(weeks*7 + i)
tokens[new_index] = DayToken(day.month, day.day, day.year)
break
return tokens


def extract_weeks_offset(tokens, end=None, key_tokens=(
'next', 'previous', 'last', 'upcoming', 'past', 'prev')):
"""Extract the number of week offsets needed.
>>> extract_weeks_offset(['next', 'next', 'week'])
(0, ['week'], 2)
>>> extract_weeks_offset(['upcoming', 'Monday'])
(0, ['Monday'], 0)
>>> extract_weeks_offset(['last', 'Monday'])
(0, ['Monday'], -1)
>>> extract_weeks_offset(['past', 'Tuesday'])
(0, ['Tuesday'], -1)
>>> extract_weeks_offset(['past', 'Wed', 'next', 'week'], end=1)
(0, ['Wed', 'next', 'week'], -1)
"""
offset = 0
end = len(tokens) - 1 if end is None else end
start = end - 1
if start < 0 or start >= len(tokens):
return 0, tokens, 0

while len(tokens) > start >= 0 and \
tokens[start] in key_tokens:
candidate = tokens[start]
if candidate == 'upcoming':
return start, tokens[:end-1] + tokens[end:], 0
if candidate == 'next':
offset += 1
elif candidate in ('previous', 'prev', 'last', 'past'):
offset -= 1
start -= 1
return start + 1, tokens[:start + 1] + tokens[end:], offset


def convert_time_of_day(tokens):
"""Convert time-of-day vernacular into time-like string.
>>> convert_time_of_day(['Monday', 'noon', 'huehue'])
['Monday', 12 pm, 'huehue']
>>> convert_time_of_day(['Monday', 'afternoon'])
['Monday', 3 pm]
>>> convert_time_of_day(['Tu', 'evening'])
['Tu', 6 pm]
>>> convert_time_of_day(['Wed', 'morning'])
['Wed', 9 am]
>>> convert_time_of_day(['Thu', 'midnight'])
['Thu', 12 am]
"""
temp_tokens = [token.lower() if isinstance(token, str) else token for token in tokens]
for keyword, time_tokens in (
('morning', [TimeToken(9, 'am')]),
('noon', [TimeToken(12, 'pm')]),
('afternoon', [TimeToken(3, 'pm')]),
('evening', [TimeToken(6, 'pm')]),
('night', [TimeToken(9, 'pm')]),
('midnight', [TimeToken(12, 'am')])):
if keyword in temp_tokens:
index = temp_tokens.index(keyword)
return tokens[:index] + time_tokens + tokens[index+1:]
return tokens


def maybe_extract_using_month(tokens, now=datetime.datetime.now()):
"""
>>> now = datetime.datetime(year=2018, month=7, day=7)
>>> maybe_extract_using_month(['July', '17', '2018', 'at'])
[7/17/2018, 'at']
>>> maybe_extract_using_month(['Jul', '17', 'at'], now=now)
[7/17/2018, 'at']
>>> maybe_extract_using_month(['July', 'at'], now=now)
[7/7/2018, 'at']
>>> maybe_extract_using_month(['August', '17'], now=now)
[8/17/2018]
>>> maybe_extract_using_month(['Aug', 'at'], now=now)
[8/1/2018, 'at']
>>> maybe_extract_using_month(['gibberish'], now=now)
['gibberish']
>>> time_range = TimeRangeToken(TimeToken(3, 'pm'), TimeToken(5, 'pm'))
>>> day_range = DayRangeToken(DayToken(None, 3, None), DayToken(None, 5, None))
>>> day = DayToken(3, 5, 2018)
>>> ambiguous_token = AmbiguousToken(time_range, day, day_range)
>>> maybe_extract_using_month(['May', ambiguous_token])
[5/3/2018 - 5/5/2018]
"""
temp_tokens = [token.lower() if isinstance(token, str) else token for token in tokens]
for mo, month in enumerate(MONTHS, start=1):

index = None
month = month.lower()
if month in temp_tokens:
index = temp_tokens.index(month)
if month[:3] in temp_tokens:
index = temp_tokens.index(month[:3])

if index is None:
continue

next_candidate = tokens[index+1]
day = 1 if now.month != mo else now.day
if isinstance(next_candidate, AmbiguousToken):
if next_candidate.has_day_range_token():
day_range = next_candidate.get_day_range_token()
day_range.apply_month(mo)
day_range.apply_year(now.year) # TODO: fails on July 3-5, 2018
return tokens[:index] + [day_range] + tokens[index+2:]
if not next_candidate.isnumeric():
day = DayToken(month=mo, day=day, year=now.year)
return tokens[:index] + [day] + tokens[index+1:]

next_candidate = int(next_candidate)
next_next_candidate = tokens[index+2] if len(tokens) > index+2 else ''
if next_candidate > 31:
day = 1 if now.month != mo else now.day
day = DayToken(month=mo, day=day, year=next_candidate)
return tokens[:index] + [day] + tokens[index+2:]
elif not next_next_candidate.isnumeric():
day = DayToken(month=mo, day=next_candidate, year=now.year)
return tokens[:index] + [day] + tokens[index+2:]

next_next_candidate = int(next_next_candidate)
day = DayToken(month=mo, day=next_candidate, year=next_next_candidate)
return tokens[:index] + [day] + tokens[index+3:]
return tokens


def maybe_extract_using_date(tokens, now=datetime.datetime.now()):
"""Attempt to extract dates.
Look for dates in the form of the following:
(month)/(day)
(month).(day)
(month)-(day)
(month)/(day)/(year)
(month).(day).(year)
(month)-(day)-(year)
>>> maybe_extract_using_date(['7/17/18'])
[7/17/2018]
>>> maybe_extract_using_date(['7-17-18'])
[7/17/2018]
>>> maybe_extract_using_date(['3', 'on', '7.17.18'])
['3', 'on', 7/17/2018]
>>> maybe_extract_using_date(['7-25', '3-4', 'pm'])
[7/25/2018, 3/4/2018 OR 3:00 - 4:00, 'pm']
>>> maybe_extract_using_date(['7/4', '-', '7/6'])
[7/4/2018, '-', 7/6/2018]
"""
for i, token in enumerate(tokens):
if isinstance(token, Token):
continue
for punctuation in ('/', '.', '-'):
if punctuation == token: # dash joins other tokens, skip parsing
continue
if punctuation in token:
parts = tuple(map(int, token.split(punctuation)))
if len(parts) == 2:
day = DayToken(month=parts[0], day=parts[1], year=now.year)
if punctuation == '-' and parts[1] <= 24:
day = AmbiguousToken(
day,
extract_hour_minute_from_time(token)
)
tokens = tokens[:i] + [day] + tokens[i+1:]
continue

month, day, year = parts
if year < 1000:
year = year + 2000 if year < 50 else year + 1000
day = DayToken(month=month, day=day, year=year)
return tokens[:i] + [day] + tokens[i+1:]
return tokens


def extract_hour_minute_from_time(string, time_of_day=None):
"""
>>> extract_hour_minute_from_time('3:00')
3:00
>>> extract_hour_minute_from_time('3:00', 'pm')
3 pm
>>> extract_hour_minute_from_time('3')
3:00
>>> extract_hour_minute_from_time('3:30-4', 'pm')
3:30 pm - 4 pm
>>> time_range = TimeRangeToken(TimeToken(3, 'pm'), TimeToken(5, 'pm'))
>>> day_range = DayRangeToken(DayToken(None, 3, None), DayToken(None, 5, None))
>>> day = DayToken(3, 5, 2018)
>>> ambiguous_token = AmbiguousToken(time_range, day, day_range)
>>> extract_hour_minute_from_time(ambiguous_token)
3 pm - 5 pm
>>> extract_hour_minute_from_time(AmbiguousToken(day))
"""
if isinstance(string, AmbiguousToken):
if string.has_time_range_token():
return string.get_time_range_token()
return None

if '-' in string:
times = string.split('-')
start = extract_hour_minute_from_time(times[0], time_of_day) # TODO: yuck! return a range
end = extract_hour_minute_from_time(times[1], time_of_day)
return TimeRangeToken(start, end)

parts = string.split(':')
hour = int(parts[0])
minute = int(parts[1]) if len(parts) >= 2 else 0
return TimeToken(hour=hour, minute=minute, time_of_day=time_of_day)


def maybe_extract_hour_minute(tokens):
"""Attempt to extract hour and minute.
If am and pm are found, grab the hour and minute before it. If colon, use
that as time.
>>> maybe_extract_hour_minute(['7/17/18', '3', 'PM'])
['7/17/18', 3 pm]
>>> maybe_extract_hour_minute(['7/17/18', '3:00', 'p.m.'])
['7/17/18', 3 pm]
>>> maybe_extract_hour_minute(['July', '17', '2018', 'at', '3', 'p.m.'])
['July', '17', '2018', 'at', 3 pm]
>>> maybe_extract_hour_minute(['July', '17', '2018', '3', 'p.m.'])
['July', '17', '2018', 3 pm]
>>> maybe_extract_hour_minute(['3', 'PM', 'on', 'July', '17'])
[3 pm, 'on', 'July', '17']
>>> maybe_extract_hour_minute(['July', 'at', '3'])
['July', 'at', '3']
>>> maybe_extract_hour_minute(['7/17/18', '15:00'])
['7/17/18', 15:00]
>>> maybe_extract_hour_minute(['7/17/18', TimeToken(3, 'pm')])
['7/17/18', 3 pm]
"""
temp_tokens = [token.replace('.', '').lower() if isinstance(token, str) else token for token in tokens]
remaining_tokens = tokens

time = None
time_of_day = None
for time_of_day in ('am', 'pm'):
if time_of_day in temp_tokens:
index = temp_tokens.index(time_of_day)
time = temp_tokens[index-1]
time_token = extract_hour_minute_from_time(time, time_of_day)
return tokens[:index-1] + [time_token] + tokens[index+1:]

for token in tokens:
if isinstance(token, Token):
continue
if ':' in token:
time_token = extract_hour_minute_from_time(token, None)
tokens = [token if ':' not in token else time_token for token in tokens]
return tokens

return tokens


def extract_hour_minute_from_remaining(tokens, now=datetime.datetime.now()):
"""Sketch collector for leftovers integers.
>>> extract_hour_minute_from_remaining(['gibberish'])
['gibberish']
"""
for i, token in enumerate(tokens):
if isinstance(token, Token):
continue
if token.isnumeric():
time_token = extract_hour_minute_from_time(token)
return tokens[:i] + [time_token] + tokens[i+1:]
return tokens

0 comments on commit d13ed06

Please sign in to comment.