Skip to content

Commit

Permalink
Merge pull request #139 from ms-boom/master
Browse files Browse the repository at this point in the history
Add flexibility in subclassing timelex
  • Loading branch information
pganssle committed Oct 31, 2015
2 parents 9e54f8d + 70d2b1d commit ed975b1
Showing 1 changed file with 25 additions and 10 deletions.
35 changes: 25 additions & 10 deletions dateutil/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,11 @@ def get_token(self):
# First character of the token - determines if we're starting
# to parse a word, a number or something else.
token = nextchar
if nextchar.isalpha():
if self.isword(nextchar):
state = 'a'
elif nextchar.isdigit():
elif self.isnum(nextchar):
state = '0'
elif nextchar.isspace():
elif self.isspace(nextchar):
token = ' '
break # emit token
else:
Expand All @@ -128,7 +128,7 @@ def get_token(self):
# If we've already started reading a word, we keep reading
# letters until we find something that's not part of a word.
seenletters = True
if nextchar.isalpha():
if self.isword(nextchar):
token += nextchar
elif nextchar == '.':
token += nextchar
Expand All @@ -139,7 +139,7 @@ def get_token(self):
elif state == '0':
# If we've already started reading a number, we keep reading
# numbers until we find something that doesn't fit.
if nextchar.isdigit():
if self.isnum(nextchar):
token += nextchar
elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
token += nextchar
Expand All @@ -151,9 +151,9 @@ def get_token(self):
# If we've seen some letters and a dot separator, continue
# parsing, and the tokens will be broken up later.
seenletters = True
if nextchar == '.' or nextchar.isalpha():
if nextchar == '.' or self.isword(nextchar):
token += nextchar
elif nextchar.isdigit() and token[-1] == '.':
elif self.isnum(nextchar) and token[-1] == '.':
token += nextchar
state = '0.'
else:
Expand All @@ -162,9 +162,9 @@ def get_token(self):
elif state == '0.':
# If we've seen at least one dot separator, keep going, we'll
# break up the tokens later.
if nextchar == '.' or nextchar.isdigit():
if nextchar == '.' or self.isnum(nextchar):
token += nextchar
elif nextchar.isalpha() and token[-1] == '.':
elif self.isword(nextchar) and token[-1] == '.':
token += nextchar
state = 'a.'
else:
Expand Down Expand Up @@ -197,9 +197,24 @@ def __next__(self):
def next(self):
return self.__next__() # Python 2.x support

@classmethod
def split(cls, s):
return list(cls(s))
split = classmethod(split)

@classmethod
def isword(cls, nextchar):
""" Whether or not the next character is part of a word """
return nextchar.isalpha()

@classmethod
def isnum(cls, nextchar):
""" Whether the next character is part of a number """
return nextchar.isdigit()

@classmethod
def isspace(cls, nextchar):
""" Whether the next character is whitespace """
return nextchar.isspace()


class _resultbase(object):
Expand Down

0 comments on commit ed975b1

Please sign in to comment.