Merge pull request #139 from ms-boom/master

Add flexibility in subclassing timelex
dateutil · Oct 31, 2015 · ed975b1 · ed975b1
2 parents 9e54f8d + 70d2b1d
commit ed975b1
Showing 1 changed file with 25 additions and 10 deletions.
diff --git a/dateutil/parser.py b/dateutil/parser.py
@@ -115,11 +115,11 @@ def get_token(self):
                 # First character of the token - determines if we're starting
                 # to parse a word, a number or something else.
                 token = nextchar
-                if nextchar.isalpha():
+                if self.isword(nextchar):
                     state = 'a'
-                elif nextchar.isdigit():
+                elif self.isnum(nextchar):
                     state = '0'
-                elif nextchar.isspace():
+                elif self.isspace(nextchar):
                     token = ' '
                     break  # emit token
                 else:
@@ -128,7 +128,7 @@ def get_token(self):
                 # If we've already started reading a word, we keep reading
                 # letters until we find something that's not part of a word.
                 seenletters = True
-                if nextchar.isalpha():
+                if self.isword(nextchar):
                     token += nextchar
                 elif nextchar == '.':
                     token += nextchar
@@ -139,7 +139,7 @@ def get_token(self):
             elif state == '0':
                 # If we've already started reading a number, we keep reading
                 # numbers until we find something that doesn't fit.
-                if nextchar.isdigit():
+                if self.isnum(nextchar):
                     token += nextchar
                 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
                     token += nextchar
@@ -151,9 +151,9 @@ def get_token(self):
                 # If we've seen some letters and a dot separator, continue
                 # parsing, and the tokens will be broken up later.
                 seenletters = True
-                if nextchar == '.' or nextchar.isalpha():
+                if nextchar == '.' or self.isword(nextchar):
                     token += nextchar
-                elif nextchar.isdigit() and token[-1] == '.':
+                elif self.isnum(nextchar) and token[-1] == '.':
                     token += nextchar
                     state = '0.'
                 else:
@@ -162,9 +162,9 @@ def get_token(self):
             elif state == '0.':
                 # If we've seen at least one dot separator, keep going, we'll
                 # break up the tokens later.
-                if nextchar == '.' or nextchar.isdigit():
+                if nextchar == '.' or self.isnum(nextchar):
                     token += nextchar
-                elif nextchar.isalpha() and token[-1] == '.':
+                elif self.isword(nextchar) and token[-1] == '.':
                     token += nextchar
                     state = 'a.'
                 else:
@@ -197,9 +197,24 @@ def __next__(self):
     def next(self):
         return self.__next__()  # Python 2.x support
 
+    @classmethod
     def split(cls, s):
         return list(cls(s))
-    split = classmethod(split)
+
+    @classmethod
+    def isword(cls, nextchar):
+        """ Whether or not the next character is part of a word """
+        return nextchar.isalpha()
+
+    @classmethod
+    def isnum(cls, nextchar):
+        """ Whether the next character is part of a number """
+        return nextchar.isdigit()
+
+    @classmethod
+    def isspace(cls, nextchar):
+        """ Whether the next character is whitespace """
+        return nextchar.isspace()
 
 
 class _resultbase(object):