grantjenks · davidpaulmcintyre · Apr 26, 2019
diff --git a/tests/test_coverage.py b/tests/test_coverage.py
@@ -93,6 +93,30 @@ def test_segment_12():
     ]
     assert segment(''.join(result)) == result
 
+# test ignore_digits param
+def test_segment_13():
+    result = [
+        'in', '1864', 'lincoln', 'wrote', '4', 'score', 'and', '7', 'years', 'ago', 'our', 'fathers', 'brought'
+    ]
+    # maintain spaces between words
+    assert segment(' '.join(result), True) == result
+
+# test ignore_digits param
+def test_segment_14():
+    result = [
+        'this', '$5,000', 'is', 'a', '2019', 'test', 'test1', 'asdf1'
+    ]
+    # maintain spaces between words
+    assert segment(' '.join(result), True) == result
+
+# test ignore_digits param
+def test_segment_14():
+    result = [
+        'increased', '$55', 'million', 'or', '23.8%', 'for'
+    ]
+    # maintain spaces between words
+    assert segment(' '.join(result), True) == result 
+
 def test_main():
     main(['tests/test.txt'])
     result = os.linesep.join(('choose spain', 'this is a test')) + os.linesep

diff --git a/wordsegment/__init__.py b/wordsegment/__init__.py
@@ -30,6 +30,8 @@
 import math
 import os.path as op
 import sys
+import re 
+import string
 
 
 class Segmenter(object):
@@ -161,10 +163,27 @@ def candidates():
         for word in prefix_words:
             yield word
 
-
-    def segment(self, text):
+    def segment_ignore_digits(self, text):
+        "apply segmentation only to non-numeric text"
+        ignore_chars = string.digits
+        # test if text contains digits
+        segments = re.split(r'((?=\S*[\d.])\S*)', text)
+        digit_checker = re.compile(r'\d')
+        results = []
+        for substring in segments:
+            if digit_checker.search(substring) is not None:
+                # has digits, so append substring w/out modification
+                results.append(substring)
+            else:
+                results.extend(self.isegment(substring))
+        return results
+
+    def segment(self, text, ignore_digits = False):
         "Return list of words that is the best segmenation of `text`."
-        return list(self.isegment(text))
+        if ignore_digits:
+            return self.segment_ignore_digits(text)
+        else:
+            return list(self.isegment(text))
 
 
     def divide(self, text):