Updated all susceptible regexes to respect unicode as word characters

Fixes #135
idpaterson · Sep 14, 2016 · 184960c · 184960c
1 parent 619acef
commit 184960c
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 16 deletions.
diff --git a/src/wunderlist/handlers/login.py b/src/wunderlist/handlers/login.py
@@ -10,7 +10,7 @@ def filter(args):
     getting_help = False
 
     if len(args) > 0:
-        action = re.sub(r'^\W+', '', args[0])
+        action = re.sub(r'^\W+', '', args[0], flags=re.UNICODE)
         getting_help = action and 'help'.find(action) == 0
 
     if not getting_help:

diff --git a/src/wunderlist/handlers/route.py b/src/wunderlist/handlers/route.py
@@ -25,11 +25,11 @@ def route(args):
     elif args:
         command_string = args[0]
 
-    command_string = re.sub(r'^[^\w\s]+', '', command_string)
+    command_string = re.sub(r'^[^\w\s]+', '', command_string, flags=re.UNICODE)
     command = re.split(r' +', command_string)
 
     if command:
-        action = re.sub(r'^\W+', '', command[0]) or 'none'
+        action = re.sub(r'^\W+', '', command[0], flags=re.UNICODE) or 'none'
 
     if 'about'.find(action) == 0:
         from wunderlist.handlers import about

diff --git a/src/wunderlist/handlers/search.py b/src/wunderlist/handlers/search.py
@@ -23,7 +23,7 @@ def filter(args):
     if not query:
         wf.add_item('Begin typing to search tasks', '', icon=icons.SEARCH)
 
-    hashtag_match = re.search(_hashtag_prompt_pattern, query)
+    hashtag_match = re.search(_hashtag_prompt_pattern, query, flags=re.UNICODE)
     if hashtag_match:
         from wunderlist.models.hashtag import Hashtag
 

diff --git a/src/wunderlist/models/hashtag.py b/src/wunderlist/models/hashtag.py
@@ -36,4 +36,4 @@ def sync(cls):
 
     @classmethod
     def hashtags_in_task(cls, task):
-        return set(re.findall(_hashtag_pattern, ' ' + task.title))
+        return set(re.findall(_hashtag_pattern, ' ' + task.title, flags=re.UNICODE))
diff --git a/src/wunderlist/models/task_parser.py b/src/wunderlist/models/task_parser.py
@@ -86,23 +86,23 @@ def _parse(self):
         prefs = Preferences.current_prefs()
         ignore_due_date = False
 
-        match = re.search(HASHTAG_PROMPT_PATTERN, phrase)
+        match = re.search(HASHTAG_PROMPT_PATTERN, phrase, flags=re.UNICODE)
         if match:
             self.hashtag_prompt = match.group(1)
             self.has_hashtag_prompt = True
 
-        match = re.search(STAR_PATTERN, phrase)
+        match = re.search(STAR_PATTERN, phrase, flags=re.UNICODE)
         if match:
             self.starred = True
             self._starred_phrase = match.group()
             phrase = phrase[:match.start()] + phrase[match.end():]
 
-        match = re.search(NOT_DUE_PATTERN, phrase)
+        match = re.search(NOT_DUE_PATTERN, phrase, flags=re.UNICODE)
         if match:
             ignore_due_date = True
             phrase = phrase[:match.start()] + phrase[match.end():]
 
-        match = re.search(LIST_TITLE_PATTERN, phrase, re.IGNORECASE)
+        match = re.search(LIST_TITLE_PATTERN, phrase, flags=re.UNICODE | re.IGNORECASE)
         if lists and match:
             if match.group(1):
                 matching_lists = wf.filter(
@@ -127,7 +127,7 @@ def _parse(self):
 
         # Parse and remove the recurrence phrase first so that any dates do
         # not interfere with the due date
-        match = re.search(RECURRENCE_PATTERN, phrase, re.IGNORECASE)
+        match = re.search(RECURRENCE_PATTERN, phrase, flags=re.UNICODE | re.IGNORECASE)
         if match:
             type_phrase = match.group(2) if match.group(2) else match.group(3)
             if type_phrase:
@@ -136,7 +136,7 @@ def _parse(self):
                 self.recurrence_type = RECURRENCE_TYPES[type_phrase[0].lower()]
                 self.recurrence_count = int(match.group(1) or 1)
             else:
-                match = re.search(RECURRENCE_BY_DATE_PATTERN, phrase, re.IGNORECASE)
+                match = re.search(RECURRENCE_BY_DATE_PATTERN, phrase, flags=re.UNICODE | re.IGNORECASE)
                 if match:
                     recurrence_phrase = match.group()
                     dates = cal.nlp(match.group(1), version=2)
@@ -172,7 +172,7 @@ def _parse(self):
                             date_pattern = r'.*?' + date_pattern
 
                             # Prepare to set the recurrence phrase below
-                            match = re.search(date_pattern, recurrence_phrase, re.IGNORECASE)
+                            match = re.search(date_pattern, recurrence_phrase, flags=re.UNICODE | re.IGNORECASE)
 
             # This is just the "every" keyword with no date following
             if not self.recurrence_type:
@@ -183,7 +183,7 @@ def _parse(self):
 
 
         reminder_info = None
-        match = re.search(REMINDER_PATTERN, phrase, re.IGNORECASE)
+        match = re.search(REMINDER_PATTERN, phrase, flags=re.UNICODE | re.IGNORECASE)
         if match:
             datetimes = cal.nlp(match.group(2), version=2)
 
@@ -211,7 +211,7 @@ def _parse(self):
         due_keyword = None
         potential_date_phrase = None
         if not ignore_due_date:
-            match = re.search(DUE_PATTERN, phrase, re.IGNORECASE)
+            match = re.search(DUE_PATTERN, phrase, flags=re.UNICODE | re.IGNORECASE)
             # Search for the due date only following the `due` keyword
             if match:
                 due_keyword = match.group(1)
@@ -242,7 +242,7 @@ def _parse(self):
                     if due_keyword:
                         date_pattern = re.escape(due_keyword) + r'.*?' + date_pattern
 
-                    due_date_phrase_match = re.search(date_pattern, phrase, re.IGNORECASE)
+                    due_date_phrase_match = re.search(date_pattern, phrase, flags=re.UNICODE | re.IGNORECASE)
 
                     if due_date_phrase_match:
                         self._due_date_phrase = due_date_phrase_match.group()
@@ -297,7 +297,7 @@ def _parse(self):
         # Look for a list title at the end of the remaining phrase, like
         # "in list Office"
         if not self.list_title:
-            matches = re.finditer(INFIX_LIST_KEYWORD_PATTERN, phrase, re.IGNORECASE)
+            matches = re.finditer(INFIX_LIST_KEYWORD_PATTERN, phrase, flags=re.UNICODE | re.IGNORECASE)
             for match in matches:
                 subphrase = phrase[match.end():]