Make categorize also return applicable rule tags

Summary: This changes the categorize API to return a tuple of (matched, applicable) so we can distinguish categorization failures from projects without categorization rules defined. To keep the API simple, default tags (unused) were removed, as they make it harder to reason about applicability. Test Plan: Unit Reviewers: jukka Reviewed By: jukka Subscribers: changesbot, wwu Differential Revision: https://tails.corp.dropbox.com/D96384
dropbox · Mar 18, 2015 · af3487c · af3487c
1 parent 69cac7c
commit af3487c
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 32 deletions.
diff --git a/changes/experimental/categorize.py b/changes/experimental/categorize.py
@@ -77,24 +77,25 @@ def _parse_regexp(regexp):
     return regexp
 
 
-def categorize(project, rules, output, default_tag=None):
+def categorize(project, rules, output):
     """Categorize test output based on rules.
 
-    Arguments:
+    Args:
       project (str): name of the project
       rules (iterable of (str, str, str) tuples):
           each rule is a tuple (tag, project, regexp) that is matched against output
       output (str): output of a (partial) test run / build
-      default_tag: if not None, consider this tag matched if no rule matches
 
-    Returns a set of matched tags.
+    Returns:
+      A tuple of sets with (matched_categories, applicable_categories), where
+      applicable_categories are the names of rules that apply to the provided project.
+      applicable_categories is a superset of matched_categories.
     """
     output = output.replace('\r\n', '\n')
-    tags = set()
+    matched, applicable = set(), set()
     for tag, rule_project, regexp in rules:
-        if (not rule_project or rule_project == project) and (
-                re.search(regexp, output, re.MULTILINE | re.DOTALL)):
-            tags.add(tag)
-    if not tags and default_tag:
-        tags.add(default_tag)
-    return tags
+        if not rule_project or rule_project == project:
+            applicable.add(tag)
+            if re.search(regexp, output, re.MULTILINE | re.DOTALL):
+                matched.add(tag)
+    return (matched, applicable)
diff --git a/tests/changes/utils/test_categorize.py b/tests/changes/utils/test_categorize.py
@@ -38,43 +38,37 @@ def test_parse_rules(self):
 
     def test_categorize_general_rule(self):
         rules = [('tag', '', 'error')]
-        self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'})
-        self.assertEqual(categorize('proj', rules, '.. Error ..'), set())
+        self.assertEqual(categorize('proj', rules, '.. error ..'), ({'tag'}, {'tag'}))
+        self.assertEqual(categorize('proj', rules, '.. Error ..'), (set(), {'tag'}))
 
     def test_categorize_general_rule_two_tags(self):
         rules = [('tag', '', 'error'),
                  ('tag2', '', 'fail')]
-        self.assertEqual(categorize('proj', rules, '.. error .. fail'), {'tag', 'tag2'})
-        self.assertEqual(categorize('proj', rules, '.. fail ..'), {'tag2'})
-        self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'})
-        self.assertEqual(categorize('proj', rules, '.. ok ..'), set())
+        tags = {'tag', 'tag2'}
+        self.assertEqual(categorize('proj', rules, '.. error .. fail'), ({'tag', 'tag2'}, tags))
+        self.assertEqual(categorize('proj', rules, '.. fail ..'), ({'tag2'}, tags))
+        self.assertEqual(categorize('proj', rules, '.. error ..'), ({'tag'}, tags))
+        self.assertEqual(categorize('proj', rules, '.. ok ..'), (set(), tags))
 
     def test_categorize_project_rule(self):
         rules = [('tag2', 'proj', 'error')]
-        self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag2'})
-        self.assertEqual(categorize('proj2', rules, '.. error ..'), set())
+        self.assertEqual(categorize('proj', rules, '.. error ..'), ({'tag2'}, {'tag2'}))
+        self.assertEqual(categorize('proj2', rules, '.. error ..'), (set(), set()))
 
     def test_categorize_full_line_regexp(self):
         rules = [('tag2', 'proj', '^error$')]
-        self.assertEqual(categorize('proj', rules, 'error'), {'tag2'})
-        self.assertEqual(categorize('proj', rules, '\nerror\n'), {'tag2'})
-        self.assertEqual(categorize('proj', rules, 'xerror'), set())
-        self.assertEqual(categorize('proj', rules, '\nerrorx\n'), set())
+        self.assertEqual(categorize('proj', rules, 'error'), ({'tag2'}, {'tag2'}))
+        self.assertEqual(categorize('proj', rules, '\nerror\n'), ({'tag2'}, {'tag2'}))
+        self.assertEqual(categorize('proj', rules, 'xerror'), (set(), {'tag2'}))
+        self.assertEqual(categorize('proj', rules, '\nerrorx\n'), (set(), {'tag2'}))
 
     def test_categorize_full_line_regexp_cr_lf(self):
         rules = [('tag', 'proj', '^error$')]
-        self.assertEqual(categorize('proj', rules, '\r\nerror\r\n'), {'tag'})
+        self.assertEqual(categorize('proj', rules, '\r\nerror\r\n'), ({'tag'}, {'tag'}))
 
     def test_categorize_match_newline(self):
         rules = [('atag', 'aproj', 'line1.*line2')]
-        self.assertEqual(categorize('aproj', rules, 'line1\n\nline2'), {'atag'})
-
-    def test_categorize_default_tag(self):
-        rules = [('tag-x', '', 'an error')]
-        self.assertEqual(categorize('proj', rules, '.. an error ..', default_tag='def'),
-                         {'tag-x'})
-        self.assertEqual(categorize('proj', rules, '.. an Error ..', default_tag='def'),
-                         {'def'})
+        self.assertEqual(categorize('aproj', rules, 'line1\n\nline2'), ({'atag'}, {'atag'}))
 
     def test_parse_error(self):
         with self.assertRaisesRegexp(ParseError, 'file.ext, line 2: syntax error'):