Add test log output classifier

Summary: This implements a simple classifier that isn't used anywhere yet. Test Plan: Unit tests. Reviewers: kylec Reviewed By: kylec Subscribers: changesbot, wwu Differential Revision: https://tails.corp.dropbox.com/D90349
dropbox · Feb 19, 2015 · a83991a · a83991a
1 parent 4f34ea2
commit a83991a
Show file tree

Hide file tree

Showing 2 changed files with 212 additions and 0 deletions.
diff --git a/changes/experimental/categorize.py b/changes/experimental/categorize.py
@@ -0,0 +1,100 @@
+"""Tools for tagging test outputs based on regexp based rules."""
+
+import ast
+import re
+
+
+class ParseError(Exception):
+    """Raised on syntax error in a rule."""
+
+
+def load_rules(path):
+    """Load rules from a file, a rule per line.
+
+    Empty lines and lines containing only a comment starting with # are ignored.
+
+    A rule is of form "tag:project:regexp" (whitespace around fields is ignored, project
+    may be empty => applies to all projects). Regular expressions can be bare strings
+    or quoted using Python string literal syntax (triple-quoted and raw string literals
+    are supported, but unicode string literals are not valid).
+
+    Return a list of (tag, regexp) tuples (both items are strings).
+    """
+    with open(path) as file:
+        return parse_rules(file.read(), path)
+
+
+def parse_rules(data, path='file'):
+    rules = []
+    for i, line in enumerate(data.splitlines()):
+        try:
+            rule = _parse_rule(line)
+        except ParseError as exc:
+            raise ParseError('%s, line %d: %s' % (path, i + 1, str(exc)))
+        if rule:
+            rules.append(rule)
+    return rules
+
+
+def _parse_rule(line):
+    """Parse line of text that represents a rule.
+
+    Return None if the line is empty or a comment. Otherwise, return tuple
+    (tag, project, regular expression string).
+
+    Raise ParseError on error.
+    """
+    line = line.strip()
+    if not line or line.startswith('#'):
+        return None
+    try:
+        tag, project, regexp = line.split(':', 2)
+    except ValueError:
+        raise ParseError("syntax error")
+    regexp = _parse_regexp(regexp)
+    return tag.strip(), project.strip(), regexp
+
+
+def _parse_regexp(regexp):
+    regexp = regexp.strip()
+    # Parse quoted regular expressions as Python string literals.
+    if regexp.endswith(('"', "'")):
+        try:
+            parsed = ast.literal_eval(regexp)
+        except SyntaxError as exc:
+            raise ParseError("invalid Python string literal")
+        # We don't want unicode regexps for now.
+        if not isinstance(parsed, str):
+            raise ParseError("syntax error")
+        regexp = parsed
+    elif regexp.startswith(('"', "'")):
+        raise ParseError("mismatched quotes")
+    # Make sure that the regexp is valid.
+    try:
+        re.compile(regexp)
+    except re.error as exc:
+        raise ParseError(str(exc))
+    return regexp
+
+
+def categorize(project, rules, output, default_tag=None):
+    """Categorize test output based on rules.
+
+    Arguments:
+      project (str): name of the project
+      rules (iterable of (str, str, str) tuples):
+          each rule is a tuple (tag, project, regexp) that is matched against output
+      output (str): output of a (partial) test run / build
+      default_tag: if not None, consider this tag matched if no rule matches
+
+    Returns a set of matched tags.
+    """
+    output = output.replace('\r\n', '\n')
+    tags = set()
+    for tag, rule_project, regexp in rules:
+        if (not rule_project or rule_project == project) and (
+                re.search(regexp, output, re.MULTILINE | re.DOTALL)):
+            tags.add(tag)
+    if not tags and default_tag:
+        tags.add(default_tag)
+    return tags
diff --git a/tests/changes/utils/test_categorize.py b/tests/changes/utils/test_categorize.py
@@ -0,0 +1,112 @@
+import textwrap
+import unittest
+
+from changes.experimental.categorize import parse_rules, _parse_rule, categorize, ParseError
+
+
+class TestCategorize(unittest.TestCase):
+    def test_parse_empty_rule(self):
+        self.assertEqual(_parse_rule(''), None)
+        self.assertEqual(_parse_rule(' \n'), None)
+        self.assertEqual(_parse_rule('# foo \n'), None)
+        self.assertEqual(_parse_rule(' # foo \n'), None)
+
+    def test_parse_rule(self):
+        self.assertEqual(_parse_rule('tag::regex'), ('tag', '', 'regex'))
+        self.assertEqual(_parse_rule(' tag : proj : regex '), ('tag', 'proj', 'regex'))
+        self.assertEqual(_parse_rule(' tag : proj : regex :: '), ('tag', 'proj', 'regex ::'))
+
+    def test_parse_rule(self):
+        self.assertEqual(_parse_rule('tag::regex'), ('tag', '', 'regex'))
+        self.assertEqual(_parse_rule(' x-fail : proj : test error '),
+                         ('x-fail', 'proj', 'test error'))
+        self.assertEqual(_parse_rule(' tag : proj-2 : regex :: '), ('tag', 'proj-2', 'regex ::'))
+
+    def test_parse_rule_with_quoted_regexp(self):
+        self.assertEqual(_parse_rule('tag::"reg\'ex"'), ('tag', '', "reg'ex"))
+        self.assertEqual(_parse_rule("tag:: 'reg\"ex' "), ('tag', '', 'reg"ex'))
+        self.assertEqual(_parse_rule("tag:: 'reg\\'ex' "), ('tag', '', "reg'ex"))
+        self.assertEqual(_parse_rule("tag:: r'reg\\'ex' "), ('tag', '', "reg\\'ex"))
+
+    def test_parse_rules(self):
+        self.assertEqual(parse_rules(''), [])
+        data = dedent('''\
+            # comment
+
+            tag::^ERROR$
+
+            tag2:project:\\[error\\]
+            ''')
+        self.assertEqual(parse_rules(data),
+                         [('tag', '', '^ERROR$'),
+                          ('tag2', 'project', r'\[error\]')])
+
+    def test_categorize_general_rule(self):
+        rules = [('tag', '', 'error')]
+        self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'})
+        self.assertEqual(categorize('proj', rules, '.. Error ..'), set())
+
+    def test_categorize_general_rule_two_tags(self):
+        rules = [('tag', '', 'error'),
+                 ('tag2', '', 'fail')]
+        self.assertEqual(categorize('proj', rules, '.. error .. fail'), {'tag', 'tag2'})
+        self.assertEqual(categorize('proj', rules, '.. fail ..'), {'tag2'})
+        self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'})
+        self.assertEqual(categorize('proj', rules, '.. ok ..'), set())
+
+    def test_categorize_project_rule(self):
+        rules = [('tag2', 'proj', 'error')]
+        self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag2'})
+        self.assertEqual(categorize('proj2', rules, '.. error ..'), set())
+
+    def test_categorize_full_line_regexp(self):
+        rules = [('tag2', 'proj', '^error$')]
+        self.assertEqual(categorize('proj', rules, 'error'), {'tag2'})
+        self.assertEqual(categorize('proj', rules, '\nerror\n'), {'tag2'})
+        self.assertEqual(categorize('proj', rules, 'xerror'), set())
+        self.assertEqual(categorize('proj', rules, '\nerrorx\n'), set())
+
+    def test_categorize_full_line_regexp_cr_lf(self):
+        rules = [('tag', 'proj', '^error$')]
+        self.assertEqual(categorize('proj', rules, '\r\nerror\r\n'), {'tag'})
+
+    def test_categorize_match_newline(self):
+        rules = [('atag', 'aproj', 'line1.*line2')]
+        self.assertEqual(categorize('aproj', rules, 'line1\n\nline2'), {'atag'})
+
+    def test_categorize_default_tag(self):
+        rules = [('tag-x', '', 'an error')]
+        self.assertEqual(categorize('proj', rules, '.. an error ..', default_tag='def'),
+                         {'tag-x'})
+        self.assertEqual(categorize('proj', rules, '.. an Error ..', default_tag='def'),
+                         {'def'})
+
+    def test_parse_error(self):
+        with self.assertRaisesRegexp(ParseError, 'file.ext, line 2: syntax error'):
+            parse_rules('foo::bar\n'
+                        'foo:bar', path='file.ext')
+
+    def test_quotes_parse_error(self):
+        with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: mismatched quotes'):
+            parse_rules('foo::"bar\n', path='file.ext')
+
+    def test_quotes_parse_error_2(self):
+        with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: invalid Python string literal'):
+            parse_rules("foo::bar' \n", path='file.ext')
+
+    def test_quotes_parse_error_3(self):
+        with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: invalid Python string literal'):
+            parse_rules("foo::'b'ar' \n", path='file.ext')
+
+    def test_unicode_regexp(self):
+        with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: syntax error'):
+            parse_rules("foo::u'foo'", path='file.ext')
+
+    def test_regex_parse_error(self):
+        with self.assertRaisesRegexp(ParseError,
+                                     'file.ext, line 1: unexpected end of regular expression'):
+            parse_rules('foo::[x', path='file.ext')
+
+
+def dedent(string):
+    return textwrap.dedent(string)