Skip to content
This repository has been archived by the owner on Dec 15, 2018. It is now read-only.

Commit

Permalink
Add test log output classifier
Browse files Browse the repository at this point in the history
Summary: This implements a simple classifier that isn't used anywhere yet.

Test Plan: Unit tests.

Reviewers: kylec

Reviewed By: kylec

Subscribers: changesbot, wwu

Differential Revision: https://tails.corp.dropbox.com/D90349
  • Loading branch information
JukkaL committed Feb 19, 2015
1 parent 4f34ea2 commit a83991a
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 0 deletions.
100 changes: 100 additions & 0 deletions changes/experimental/categorize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""Tools for tagging test outputs based on regexp based rules."""

import ast
import re


class ParseError(Exception):
"""Raised on syntax error in a rule."""


def load_rules(path):
"""Load rules from a file, a rule per line.
Empty lines and lines containing only a comment starting with # are ignored.
A rule is of form "tag:project:regexp" (whitespace around fields is ignored, project
may be empty => applies to all projects). Regular expressions can be bare strings
or quoted using Python string literal syntax (triple-quoted and raw string literals
are supported, but unicode string literals are not valid).
Return a list of (tag, regexp) tuples (both items are strings).
"""
with open(path) as file:
return parse_rules(file.read(), path)


def parse_rules(data, path='file'):
rules = []
for i, line in enumerate(data.splitlines()):
try:
rule = _parse_rule(line)
except ParseError as exc:
raise ParseError('%s, line %d: %s' % (path, i + 1, str(exc)))
if rule:
rules.append(rule)
return rules


def _parse_rule(line):
"""Parse line of text that represents a rule.
Return None if the line is empty or a comment. Otherwise, return tuple
(tag, project, regular expression string).
Raise ParseError on error.
"""
line = line.strip()
if not line or line.startswith('#'):
return None
try:
tag, project, regexp = line.split(':', 2)
except ValueError:
raise ParseError("syntax error")
regexp = _parse_regexp(regexp)
return tag.strip(), project.strip(), regexp


def _parse_regexp(regexp):
regexp = regexp.strip()
# Parse quoted regular expressions as Python string literals.
if regexp.endswith(('"', "'")):
try:
parsed = ast.literal_eval(regexp)
except SyntaxError as exc:
raise ParseError("invalid Python string literal")
# We don't want unicode regexps for now.
if not isinstance(parsed, str):
raise ParseError("syntax error")
regexp = parsed
elif regexp.startswith(('"', "'")):
raise ParseError("mismatched quotes")
# Make sure that the regexp is valid.
try:
re.compile(regexp)
except re.error as exc:
raise ParseError(str(exc))
return regexp


def categorize(project, rules, output, default_tag=None):
"""Categorize test output based on rules.
Arguments:
project (str): name of the project
rules (iterable of (str, str, str) tuples):
each rule is a tuple (tag, project, regexp) that is matched against output
output (str): output of a (partial) test run / build
default_tag: if not None, consider this tag matched if no rule matches
Returns a set of matched tags.
"""
output = output.replace('\r\n', '\n')
tags = set()
for tag, rule_project, regexp in rules:
if (not rule_project or rule_project == project) and (
re.search(regexp, output, re.MULTILINE | re.DOTALL)):
tags.add(tag)
if not tags and default_tag:
tags.add(default_tag)
return tags
112 changes: 112 additions & 0 deletions tests/changes/utils/test_categorize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import textwrap
import unittest

from changes.experimental.categorize import parse_rules, _parse_rule, categorize, ParseError


class TestCategorize(unittest.TestCase):
def test_parse_empty_rule(self):
self.assertEqual(_parse_rule(''), None)
self.assertEqual(_parse_rule(' \n'), None)
self.assertEqual(_parse_rule('# foo \n'), None)
self.assertEqual(_parse_rule(' # foo \n'), None)

def test_parse_rule(self):
self.assertEqual(_parse_rule('tag::regex'), ('tag', '', 'regex'))
self.assertEqual(_parse_rule(' tag : proj : regex '), ('tag', 'proj', 'regex'))
self.assertEqual(_parse_rule(' tag : proj : regex :: '), ('tag', 'proj', 'regex ::'))

def test_parse_rule(self):
self.assertEqual(_parse_rule('tag::regex'), ('tag', '', 'regex'))
self.assertEqual(_parse_rule(' x-fail : proj : test error '),
('x-fail', 'proj', 'test error'))
self.assertEqual(_parse_rule(' tag : proj-2 : regex :: '), ('tag', 'proj-2', 'regex ::'))

def test_parse_rule_with_quoted_regexp(self):
self.assertEqual(_parse_rule('tag::"reg\'ex"'), ('tag', '', "reg'ex"))
self.assertEqual(_parse_rule("tag:: 'reg\"ex' "), ('tag', '', 'reg"ex'))
self.assertEqual(_parse_rule("tag:: 'reg\\'ex' "), ('tag', '', "reg'ex"))
self.assertEqual(_parse_rule("tag:: r'reg\\'ex' "), ('tag', '', "reg\\'ex"))

def test_parse_rules(self):
self.assertEqual(parse_rules(''), [])
data = dedent('''\
# comment
tag::^ERROR$
tag2:project:\\[error\\]
''')
self.assertEqual(parse_rules(data),
[('tag', '', '^ERROR$'),
('tag2', 'project', r'\[error\]')])

def test_categorize_general_rule(self):
rules = [('tag', '', 'error')]
self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'})
self.assertEqual(categorize('proj', rules, '.. Error ..'), set())

def test_categorize_general_rule_two_tags(self):
rules = [('tag', '', 'error'),
('tag2', '', 'fail')]
self.assertEqual(categorize('proj', rules, '.. error .. fail'), {'tag', 'tag2'})
self.assertEqual(categorize('proj', rules, '.. fail ..'), {'tag2'})
self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'})
self.assertEqual(categorize('proj', rules, '.. ok ..'), set())

def test_categorize_project_rule(self):
rules = [('tag2', 'proj', 'error')]
self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag2'})
self.assertEqual(categorize('proj2', rules, '.. error ..'), set())

def test_categorize_full_line_regexp(self):
rules = [('tag2', 'proj', '^error$')]
self.assertEqual(categorize('proj', rules, 'error'), {'tag2'})
self.assertEqual(categorize('proj', rules, '\nerror\n'), {'tag2'})
self.assertEqual(categorize('proj', rules, 'xerror'), set())
self.assertEqual(categorize('proj', rules, '\nerrorx\n'), set())

def test_categorize_full_line_regexp_cr_lf(self):
rules = [('tag', 'proj', '^error$')]
self.assertEqual(categorize('proj', rules, '\r\nerror\r\n'), {'tag'})

def test_categorize_match_newline(self):
rules = [('atag', 'aproj', 'line1.*line2')]
self.assertEqual(categorize('aproj', rules, 'line1\n\nline2'), {'atag'})

def test_categorize_default_tag(self):
rules = [('tag-x', '', 'an error')]
self.assertEqual(categorize('proj', rules, '.. an error ..', default_tag='def'),
{'tag-x'})
self.assertEqual(categorize('proj', rules, '.. an Error ..', default_tag='def'),
{'def'})

def test_parse_error(self):
with self.assertRaisesRegexp(ParseError, 'file.ext, line 2: syntax error'):
parse_rules('foo::bar\n'
'foo:bar', path='file.ext')

def test_quotes_parse_error(self):
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: mismatched quotes'):
parse_rules('foo::"bar\n', path='file.ext')

def test_quotes_parse_error_2(self):
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: invalid Python string literal'):
parse_rules("foo::bar' \n", path='file.ext')

def test_quotes_parse_error_3(self):
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: invalid Python string literal'):
parse_rules("foo::'b'ar' \n", path='file.ext')

def test_unicode_regexp(self):
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: syntax error'):
parse_rules("foo::u'foo'", path='file.ext')

def test_regex_parse_error(self):
with self.assertRaisesRegexp(ParseError,
'file.ext, line 1: unexpected end of regular expression'):
parse_rules('foo::[x', path='file.ext')


def dedent(string):
return textwrap.dedent(string)

0 comments on commit a83991a

Please sign in to comment.