This repository has been archived by the owner on Dec 15, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: This implements a simple classifier that isn't used anywhere yet. Test Plan: Unit tests. Reviewers: kylec Reviewed By: kylec Subscribers: changesbot, wwu Differential Revision: https://tails.corp.dropbox.com/D90349
- Loading branch information
Showing
2 changed files
with
212 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
"""Tools for tagging test outputs based on regexp based rules.""" | ||
|
||
import ast | ||
import re | ||
|
||
|
||
class ParseError(Exception): | ||
"""Raised on syntax error in a rule.""" | ||
|
||
|
||
def load_rules(path): | ||
"""Load rules from a file, a rule per line. | ||
Empty lines and lines containing only a comment starting with # are ignored. | ||
A rule is of form "tag:project:regexp" (whitespace around fields is ignored, project | ||
may be empty => applies to all projects). Regular expressions can be bare strings | ||
or quoted using Python string literal syntax (triple-quoted and raw string literals | ||
are supported, but unicode string literals are not valid). | ||
Return a list of (tag, regexp) tuples (both items are strings). | ||
""" | ||
with open(path) as file: | ||
return parse_rules(file.read(), path) | ||
|
||
|
||
def parse_rules(data, path='file'): | ||
rules = [] | ||
for i, line in enumerate(data.splitlines()): | ||
try: | ||
rule = _parse_rule(line) | ||
except ParseError as exc: | ||
raise ParseError('%s, line %d: %s' % (path, i + 1, str(exc))) | ||
if rule: | ||
rules.append(rule) | ||
return rules | ||
|
||
|
||
def _parse_rule(line): | ||
"""Parse line of text that represents a rule. | ||
Return None if the line is empty or a comment. Otherwise, return tuple | ||
(tag, project, regular expression string). | ||
Raise ParseError on error. | ||
""" | ||
line = line.strip() | ||
if not line or line.startswith('#'): | ||
return None | ||
try: | ||
tag, project, regexp = line.split(':', 2) | ||
except ValueError: | ||
raise ParseError("syntax error") | ||
regexp = _parse_regexp(regexp) | ||
return tag.strip(), project.strip(), regexp | ||
|
||
|
||
def _parse_regexp(regexp): | ||
regexp = regexp.strip() | ||
# Parse quoted regular expressions as Python string literals. | ||
if regexp.endswith(('"', "'")): | ||
try: | ||
parsed = ast.literal_eval(regexp) | ||
except SyntaxError as exc: | ||
raise ParseError("invalid Python string literal") | ||
# We don't want unicode regexps for now. | ||
if not isinstance(parsed, str): | ||
raise ParseError("syntax error") | ||
regexp = parsed | ||
elif regexp.startswith(('"', "'")): | ||
raise ParseError("mismatched quotes") | ||
# Make sure that the regexp is valid. | ||
try: | ||
re.compile(regexp) | ||
except re.error as exc: | ||
raise ParseError(str(exc)) | ||
return regexp | ||
|
||
|
||
def categorize(project, rules, output, default_tag=None): | ||
"""Categorize test output based on rules. | ||
Arguments: | ||
project (str): name of the project | ||
rules (iterable of (str, str, str) tuples): | ||
each rule is a tuple (tag, project, regexp) that is matched against output | ||
output (str): output of a (partial) test run / build | ||
default_tag: if not None, consider this tag matched if no rule matches | ||
Returns a set of matched tags. | ||
""" | ||
output = output.replace('\r\n', '\n') | ||
tags = set() | ||
for tag, rule_project, regexp in rules: | ||
if (not rule_project or rule_project == project) and ( | ||
re.search(regexp, output, re.MULTILINE | re.DOTALL)): | ||
tags.add(tag) | ||
if not tags and default_tag: | ||
tags.add(default_tag) | ||
return tags |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import textwrap | ||
import unittest | ||
|
||
from changes.experimental.categorize import parse_rules, _parse_rule, categorize, ParseError | ||
|
||
|
||
class TestCategorize(unittest.TestCase): | ||
def test_parse_empty_rule(self): | ||
self.assertEqual(_parse_rule(''), None) | ||
self.assertEqual(_parse_rule(' \n'), None) | ||
self.assertEqual(_parse_rule('# foo \n'), None) | ||
self.assertEqual(_parse_rule(' # foo \n'), None) | ||
|
||
def test_parse_rule(self): | ||
self.assertEqual(_parse_rule('tag::regex'), ('tag', '', 'regex')) | ||
self.assertEqual(_parse_rule(' tag : proj : regex '), ('tag', 'proj', 'regex')) | ||
self.assertEqual(_parse_rule(' tag : proj : regex :: '), ('tag', 'proj', 'regex ::')) | ||
|
||
def test_parse_rule(self): | ||
self.assertEqual(_parse_rule('tag::regex'), ('tag', '', 'regex')) | ||
self.assertEqual(_parse_rule(' x-fail : proj : test error '), | ||
('x-fail', 'proj', 'test error')) | ||
self.assertEqual(_parse_rule(' tag : proj-2 : regex :: '), ('tag', 'proj-2', 'regex ::')) | ||
|
||
def test_parse_rule_with_quoted_regexp(self): | ||
self.assertEqual(_parse_rule('tag::"reg\'ex"'), ('tag', '', "reg'ex")) | ||
self.assertEqual(_parse_rule("tag:: 'reg\"ex' "), ('tag', '', 'reg"ex')) | ||
self.assertEqual(_parse_rule("tag:: 'reg\\'ex' "), ('tag', '', "reg'ex")) | ||
self.assertEqual(_parse_rule("tag:: r'reg\\'ex' "), ('tag', '', "reg\\'ex")) | ||
|
||
def test_parse_rules(self): | ||
self.assertEqual(parse_rules(''), []) | ||
data = dedent('''\ | ||
# comment | ||
tag::^ERROR$ | ||
tag2:project:\\[error\\] | ||
''') | ||
self.assertEqual(parse_rules(data), | ||
[('tag', '', '^ERROR$'), | ||
('tag2', 'project', r'\[error\]')]) | ||
|
||
def test_categorize_general_rule(self): | ||
rules = [('tag', '', 'error')] | ||
self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'}) | ||
self.assertEqual(categorize('proj', rules, '.. Error ..'), set()) | ||
|
||
def test_categorize_general_rule_two_tags(self): | ||
rules = [('tag', '', 'error'), | ||
('tag2', '', 'fail')] | ||
self.assertEqual(categorize('proj', rules, '.. error .. fail'), {'tag', 'tag2'}) | ||
self.assertEqual(categorize('proj', rules, '.. fail ..'), {'tag2'}) | ||
self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag'}) | ||
self.assertEqual(categorize('proj', rules, '.. ok ..'), set()) | ||
|
||
def test_categorize_project_rule(self): | ||
rules = [('tag2', 'proj', 'error')] | ||
self.assertEqual(categorize('proj', rules, '.. error ..'), {'tag2'}) | ||
self.assertEqual(categorize('proj2', rules, '.. error ..'), set()) | ||
|
||
def test_categorize_full_line_regexp(self): | ||
rules = [('tag2', 'proj', '^error$')] | ||
self.assertEqual(categorize('proj', rules, 'error'), {'tag2'}) | ||
self.assertEqual(categorize('proj', rules, '\nerror\n'), {'tag2'}) | ||
self.assertEqual(categorize('proj', rules, 'xerror'), set()) | ||
self.assertEqual(categorize('proj', rules, '\nerrorx\n'), set()) | ||
|
||
def test_categorize_full_line_regexp_cr_lf(self): | ||
rules = [('tag', 'proj', '^error$')] | ||
self.assertEqual(categorize('proj', rules, '\r\nerror\r\n'), {'tag'}) | ||
|
||
def test_categorize_match_newline(self): | ||
rules = [('atag', 'aproj', 'line1.*line2')] | ||
self.assertEqual(categorize('aproj', rules, 'line1\n\nline2'), {'atag'}) | ||
|
||
def test_categorize_default_tag(self): | ||
rules = [('tag-x', '', 'an error')] | ||
self.assertEqual(categorize('proj', rules, '.. an error ..', default_tag='def'), | ||
{'tag-x'}) | ||
self.assertEqual(categorize('proj', rules, '.. an Error ..', default_tag='def'), | ||
{'def'}) | ||
|
||
def test_parse_error(self): | ||
with self.assertRaisesRegexp(ParseError, 'file.ext, line 2: syntax error'): | ||
parse_rules('foo::bar\n' | ||
'foo:bar', path='file.ext') | ||
|
||
def test_quotes_parse_error(self): | ||
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: mismatched quotes'): | ||
parse_rules('foo::"bar\n', path='file.ext') | ||
|
||
def test_quotes_parse_error_2(self): | ||
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: invalid Python string literal'): | ||
parse_rules("foo::bar' \n", path='file.ext') | ||
|
||
def test_quotes_parse_error_3(self): | ||
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: invalid Python string literal'): | ||
parse_rules("foo::'b'ar' \n", path='file.ext') | ||
|
||
def test_unicode_regexp(self): | ||
with self.assertRaisesRegexp(ParseError, 'file.ext, line 1: syntax error'): | ||
parse_rules("foo::u'foo'", path='file.ext') | ||
|
||
def test_regex_parse_error(self): | ||
with self.assertRaisesRegexp(ParseError, | ||
'file.ext, line 1: unexpected end of regular expression'): | ||
parse_rules('foo::[x', path='file.ext') | ||
|
||
|
||
def dedent(string): | ||
return textwrap.dedent(string) |