This repository has been archived by the owner on Dec 15, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add log_processing job listener to report on classifier results for f…
…ailed job logs Summary: This adds the first use of our classification code to track how it does when running against failure logs. No consequences other than monitoring at the moment, but it'll give us some confidence in our rules. Rules are loaded for each log, but that seems insignificant relative to other costs and allows us to iterate on rules quickly if we need to. Arguable. Test Plan: Unit Reviewers: josiah Reviewed By: josiah Subscribers: changesbot, wwu, jukka Differential Revision: https://tails.corp.dropbox.com/D95667
- Loading branch information
Showing
3 changed files
with
168 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
"""Runs a classifier on logs of failed jobs and reports the classifications. | ||
The rules for the classifier are loaded from the string config variable CATEGORIZE_RULES_FILE. | ||
If that variable isn't specified, no classification is attempted. | ||
""" | ||
import logging | ||
from flask import current_app | ||
|
||
from changes.config import statsreporter | ||
from changes.constants import Result | ||
from changes.models import Job, LogSource, LogChunk, JobStep | ||
from changes.experimental import categorize | ||
|
||
|
||
logger = logging.getLogger('log_processing') | ||
|
||
|
||
def _get_failing_log_sources(job): | ||
return list(LogSource.query.filter( | ||
LogSource.job_id == job.id, | ||
).join( | ||
JobStep, LogSource.step_id == JobStep.id, | ||
).filter( | ||
JobStep.result == Result.failed, | ||
).order_by(JobStep.date_created)) | ||
|
||
|
||
def _get_log_data(source): | ||
queryset = LogChunk.query.filter( | ||
LogChunk.source_id == source.id, | ||
).order_by(LogChunk.offset.asc()) | ||
return ''.join(l.text for l in queryset) | ||
|
||
|
||
def _get_rules(): | ||
"""Return the current rules to be used with categorize.categorize. | ||
NB: Reloads the rules file at each call. | ||
""" | ||
rules_file = current_app.config.get('CATEGORIZE_RULES_FILE') | ||
if not rules_file: | ||
return None | ||
return categorize.load_rules(rules_file) | ||
|
||
|
||
def _log_uri(logsource): | ||
""" | ||
Args: | ||
logsource (LogSource): The LogSource to return URI for. | ||
Returns: | ||
str with relative URI of the provided LogSource. | ||
""" | ||
job = logsource.job | ||
build = job.build | ||
project = build.project | ||
return "/projects/{}/builds/{}/jobs/{}/logs/{}/".format( | ||
project.id.hex, build.id.hex, job.id.hex, logsource.id.hex) | ||
|
||
|
||
def _incr(name): | ||
"""Helper to increments a stats counter. | ||
Mostly exists to ease mocking in tests. | ||
Args: | ||
name (str): Name of counter to increment. | ||
""" | ||
statsreporter.stats().incr(name) | ||
|
||
|
||
def job_finished_handler(job_id, **kwargs): | ||
job = Job.query.get(job_id) | ||
if job is None: | ||
return | ||
|
||
rules = _get_rules() | ||
if not rules: | ||
return | ||
|
||
for ls in _get_failing_log_sources(job): | ||
logdata = _get_log_data(ls) | ||
tags, applicable = categorize.categorize(job.project.slug, rules, logdata) | ||
_incr("failing-log-processed") | ||
if not tags and applicable: | ||
_incr("failing-log-uncategorized") | ||
logger.warning("Uncategorized log: {} ({})".format(ls.id.hex, _log_uri(ls))) | ||
else: | ||
for tag in tags: | ||
_incr("failing-log-category-{}".format(tag)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
from __future__ import absolute_import | ||
|
||
import mock | ||
|
||
from changes.constants import Result, Status | ||
from changes.testutils import TestCase | ||
from changes.listeners.log_processing import job_finished_handler | ||
|
||
|
||
class LogProcessingTest(TestCase): | ||
|
||
def setUp(self): | ||
super(LogProcessingTest, self).setUp() | ||
|
||
@mock.patch('changes.listeners.log_processing.categorize.categorize') | ||
@mock.patch('changes.listeners.log_processing._get_rules') | ||
def test_tagged_log(self, get_rules_fn, categorize_fn): | ||
project = self.create_project(name='test', slug='project-slug') | ||
|
||
build = self.create_build(project, result=Result.failed, target='D1', | ||
label='Some sweet diff') | ||
job = self.create_job(build=build, result=Result.failed, status=Status.finished) | ||
phase = self.create_jobphase(job=job) | ||
step = self.create_jobstep(phase=phase) | ||
logsource = self.create_logsource(step=step, name='loglog') | ||
chunks = ['Some log text\n', 'Hey, some more log text.'] | ||
offset = 0 | ||
for c in chunks: | ||
self.create_logchunk(source=logsource, text=c, offset=offset) | ||
offset += len(c) | ||
|
||
fake_rules = object() | ||
get_rules_fn.return_value = fake_rules | ||
|
||
categorize_fn.return_value = ({"tag1", "tag2"}, {'tag1', 'tag2'}) | ||
|
||
with mock.patch('changes.listeners.log_processing._incr') as incr: | ||
job_finished_handler(job_id=job.id.hex) | ||
incr.assert_any_call("failing-log-processed") | ||
incr.assert_any_call("failing-log-category-tag1") | ||
incr.assert_any_call("failing-log-category-tag2") | ||
|
||
categorize_fn.assert_called_with('project-slug', fake_rules, ''.join(chunks)) | ||
|
||
@mock.patch('changes.listeners.log_processing.categorize.categorize') | ||
@mock.patch('changes.listeners.log_processing._get_rules') | ||
def test_no_tags(self, get_rules_fn, categorize_fn): | ||
project = self.create_project(name='test', slug='project-slug') | ||
|
||
build = self.create_build(project, result=Result.failed, target='D1', | ||
label='Some sweet diff') | ||
job = self.create_job(build=build, result=Result.failed, status=Status.finished) | ||
phase = self.create_jobphase(job=job) | ||
step = self.create_jobstep(phase=phase) | ||
logsource = self.create_logsource(step=step, name='loglog') | ||
self.create_logchunk(source=logsource, text='Some log text') | ||
|
||
fake_rules = object() | ||
get_rules_fn.return_value = fake_rules | ||
|
||
# one tag was applicable, but none matched. | ||
categorize_fn.return_value = (set(), {'tag1'}) | ||
|
||
with mock.patch('changes.listeners.log_processing._incr') as incr: | ||
with mock.patch('changes.listeners.log_processing.logger.warning') as warn: | ||
job_finished_handler(job_id=job.id.hex) | ||
warn.assert_any_call(mock.ANY) | ||
incr.assert_any_call("failing-log-uncategorized") | ||
|
||
categorize_fn.assert_called_with('project-slug', fake_rules, 'Some log text') |