Skip to content
This repository has been archived by the owner on Dec 15, 2018. It is now read-only.

Commit

Permalink
Add log_processing job listener to report on classifier results for f…
Browse files Browse the repository at this point in the history
…ailed job logs

Summary:
This adds the first use of our classification code to track how it does when running against
failure logs. No consequences other than monitoring at the moment, but it'll give us some confidence
in our rules.
Rules are loaded for each log, but that seems insignificant relative to other costs and allows us to
iterate on rules quickly if we need to. Arguable.

Test Plan: Unit

Reviewers: josiah

Reviewed By: josiah

Subscribers: changesbot, wwu, jukka

Differential Revision: https://tails.corp.dropbox.com/D95667
  • Loading branch information
kylec1 committed Mar 24, 2015
1 parent e6ad3ba commit a860c3c
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 2 deletions.
14 changes: 12 additions & 2 deletions changes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from changes.constants import PROJECT_ROOT
from changes.api.controller import APIController, APICatchall
from changes.experimental import categorize
from changes.ext.celery import Celery
from changes.ext.redis import Redis
from changes.ext.statsreporter import StatsReporter
Expand Down Expand Up @@ -158,6 +159,7 @@ def create_app(_read_config=True, **config):
('changes.listeners.build_revision.revision_created_handler', 'revision.created'),
('changes.listeners.phabricator_listener.build_finished_handler', 'build.finished'),
('changes.listeners.analytics_notifier.build_finished_handler', 'build.finished'),
('changes.listeners.log_processing.job_finished_handler', 'job.finished'),
)

# restrict outbound notifications to the given domains
Expand Down Expand Up @@ -278,6 +280,11 @@ def capture_user(*args, **kwargs):

configure_jobs(app)

rules_file = app.config.get('CATEGORIZE_RULES_FILE')
if rules_file:
# Fail at startup if we have a bad rules file.
categorize.load_rules(rules_file)

return app


Expand Down Expand Up @@ -410,7 +417,8 @@ def configure_api_routes(app):
api.add_resource(ProjectIndexAPIView, '/projects/')
api.add_resource(ProjectDetailsAPIView, '/projects/<project_id>/')
api.add_resource(ProjectBuildIndexAPIView, '/projects/<project_id>/builds/')
api.add_resource(ProjectBuildIndexAPIView, '/projects/<project_id>/builds/search/', endpoint='projectbuildsearchapiview')
api.add_resource(ProjectBuildIndexAPIView, '/projects/<project_id>/builds/search/',
endpoint='projectbuildsearchapiview')
api.add_resource(ProjectLatestGreenBuildsAPIView, '/projects/<project_id>/latest_green_builds/')
api.add_resource(ProjectCommitIndexAPIView, '/projects/<project_id>/commits/')
api.add_resource(ProjectCommitDetailsAPIView, '/projects/<project_id>/commits/<commit_id>/')
Expand Down Expand Up @@ -469,7 +477,9 @@ def configure_web_routes(app):
app.add_url_rule(
'/auth/logout/', view_func=LogoutView.as_view('logout', complete_url='index'))
app.add_url_rule(
'/auth/complete/', view_func=AuthorizedView.as_view('authorized', authorized_url='authorized', complete_url='index'))
'/auth/complete/', view_func=AuthorizedView.as_view('authorized',
authorized_url='authorized',
complete_url='index'))

app.add_url_rule(
'/<path:path>', view_func=IndexView.as_view('index-path'))
Expand Down
86 changes: 86 additions & 0 deletions changes/listeners/log_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Runs a classifier on logs of failed jobs and reports the classifications.
The rules for the classifier are loaded from the string config variable CATEGORIZE_RULES_FILE.
If that variable isn't specified, no classification is attempted.
"""
import logging
from flask import current_app

from changes.config import statsreporter
from changes.constants import Result
from changes.models import Job, LogSource, LogChunk, JobStep
from changes.experimental import categorize


logger = logging.getLogger('log_processing')


def _get_failing_log_sources(job):
return list(LogSource.query.filter(
LogSource.job_id == job.id,
).join(
JobStep, LogSource.step_id == JobStep.id,
).filter(
JobStep.result == Result.failed,
).order_by(JobStep.date_created))


def _get_log_data(source):
queryset = LogChunk.query.filter(
LogChunk.source_id == source.id,
).order_by(LogChunk.offset.asc())
return ''.join(l.text for l in queryset)


def _get_rules():
"""Return the current rules to be used with categorize.categorize.
NB: Reloads the rules file at each call.
"""
rules_file = current_app.config.get('CATEGORIZE_RULES_FILE')
if not rules_file:
return None
return categorize.load_rules(rules_file)


def _log_uri(logsource):
"""
Args:
logsource (LogSource): The LogSource to return URI for.
Returns:
str with relative URI of the provided LogSource.
"""
job = logsource.job
build = job.build
project = build.project
return "/projects/{}/builds/{}/jobs/{}/logs/{}/".format(
project.id.hex, build.id.hex, job.id.hex, logsource.id.hex)


def _incr(name):
"""Helper to increments a stats counter.
Mostly exists to ease mocking in tests.
Args:
name (str): Name of counter to increment.
"""
statsreporter.stats().incr(name)


def job_finished_handler(job_id, **kwargs):
job = Job.query.get(job_id)
if job is None:
return

rules = _get_rules()
if not rules:
return

for ls in _get_failing_log_sources(job):
logdata = _get_log_data(ls)
tags, applicable = categorize.categorize(job.project.slug, rules, logdata)
_incr("failing-log-processed")
if not tags and applicable:
_incr("failing-log-uncategorized")
logger.warning("Uncategorized log: {} ({})".format(ls.id.hex, _log_uri(ls)))
else:
for tag in tags:
_incr("failing-log-category-{}".format(tag))
70 changes: 70 additions & 0 deletions tests/changes/listeners/test_log_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import absolute_import

import mock

from changes.constants import Result, Status
from changes.testutils import TestCase
from changes.listeners.log_processing import job_finished_handler


class LogProcessingTest(TestCase):

def setUp(self):
super(LogProcessingTest, self).setUp()

@mock.patch('changes.listeners.log_processing.categorize.categorize')
@mock.patch('changes.listeners.log_processing._get_rules')
def test_tagged_log(self, get_rules_fn, categorize_fn):
project = self.create_project(name='test', slug='project-slug')

build = self.create_build(project, result=Result.failed, target='D1',
label='Some sweet diff')
job = self.create_job(build=build, result=Result.failed, status=Status.finished)
phase = self.create_jobphase(job=job)
step = self.create_jobstep(phase=phase)
logsource = self.create_logsource(step=step, name='loglog')
chunks = ['Some log text\n', 'Hey, some more log text.']
offset = 0
for c in chunks:
self.create_logchunk(source=logsource, text=c, offset=offset)
offset += len(c)

fake_rules = object()
get_rules_fn.return_value = fake_rules

categorize_fn.return_value = ({"tag1", "tag2"}, {'tag1', 'tag2'})

with mock.patch('changes.listeners.log_processing._incr') as incr:
job_finished_handler(job_id=job.id.hex)
incr.assert_any_call("failing-log-processed")
incr.assert_any_call("failing-log-category-tag1")
incr.assert_any_call("failing-log-category-tag2")

categorize_fn.assert_called_with('project-slug', fake_rules, ''.join(chunks))

@mock.patch('changes.listeners.log_processing.categorize.categorize')
@mock.patch('changes.listeners.log_processing._get_rules')
def test_no_tags(self, get_rules_fn, categorize_fn):
project = self.create_project(name='test', slug='project-slug')

build = self.create_build(project, result=Result.failed, target='D1',
label='Some sweet diff')
job = self.create_job(build=build, result=Result.failed, status=Status.finished)
phase = self.create_jobphase(job=job)
step = self.create_jobstep(phase=phase)
logsource = self.create_logsource(step=step, name='loglog')
self.create_logchunk(source=logsource, text='Some log text')

fake_rules = object()
get_rules_fn.return_value = fake_rules

# one tag was applicable, but none matched.
categorize_fn.return_value = (set(), {'tag1'})

with mock.patch('changes.listeners.log_processing._incr') as incr:
with mock.patch('changes.listeners.log_processing.logger.warning') as warn:
job_finished_handler(job_id=job.id.hex)
warn.assert_any_call(mock.ANY)
incr.assert_any_call("failing-log-uncategorized")

categorize_fn.assert_called_with('project-slug', fake_rules, 'Some log text')

0 comments on commit a860c3c

Please sign in to comment.