Skip to content

Commit

Permalink
[blinkpy] Report failure reasons for blink web tests.
Browse files Browse the repository at this point in the history
Report failure reasons for two categories of failures:
- Testharness.js failures. Here the new failure(s) (if any)
  will be reported as the reason.
- Other text diff-based failures. A snippet of diff will be
  supplied as the reason.

Failure reasons will be used in Weetbix to perform failure
clustering and to assist users link bugs to known failures using
failure association rules. They will also be surfaced in MILO.

BUG=1243174
TEST=third_party/blink/tools/run_blinkpy_tests.py

Change-Id: Id4a1f61527cfea52c9a78e0179ac3070e40beb85
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3539721
Reviewed-by: Rakib Hasan <rmhasan@google.com>
Commit-Queue: Patrick Meiring <meiring@google.com>
Cr-Commit-Position: refs/heads/main@{#984756}
  • Loading branch information
patrickmeiring authored and Chromium LUCI CQ committed Mar 24, 2022
1 parent dd5ef8a commit 989707b
Show file tree
Hide file tree
Showing 8 changed files with 264 additions and 21 deletions.
1 change: 1 addition & 0 deletions testing/scripts/run_android_wpt.pydeps
Expand Up @@ -67,6 +67,7 @@
//third_party/blink/tools/blinkpy/web_tests/layout_package/bot_test_expectations.py
//third_party/blink/tools/blinkpy/web_tests/layout_package/json_results_generator.py
//third_party/blink/tools/blinkpy/web_tests/models/__init__.py
//third_party/blink/tools/blinkpy/web_tests/models/failure_reason.py
//third_party/blink/tools/blinkpy/web_tests/models/test_configuration.py
//third_party/blink/tools/blinkpy/web_tests/models/test_expectations.py
//third_party/blink/tools/blinkpy/web_tests/models/test_failures.py
Expand Down
Expand Up @@ -247,6 +247,13 @@ def sink(self, expected, result, expectations):
if summaries:
r['summaryHtml'] = '\n'.join(summaries)

if result.failure_reason:
primary_error_message = _truncate_to_utf8_bytes(
result.failure_reason.primary_error_message, 1024)
r['failureReason'] = {
'primaryErrorMessage': primary_error_message,
}

self._send({'testResults': [r]})

def close(self):
Expand All @@ -257,3 +264,31 @@ def close(self):
if not self.is_closed:
self.is_closed = True
self._session.close()


def _truncate_to_utf8_bytes(s, length):
""" Truncates a string to a given number of bytes when encoded as UTF-8.
Ensures the given string does not take more than length bytes when encoded
as UTF-8. Adds trailing ellipsis (...) if truncation occurred. A truncated
string may end up encoding to a length slightly shorter than length
because only whole Unicode codepoints are dropped.
Args:
s: The string to truncate.
length: the length (in bytes) to truncate to.
"""
try:
encoded = s.encode('utf-8')
# When encode throws UnicodeDecodeError in py2, it usually means the str is
# already encoded and has non-ascii chars. So skip re-encoding it.
except UnicodeDecodeError:
encoded = s
if len(encoded) > length:
# Truncate, leaving space for trailing ellipsis (...).
encoded = encoded[:length - 3]
# Truncating the string encoded as UTF-8 may have left the final
# codepoint only partially present. Pass 'ignore' to acknowledge
# and ensure this is dropped.
return encoded.decode('utf-8', 'ignore') + "..."
return s
Expand Up @@ -15,7 +15,7 @@
from blinkpy.common.path_finder import RELATIVE_WEB_TESTS
from blinkpy.web_tests.controllers.test_result_sink import CreateTestResultSink
from blinkpy.web_tests.controllers.test_result_sink import TestResultSink
from blinkpy.web_tests.models import test_results
from blinkpy.web_tests.models import test_results, failure_reason
from blinkpy.web_tests.models.typ_types import ResultType
from blinkpy.web_tests.port.test import add_manifest_to_mock_filesystem
from blinkpy.web_tests.port.test import TestPort
Expand Down Expand Up @@ -347,3 +347,32 @@ def test_wpt_location_filename(self):
'virtual/virtual_wpt/external/wpt/dom/ranges/Range-attributes.html',
'external/wpt/dom/ranges/Range-attributes.html',
)

def test_failure_reason(self):
tr = test_results.TestResult(test_name='test-name')
tr.failure_reason = failure_reason.FailureReason(
'primary error message')
sent_data = self.sink(True, tr)
self.assertDictEqual(sent_data['failureReason'], {
'primaryErrorMessage': 'primary error message',
})

def test_failure_reason_truncated(self):
# Swedish "Place of interest symbol", which encodes as 3 bytes in
# UTF-8. This is one Unicode code point.
poi = b'\xE2\x8C\x98'.decode('utf-8')
primary_error_message = poi * 350

# Test that the primary error message is truncated to 1K bytes in
# UTF-8 encoding.
tr = test_results.TestResult(test_name='test-name')
tr.failure_reason = failure_reason.FailureReason(primary_error_message)
sent_data = self.sink(True, tr)

# Ensure truncation has left only whole unicode code points.
# In this case, the output ends up being 1023 bytes, which is one
# byte less than the allowed size of 1024 bytes, as we do not want
# part of a unicode code point to be included in the output.
self.assertDictEqual(sent_data['failureReason'], {
'primaryErrorMessage': (poi * 340) + '...',
})
20 changes: 20 additions & 0 deletions third_party/blink/tools/blinkpy/web_tests/models/failure_reason.py
@@ -0,0 +1,20 @@
# Copyright 2022 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.


# Information about why a test failed.
# Modelled after https://source.chromium.org/chromium/infra/infra/+/master:go/src/go.chromium.org/luci/resultdb/proto/v1/failure_reason.proto
class FailureReason(object):
def __init__(self, primary_error_message):
"""Initialises a new failure reason.
Args:
primary_error_message: The error message that ultimately caused
the test to fail. This should/ only be the error message
and should not include any stack traces. In the case that
a test failed due to multiple expectation failures, any
immediately fatal failure should be chosen, or otherwise
the first expectation failure.
"""
self.primary_error_message = primary_error_message
124 changes: 106 additions & 18 deletions third_party/blink/tools/blinkpy/web_tests/models/test_failures.py
Expand Up @@ -25,11 +25,13 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import re
import six
from six.moves import cPickle

from blinkpy.web_tests.controllers import repaint_overlay
from blinkpy.web_tests.models.typ_types import ResultType
from blinkpy.web_tests.models.failure_reason import FailureReason
from blinkpy.common.html_diff import html_diff
from blinkpy.common.unified_diff import unified_diff

Expand Down Expand Up @@ -101,6 +103,9 @@

_ext_to_file_type = {'.txt': 'text', '.png': 'image', '.wav': 'audio'}

# Matches new failures in TestHarness.js tests.
FAILURE_RE = re.compile(r'\+(?:FAIL|Harness Error\.) (.*)$')


def has_failure_type(failure_type, failure_list):
return any(isinstance(failure, failure_type) for failure in failure_list)
Expand Down Expand Up @@ -174,6 +179,13 @@ def message(self):
"""Returns a string describing the failure in more detail."""
raise NotImplementedError

def failure_reason(self):
"""Returns a FailureReason object describing why the test failed, if
the test failed and suitable reasons are available.
The information is used in LUCI result viewing UIs and will be
used to cluster similar failures together."""
return None

def __eq__(self, other):
return self.__class__.__name__ == other.__class__.__name__

Expand Down Expand Up @@ -309,38 +321,48 @@ def __init__(self, actual_driver_output, expected_driver_output):
expected_driver_output)
self.has_repaint_overlay = (
repaint_overlay.result_contains_repaint_rects(
actual_driver_output.text)
self._actual_text())
or repaint_overlay.result_contains_repaint_rects(
expected_driver_output.text))
self._expected_text()))
self.file_ext = '.txt'

def create_artifacts(self, typ_artifacts, force_overwrite=False):
# TODO (rmhasan): See if you can can only output diff files for
# non empty text.
super(FailureText, self).create_artifacts(typ_artifacts,
force_overwrite)

actual_text = ''
expected_text = ''
if self.expected_driver_output.text is not None:
def _actual_text(self):
if self.actual_driver_output and\
self.actual_driver_output.text is not None:
if six.PY3:
# TODO(crbug/1197331): We should not decode here looks like.
# html_diff expects it to be bytes for comparing to account
# various types of encodings.
# html_diff.py and unified_diff.py use str types during
# diff fixup. Will handle it later.
expected_text = self.expected_driver_output.text.decode(
'utf8', 'replace')
return self.actual_driver_output.text.decode('utf8', 'replace')
else:
expected_text = self.expected_driver_output.text
return self.actual_driver_output.text
return ''

if self.actual_driver_output.text is not None:
def _expected_text(self):
if self.expected_driver_output and\
self.expected_driver_output.text is not None:
if six.PY3:
# TODO(crbug/1197331): ditto as in the case of expected_text above.
actual_text = self.actual_driver_output.text.decode(
# TODO(crbug/1197331): We should not decode here looks like.
# html_diff expects it to be bytes for comparing to account
# various types of encodings.
# html_diff.py and unified_diff.py use str types during
# diff fixup. Will handle it later.
return self.expected_driver_output.text.decode(
'utf8', 'replace')
else:
actual_text = self.actual_driver_output.text
return self.expected_driver_output.text
return ''

def create_artifacts(self, typ_artifacts, force_overwrite=False):
# TODO (rmhasan): See if you can can only output diff files for
# non empty text.
super(FailureText, self).create_artifacts(typ_artifacts,
force_overwrite)

actual_text = self._actual_text()
expected_text = self._expected_text()

artifacts_abs_path = self.filesystem.join(
self.result_directory, typ_artifacts.ArtifactsSubDirectory())
Expand Down Expand Up @@ -374,6 +396,72 @@ def message(self):
def text_mismatch_category(self):
raise NotImplementedError

def failure_reason(self):
actual_text = self._actual_text()
expected_text = self._expected_text()
diff_content = unified_diff(expected_text, actual_text, "expected",
"actual")
diff_lines = diff_content.splitlines()

# Drop the standard diff header with the following lines:
# --- expected
# +++ actual
diff_lines = diff_lines[2:]

# Find the first block of additions and/or deletions in the diff.
# E.g.
# Unchanged line
# -Old line 1 <-- Start of block
# -Old line 2
# +New line 1
# +New line 2 <-- End of block
# Unchanged line
deleted_lines = []
added_lines = []
match_state = ''
for i, line in enumerate(diff_lines):
# A block of diffs starts with removals (-)
# and then additions (+). Any variation from this
# pattern indicates an end of this block of diffs.
if ((line.startswith(' ') and match_state != '')
or (line.startswith('-') and match_state == '+')):
# End of block of additions and deletions.
break
if line.startswith('-'):
match_state = '-'
deleted_lines.append(line)
if line.startswith('+'):
match_state = '+'
added_lines.append(line)

primary_error = None

if 'This is a testharness.js-based test.' in actual_text:
# Testharness.js test. Find the first new failure (if any) and
# report it as the failure reason.
for i, line in enumerate(added_lines):
match = FAILURE_RE.match(line)
if match:
primary_error = match.group(1)
break
else:
# Reconstitute the first diff block, but put added lines before
# the deleted lines as they are usually more interesting.
# (New actual output more interesting than missing expected
# output, as it is likely to contain errors.)
first_diff_block = '\n'.join(added_lines + deleted_lines)

if len(first_diff_block) >= 30:
# Only use the diff if it is not tiny. If it is only the
# addition of an empty line at the end of the file or
# similar, it is unlikely to be useful.
primary_error = ('Unexpected Diff (+got, -want):\n' +
first_diff_block)

if primary_error:
return FailureReason(primary_error)
return None


class FailureMissingResult(FailureText):
def message(self):
Expand Down
Expand Up @@ -36,7 +36,7 @@
PassWithStderr,
FailureCrash,
FailureTimeout,
TestFailure)
TestFailure, FailureText)


class TestFailuresTest(unittest.TestCase):
Expand Down Expand Up @@ -131,3 +131,61 @@ def init_test_failure(test_failure):
pass_with_stderr.create_artifacts(artifacts)
self.assertEqual('timeout with stderr',
host.filesystem.read_text_file('/dir/foo-stderr.txt'))

def test_failure_text_failure_reason_testharness_js(self):
expected_text = ''
actual_text = """Content-Type: text/plain
This is a testharness.js-based test.
FAIL Tests that the document gets overscroll event with right deltaX/Y attributes. promise_test: Unhandled rejection with value: "Document did not receive scrollend event."
Harness: the test ran to completion."""

self._actual_output.text = actual_text.encode('utf8')
self._expected_output.text = expected_text.encode('utf8')

failure_text = FailureText(self._actual_output, self._expected_output)
failure_reason = failure_text.failure_reason()
self.assertIsNotNone(failure_reason)
self.assertEqual(
failure_reason.primary_error_message,
'Tests that the document gets overscroll event with right'
' deltaX/Y attributes. promise_test: Unhandled rejection with'
' value: "Document did not receive scrollend event."')

def test_failure_text_failure_reason_other(self):
expected_text = """retained line 1
deleted line 1
deleted line 2
retained line 2
"""

actual_text = """retained line 1
new line 1
retained line 2
new line 2
"""

self._actual_output.text = actual_text.encode('utf8')
self._expected_output.text = expected_text.encode('utf8')

failure_text = FailureText(self._actual_output, self._expected_output)
failure_reason = failure_text.failure_reason()
self.assertIsNotNone(failure_reason)
self.assertEqual(
failure_reason.primary_error_message,
'Unexpected Diff (+got, -want):\n'
'+new line 1\n'
'-deleted line 1\n'
'-deleted line 2')

def test_failure_text_failure_reason_empty(self):
# Construct a scenario in which the difference between the actual
# and expected text does not provide a useful failure reason.
expected_text = ''
actual_text = '\n'

self._actual_output.text = actual_text.encode('utf8')
self._expected_output.text = expected_text.encode('utf8')

failure_text = FailureText(self._actual_output, self._expected_output)
failure_reason = failure_text.failure_reason()
self.assertIsNone(failure_reason)
11 changes: 11 additions & 0 deletions third_party/blink/tools/blinkpy/web_tests/models/test_results.py
Expand Up @@ -113,6 +113,17 @@ def __init__(self,
# FIXME: Setting this in the constructor makes this class hard to mutate.
self.type = results.pop()

self.failure_reason = None
for failure in self.failures:
# Take the failure reason from any failure that has one.
# At time of writing, only one type of failure defines failure
# reasons, if this changes, we may want to change this to be
# more deterministic.
failure_reason = failure.failure_reason()
if failure_reason:
self.failure_reason = failure_reason
break

# These are set by the worker, not by the driver, so they are not passed to the constructor.
self.worker_name = ''
self.shard_name = ''
Expand Down
Expand Up @@ -58,7 +58,8 @@ def test_results_has_stderr(self):
self.assertTrue(result.has_stderr)

def test_results_has_repaint_overlay(self):
driver_output = DriverOutput('"invalidations": [', None, None, None)
text = '"invalidations": ['.encode('utf8')
driver_output = DriverOutput(text, None, None, None)
failures = [test_failures.FailureTextMismatch(driver_output, None)]
result = TestResult('foo', failures=failures)
self.assertTrue(result.has_repaint_overlay)
Expand Down

0 comments on commit 989707b

Please sign in to comment.