Skip to content
Permalink
Browse files
[webkitpy] The actual results reported for a flaky tests shouldn't in…
…clude the expectation

https://bugs.webkit.org/show_bug.cgi?id=231241

Reviewed by Jonathan Bedard.

When a test is marked as flaky and fails the expectations on the
first run but passes on the second run (the retry) the current code
was adding the expectations to the actual results.
This is missleading and makes really hard to detect when a test stops
giving a specific expectation.

Instead of doing that report the actual results of the test on both runs.

* Scripts/webkitpy/layout_tests/models/test_run_results.py:
(TestRunResults.__init__):
(TestRunResults.add):
(TestRunResults.merge):
(summarize_results):
* Scripts/webkitpy/layout_tests/views/buildbot_results.py:
(BuildBotPrinter.print_unexpected_results):


Canonical link: https://commits.webkit.org/243012@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@284198 268f45cc-cd09-0410-ab3c-d52691b4dbfc
  • Loading branch information
clopez committed Oct 14, 2021
1 parent 2fcb9af commit 11141165b1b28eb71d2bc182a9bfcb99a80ec278
Showing 3 changed files with 32 additions and 8 deletions.
@@ -1,3 +1,26 @@
2021-10-14 Carlos Alberto Lopez Perez <clopez@igalia.com>

[webkitpy] The actual results reported for a flaky tests shouldn't include the expectation
https://bugs.webkit.org/show_bug.cgi?id=231241

Reviewed by Jonathan Bedard.

When a test is marked as flaky and fails the expectations on the
first run but passes on the second run (the retry) the current code
was adding the expectations to the actual results.
This is missleading and makes really hard to detect when a test stops
giving a specific expectation.

Instead of doing that report the actual results of the test on both runs.

* Scripts/webkitpy/layout_tests/models/test_run_results.py:
(TestRunResults.__init__):
(TestRunResults.add):
(TestRunResults.merge):
(summarize_results):
* Scripts/webkitpy/layout_tests/views/buildbot_results.py:
(BuildBotPrinter.print_unexpected_results):

2021-10-14 Myles C. Maxfield <mmaxfield@apple.com>

All the SDKVariant.xcconfig files should match
@@ -55,6 +55,7 @@ def __init__(self, expectations, num_tests):
self.tests_by_timeline = {}
self.results_by_name = {} # Map of test name to the last result for the test.
self.all_results = [] # All results from a run, including every iteration of every test.
self.expected_results_by_name = {}
self.unexpected_results_by_name = {}
self.failures_by_name = {}
self.total_failures = 0
@@ -79,6 +80,7 @@ def add(self, test_result, expected):
self.total_failures += 1
self.failures_by_name[test_result.test_name] = test_result.failures
if expected:
self.expected_results_by_name[test_result.test_name] = test_result
self.expected += 1
if test_result.type == test_expectations.SKIP:
self.expected_skips += 1
@@ -158,6 +160,7 @@ def merge_dict_sets(a, b):
self.tests_by_timeline = merge_dict_sets(self.tests_by_timeline, test_run_results.tests_by_timeline)
self.results_by_name.update(test_run_results.results_by_name)
self.all_results += test_run_results.all_results
self.expected_results_by_name.update(test_run_results.expected_results_by_name)
self.unexpected_results_by_name.update(test_run_results.unexpected_results_by_name)
self.failures_by_name.update(test_run_results.failures_by_name)
self.total_failures += test_run_results.total_failures
@@ -296,11 +299,7 @@ def summarize_results(port_obj, expectations_by_type, initial_results, retry_res
num_missing += 1
test_dict['report'] = 'MISSING'
elif test_name in initial_results.unexpected_results_by_name:
if retry_results and test_name not in retry_results.unexpected_results_by_name:
actual.extend(expectations.model().get_expectations_string(test_name).split(" "))
num_flaky += 1
test_dict['report'] = 'FLAKY'
elif retry_results:
if retry_results and test_name in retry_results.unexpected_results_by_name:
retry_result_type = retry_results.unexpected_results_by_name[test_name].type
if result_type != retry_result_type:
if enabled_pixel_tests_in_retry and result_type == test_expectations.TEXT and (retry_result_type == test_expectations.IMAGE_PLUS_TEXT or retry_result_type == test_expectations.MISSING):
@@ -315,6 +314,10 @@ def summarize_results(port_obj, expectations_by_type, initial_results, retry_res
else:
num_regressions += 1
test_dict['report'] = 'REGRESSION'
elif retry_results and test_name in retry_results.expected_results_by_name:
actual.append(keywords[retry_results.expected_results_by_name[test_name].type])
num_flaky += 1
test_dict['report'] = 'FLAKY'
else:
num_regressions += 1
test_dict['report'] = 'REGRESSION'
@@ -146,9 +146,7 @@ def is_expected(result):
for test in tests:
result = resultsjsonparser.result_for_test(summarized_results['tests'], test)
actual = result['actual'].split(" ")
expected = result['expected'].split(" ")
# FIXME: clean this up once the old syntax is gone
new_expectations_list = [TestExpectationParser._inverted_expectation_tokens[exp] for exp in list(set(actual) | set(expected))]
new_expectations_list = [TestExpectationParser._inverted_expectation_tokens[exp] for exp in list(set(actual))]
self._print(" %s [ %s ]" % (test, " ".join(new_expectations_list)))
self._print("")
self._print("")

0 comments on commit 1114116

Please sign in to comment.