Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better messages formatting in the CI Slack bot #48712

Merged
merged 1 commit into from
Apr 12, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 37 additions & 11 deletions utils/ci-slack-bot/ci-slack-bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,22 @@

DRY_RUN_MARK = "<no url, dry run>"

MAX_FAILURES_DEFAULT = 50
MAX_FAILURES_DEFAULT = 40
SLACK_URL_DEFAULT = DRY_RUN_MARK

EXTENDED_CHECK_PERIOD_MUL = 3
FLAKY_ALERT_PROBABILITY = 0.20

# Find tests that failed in master during the last check_period * 12 hours,
# Slack has a stupid limitation on message size, it splits long messages into multiple ones breaking formatting
MESSAGE_LENGTH_LIMIT = 4000

# Find tests that failed in master during the last check_period * 24 hours,
# but did not fail during the last 2 weeks. Assuming these tests were broken recently.
# Counts number of failures in check_period and check_period * 12 time windows
# Counts number of failures in check_period and check_period * 24 time windows
# to distinguish rare flaky tests from completely broken tests
NEW_BROKEN_TESTS_QUERY = """
WITH
1 AS check_period,
check_period * 12 AS extended_check_period,
check_period * 24 AS extended_check_period,
now() as now
SELECT
test_name,
Expand Down Expand Up @@ -155,7 +157,7 @@ def format_failed_tests_list(failed_tests, failure_type):

for name, report in failed_tests:
cidb_url = get_play_url(ALL_RECENT_FAILURES_QUERY.format(name))
res += " - *{}* - <{}|Report> - <{}|CI DB> \n".format(
res += "- *{}* - <{}|Report> - <{}|CI DB> \n".format(
name, report, cidb_url
)
return res
Expand All @@ -173,11 +175,14 @@ def get_new_broken_tests_message(failed_tests):
if len(broken_tests) > 0:
msg += format_failed_tests_list(broken_tests, "*BROKEN*")
elif random.random() > FLAKY_ALERT_PROBABILITY:
# Should we report fuzzers unconditionally?
print("Will not report flaky tests to avoid noise: ", flaky_tests)
return None
looks_like_fuzzer = [x[0].count(" ") > 2 for x in flaky_tests]
if not any(looks_like_fuzzer):
print("Will not report flaky tests to avoid noise: ", flaky_tests)
return None

if len(flaky_tests) > 0:
if len(msg) > 0:
msg += "\n"
msg += format_failed_tests_list(flaky_tests, "flaky")

return msg
Expand All @@ -187,7 +192,7 @@ def get_too_many_failures_message_impl(failures_count):
MAX_FAILURES = int(os.environ.get("MAX_FAILURES", MAX_FAILURES_DEFAULT))
curr_failures = int(failures_count[0][0])
prev_failures = int(failures_count[0][1])
if curr_failures == 0:
if curr_failures == 0 and prev_failures != 0:
return (
"Looks like CI is completely broken: there are *no failures* at all... 0_o"
)
Expand All @@ -213,7 +218,22 @@ def get_too_many_failures_message(failures_count):
return msg


def send_to_slack(message):
def split_slack_message(long_message):
lines = long_message.split("\n")
messages = []
curr_msg = ""
for line in lines:
if len(curr_msg) + len(line) < MESSAGE_LENGTH_LIMIT:
curr_msg += "\n"
curr_msg += line
else:
messages.append(curr_msg)
curr_msg = line
messages.append(curr_msg)
return messages


def send_to_slack_impl(message):
SLACK_URL = os.environ.get("SLACK_URL", SLACK_URL_DEFAULT)
if SLACK_URL == DRY_RUN_MARK:
return
Expand All @@ -230,6 +250,12 @@ def send_to_slack(message):
)


def send_to_slack(message):
messages = split_slack_message(message)
for msg in messages:
send_to_slack_impl(msg)


def query_and_alert_if_needed(query, get_message_func):
query_res = run_clickhouse_query(query)
print("Got result {} for query {}", query_res, query)
Expand Down