# Percent of Failing Tests Fixed

This notebook is an addition to the series of KPI notebook in which we calculate key performance indicators for CI processes. In this notebook, we will calculate the KPI "Percent of failing tests fixed in each run/timestamp". Essentially we will determine what percent of tests that were failing in the previous test run got fixed in the current test run.

For OpenShift managers, this information can potentially help quantify the agility and efficiency of their team. If this number is high, it means they are able to quickly identify the root causes of all failing tests in the previous run and fix them. Conversely if this number is low, it means only a small percent of previously failing tests get fixed in each new run, which in turn implies that their CI process is likely not as efficient as it could be.

In [1]:
import os
import sys
import gzip
import json
import datetime as dt

import numpy as np
import pandas as pd

sys.path.append("../../..")

module_path_1 = os.path.abspath(os.path.join("../../../data-sources/TestGrid"))
if module_path_1 not in sys.path:
    sys.path.append(module_path_1)

from ipynb.fs.defs.number_of_flakes import (  # noqa: E402
    testgrid_labelwise_encoding,
)  # noqa: E402

from ipynb.fs.defs.testgrid_EDA import decode_run_length  # noqa: E402

In [2]:
# Load test file
with gzip.open("../../../../data/raw/testgrid_810.json.gz", "rb") as read_file:
    data = json.load(read_file)

In [3]:
# NOTE: this for loop is a modified version of the testgrid_labelwise_encoding function
# We've adapted this loop to put a "True" if a test was fixed in the current run, and
# "False" otherwise. Basically instead of indicating "is_flake" or "is_pass" etc., it
# now indicates "is passing now but was failing before" aka "is_flip"
percent_label_by_grid_csv = []

for tab in data.keys():
    print(tab)

    for grid in data[tab].keys():
        current_grid = data[tab][grid]

        # get all timestamps for this grid (x-axis of grid)
        timestamps = [
            dt.datetime.fromtimestamp(t // 1000) for t in current_grid["timestamps"]
        ]

        tests = []
        all_tests_did_get_fixed = []

        # NOTE: this list of dicts goes from most recent to least recent
        for i, current_test in enumerate(current_grid["grid"]):
            tests.append(current_test["name"])
            statuses_decoded = decode_run_length(current_grid["grid"][i]["statuses"])

            did_get_fixed = []
            for status_i in range(0, len(statuses_decoded) - 1):
                did_get_fixed.append(
                    statuses_decoded[status_i] == 1
                    and statuses_decoded[status_i + 1] == 12
                )

            # the least recent test cannot have "True", assuming it wasnt failing before
            did_get_fixed.append(False)

            # add results for all timestamps for current test
            all_tests_did_get_fixed.append(np.array(did_get_fixed))

        all_tests_did_get_fixed = [
            list(zip(timestamps, g)) for g in all_tests_did_get_fixed
        ]

        # add the test, tab and grid name to each entry
        # TODO: any ideas for avoiding this quad-loop
        for i, d in enumerate(all_tests_did_get_fixed):
            for j, k in enumerate(d):
                all_tests_did_get_fixed[i][j] = (k[0], tab, grid, tests[i], k[1])

        # accumulate the results
        percent_label_by_grid_csv.append(all_tests_did_get_fixed)

# output above leaves us with a doubly nested list. Flatten
flat_list = [item for sublist in percent_label_by_grid_csv for item in sublist]
flatter_list = [item for sublist in flat_list for item in sublist]

"redhat-openshift-informing"
"redhat-openshift-ocp-release-3.11-informing"
"redhat-openshift-ocp-release-4.1-blocking"
"redhat-openshift-ocp-release-4.1-informing"
"redhat-openshift-ocp-release-4.2-blocking"
"redhat-openshift-ocp-release-4.2-informing"
"redhat-openshift-ocp-release-4.3-blocking"
"redhat-openshift-ocp-release-4.3-broken"
"redhat-openshift-ocp-release-4.3-informing"
"redhat-openshift-ocp-release-4.4-blocking"
"redhat-openshift-ocp-release-4.4-broken"
"redhat-openshift-ocp-release-4.4-informing"
"redhat-openshift-ocp-release-4.5-blocking"
"redhat-openshift-ocp-release-4.5-broken"
"redhat-openshift-ocp-release-4.5-informing"
"redhat-openshift-ocp-release-4.6-blocking"
"redhat-openshift-ocp-release-4.6-broken"
"redhat-openshift-ocp-release-4.6-informing"
"redhat-openshift-ocp-release-4.7-blocking"
"redhat-openshift-ocp-release-4.7-broken"
"redhat-openshift-ocp-release-4.7-informing"
"redhat-openshift-okd-release-4.3-informing"
"redhat-openshift-okd-release-4.4-informing"
"r

In [4]:
flatter_list[0]

(datetime.datetime(2020, 10, 8, 20, 48, 5),
 '"redhat-openshift-informing"',
 'release-openshift-okd-installer-e2e-aws-upgrade',
 'Application behind service load balancer with PDB is not disrupted',
 False)

In [5]:
# this df indicates whether a test was fixed or not at a given timestamp (as compared to previous one)
df_csv = pd.DataFrame(
    flatter_list, columns=["timestamp", "tab", "grid", "test", "did_get_fixed"]
)
df_csv.head()

Unnamed: 0,timestamp,tab,grid,test,did_get_fixed
0,2020-10-08 20:48:05,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
1,2020-10-08 19:12:01,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
2,2020-10-08 14:18:13,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
3,2020-10-08 11:15:28,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
4,2020-10-08 08:27:53,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False


In [6]:
# each element in this multiindexed series tells how many tests got fixed at each run/timestamp
num_fixed_per_ts = df_csv.groupby(["tab", "grid", "timestamp"]).did_get_fixed.sum()
num_fixed_per_ts

tab                           grid                                             timestamp          
"redhat-openshift-informing"  release-openshift-okd-installer-e2e-aws-upgrade  2020-09-23 22:16:02    0
                                                                               2020-09-24 00:04:39    1
                                                                               2020-09-24 01:57:00    0
                                                                               2020-09-24 03:48:47    6
                                                                               2020-09-24 05:36:09    0
                                                                                                     ..
"redhat-osde2e-stage-moa"     osde2e-stage-moa-e2e-upgrade-default-next        2020-10-07 08:01:13    0
                                                                               2020-10-07 16:01:54    0
                                                                     

In [7]:
# this df indicates whether a test was failing or not at a given timestamp
failures_df = pd.DataFrame(
    testgrid_labelwise_encoding(data, 12),
    columns=["timestamp", "tab", "job", "test", "failure"],
)
failures_df.head()

Unnamed: 0,timestamp,tab,job,test,failure
0,2020-10-08 20:48:05,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
1,2020-10-08 19:12:01,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,True
2,2020-10-08 14:18:13,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
3,2020-10-08 11:15:28,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
4,2020-10-08 08:27:53,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False


In [8]:
# each element in this multiindexed series tells how many tests failed at each run/timestamp
num_failures_per_ts = failures_df.groupby(["tab", "job", "timestamp"]).failure.sum()
num_failures_per_ts

tab                           job                                              timestamp          
"redhat-openshift-informing"  release-openshift-okd-installer-e2e-aws-upgrade  2020-09-23 22:16:02    1
                                                                               2020-09-24 00:04:39    0
                                                                               2020-09-24 01:57:00    7
                                                                               2020-09-24 03:48:47    0
                                                                               2020-09-24 05:36:09    0
                                                                                                     ..
"redhat-osde2e-stage-moa"     osde2e-stage-moa-e2e-upgrade-default-next        2020-10-07 08:01:13    0
                                                                               2020-10-07 16:01:54    0
                                                                     

In [9]:
# dividing the above two df's tells us what percent of failing tests got fixed at each timestamp
pct_fixed_per_ts = (num_fixed_per_ts / num_failures_per_ts.shift()).fillna(0)
pct_fixed_per_ts

tab                           grid                                             timestamp          
"redhat-openshift-informing"  release-openshift-okd-installer-e2e-aws-upgrade  2020-09-23 22:16:02    0.000000
                                                                               2020-09-24 00:04:39    1.000000
                                                                               2020-09-24 01:57:00    0.000000
                                                                               2020-09-24 03:48:47    0.857143
                                                                               2020-09-24 05:36:09    0.000000
                                                                                                        ...   
"redhat-osde2e-stage-moa"     osde2e-stage-moa-e2e-upgrade-default-next        2020-10-07 08:01:13    0.000000
                                                                               2020-10-07 16:01:54    0.000000
             

In [10]:
# Save the metric (takes abot 67 MB in csv data)
file = "pct_fixed_per_run.csv"
folder = "../../../../data/processed/metrics/percent_tests_fixed"
if not os.path.exists(folder):
    os.makedirs(folder)

fullpath = os.path.join(folder, file)
pct_fixed_per_ts.to_csv(fullpath, header=False)