# Tests Blocked or Timed Out

This notebook is an extension to the [number_of_flakes](number_of_flakes.ipynb) notebook. In this notebook, the key perfomance indicators that we would like to create greater visbility into and track over time is the percent of tests that got blocked or were timed out. By observing the above metrics and tracking them wrt time, we will be able to quantify the efficiency of our testing platforms.

* number and percent of tests blocked
* number and percent of tests timed out

In [2]:
import gzip
import json
import os
import sys
import pandas as pd

sys.path.append("../../..")

module_path_1 = os.path.abspath(os.path.join("../../../data-sources/TestGrid"))
if module_path_1 not in sys.path:
    sys.path.append(module_path_1)

from ipynb.fs.defs.number_of_flakes import (  # noqa: E402
    testgrid_labelwise_encoding,
)  # noqa: E402

In [3]:
# Load test file
with gzip.open("../../../../data/raw/testgrid_810.json.gz", "rb") as read_file:
    testgrid_data = json.load(read_file)

In [4]:
blocked_tests_list = testgrid_labelwise_encoding(testgrid_data, 8)[0:1000000]

In [5]:
len(blocked_tests_list)

1000000

In [6]:
blocked_tests_list[0]

(datetime.datetime(2020, 10, 8, 20, 48, 5),
 '"redhat-openshift-informing"',
 'release-openshift-okd-installer-e2e-aws-upgrade',
 'Application behind service load balancer with PDB is not disrupted',
 False)

In [7]:
# Convert to dataframe
blocked_tests_df = pd.DataFrame(
    blocked_tests_list, columns=["timestamp", "tab", "job", "test", "test_blocked"]
)
blocked_tests_df.head()

Unnamed: 0,timestamp,tab,job,test,test_blocked
0,2020-10-08 20:48:05,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
1,2020-10-08 19:12:01,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
2,2020-10-08 14:18:13,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
3,2020-10-08 11:15:28,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
4,2020-10-08 08:27:53,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False


In [8]:
# saving only the first 1000000 out of 19 million rows due to pvc limits.
# 250mb = 1 million --> 4750 mb = 19 million
blocked_tests_df.head(1000).to_csv(
    "../../../../data/processed/blocked_tests.csv",
    header=False,
)

In [9]:
timed_out_tests_list = testgrid_labelwise_encoding(testgrid_data, 9)[0:1000000]

In [10]:
# Convert to dataframe
timed_out_tests_df = pd.DataFrame(
    timed_out_tests_list, columns=["timestamp", "tab", "job", "test", "test_timed_out"]
)
timed_out_tests_df.head()

Unnamed: 0,timestamp,tab,job,test,test_timed_out
0,2020-10-08 20:48:05,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
1,2020-10-08 19:12:01,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
2,2020-10-08 14:18:13,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
3,2020-10-08 11:15:28,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
4,2020-10-08 08:27:53,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False


In [11]:
# saving only the first 1000000 out of 19 million rows due to pvc limits.
# 250mb = 1 million --> 4750 mb = 19 million
timed_out_tests_df.head(1000).to_csv(
    "../../../../data/processed/timed_out_tests.csv",
    header=False,
)

In [12]:
# Metrics
no_tests = blocked_tests_df.head(1000000).test.count()
print("Total number of tests: %i" % (no_tests))
no_blocked = blocked_tests_df.head(1000000).test_blocked.sum()
print("Total number of tests blocked: %i" % (no_blocked))
test_blocked_percentage = (
    (
        blocked_tests_df.head(1000000).test_blocked.sum()
        / blocked_tests_df.head(1000000).test.count()
    )
) * 100
print("Tests blocked percentage: %f" % (test_blocked_percentage))
no_timed_out = timed_out_tests_df.head(1000000).test_timed_out.sum()
print("Total number of timed out tests: %i" % (no_timed_out))
test_timed_out_percentage = (
    (
        timed_out_tests_df.head(1000000).test_timed_out.sum()
        / timed_out_tests_df.head(1000000).test.count()
    )
) * 100
print("Test timed out percentage: %f" % (test_timed_out_percentage))

Total number of tests: 1000000
Total number of tests blocked: 0
Tests blocked percentage: 0.000000
Total number of timed out tests: 0
Test timed out percentage: 0.000000
