# Quantify Builds

This notebook is an extension to the [number_of_flakes](number_of_flakes.ipynb) notebook. In this notebook, the key perfomance indicators that we would like to create greater visbility into and track over time is the percent of builds that passed/failed. This can be used to capture the build success rate ie. number of successful builds / deployments relative to the total number of builds / deployments.

* number and percent of builds passed
* number and percent of build failed

In [2]:
import gzip
import json
import os
import sys
import pandas as pd

sys.path.append("../../..")

module_path_1 = os.path.abspath(os.path.join("../../../data-sources/TestGrid"))
if module_path_1 not in sys.path:
    sys.path.append(module_path_1)

from ipynb.fs.defs.number_of_flakes import (  # noqa: E402
    testgrid_labelwise_encoding,
)  # noqa: E402

In [3]:
# Load test file
with gzip.open("../../../../data/raw/testgrid_810.json.gz", "rb") as read_file:
    testgrid_data = json.load(read_file)

In [4]:
build_failures_list = testgrid_labelwise_encoding(testgrid_data, 11)[0:1000000]

In [5]:
len(build_failures_list)

1000000

In [6]:
build_failures_list[0]

(datetime.datetime(2020, 10, 8, 20, 48, 5),
 '"redhat-openshift-informing"',
 'release-openshift-okd-installer-e2e-aws-upgrade',
 'Application behind service load balancer with PDB is not disrupted',
 False)

In [7]:
# Convert to dataframe
build_failures_df = pd.DataFrame(
    build_failures_list, columns=["timestamp", "tab", "job", "test", "build_failure"]
)
build_failures_df.head()

Unnamed: 0,timestamp,tab,job,test,build_failure
0,2020-10-08 20:48:05,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
1,2020-10-08 19:12:01,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
2,2020-10-08 14:18:13,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
3,2020-10-08 11:15:28,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
4,2020-10-08 08:27:53,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False


In [8]:
# saving only the first 1000000 out of 19 million rows due to pvc limits.
# 250mb = 1 million --> 4750 mb = 19 million
build_failures_df.head(1000).to_csv(
    "../../../../data/processed/build_failures.csv",
    header=False,
)

In [9]:
build_passing_list = testgrid_labelwise_encoding(testgrid_data, 15)[0:1000000]

In [10]:
# Convert to dataframe
build_passing_df = pd.DataFrame(
    build_passing_list, columns=["timestamp", "tab", "job", "test", "build_passing"]
)
build_passing_df.head()

Unnamed: 0,timestamp,tab,job,test,build_passing
0,2020-10-08 20:48:05,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
1,2020-10-08 19:12:01,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
2,2020-10-08 14:18:13,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
3,2020-10-08 11:15:28,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False
4,2020-10-08 08:27:53,"""redhat-openshift-informing""",release-openshift-okd-installer-e2e-aws-upgrade,Application behind service load balancer with ...,False


In [11]:
# saving only the first 1000000 out of 19 million rows due to pvc limits.
# 250mb = 1 million --> 4750 mb = 19 million
build_passing_df.head(1000).to_csv(
    "../../../../data/processed/build_pass.csv",
    header=False,
)

In [12]:
# Metrics
no_tests = build_passing_df.head(1000000).test.count()
print("Total number of tests: %i" % (no_tests))
no_failures = build_failures_df.head(1000000).build_failure.sum()
print("Total number of failing builds: %i" % (no_failures))
build_failures_percentage = (
    (
        build_failures_df.head(1000000).build_failure.sum()
        / build_failures_df.head(1000000).test.count()
    )
) * 100
print("Build failure percentage: %f" % (build_failures_percentage))
no_pass = build_passing_df.head(1000000).build_passing.sum()
print("Total number of passing builds: %i" % (no_pass))
build_pass_percentage = (
    (
        build_passing_df.head(1000000).build_passing.sum()
        / build_passing_df.head(1000000).test.count()
    )
) * 100
print("Build pass percentage: %f" % (build_pass_percentage))

Total number of tests: 1000000
Total number of failing builds: 0
Build failure percentage: 0.000000
Total number of passing builds: 0
Build pass percentage: 0.000000
