# Data Source: Bugzilla

In [1]:
import pdb
import bugzilla
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [2]:
# tqdm extensions for pandas functions
tqdm.pandas()

## Get All Linked and Associated Bugs

In [3]:
# get the red hat dashboard names
response = requests.get(
    "https://testgrid.k8s.io/redhat-openshift-informing?id=dashboard-group-bar"
)
html = BeautifulSoup(response.content)
testgrid_script = html.findAll("script")[3]
testgrid_script = testgrid_script.text.split()[5].split(",")
dashboard_names = [x.split(":")[1] for x in testgrid_script if "name" in x]
dashboard_names

['"redhat-assisted-installer"',
 '"redhat-openshift-informing"',
 '"redhat-openshift-ocp-release-4.1-blocking"',
 '"redhat-openshift-ocp-release-4.1-informing"',
 '"redhat-openshift-ocp-release-4.2-blocking"',
 '"redhat-openshift-ocp-release-4.2-informing"',
 '"redhat-openshift-ocp-release-4.3-blocking"',
 '"redhat-openshift-ocp-release-4.3-broken"',
 '"redhat-openshift-ocp-release-4.3-informing"',
 '"redhat-openshift-ocp-release-4.4-blocking"',
 '"redhat-openshift-ocp-release-4.4-broken"',
 '"redhat-openshift-ocp-release-4.4-informing"',
 '"redhat-openshift-ocp-release-4.5-blocking"',
 '"redhat-openshift-ocp-release-4.5-broken"',
 '"redhat-openshift-ocp-release-4.5-informing"',
 '"redhat-openshift-ocp-release-4.6-blocking"',
 '"redhat-openshift-ocp-release-4.6-broken"',
 '"redhat-openshift-ocp-release-4.6-informing"',
 '"redhat-openshift-ocp-release-4.7-blocking"',
 '"redhat-openshift-ocp-release-4.7-broken"',
 '"redhat-openshift-ocp-release-4.7-informing"',
 '"redhat-openshift-ocp-re

**NOTE** Since other notebooks also look at only one dashboard and job, we'll do the same. at the end we'll save the dataset for further analysis.

In [4]:
# bugs linked at timestamps up to this amount of time before today will be returned
max_age = '336h'

# ci details search url
url = 'https://search.ci.openshift.org/'

dashboard = '"redhat-openshift-ocp-release-4.2-informing"'

In [5]:
# get all linked bugs
associated_bugs = list()
all_linked_bugs = set()

# get all jobs in this dashboard
response = requests.get(f"https://testgrid.k8s.io/{dashboard}/summary")
job_names = response.json().keys()

for job in tqdm(job_names):
    # get all tests in this job
    response = requests.get(f"https://testgrid.k8s.io/{dashboard}/table?&show-stale-tests=&tab={job}")

    # params to send to openshift ci search for tests under this job
    args = {
        'type': 'bug+junit',
        'context': '-1',
        'name': job,
        'maxAge': max_age,
        # DO NOT REMOVE THESE KEYS. THIS HACK PREVENTS REQUESTS FROM TIMING OUT.
        # read more here - https://stackoverflow.com/a/63377265/9743348
        'ajax': 'true',
        'mobile': 'false',
    }

    for test in response.json().get('tests', []):
        testname = test['name'].split('.', maxsplit=1)[-1]

        # use test name as the search phrase
        args['search'] = testname.replace('[', '\[').replace(']', '\]')

        # search for linked and associated bugs for this test
        response = requests.post(url, data=args)
        soup = BeautifulSoup(response.content)

        # the "em" objects in soup have information that can tell us
        # whether or not this test had a linked bug for the given job name
        em_objects = soup.find_all('em')
        pct_affected = 0
        for em in em_objects:
            if 'Found' in em.text:
                pct_affected = float(em.text.split()[2][:-1])
                break

        # init to empty for this test result / reset to empty from previous test result
        test_bugs = []

        # if percent jobs affected is 0 then the linked bugs correspond to another job
        if pct_affected > 0:
            result_rows = soup.find('table').find('tbody').find_all('tr')
            for row in result_rows:
                column_values = row.find_all('td')

                # if there is only 1 column then the result is a junit, not bug
                if len(column_values) > 1:
                    # check the second column to make sure it is a bug and not junit details result
                    if column_values[1].text == 'bug':
                        test_bugs.append(column_values[0].text[1:])
                        all_linked_bugs.add(column_values[0].text[1:])

        associated_bugs.append((dashboard, job, test, test_bugs))

100%|██████████| 31/31 [17:14<00:00, 33.37s/it] 


In [6]:
linked_and_associated_bugs = pd.DataFrame(associated_bugs, columns=['dashboard', 'job', 'test_name', 'bug_ids'])
linked_and_associated_bugs.head()

Unnamed: 0,dashboard,job,test_name,bug_ids
0,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,"{'name': 'Overall', 'original-name': 'Overall'...",[]
1,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]
2,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'Operator results.operator conditions...,[]
3,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]
4,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]


In [7]:
# todo: distribution of bugs across jobs

## Get Bugzilla Details

In [8]:
bzapi = bugzilla.Bugzilla("bugzilla.redhat.com")

In [9]:
samplebug = bzapi.getbug(1883345)
dir(samplebug)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '__weakref__',
 '_aliases',
 '_rawdata',
 '_translate_dict',
 '_update_dict',
 'addcc',
 'addcomment',
 'alias',
 'assigned_to',
 'assigned_to_detail',
 'autorefresh',
 'blocks',
 'bugzilla',
 'cc',
 'cc_detail',
 'cf_clone_of',
 'cf_doc_type',
 'cf_environment',
 'cf_last_closed',
 'cf_release_notes',
 'cf_target_upstream_version',
 'classification',
 'close',
 'comments',
 'component',
 'components',
 'creation_time',
 'creator',
 'creator_detail',
 'deletecc',
 'depends_on',
 'description',
 'docs_contact',
 'external_bugs',
 'fixed_in',
 'flags',
 'get_attachment_ids',

In [10]:
# lets peek at some of the available bug metadata
samplebug._rawdata

{'priority': 'low',
 'cf_last_closed': <DateTime '20201105T12:46:56' at 0x7fefe177e430>,
 'creator': 'Cesar Wong',
 'blocks': [1883348],
 'assigned_to_detail': {'real_name': 'Cesar Wong',
  'email': 'cewong',
  'name': 'cewong',
  'id': 368980},
 'last_change_time': <DateTime '20201105T12:47:17' at 0x7fefe177eca0>,
 'comments': [{'is_private': False,
   'count': 0,
   'creator': 'cewong',
   'time': <DateTime '20200928T20:47:20' at 0x7fefe177eb80>,
   'bug_id': 1883345,
   'tags': [],
   'text': '+++ This bug was initially created as a clone of Bug #1883343 +++\n\nRe-enabling selected tests now that associated bugs have been fixed and are in the Red Hat OpenShift on IBM Cloud v4.3 builds\n\n[Feature:Prometheus][Conformance] Prometheus when installed on the cluster should provide ingress metrics\n[Conformance][Area:Networking][Feature:Router] The HAProxy router should enable openshift-monitoring to pull metrics\n[k8s.io] [sig-node] Pods Extended [k8s.io] Pod Container Status should neve

In [11]:
samplebug.bugzilla

<bugzilla.oldclasses.RHBugzilla at 0x7fefe0d33a60>

In [12]:
samplebug._aliases

[('summary', 'short_desc'),
 ('description', 'comment'),
 ('platform', 'rep_platform'),
 ('severity', 'bug_severity'),
 ('status', 'bug_status'),
 ('id', 'bug_id'),
 ('blocks', 'blockedby'),
 ('blocks', 'blocked'),
 ('depends_on', 'dependson'),
 ('creator', 'reporter'),
 ('url', 'bug_file_loc'),
 ('dupe_of', 'dupe_id'),
 ('dupe_of', 'dup_id'),
 ('comments', 'longdescs'),
 ('creation_time', 'opendate'),
 ('creation_time', 'creation_ts'),
 ('whiteboard', 'status_whiteboard'),
 ('last_change_time', 'delta_ts'),
 ('fixed_in', 'cf_fixed_in'),
 ('qa_whiteboard', 'cf_qa_whiteboard'),
 ('devel_whiteboard', 'cf_devel_whiteboard'),
 ('internal_whiteboard', 'cf_internal_whiteboard'),
 ('flags', 'flag_types')]

**NOTE** `_rawdata` seems redundant, the information is already captured in other fields. And `bugzilla` attribute is depracated / old representation.

In [13]:
list(vars(samplebug).keys()).remove('bugzilla')

In [14]:
# get all the available fields, except the depracated and duplicate ones
bug_details_to_get = list(vars(samplebug).keys())
bug_details_to_get.remove('_rawdata')
bug_details_to_get.remove('bugzilla')
bug_details_to_get.remove('_aliases')

# these two keys are msissing for a lot of bugs
bug_details_to_get.remove('qa_contact_detail')
bug_details_to_get.remove('cf_last_closed')
bug_details_to_get.remove('cf_clone_of')

bug_details_to_get

['autorefresh',
 'priority',
 'creator',
 'blocks',
 'assigned_to_detail',
 'last_change_time',
 'comments',
 'is_cc_accessible',
 'keywords',
 'creator_detail',
 'cc',
 'see_also',
 'groups',
 'assigned_to',
 'url',
 'qa_contact',
 'creation_time',
 'whiteboard',
 'id',
 'depends_on',
 'cf_target_upstream_version',
 'docs_contact',
 'description',
 'resolution',
 'classification',
 'cf_doc_type',
 'alias',
 'op_sys',
 'target_release',
 'status',
 'cc_detail',
 'external_bugs',
 'summary',
 'is_open',
 'platform',
 'severity',
 'cf_environment',
 'flags',
 'version',
 'tags',
 'component',
 'sub_components',
 'is_creator_accessible',
 'cf_release_notes',
 'product',
 'target_milestone',
 'is_confirmed',
 'components',
 'versions',
 'sub_component',
 'fixed_in',
 'weburl']

In [15]:
# create a df containing details of all linked and associated bugs
bugs_df = pd.DataFrame(columns=['bug_id'] + bug_details_to_get, index=range(len(all_linked_bugs)))
bugs_df = bugs_df.assign(bug_id=all_linked_bugs)
bugs_df.head()

Unnamed: 0,bug_id,autorefresh,priority,creator,blocks,assigned_to_detail,last_change_time,comments,is_cc_accessible,keywords,...,is_creator_accessible,cf_release_notes,product,target_milestone,is_confirmed,components,versions,sub_component,fixed_in,weburl
0,1934731,,,,,,,,,,...,,,,,,,,,,
1,1882505,,,,,,,,,,...,,,,,,,,,,
2,1809892,,,,,,,,,,...,,,,,,,,,,
3,1861498,,,,,,,,,,...,,,,,,,,,,
4,1948535,,,,,,,,,,...,,,,,,,,,,


In [None]:
def fill_bug_details(bug_row):
    global bzapi
    
    try:
        bug = bzapi.getbug(bug_row.bug_id)
    except Exception:
        return bug_row
    
    for detail in bug_row.index:
        try:
            bug_row[detail] = getattr(bug, detail)
        except AttributeError:
            print(detail)

    return bug_row

bugs_df.progress_apply(fill_bug_details, axis=1)
bugs_df

 99%|█████████▉| 2755/2772 [30:25<00:09,  1.84it/s] 

In [None]:
for i in bugs_df.columns:
    print(i)

## Bug fields

### Whiteboard
A free-form text area for adding short notes and tags to a bug.

In [None]:
print("Length of unique values of whiteboard : ", len(bugs_df.whiteboard.unique()))
bugs_df.whiteboard.value_counts().to_frame().head()

In [None]:
plt.figure(figsize=(16,5))
bugs_df.whiteboard.value_counts().plot.bar()
plt.xticks(rotation = 90)
plt.title("WHITEBOARD TEXT")
plt.xlabel("Whiteboard text")
plt.ylabel("Value counts for Whiteboard text")
plt.show()

From the above graph, we can conclude that even though we have 58 unique values for `whiteboard` , most of them are empty. 

### cf_target_upstream_version

In [None]:
bugs_df.cf_target_upstream_version.unique()

### docs_contact_value and qa_contact 
The people responsible for contacting and fixing the bug

In [None]:
bugs_df.docs_contact.value_counts()

In [None]:
bugs_df.qa_contact.value_counts()

Most of the tickets have no contacts assigned, but this could be useful so as to automatically identify the bug associated and assign the authorized person to the ticket.

### description 
This conatins descriptions for each bugzilla ticket.

In [None]:
bugs_df.description

This section contains information for all the values, and can be used for identifying more details about a given bug.

### resolution

In [None]:
print("Length of resolution: ", len(bugs_df.resolution.unique()))
display(bugs_df.resolution.value_counts().to_frame().head())

In [None]:
plt.figure(figsize=(16,5))
bugs_df.resolution.value_counts().plot.bar()
plt.xticks(rotation = 90)
plt.title("Resolution")
plt.xlabel("Resolution")
plt.ylabel("Value counts for Resolution")
plt.show()

From the above graph, we can infer that we have most values available for resolution, even though we have many values as empty, this looks like a promising parameter.

### classification : 

In [None]:
bugs_df.classification.unique()

### cf_doc_type

In [None]:
print("",len(bugs_df.cf_doc_type.unique()))
display(bugs_df.cf_doc_type.value_counts().to_frame().head())

In [None]:
plt.figure(figsize=(16,5))
bugs_df.cf_doc_type.value_counts().plot.bar()
plt.xticks(rotation = 90)
plt.title("Doc Type")
plt.xlabel("Doc Type")
plt.ylabel("Value counts for Doc Type")
plt.show()

From the above graph, we see that most of the tickets have the value for `doc_type`. This could be used to classify the tickets according to the doc type.

### op_sys : Operating Systems

In [None]:
print("Various Operating Systems : ", len(bugs_df.op_sys.unique()))
display(bugs_df.op_sys.value_counts().to_frame().head())

In [None]:
plt.figure(figsize=(16,5))
bugs_df.op_sys.value_counts().plot.bar()
plt.xticks(rotation = 90)
plt.title("Operating System")
plt.xlabel("Operating System")
plt.ylabel("Value counts for Operating System")
plt.show()

From the above graph, we can see that we have four OS(s) across the bugs.

### target_release

In [None]:
plt.figure(figsize=(16,5))
bugs_df.target_release.value_counts().plot.bar()
plt.xticks(rotation = 90)
plt.title("Target Release")
plt.xlabel("Target Release")
plt.ylabel("Value counts for Target Release")
plt.show()

From the above graph, we see the various target releases frequency. This value also is mostly not assigned but we still have many observations.

### status

In [None]:
print("Various Statuses : ", len(bugs_df.status.unique()))
display(bugs_df.status.value_counts().to_frame())

In [None]:
plt.figure(figsize=(16,5))
bugs_df.status.value_counts().plot.bar()
plt.xticks(rotation = 90)
plt.title("Status")
plt.xlabel("Status")
plt.ylabel("Value counts for Status")
plt.show()

The above graph, shows various status across tickets.

### cc_detail

In [None]:
display(bugs_df.cc_detail.value_counts().to_frame().head())

In [None]:
plt.figure(figsize=(16,5))
bugs_df.cc_detail.value_counts().to_frame().plot()
plt.xticks(rotation = 90)
plt.title("Details")
plt.xlabel("Details")
plt.ylabel("Value counts for Details")
plt.show()

### External Bugs

In [None]:
display(bugs_df.external_bugs.value_counts().to_frame().head())

In [None]:
plt.figure(figsize=(16,5))
bugs_df.external_bugs.value_counts().to_frame().plot()
plt.xticks(rotation = 90)
plt.title("External Bugs")
plt.xlabel("External Bugs")
plt.ylabel("Value counts for External Bugs")
plt.show()

### summary

In [None]:
bugs_df.summary.head()

In [None]:
bugs_df.summary[0]

The bug `summary` is a short sentence which succinctly describes what the bug is about.

### is_open

In [None]:
bugs_df.is_open.head()

The bug `is_open` field returns a boolean value indicating if the bug is currently open or not

### platform

In [None]:
bugs_df.platform.head()

`platform` field indicates the hardware platform the bug was observed on.

In [None]:
platforms = bugs_df.platform.unique()
print(platforms)

In [None]:
platforms_count = []
for i in range(len(platforms)):
    platforms_count.append((bugs_df.platform == platforms[i]).sum())
platforms_count

In [None]:
plt.figure(figsize = (15,10))
sns.barplot(platforms, platforms_count)
plt.xlabel("Platform")
plt.ylabel("Bug Count")
plt.title("Bug Platform Distribution")
plt.show()

### severity

In [None]:
bugs_df.severity.head()

The `severity` field categorzies the severity level of each bug. Let's see the different severity levels defined.

In [None]:
severity = bugs_df.severity.unique()
print(severity)

In [None]:
severity_count = []
for i in range(len(severity)):
    severity_count.append((bugs_df.severity == severity[i]).sum())
severity_count

Let's plot a simple graph to visualize the distribution of bug severities

In [None]:
plt.figure(figsize = (15,10))
sns.barplot(severity, severity_count)
plt.xlabel("Severity Level")
plt.ylabel("Bug Count")
plt.title("Bug Severity Distribution")
plt.show()

### cf_environment

In [None]:
bugs_df.cf_environment.head()

In [None]:
cf_env = bugs_df.cf_environment.unique()
len(cf_env)

In [None]:
cf_env[1]

Not too sure what `cf_environment` is supposed to return

### flags

In [None]:
bugs_df['flags'][:10]

In [None]:
bugs_df['flags'][6]

The `flags` field seems to return empty for most bugs. For thos bugs which have this field set, it seems to have redundant information which are already available in other bug fields so we can probably ignore this field.

### version

In [None]:
bugs_df.version.head()

In [None]:
versions = bugs_df.version.unique()
versions

In [None]:
version_count = []
for i in range(len(versions)):
    version_count.append((bugs_df.version == versions[i]).sum())
version_count

The `version` field indicates the version of the software the bug was found in. Let's plot a simple graph to visualize the distribution of bugs across different software versions.

In [None]:
plt.figure(figsize = (15,10))
sns.barplot(version_count, versions)
plt.xlabel("Bug Count")
plt.ylabel("Software Versions")
plt.title("Bug distrbution across different software versions")
plt.show()

### tags

In [None]:
bugs_df.tags.head()

The `tags` field seems to be empty for most bugs so we can probably ignore this field.

### component

In [None]:
bugs_df.component.head()

In [None]:
components = bugs_df.component.unique()
components

In [None]:
component_count = []
for i in range(len(components)):
    component_count.append((bugs_df.component == components[i]).sum())

Bugs are categorised into Product and Component. Components are second-level categories and the `component` field indicates which component is affected by the bug.

### sub_component

In [None]:
bugs_df.sub_component.head()

In [None]:
sub_component = bugs_df.sub_component.unique()
len(sub_component)

In [None]:
sub_component

In [None]:
sub_component_count = []
for i in range(len(sub_component)):
    sub_component_count.append((bugs_df.sub_component == sub_component[i]).sum())
sub_component_count

The `sub_component` field indicates the sub-component of a specifc component the bug affects.

### is_creator_accessible

In [None]:
bugs_df.is_creator_accessible.head()

The `is_creator_accessible` field returns a boolean value, but doesn't seem to be useful for our analysis. 

### cf_release_notes

In [None]:
bugs_df.cf_release_notes.tail()

The `cf_release_notes` is the basis of the errata or release note for the bug. It can also be used for change logs. However, it seems to be empty for most bugs and can be excluded from our analysis. 

### product

In [None]:
bugs_df['product'][:10]

In [None]:
products = bugs_df['product'].unique()
products

The `product` field indicates the software product affected by the bug.

In [None]:
product_count = []
for i in range(len(products)):
    product_count.append((bugs_df['product'] == products[i]).sum())
product_count

Let's plot a simple graph to visualize the distribution of bugs across different products

In [None]:
plt.figure(figsize = (15,10))
sns.barplot(product_count, products)
plt.xlabel("Bug Count")
plt.ylabel("Software Products")
plt.title("Bug distrbution across different software products")
plt.show()

### target_milestone

In [None]:
bugs_df.target_milestone.head()

In [None]:
bugs_df.target_milestone.unique()

The `target_milestone` is used to define when the engineer the bug is assigned to expects to fix it. However, it doesn't seem to be applicable for most bugs.

### is_confirmed

In [None]:
bugs_df.is_confirmed.head()

The `is_confirmed` field seems to return a boolean value (not sure what it indicates) and doesn't seem to be useful for our analysis.

### components

In [None]:
bugs_df.components.head()

The `components` field returns the same values as the `component` field, but in a list format.

### sub_components

In [None]:
bugs_df.sub_components.head()

The `sub_components` field is similar to the `sub_component` field, but returns both the component and sub-component affected by the bug in a dictionary format.

### versions

In [None]:
bugs_df.versions.head()

The `versions` field returns the same values as the `version` field, but in a list format.

### fixed_in

In [None]:
bugs_df.fixed_in[:15]

In [None]:
bugs_df.fixed_in.unique()

The `fixed_in` field seems to indicate the software version the bug was fixed in. However, it doesn't seem to be applicable to all bugs as some bugs may still be open and not yet resolved.

### weburl

In [None]:
bugs_df.weburl.head()

In [None]:
bugs_df['bug_id'][0]

In [None]:
bugs_df['weburl'][0]

The `weburl` field contains the bugzilla URL for the respective bug ID

## Merge Bug Details + Affected Jobs/Test DF

In [None]:
linked_and_associated_bugs.head()

In [None]:
firstdf = linked_and_associated_bugs.explode('bug_ids')
firstdf = firstdf.rename(columns={'bug_ids': 'bug_id'})
firstdf.head()

In [None]:
res = bugs_df.merge(
    firstdf,
#     how='left',
    left_on='bug_id',
    right_on='bug_id',
)
res.head()

## Get Linked Bug Data for All Dashboards

In [None]:
# # get all linked bugs
# associated_bugs = list()
# all_linked_bugs = set()
# for dashboard in tqdm(dashboard_names):
#     # get all jobs in this dashboard
#     response = requests.get(f"https://testgrid.k8s.io/{dashboard}/summary")
#     job_names = response.json().keys()
    
#     for job in job_names:
#         # get all tests in this job
#         response = requests.get(f"https://testgrid.k8s.io/{dashboard}/table?&show-stale-tests=&tab={job}")
        
#         # params to send to openshift ci search for tests under this job
#         args = {
#             'type': 'bug+junit',
#             'context': '-1',
#             'name': job,
#             'maxAge': max_age,
#             # DO NOT REMOVE THESE KEYS. THIS HACK PREVENTS REQUESTS FROM TIMING OUT.
#             # read more here - https://stackoverflow.com/a/63377265/9743348
#             'ajax': 'true',
#             'mobile': 'false',
#         }
        
#         for test in response.json().get('tests', []):
#             testname = test['name'].split('.', maxsplit=1)[-1]
            
#             # use test name as the search phrase
#             args['search'] = testname.replace('[', '\[').replace(']', '\]')

#             # search for linked and associated bugs for this test
#             response = requests.post(url, data=args)
#             soup = BeautifulSoup(response.content)

#             # the "em" objects in soup have information that can tell us
#             # whether or not this test had a linked bug for the given job name
#             em_objects = soup.find_all('em')
#             pct_affected = 0
#             for em in em_objects:
#                 if 'Found' in em.text:
#                     pct_affected = float(em.text.split()[2][:-1])
#                     break

#             # init to empty for this test result / reset to empty from previous test result
#             test_bugs = []
            
#             # if percent jobs affected is 0 then the linked bugs correspond to another job
#             if pct_affected > 0:
#                 result_rows = soup.find('table').find('tbody').find_all('tr')
#                 for row in result_rows:
#                     column_values = row.find_all('td')
                    
#                     # if there is only 1 column then the result is a junit, not bug
#                     if len(column_values) > 1:
#                         # check the second column to make sure it is a bug and not junit details result
#                         if column_values[1].text == 'bug':
#                             test_bugs.append(column_values[0].text[1:])
#                             all_linked_bugs.add(column_values[0].text[1:])

#             associated_bugs.append((dashboard, job, test, test_bugs))

## todo

combine it into a dataframe s.t. each row has one bugzilla id, and columns are status, component, etc + affected tests/jobs.