# Data Source: Bugzilla

In [37]:
import pdb
import bugzilla
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup

import pandas as pd

In [3]:
# tqdm extensions for pandas functions
tqdm.pandas()

## Get All Linked and Associated Bugs

In [4]:
# get the red hat dashboard names
response = requests.get(
    "https://testgrid.k8s.io/redhat-openshift-informing?id=dashboard-group-bar"
)
html = BeautifulSoup(response.content)
testgrid_script = html.findAll("script")[3]
testgrid_script = testgrid_script.text.split()[5].split(",")
dashboard_names = [x.split(":")[1] for x in testgrid_script if "name" in x]
dashboard_names

['"redhat-assisted-installer"',
 '"redhat-openshift-informing"',
 '"redhat-openshift-ocp-release-4.1-blocking"',
 '"redhat-openshift-ocp-release-4.1-informing"',
 '"redhat-openshift-ocp-release-4.2-blocking"',
 '"redhat-openshift-ocp-release-4.2-informing"',
 '"redhat-openshift-ocp-release-4.3-blocking"',
 '"redhat-openshift-ocp-release-4.3-broken"',
 '"redhat-openshift-ocp-release-4.3-informing"',
 '"redhat-openshift-ocp-release-4.4-blocking"',
 '"redhat-openshift-ocp-release-4.4-broken"',
 '"redhat-openshift-ocp-release-4.4-informing"',
 '"redhat-openshift-ocp-release-4.5-blocking"',
 '"redhat-openshift-ocp-release-4.5-broken"',
 '"redhat-openshift-ocp-release-4.5-informing"',
 '"redhat-openshift-ocp-release-4.6-blocking"',
 '"redhat-openshift-ocp-release-4.6-broken"',
 '"redhat-openshift-ocp-release-4.6-informing"',
 '"redhat-openshift-ocp-release-4.7-blocking"',
 '"redhat-openshift-ocp-release-4.7-broken"',
 '"redhat-openshift-ocp-release-4.7-informing"',
 '"redhat-openshift-ocp-re

**NOTE** Since other notebooks also look at only one dashboard and job, we'll do teh same. at the end we'll save the dataset for further analysis.

In [39]:
# bugs linked at timestamps up to this amount of time before today will be returned
max_age = '336h'

# ci details search url
url = 'https://search.ci.openshift.org/'

dashboard = '"redhat-openshift-ocp-release-4.2-informing"'

In [40]:
# get all linked bugs
associated_bugs = list()
all_linked_bugs = set()

# get all jobs in this dashboard
response = requests.get(f"https://testgrid.k8s.io/{dashboard}/summary")
job_names = response.json().keys()

for job in tqdm(job_names):
    # get all tests in this job
    response = requests.get(f"https://testgrid.k8s.io/{dashboard}/table?&show-stale-tests=&tab={job}")

    # params to send to openshift ci search for tests under this job
    args = {
        'type': 'bug+junit',
        'context': '-1',
        'name': job,
        'maxAge': max_age,
        # DO NOT REMOVE THESE KEYS. THIS HACK PREVENTS REQUESTS FROM TIMING OUT.
        # read more here - https://stackoverflow.com/a/63377265/9743348
        'ajax': 'true',
        'mobile': 'false',
    }

    for test in response.json().get('tests', []):
        testname = test['name'].split('.', maxsplit=1)[-1]

        # use test name as the search phrase
        args['search'] = testname.replace('[', '\[').replace(']', '\]')

        # search for linked and associated bugs for this test
        response = requests.post(url, data=args)
        soup = BeautifulSoup(response.content)

        # the "em" objects in soup have information that can tell us
        # whether or not this test had a linked bug for the given job name
        em_objects = soup.find_all('em')
        pct_affected = 0
        for em in em_objects:
            if 'Found' in em.text:
                pct_affected = float(em.text.split()[2][:-1])
                break

        # init to empty for this test result / reset to empty from previous test result
        test_bugs = []

        # if percent jobs affected is 0 then the linked bugs correspond to another job
        if pct_affected > 0:
            result_rows = soup.find('table').find('tbody').find_all('tr')
            for row in result_rows:
                column_values = row.find_all('td')

                # if there is only 1 column then the result is a junit, not bug
                if len(column_values) > 1:
                    # check the second column to make sure it is a bug and not junit details result
                    if column_values[1].text == 'bug':
                        test_bugs.append(column_values[0].text[1:])
                        all_linked_bugs.add(column_values[0].text[1:])

        associated_bugs.append((dashboard, job, test, test_bugs))

100%|██████████| 31/31 [19:48<00:00, 38.34s/it] 


SyntaxError: 'break' outside loop (<ipython-input-40-41d3402b5994>, line 61)

In [41]:
linked_and_associated_bugs = pd.DataFrame(associated_bugs, columns=['dashboard', 'job', 'test_name', 'bug_ids'])
linked_and_associated_bugs.head()

Unnamed: 0,dashboard,job,test_name,bug_ids
0,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,"{'name': 'Overall', 'original-name': 'Overall'...",[]
1,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]
2,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'Operator results.operator conditions...,[1936859]
3,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]
4,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]


In [43]:
# todo: distribution of bugs across jobs

## Get Bugzilla Details

In [8]:
bzapi = bugzilla.Bugzilla("bugzilla.redhat.com")

In [9]:
samplebug = bzapi.getbug(1883345)
dir(samplebug)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '__weakref__',
 '_aliases',
 '_rawdata',
 '_translate_dict',
 '_update_dict',
 'addcc',
 'addcomment',
 'alias',
 'assigned_to',
 'assigned_to_detail',
 'autorefresh',
 'blocks',
 'bugzilla',
 'cc',
 'cc_detail',
 'cf_clone_of',
 'cf_doc_type',
 'cf_environment',
 'cf_last_closed',
 'cf_release_notes',
 'cf_target_upstream_version',
 'classification',
 'close',
 'comments',
 'component',
 'components',
 'creation_time',
 'creator',
 'creator_detail',
 'deletecc',
 'depends_on',
 'description',
 'docs_contact',
 'external_bugs',
 'fixed_in',
 'flags',
 'get_attachment_ids',

In [12]:
# lets peek at some of the available bug metadata
samplebug._rawdata

{'priority': 'low',
 'cf_last_closed': <DateTime '20201105T12:46:56' at 0x7fcf48939d30>,
 'creator': 'Cesar Wong',
 'blocks': [1883348],
 'assigned_to_detail': {'real_name': 'Cesar Wong',
  'email': 'cewong',
  'name': 'cewong',
  'id': 368980},
 'last_change_time': <DateTime '20201105T12:47:17' at 0x7fcf48939e20>,
 'comments': [{'is_private': False,
   'count': 0,
   'creator': 'cewong',
   'time': <DateTime '20200928T20:47:20' at 0x7fcf489216d0>,
   'bug_id': 1883345,
   'tags': [],
   'text': '+++ This bug was initially created as a clone of Bug #1883343 +++\n\nRe-enabling selected tests now that associated bugs have been fixed and are in the Red Hat OpenShift on IBM Cloud v4.3 builds\n\n[Feature:Prometheus][Conformance] Prometheus when installed on the cluster should provide ingress metrics\n[Conformance][Area:Networking][Feature:Router] The HAProxy router should enable openshift-monitoring to pull metrics\n[k8s.io] [sig-node] Pods Extended [k8s.io] Pod Container Status should neve

In [13]:
samplebug.bugzilla

<bugzilla.oldclasses.RHBugzilla at 0x7fcf49ce1370>

In [18]:
samplebug._aliases

[('summary', 'short_desc'),
 ('description', 'comment'),
 ('platform', 'rep_platform'),
 ('severity', 'bug_severity'),
 ('status', 'bug_status'),
 ('id', 'bug_id'),
 ('blocks', 'blockedby'),
 ('blocks', 'blocked'),
 ('depends_on', 'dependson'),
 ('creator', 'reporter'),
 ('url', 'bug_file_loc'),
 ('dupe_of', 'dupe_id'),
 ('dupe_of', 'dup_id'),
 ('comments', 'longdescs'),
 ('creation_time', 'opendate'),
 ('creation_time', 'creation_ts'),
 ('whiteboard', 'status_whiteboard'),
 ('last_change_time', 'delta_ts'),
 ('fixed_in', 'cf_fixed_in'),
 ('qa_whiteboard', 'cf_qa_whiteboard'),
 ('devel_whiteboard', 'cf_devel_whiteboard'),
 ('internal_whiteboard', 'cf_internal_whiteboard'),
 ('flags', 'flag_types')]

**NOTE** `_rawdata` seems redundant, the information is already captured in other fields. And `bugzilla` attribute is depracated / old representation.

In [16]:
list(vars(samplebug).keys()).remove('bugzilla')

None


In [34]:
# get all the available fields, except the depracated and duplicate ones
bug_details_to_get = list(vars(samplebug).keys())
bug_details_to_get.remove('_rawdata')
bug_details_to_get.remove('bugzilla')
bug_details_to_get.remove('_aliases')

# these two keys are msissing for a lot of bugs
bug_details_to_get.remove('qa_contact_detail')
bug_details_to_get.remove('cf_last_closed')
bug_details_to_get.remove('cf_clone_of')

bug_details_to_get

['autorefresh',
 'priority',
 'creator',
 'blocks',
 'assigned_to_detail',
 'last_change_time',
 'comments',
 'is_cc_accessible',
 'keywords',
 'creator_detail',
 'cc',
 'see_also',
 'groups',
 'assigned_to',
 'url',
 'qa_contact',
 'creation_time',
 'whiteboard',
 'id',
 'depends_on',
 'cf_target_upstream_version',
 'docs_contact',
 'description',
 'qa_contact_detail',
 'resolution',
 'classification',
 'cf_doc_type',
 'alias',
 'op_sys',
 'target_release',
 'status',
 'cc_detail',
 'external_bugs',
 'summary',
 'is_open',
 'platform',
 'severity',
 'cf_environment',
 'flags',
 'version',
 'tags',
 'component',
 'sub_components',
 'is_creator_accessible',
 'cf_release_notes',
 'product',
 'target_milestone',
 'is_confirmed',
 'components',
 'versions',
 'sub_component',
 'fixed_in',
 'weburl']

In [35]:
# create a df containing details of all linked and associated bugs
bugs_df = pd.DataFrame(columns=['bug_id'] + bug_details_to_get, index=range(len(all_linked_bugs)))
bugs_df = bugs_df.assign(bug_id=all_linked_bugs)
bugs_df.head()

Unnamed: 0,bug_id,autorefresh,priority,creator,blocks,assigned_to_detail,last_change_time,comments,is_cc_accessible,keywords,...,is_creator_accessible,cf_release_notes,product,target_milestone,is_confirmed,components,versions,sub_component,fixed_in,weburl
0,1862158,,,,,,,,,,...,,,,,,,,,,
1,1890423,,,,,,,,,,...,,,,,,,,,,
2,1918651,,,,,,,,,,...,,,,,,,,,,
3,1862182,,,,,,,,,,...,,,,,,,,,,
4,1906298,,,,,,,,,,...,,,,,,,,,,


In [36]:
def fill_bug_details(bug_row):
    global bzapi
    
    try:
        bug = bzapi.getbug(bug_row.bug_id)
    except Exception:
        return bug_row
    
    for detail in bug_row.index:
        try:
            bug_row[detail] = getattr(bug, detail)
        except AttributeError:
            print(detail)

    return bug_row

bugs_df.progress_apply(fill_bug_details, axis=1)
bugs_df

 31%|███▏      | 409/1300 [04:28<08:30,  1.74it/s]

qa_contact_detail


 54%|█████▍    | 700/1300 [07:31<07:52,  1.27it/s]

qa_contact_detail


 64%|██████▍   | 837/1300 [09:04<04:12,  1.83it/s]

qa_contact_detail


100%|██████████| 1300/1300 [14:22<00:00,  1.51it/s]


Unnamed: 0,bug_id,autorefresh,priority,creator,blocks,assigned_to_detail,last_change_time,comments,is_cc_accessible,keywords,...,is_creator_accessible,cf_release_notes,product,target_milestone,is_confirmed,components,versions,sub_component,fixed_in,weburl
0,1862158,False,low,David Sundqvist,[],"{'real_name': 'Rastislav Wagner', 'email': 'ra...",20210413T13:02:28,"[{'is_private': False, 'count': 0, 'creator': ...",True,[],...,True,,OpenShift Container Platform,---,True,[Management Console],[4.4],,,https://bugzilla.redhat.com/show_bug.cgi?id=18...
1,1890423,False,low,Guohua Ouyang,[],"{'real_name': 'Adam Litke', 'email': 'alitke',...",20210317T13:28:08,"[{'is_private': False, 'count': 0, 'creator': ...",True,[Reopened],...,True,,Container Native Virtualization (CNV),---,True,[Storage],[2.5.0],,,https://bugzilla.redhat.com/show_bug.cgi?id=18...
2,1918651,False,medium,Hongan Li,[],"{'real_name': 'Miciah Dashiel Butler Masters',...",20210323T12:54:42,"[{'is_private': False, 'count': 0, 'creator': ...",True,[],...,True,,OpenShift Container Platform,---,True,[Routing],[4.7],,,https://bugzilla.redhat.com/show_bug.cgi?id=19...
3,1862182,False,low,Eric Matysek,[],"{'real_name': 'Luis Sanchez', 'email': 'sanche...",20200909T14:37:52,"[{'is_private': False, 'count': 0, 'creator': ...",True,[],...,True,,OpenShift Container Platform,---,True,[kube-apiserver],[4.4],,,https://bugzilla.redhat.com/show_bug.cgi?id=18...
4,1906298,False,medium,OpenShift BugZilla Robot,[],"{'real_name': 'Kirsten Garrison', 'email': 'kg...",20210309T20:16:27,"[{'is_private': False, 'count': 3, 'creator': ...",True,[],...,True,,OpenShift Container Platform,---,True,[Machine Config Operator],[4.5],,,https://bugzilla.redhat.com/show_bug.cgi?id=19...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1295,1898672,False,high,Douglas Smith,[1898675],"{'real_name': 'Douglas Smith', 'email': 'dosmi...",20210208T13:51:06,"[{'is_private': False, 'count': 0, 'creator': ...",True,[UpcomingSprint],...,True,,OpenShift Container Platform,---,True,[Networking],[4.6],multus,,https://bugzilla.redhat.com/show_bug.cgi?id=18...
1296,1894040,False,urgent,Andre Costa,[],"{'real_name': 'Jan Safranek', 'email': 'jsafra...",20201217T09:53:04,"[{'is_private': False, 'count': 0, 'creator': ...",True,[],...,True,,OpenShift Container Platform,---,True,[Storage],[4.5],Kubernetes External Components,,https://bugzilla.redhat.com/show_bug.cgi?id=18...
1297,1940297,False,high,Matthew Robson,[],"{'real_name': 'Dan Winship', 'email': 'danw', ...",20210325T14:20:35,"[{'is_private': False, 'count': 0, 'creator': ...",True,[],...,True,,OpenShift Container Platform,---,True,[Networking],[4.5],openshift-sdn,,https://bugzilla.redhat.com/show_bug.cgi?id=19...
1298,1891551,False,medium,aaleman,[],"{'real_name': 'Joel Speed', 'email': 'jspeed',...",20210224T15:29:04,"[{'is_private': False, 'count': 0, 'creator': ...",True,[],...,True,Cause: The cluster autoscaler would use a temp...,OpenShift Container Platform,---,True,[Cloud Compute],[4.6],Other Providers,,https://bugzilla.redhat.com/show_bug.cgi?id=18...


In [48]:
for i in bugs_df.columns:
    print(i)

bug_id
autorefresh
priority
creator
blocks
assigned_to_detail
last_change_time
comments
is_cc_accessible
keywords
creator_detail
cc
see_also
groups
assigned_to
url
qa_contact
creation_time
whiteboard
id
depends_on
cf_target_upstream_version
docs_contact
description
qa_contact_detail
resolution
classification
cf_doc_type
alias
op_sys
target_release
status
cc_detail
external_bugs
summary
is_open
platform
severity
cf_environment
flags
version
tags
component
sub_components
is_creator_accessible
cf_release_notes
product
target_milestone
is_confirmed
components
versions
sub_component
fixed_in
weburl


In [44]:
# time elapsed

In [45]:
# creator / fixer analysis

In [46]:
# priority

## Merge Bug Details + Affected Jobs/Test DF

In [60]:
linked_and_associated_bugs.head()

Unnamed: 0,dashboard,job,test_name,bug_ids
0,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,"{'name': 'Overall', 'original-name': 'Overall'...",[]
1,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]
2,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'Operator results.operator conditions...,[1936859]
3,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]
4,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,[]


In [61]:
firstdf = linked_and_associated_bugs.explode('bug_ids')
firstdf = firstdf.rename(columns={'bug_ids': 'bug_id'})
firstdf.head()

Unnamed: 0,dashboard,job,test_name,bug_id
0,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,"{'name': 'Overall', 'original-name': 'Overall'...",
1,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,
2,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'Operator results.operator conditions...,1936859.0
3,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,
4,"""redhat-openshift-ocp-release-4.2-informing""",periodic-ci-openshift-release-master-ci-4.2-e2...,{'name': 'operator.Run multi-stage test e2e-aw...,


In [64]:
res = bugs_df.merge(
    firstdf,
#     how='left',
    left_on='bug_id',
    right_on='bug_id',
)
res.head()

Unnamed: 0,bug_id,autorefresh,priority,creator,blocks,assigned_to_detail,last_change_time,comments,is_cc_accessible,keywords,...,target_milestone,is_confirmed,components,versions,sub_component,fixed_in,weburl,dashboard,job,test_name
0,1941999,,,,,,,,,,...,,,,,,,,"""redhat-openshift-ocp-release-4.2-informing""",promote-release-openshift-machine-os-content-e...,"{'name': 'Pod', 'original-name': 'Pod', 'alert..."
1,1941999,,,,,,,,,,...,,,,,,,,"""redhat-openshift-ocp-release-4.2-informing""",release-openshift-ocp-installer-e2e-aws-mirror...,"{'name': 'Pod', 'original-name': 'Pod', 'alert..."
2,1941999,,,,,,,,,,...,,,,,,,,"""redhat-openshift-ocp-release-4.2-informing""",release-openshift-ocp-installer-e2e-azure-seri...,"{'name': 'Pod', 'original-name': 'Pod', 'alert..."
3,1941999,,,,,,,,,,...,,,,,,,,"""redhat-openshift-ocp-release-4.2-informing""",release-openshift-origin-installer-e2e-aws-upg...,"{'name': 'Cluster upgrade.upgrade', 'original-..."
4,1941999,,,,,,,,,,...,,,,,,,,"""redhat-openshift-ocp-release-4.2-informing""",release-openshift-origin-installer-e2e-aws-upg...,"{'name': 'Pod', 'original-name': 'Pod', 'alert..."


## Get Linked Bug Data for All Dashboards

In [6]:
# get all linked bugs
associated_bugs = list()
all_linked_bugs = set()
for dashboard in tqdm(dashboard_names):
    # get all jobs in this dashboard
    response = requests.get(f"https://testgrid.k8s.io/{dashboard}/summary")
    job_names = response.json().keys()
    
    for job in job_names:
        # get all tests in this job
        response = requests.get(f"https://testgrid.k8s.io/{dashboard}/table?&show-stale-tests=&tab={job}")
        
        # params to send to openshift ci search for tests under this job
        args = {
            'type': 'bug+junit',
            'context': '-1',
            'name': job,
            'maxAge': max_age,
            # DO NOT REMOVE THESE KEYS. THIS HACK PREVENTS REQUESTS FROM TIMING OUT.
            # read more here - https://stackoverflow.com/a/63377265/9743348
            'ajax': 'true',
            'mobile': 'false',
        }
        
        for test in response.json().get('tests', []):
            testname = test['name'].split('.', maxsplit=1)[-1]
            
            # use test name as the search phrase
            args['search'] = testname.replace('[', '\[').replace(']', '\]')

            # search for linked and associated bugs for this test
            response = requests.post(url, data=args)
            soup = BeautifulSoup(response.content)

            # the "em" objects in soup have information that can tell us
            # whether or not this test had a linked bug for the given job name
            em_objects = soup.find_all('em')
            pct_affected = 0
            for em in em_objects:
                if 'Found' in em.text:
                    pct_affected = float(em.text.split()[2][:-1])
                    break

            # init to empty for this test result / reset to empty from previous test result
            test_bugs = []
            
            # if percent jobs affected is 0 then the linked bugs correspond to another job
            if pct_affected > 0:
                result_rows = soup.find('table').find('tbody').find_all('tr')
                for row in result_rows:
                    column_values = row.find_all('td')
                    
                    # if there is only 1 column then the result is a junit, not bug
                    if len(column_values) > 1:
                        # check the second column to make sure it is a bug and not junit details result
                        if column_values[1].text == 'bug':
                            test_bugs.append(column_values[0].text[1:])
                            all_linked_bugs.add(column_values[0].text[1:])

            associated_bugs.append((dashboard, job, test, test_bugs))

  0%|          | 0/39 [01:16<?, ?it/s]


## todo

combine it into a dataframe s.t. each row has one bugzilla id, and columns are status, component, etc + affected tests/jobs.