 # Analysis of Robot reports from OSM Jenkins

In [None]:
import os
import xml.etree.ElementTree as et
import pandas as pd
import requests
import jenkins
import getpass

 ## 0. Input parameters

In [None]:
inputs_folder = 'inputs'
input_robot_file = 'output.xml'
job_name = 'osm-stage_3-merge/v9.0'
url_jenkins_server = 'https://osm.etsi.org/jenkins'

 Credentials:

In [None]:
# If the '.env' file exists, loads the environment variables
try:
    with open('.env', 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            key, value = line.split('=')
            os.environ[key] = value
except FileNotFoundError as e:
    print("Environment file ('.env') does not exist. Skipping...")

In [None]:
username = os.environ.get('JENKINS_USER', None) or input('Username: ')
password = os.environ.get('JENKINS_PASS', None) or getpass.getpass()

## 1. Retrieval of Jenkins jobs info and Robot reports

 Opens session with the Jenkins server:

In [None]:
server = jenkins.Jenkins(url_jenkins_server, username=username, password=password)

Tests the connection to the Jenkins server:

In [None]:
def test_jenkins_connection(server):
    user = server.get_whoami()
    version = server.get_version()
    print(f'Hello {user["fullName"]} from Jenkins {version}')

In [None]:
test_jenkins_connection(server)

 ### 1.1 Jobs in the Jenkins server

Retrieves the list of jobs that exist in the Jenkins server:

In [None]:
def get_all_jenkins_jobs_as_df(server):
    jenkinsJobs = server.get_all_jobs()
    df_jobs = pd.DataFrame(jenkinsJobs)
    return df_jobs

In [None]:
get_all_jenkins_jobs_as_df(server)

1.2 Analysis of specific jobs: e.g. v9.0 testing job (`job_name`)

In [None]:
def get_job_summary(server, job_name):

    # Obtains all the raw information about the job:
    my_job = server.get_job_info(job_name, 0, True)

    job_fields = [key for key in my_job]

    # Builds a summary table of the selected job:

    ## Retrieves all the fields except those that embed complex structures in the JSON
    composite_fields = ['actions', 'builds', 'firstBuild', 'healthReport', 'lastBuild', 'lastCompletedBuild', 'lastFailedBuild', 'lastStableBuild', 'lastSuccessfulBuild','lastUnstableBuild', 'lastUnsuccessfulBuild', 'property']
    my_job.get('resumeBlocked')
    my_job_status = {k: my_job.get(k, None) for k in my_job if k not in composite_fields}

    ## Adds additional info that was nested in the JSON
    reference_builds_of_job = ['firstBuild', 'lastBuild', 'lastCompletedBuild', 'lastFailedBuild', 'lastStableBuild', 'lastSuccessfulBuild','lastUnstableBuild', 'lastUnsuccessfulBuild']
    for k in reference_builds_of_job:
        item = my_job.get(k, None)
        if item:
            my_job_status[k + '_number'] = item.get('number', None)
            my_job_status[k + '_url'] = item.get('url', None)

    return my_job_status

In [None]:
my_job_status = get_job_summary(server, job_name)
my_job_status

Health report of the job:

In [None]:
def get_job_health(server, job_name):
    my_job = server.get_job_info(job_name, 0, True)
    return my_job.get('healthReport')

In [None]:
health = get_job_health(server, job_name)
health

### 1.3 Analysis of builds of the reference job (v9.0 testing job)

List of historical builds of the job:

In [None]:
def get_all_job_builds(server, job_name):
    my_job = server.get_job_info(job_name, 0, True)
    return pd.DataFrame(my_job.get('builds')).drop(columns='_class')

In [None]:
df_builds_of_job = get_all_job_builds(server, job_name)
df_builds_of_job

In [None]:
print(df_builds_of_job.number.tolist())

Retrieves all the information about a specific build:

In [None]:
def get_build_summary(server, job_name, build_number):
    # Retrieves raw build data
    build_info = server.get_build_info(job_name, build_number)

    # Summary of key data of the build
    relevant_build_fields = ['id', 'number', 'result', 'duration', 'estimatedDuration', 'timestamp', 'url']
    return {k: build_info.get(k, None) for k in relevant_build_fields}

In [None]:
# We want the info of latest complete build
build_number = my_job_status["lastCompletedBuild_number"]
my_build_summary = get_build_summary(server, job_name, build_number)
my_build_summary

### 1.4 Retrieval of Robot results of latest completed build of v9.0 testing job

In [None]:
# Retrieves the contents of the report file
def get_robot_report(server, job_name, build_number):
    robot_results_url = get_build_summary(server, job_name, build_number)['url'] + 'robot/report/output.xml'
    req = requests.Request('POST',  robot_results_url)
    return server.jenkins_open(req)

In [None]:
robot_report_contents = get_robot_report(server, job_name, build_number)

## 2. Imports info from Robot test report and cleans data

In [None]:
robot_report = os.path.join(inputs_folder, input_robot_file)
with open(robot_report, 'w', encoding='utf-8') as f:
    print(robot_report_contents, file=f)

### 1.1 Numerical statistics

In [None]:
def get_stats_from_report(robot_report):
    # Finds the root of the XML tree:
    xtree = et.parse(robot_report)
    xroot = xtree.getroot()
    timestamp = xroot.attrib['generated']

    # Obtains the section of numerical statistics, which includes the number of passed/failed tests per testsuite:
    statistics = xroot.find('statistics')
    stat_suites = statistics.find('suite')
    fields = ['id', 'name', 'pass', 'fail']
    rows = []
    for stat in stat_suites:
        rows.append( {f: stat.attrib[f] for f in fields} )
    df_test_stats = pd.DataFrame(rows)

    # Fixes the types of some columns
    df_test_stats['pass'] = df_test_stats['pass'].astype('int64')
    df_test_stats['fail'] = df_test_stats['fail'].astype('int64')

    # Removes the first row, which is redundant (just summarizes the stats of all the testsuites)
    df_test_stats = df_test_stats.loc[1:].reset_index(drop=True)

    # Adds a new column with the overall result of the test suite
    df_test_stats['status'] = 'PASS'
    df_test_stats.loc[df_test_stats.fail>0, 'status'] = 'FAIL'
    df_test_stats['status'] = df_test_stats.status.astype('category')

    return df_test_stats

In [None]:
df_test_stats = get_stats_from_report(robot_report)
df_test_stats

In [None]:
df_test_stats.info()

### 1.2 Results per test suite

In [None]:
def get_results_from_report(robot_report):
    # Finds the root of the XML tree:
    xtree = et.parse(robot_report)
    xroot = xtree.getroot()
    timestamp = xroot.attrib['generated']

    # Dataframe of results of the test suites of the tests of the day
    all_suites = xroot.find('suite')

    suite_rows = []
    status_rows = []
    for suite in all_suites.findall('suite'):
        # suite
        suite_rows.append(suite.attrib)

        ## suite --> status
        status_rows.append(suite.find('status').attrib)

    df_test_suites = pd.concat([pd.DataFrame(suite_rows), pd.DataFrame(status_rows)], axis=1)
    df_test_suites['status'] = df_test_suites.status.astype('category')
    df_test_suites['starttime'] = pd.to_datetime(df_test_suites.starttime)
    df_test_suites['endtime'] = pd.to_datetime(df_test_suites.endtime)

    return df_test_suites

In [None]:
df_test_suites = get_results_from_report(robot_report)
df_test_suites

In [None]:
df_test_suites.info()

### 1.3 Details of the test suites up to the level of keyword

In [None]:
def get_detailed_results_from_report(robot_report):
    # Finds the root of the XML tree:
    xtree = et.parse(robot_report)
    xroot = xtree.getroot()
    timestamp = xroot.attrib['generated']

    # Dataframe with details of each keyword run in the test
    all_suites = xroot.find('suite')

    rows = []
    for suite in all_suites.findall('suite'):
        # suite
        suite_id = suite.attrib['id']
        suite_name = suite.attrib['name']

        ## tests in the suite
        for test in suite.findall('test'):
            test_id = test.attrib['id']
            test_name = test.attrib['name']

            for kw in test.findall('kw'):
                keyword_name = kw.attrib['name']
                resultado = kw.find('status').attrib

                line = {'suite_id': suite_id, 'suite_name': suite_name, 'test_id': test_id, 'test_name': test_name, 'keyword_name': keyword_name, **resultado}
                rows.append(line)

    df_tests_and_keywords = pd.DataFrame(rows)

    # Fixes the dtype of some columns
    df_tests_and_keywords['status'] = df_tests_and_keywords.status.astype('category')
    df_tests_and_keywords['starttime'] = pd.to_datetime(df_tests_and_keywords.starttime)
    df_tests_and_keywords['endtime'] = pd.to_datetime(df_tests_and_keywords.endtime)

    return df_tests_and_keywords

In [None]:
df_tests_and_keywords = get_detailed_results_from_report(robot_report)
df_tests_and_keywords

In [None]:
df_tests_and_keywords.info()

 Finds the first failure per test suite (which is the most likely root cause):

In [None]:
df_root_cause_errors = df_tests_and_keywords.loc[df_tests_and_keywords.status=='FAIL'].groupby('suite_id').first()
df_root_cause_errors

In [None]:
df_root_cause_errors.info()