# Analysis of latest Robot reports per branch


In [None]:
import os
import pandas as pd
import numpy as np
#import getpass
from sqlalchemy import create_engine
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
######################## Input parameters ########################

In [None]:
inputs_folder = 'etl_outputs'
outputs_folder = 'report_outputs'
database_uri = f'sqlite:///{inputs_folder}/test_executions.db'
table_known_builds = 'builds_info'  # TODO: To remove at the end of the cleanup process
table_robot_reports = 'robot_reports'  # TODO: To remove at the end of the cleanup process
table_robot_reports_extended = 'robot_reports_extended'  # TODO: To remove at the end of the cleanup process

too_old_builds = "2020-12-15"

In [None]:
today = pd.to_datetime("today").strftime('%Y-%m-%d')
print('Date: ', today)

In [None]:
###################################################################

In [None]:
############################ Load data ############################

In [None]:
def load_latest_builds_all_jobs(engine, too_old_builds='1980-12-15'):
	'''
	From each of the known jobs, retrieves their latest build.
	Returns a dataframe with a row per job.

	Usage:

	load_latest_builds(engine, too_old_builds='1980-12-15')

	- `engine`: Database engine to use for the connection.
	- `too_old_builds`: Limits the query to builds not older than a date. By default, it does not limit in practice (1980!).
	'''

	table_known_builds = 'builds_info'

	query_latest_builds = f'''
	SELECT main.*
	FROM {table_known_builds} AS main
	INNER JOIN (
		SELECT job, MAX(timestamp) as ts
		FROM {table_known_builds}
		WHERE timestamp>DATETIME("{too_old_builds}")
		GROUP BY job
	) AS latest_build
	ON main.job=latest_build.job AND main.timestamp=ts
	'''

	with engine.begin() as conn:
		df_latest_builds = pd.read_sql(query_latest_builds, con=conn)

	df_latest_builds['timestamp'] = pd.to_datetime(df_latest_builds.timestamp)

	return df_latest_builds

In [None]:
def load_latest_report_all_jobs(engine, too_old_builds='1980-12-15'):
	'''
	From each of the known jobs, retrieves the report from their latest build.
	Returns a dataframe with a row per suite per job (in case the latest build of the job generated a report).

	Usage:

	load_latest_report_all_jobs(engine, too_old_builds='1980-12-15')

	- `engine`: Database engine to use for the connection.
	- `too_old_builds`: Limits the query to builds not older than a date. By default, it does not limit in practice (1980!).

	'''
	table =  'robot_reports'
	table_known_builds = 'builds_info'

	query_robot_reports = f'''
	SELECT details.*
	FROM {table} AS details
	INNER JOIN {table_known_builds} AS main
	ON details.job=main.job AND details.build=main.build
	INNER JOIN (
		SELECT job, MAX(timestamp) as ts
		FROM {table_known_builds}
		WHERE timestamp>DATETIME("{too_old_builds}")
		GROUP BY job
	) AS latest_build
	ON main.job=latest_build.job AND main.timestamp=ts
	'''

	with engine.begin() as conn:
		df_robot_reports = pd.read_sql(query_robot_reports, con=conn)

	df_robot_reports['starttime'] = pd.to_datetime(df_robot_reports.starttime)
	df_robot_reports['endtime'] = pd.to_datetime(df_robot_reports.endtime)

	return df_robot_reports

In [None]:
def load_latest_extended_report_all_jobs(engine, too_old_builds='1980-12-15'):
	'''
	From each of the known jobs, retrieves the extended report from their latest build.
	Returns a dataframe with a row per test per suite per job (in case the latest build of the job generated a report).

	Usage:

	load_latest_extendend_report_all_jobs(engine, too_old_builds='1980-12-15')

	- `engine`: Database engine to use for the connection.
	- `too_old_builds`: Limits the query to builds not older than a date. By default, it does not limit in practice (1980!).
	'''

	table = 'robot_reports_extended'
	table_known_builds = 'builds_info'

	query_robot_reports = f'''
	SELECT details.*
	FROM {table} AS details
	INNER JOIN {table_known_builds} AS main
	ON details.job=main.job AND details.build=main.build
	INNER JOIN (
		SELECT job, MAX(timestamp) as ts
		FROM {table_known_builds}
		WHERE timestamp>DATETIME("{too_old_builds}")
		GROUP BY job
	) AS latest_build
	ON main.job=latest_build.job AND main.timestamp=ts
	'''

	with engine.begin() as conn:
		df_robot_reports_extended = pd.read_sql(query_robot_reports, con=conn)

	df_robot_reports_extended['starttime'] = pd.to_datetime(df_robot_reports_extended.starttime)
	df_robot_reports_extended['endtime'] = pd.to_datetime(df_robot_reports_extended.endtime)

	return df_robot_reports_extended

In [None]:
engine = create_engine(database_uri)

df_latest_builds_all_jobs = load_latest_builds_all_jobs(engine, too_old_builds=too_old_builds)
df_latest_report_all_jobs = load_latest_report_all_jobs(engine, too_old_builds=too_old_builds)
df_latest_extended_report_all_jobs = load_latest_extended_report_all_jobs(engine, too_old_builds=too_old_builds)

In [None]:
###################################################################

### Latest build of each job

In [None]:
display(df_latest_builds_all_jobs)

### Failed tests per job (if they exist):

In [None]:
relevant_jobs = ['osm-stage_3-merge/v10.0', 'osm-stage_3-merge/master', 'osm-stage_3-merge/v9.0']

In [None]:
df_failed = (
    df_latest_report_all_jobs
    .query("status=='FAIL'")
)

for job_name in relevant_jobs:
    display(job_name)
    display(
        df_failed
        .query('job==@job_name')
        .drop(columns=['build', 'source', 'job', 'id', 'failed_test_id'])
    )

### Details of failed tests into failing test suites (if they exist):

In [None]:
df_details_failed = (
    df_latest_extended_report_all_jobs
    .merge(
        df_failed[['job', 'build', 'name']],
        how='inner',
        left_on=['job', 'build', 'suite_name'],
        right_on=['job', 'build', 'name']
        )
    .drop(columns=['suite_id', 'test_id', 'name'])
    .query('status=="FAIL"')
)

for job_name in relevant_jobs:
    display(job_name)
    display(
        df_details_failed
        .query('job==@job_name')
        .drop(columns=['job', 'build'])
    )

In [None]:
!jupyter nbconvert --to html --output report_outputs/analysis_latest_build.html --TemplateExporter.exclude_input=True 001-analysis_latest_build.ipynb