# Exploration and visualization of our collected metrics 
In this notebook, we will read the collected metrics from ceph and uses interactive widgets to explore and visualize our data.

In [2]:
# !pip install fsspec s3fs ipywidgets

In [3]:
# imports
import os
import pdb
import ipywidgets as widgets
from functools import partial
from collections import defaultdict

from matplotlib import pyplot as plt
import plotly.graph_objects as go
import seaborn as sns

from ipynb.fs.defs.metric_template import CephCommunication

from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [4]:
## Specify variables

## CEPH Bucket variables
## Create a .env file on your local with the correct configs
s3_endpoint_url = os.getenv(
    "S3_ENDPOINT", "https://s3-openshift-storage.apps.zero.massopen.cloud/"
)
s3_access_key = os.getenv("S3_ACCESS_KEY", "v3FnruQ78kfeULDjejUB")
s3_secret_key = os.getenv("S3_SECRET_KEY", "kJiDiHXncLJOXbaL7Zeb5Ok+gkLt9sWIa1rWAJa0")
s3_bucket = os.getenv("S3_BUCKET", "opf-datacatalog")
s3_path = os.getenv("S3_PROJECT_KEY", "metrics")

# Specify whether or not we are running this as a notebook or part of an automation pipeline.
AUTOMATION = os.getenv("IN_AUTOMATION", True)

In [5]:
# ceph connector
cc = CephCommunication(s3_endpoint_url, s3_access_key, s3_secret_key, s3_bucket)

# bucket where the KPI metrics are stored
metrics_bucket = cc.s3_resource.Bucket(s3_bucket)

# get all objects (files) corresponding to all metrics
available_metric_objs = metrics_bucket.objects.filter(Prefix=s3_path)

In [6]:
# extract the names of unique KPI metric files, and the KPIs available in them
kpis_in_metric_objs = defaultdict(list)
dates_available_for_metric_objs = defaultdict(list)

for obj in available_metric_objs:
    
    if obj.key == 'metrics/flake.parquet':
        continue
    
    # split key into prefix "directory", metric name, and filename
    dir_name, metric_obj_name, file_name = obj.key.split("/")
    print(file_name)
    
    if metric_obj_name not in kpis_in_metric_objs:
        # read a sample file to see what kpis are contained in this file
        sample_file = cc.read_from_ceph(f"{dir_name}/{metric_obj_name}", file_name)

        # get these kpis names
        kpis = sample_file.columns.drop(
            ["timestamp", "tab", "job", "grid", "test"], errors="ignore"
        ).tolist()

        # update mapping of "filename": "kpi"
        kpis_in_metric_objs[metric_obj_name] = kpis

    # update mapping of "metric name": "dates for which data is available"
    date = file_name.split('-', maxsplit=1)[-1].split('.', maxsplit=1)[0]
    dates_available_for_metric_objs[metric_obj_name].append(date)

avg_correlation-2021-4-14.parquet
avg_correlation-2021-4-16.parquet
avg_correlation-2021-4-19.parquet
blocked_timed_out-2021-4-14.parquet
blocked_timed_out-2021-4-16.parquet
blocked_timed_out-2021-4-19.parquet
build_pass_failure-2021-4-14.parquet
build_pass_failure-2021-4-16.parquet
build_pass_failure-2021-4-19.parquet
correlation-2021-4-14.parquet
correlation-2021-4-16.parquet
correlation-2021-4-19.parquet
number_of_flakes-2021-4-14.parquet
number_of_flakes-2021-4-16.parquet
number_of_flakes-2021-4-19.parquet
pct_fixed_each_ts-2021-4-14.parquet
pct_fixed_each_ts-2021-4-16.parquet
pct_fixed_each_ts-2021-4-19.parquet
persistent_failures-2021-4-14.parquet
persistent_failures-2021-4-16.parquet
persistent_failures-2021-4-19.parquet
test_pass_failures-2021-4-14.parquet
test_pass_failures-2021-4-16.parquet
test_pass_failures-2021-4-19.parquet
time_to_test-2021-4-14.parquet
time_to_test-2021-4-16.parquet
time_to_test-2021-4-19.parquet


In [7]:
# select metric objects and the kpis in them to visualize
def on_metric_obj_selected(metric_obj, kpis_widget=None, dates_widget=None):
    new_opts = []
    for selected_obj in metric_obj.new:
        new_opts += kpis_in_metric_objs[selected_obj]
    kpis_widget.options = new_opts
    
    new_opts = []
    for selected_obj in metric_obj.new:
        new_opts += dates_available_for_metric_objs[selected_obj]
    dates_widget.options = new_opts


metric_obj_widget = widgets.SelectMultiple(
    options=kpis_in_metric_objs.keys(), disabled=False
)

kpis_widget = widgets.SelectMultiple(options=[], disabled=False)
dates_widget = widgets.SelectMultiple(options=[], disabled=False)

metric_obj_widget.observe(
    partial(on_metric_obj_selected, kpis_widget=kpis_widget, dates_widget=dates_widget),
    "value",
)

display(metric_obj_widget)
display(kpis_widget)
display(dates_widget)

SelectMultiple(options=('avg_correlation', 'blocked_timed_out', 'build_pass_failure', 'correlation', 'number_o…

SelectMultiple(options=(), value=())

SelectMultiple(options=(), value=())

In [8]:
metric_obj_widget.value, kpis_widget.value, dates_widget.value

(('avg_correlation', 'blocked_timed_out', 'correlation'),
 ('test_blocked', 'test_timed_out'),
 ('2021-4-14', '2021-4-16', '2021-4-19'))

In [9]:
s3_path, f"{metric_obj_widget.value[0]}.parquet"

('metrics', 'avg_correlation.parquet')

## todo
- right now this will fail for some cases because not all metrics are under "metrics" prefix (see https://github.com/aicoe-aiops/ocp-ci-analysis/issues/217#issuecomment-819624029)
- need to know what columns exist in which df when indexing
- some dfs have a column "grid" vs some others have "job"?

In [10]:
# read one of the kpis to plot the selected metrics objects from ceph
metric = cc.read_from_ceph(
    s3_path,
    f"number_of_flakes/number_of_flakes-2021-4-14.parquet",
)
# metric[["timestamp", "tab", "grid", "test"] + [kpis_widget.value[1]]].head()

overall_metric = metric[metric['test']=='Overall']
overall_metric.head()

Unnamed: 0,timestamp,tab,grid,test,test_duration,flake
0,2021-04-14 16:12:04,"""redhat-assisted-installer""",periodic-ci-openshift-release-master-nightly-4...,Overall,105.583333,False
1,2021-04-14 15:05:09,"""redhat-assisted-installer""",periodic-ci-openshift-release-master-nightly-4...,Overall,87.216667,False
2,2021-04-14 13:13:36,"""redhat-assisted-installer""",periodic-ci-openshift-release-master-nightly-4...,Overall,99.1,False
3,2021-04-14 02:02:46,"""redhat-assisted-installer""",periodic-ci-openshift-release-master-nightly-4...,Overall,107.316667,False
4,2021-04-13 19:12:42,"""redhat-assisted-installer""",periodic-ci-openshift-release-master-nightly-4...,Overall,85.583333,False


## Next step 1

assuming there is a metric df that looks like above, plot something like this:

Given a dashboard (e.g. "redhat-openshift-ocp-release-4.6-informing”), plot the `Overall` KPI values for jobs within the dashboard (e.g. plot the Overall test duration for “periodic-ci-openshift-release-master-ci-4.6-e2e-aws” vs “periodic-ci-openshift-release-master-ci-4.6-e2e-gcp” vs …)

In [11]:
# widget to select tab
tabs = overall_metric['tab'].unique()
tabs_widget = widgets.Dropdown(
    options=tabs,
    value=tabs[0],
)

# init plot data
traces = []
grids = overall_metric[overall_metric['tab']==tabs_widget.value]['grid'].unique()
for curr_grid in grids:
    curr_grid_df = overall_metric[
        (overall_metric['tab']==tabs_widget.value) & \
        (overall_metric['grid']==curr_grid)
    ]
    
    traces.append(go.Scatter(
        x=curr_grid_df['timestamp'],
        y=curr_grid_df['test_duration'],
        mode='lines+markers',
        name=curr_grid,
    ))

# init plot
figwig = go.FigureWidget(
    data=traces,
    layout=go.Layout(
        xaxis_title="Timestamp",
        yaxis_title="Test Duration",
        legend=dict(
            yanchor="bottom",
            y=-0.8,
            xanchor="left",
            x=0.2,
        )
    )
)

In [12]:
def update_traces_plot(change):
    global figwig, tabs_widget, overall_metric
    
    # new selections
    curr_tab = tabs_widget.value
    grids = overall_metric[overall_metric['tab']==curr_tab]['grid'].unique()
    
    # create new data
    traces = []
    for curr_grid in grids:
        curr_grid_df = overall_metric[
            (overall_metric['tab']==curr_tab) & \
            (overall_metric['grid']==curr_grid)
        ]

        traces.append(
            go.Scatter(
                x=curr_grid_df['timestamp'],
                y=curr_grid_df['test_duration'],
                mode='lines+markers',
                name=curr_grid,
            )
        )
        
    # add data corresponding to new tab
    with figwig.batch_update():
        figwig.data = []
        figwig.add_traces(traces)

tabs_widget.observe(update_traces_plot, names="value")
display(widgets.VBox([tabs_widget, figwig]))

VBox(children=(Dropdown(options=('"redhat-assisted-installer"', '"redhat-openshift-informing"', '"redhat-opens…

## Next step 2

assuming there is a metric df that looks like above, plot something like this:

Given a job (e.g. “periodic-ci-openshift-release-master-ci-4.6-e2e-gcp”), plot the KPI values for tests within the job (e.g. plot duration for “operator conditions cloud-credential” vs “operator conditions cluster-autoscaler” vs ...)

In [None]:
def plot_builds_tab_grid(tab, job, df, metric_name):

    """
    Takes in input as tab and grid and plots change in
    build pass/fail over time
    """

    df = df[(df["tab"] == tab) | (df["job"] == job)]

    sns.set(rc={"figure.figsize": (15, 5)})
    sns.lineplot(x="timestamp", y=metric_name, data=df)
    plt.xlabel("Timestamps")
    plt.ylabel(metric_name)
    plt.title("Change in Build Pass or Failure over time")
    plt.show()

In [None]:
plot_builds_tab_grid(
    "redhat-openshift-informing",
    "release-openshift-okd-installer-e2e-aws-upgrade",
    metric,
    "test_duration",
)

In [None]:
plot_builds_tab_grid(
    "redhat-openshift-informing",
    "release-openshift-okd-installer-e2e-aws-upgrade",
    metric,
    "build_status",
)