# ALI Performance Tests on Blake
Performance comparison dashboard. Must be run in Jupyter notebook to interact with plots.

In [1]:
# https://stackoverflow.com/questions/27934885/how-to-hide-code-from-cells-in-ipython-notebook-visualized-with-nbviewer
from IPython.display import HTML
button = HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Show/hide code blocks"></form>''')

In [2]:
button

In [3]:
import datetime as dt
import glob
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import os
import json

import plotly.graph_objects as go

# Import scripts
from json2timeline import json2dataframe
from utils import *
from models import findChangePts

In [4]:
# Cases to plot:
nproc = 384
timers = ['Albany: Total Fill Time:']
metadata = ['Albany cxx compiler',
            'Albany git commit id',
            'Trilinos git commit id']
cases = ['ant-2-20km_ml_ls',
         'ant-2-20km_mu_ls',
         'ant-2-20km_mu_dls',
         'green-1-7km_fea_1ws',
         'green-1-7km_ml_ls_1ws',
         'green-1-7km_mu_ls_1ws',
         'green-1-7km_mu_dls_1ws',
         'green-1-7km_fea_mem',
         'green-1-7km_ml_ls_mem',
         'green-1-7km_mu_ls_mem',
         'green-1-7km_mu_dls_mem']
names =  ('Total Time',
          'Setup Time',
          'Total Fill Time',
          'Residual Fill',
          'Residual Fill Evaluate',
          'Residual Fill Export',
          'Jacobian Fill',
          'Jacobian Fill Evaluate',
          'Jacobian Fill Export',
          'NOX Total Preconditioner Construction',
          'NOX Total Linear Solve')
timers = ('Albany Total Time:',
          'Albany: Setup Time:',
          'Albany: Total Fill Time:',
          'Albany Fill: Residual:',
          'Albany Residual Fill: Evaluate:',
          'Albany Residual Fill: Export:',
          'Albany Fill: Jacobian:',
          'Albany Jacobian Fill: Evaluate:',
          'Albany Jacobian Fill: Export:',
          'NOX Total Preconditioner Construction:',
          'NOX Total Linear Solve:')

# Extract file names and collect data
files = glob.glob('ctest-*')
df = json2dataframe(files, cases, nproc, names, timers, metadata)

# # Filter data by date if desired
# import datetime as dt
# df = df[df['date'] < dt.datetime.strptime('20191231', '%Y%m%d')]

In [5]:
# Set threshold for model. If normally distributed data, this would
# be the probability of a false positive if no change has occurred.
# (since real data is not normal, false positives can occur much
# more frequently)
threshold = 0.0001

# Number of days since most recent to warn about
recency = 10

In [6]:
# #Print some information
# print('Test cases:')
# [print('  '+c) for c in cases]
# print('Timers:')
# [print('  '+n) for n in names]
print("Model threshold: %f" % threshold)

Model threshold: 0.000100


In [7]:
# Find changepoints and format data to work nicely with plots
seqs = {case:{} for case in cases}
mostRecent = df['date'].max()
events = {}
for case in cases:
    for name in names:
        # Detect changepoints
        data = df.loc[df['case']==case, ['date', name]].dropna()
        pts, _ = findChangePts(data[name].to_numpy(), threshold=threshold,
                               method='mean')
        
        # Build dictionary of recent changepoints
        for d in data['date'].iloc[pts]:
            if d in events.keys():
                events[d][case] = name
            else:
                events[d] = {case: name}
                    
        # Calculate mean/std between changepoints and save in dataframe
        cols = ['date', name] + list(metadata)
        data = df.loc[df['case']==case, cols].dropna(subset=[name])
        data.reset_index(drop=True, inplace=True)
        data.rename(columns={name:'time'}, inplace=True)
        mean, std = regimeTimeseries(data['time'].to_numpy(), pts)
        temp = {'mean': mean, 'upper': mean+2*std, 'lower': mean-2*std}
        seqs[case][name] = pd.concat((data, pd.DataFrame(temp)), axis=1)
        
''''''
# Sort and print recent events
events = {k:events[k] for k in sorted(events.keys())}
print('Events in the most recent %d days:' % recency)
recentEvents = printEvents(events, mostRecent, recency)

Events in the most recent 10 days:
02/01/2020:
    green-1-7km_mu_dls_1ws: Total Time
02/02/2020:
    green-1-7km_mu_ls_1ws: Total Fill Time
    green-1-7km_mu_dls_1ws: Residual Fill
    green-1-7km_fea_mem: Residual Fill
    green-1-7km_mu_ls_mem: Residual Fill
    green-1-7km_mu_dls_mem: Residual Fill


In [14]:
# Plot characteristics
lines = ['time', 'mean', 'upper', 'lower']
colors = ['darkred', 'midnightblue']
modes = ['markers', 'lines', 'lines', 'lines']
dashes = ['solid', 'solid', 'dot', 'dot']

fig = go.FigureWidget()
# Create series on plot
for line, mode, dash in zip(lines, modes, dashes):
    for c, color in zip(cases[:2], colors):
        first = c == cases[0]
        if line == 'time':
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=seqs[c][names[0]][line],
                mode=mode,
                line=dict(color=color, dash=dash, width=1.5),
                marker_symbol='square' if first else 'circle',
                name=c,
                legendgroup='g1' if first else 'g2',
                customdata=seqs[c][names[0]].loc[:, ['date']+list(metadata)],
                hovertemplate=
                "Date: %{customdata[0]}<br>" +
#                 "Albany compiler: %{customdata[1]}<br>" +
                "Albany commit: %{customdata[2]}<br>" +
                "Trilinos commit: %{customdata[3]}" +
                "<extra></extra>",
            ))
        else:
            fig.add_trace(go.Scatter(
                x=seqs[c][names[0]]['date'],
                y=seqs[c][names[0]][line],
                mode=mode,
                line=dict(color=color, dash=dash, width=1.5),
                name=line,
                legendgroup='g1' if first else 'g2',
                hoverinfo='skip'
            ))

fig = fig.update_layout(
    title='Comparison of nightly test performance',
    xaxis_title='Simulation Date',
    yaxis_title='Wall-clock Time (s)'
)


In [15]:
def latest_stats(c, n):
    df = seqs[c][n]
    n = df.shape[0]
    mean = df['mean'][n-1]
    std = .5*(df['upper'][n-1] - mean)
    pts = []
    for i in reversed(range(n)):
        if df['mean'][i] == mean:
            pts.append(df['time'][i])
    return np.array(pts), mean, std

In [30]:
# Create histogram of recent data from two test cases
hist = go.FigureWidget()
for c, color in zip(cases[:2], colors):
    hist.add_trace(go.Histogram(
        x=latest_stats(c, names[0])[0],
        name=c,
        marker_color=color,
        opacity=0.7
    ))
hist = hist.update_layout(
    barmode='overlay',
    title='Observations since latest changepoints',
    xaxis_title='Wall-clock Time (s)',
    yaxis_title='Count',
    legend_orientation='h',
    legend=dict(x=0, y=1.11, bgcolor=None)
)

In [31]:
from ipywidgets import Output
from scipy.stats import ttest_ind
# Create a textbox that will show t-test results, and update it
textbox = Output(layout={'border': '1px solid black'})
def update_textbox(c1=cases[0], c2=cases[1], n=names[0]):
    with textbox:
        textbox.clear_output()
        p1, m1, s1 = latest_stats(c1, n)
        p2, m2, s2 = latest_stats(c2, n)
        tstat, pval = ttest_ind(trim(p1), trim(p2))
        signif = '*'*(int(pval<0.5)+int(pval<0.1)+int(pval<0.01))
        print('Data since latest changepoints')
        print(cases[0])
        print('  N   : %d\n  mean: %.2f\n  std : %.2f' % (len(p1), m1, s1))
        print(cases[1])
        print('  N   : %d\n  mean: %.2f\n  std : %.2f' % (len(p2), m2, s2))
        print('t-test results:')
        print('  t-stat : %.2f' % tstat)
        print('  p-value: %.4f%s' % (pval, signif))
        
update_textbox()

In [32]:
# Function that will update all chart elements based on dropdowns
def update(Case1=cases[0], Case2=cases[1], Timer=names[0]):
    c1, c2, n = Case1, Case2, Timer
    with fig.batch_update():
        i = 0
        for line, mode, dash in zip(lines, modes, dashes):
            fig.data[i]['x']=seqs[c1][n]['date']
            fig.data[i]['y']=seqs[c1][n][line]
            fig.data[i]['name']=c1 if i < 2 else line
            fig.data[i]['customdata']=seqs[c1][n].loc[:, ['date']+list(metadata)]
            i += 1
            
            fig.data[i]['x']=seqs[c2][n]['date']
            fig.data[i]['y']=seqs[c2][n][line]
            fig.data[i]['name']=c2 if i < 2 else line
            fig.data[i]['customdata']=seqs[c1][n].loc[:, ['date']+list(metadata)]            
            i += 1
    with hist.batch_update():
        hist.data[0]['x']=latest_stats(c1,n)[0]
        hist.data[1]['x']=latest_stats(c2,n)[0]
    update_textbox(c1, c2,n)

In [33]:
# Create dashboard
from ipywidgets import interactive, HBox, VBox, HTML
widget = interactive(update, Case1=list(cases), Case2=list(cases), Timer=list(names))
VBox([
    fig,
    HBox([
        hist, 
        VBox([widget, HTML('<br />'),textbox])
    ])
])

VBox(children=(FigureWidget({
    'data': [{'customdata': array([[Timestamp('2019-10-17 00:00:00'), None, None…

### Plot of wall-clock times for nightly runs
Changepoints are estimated using a generalized likelihood ratio method on each timer. 
* Markers: recorded wall-clock time
* Solid line: average wall-clock time between changepoints
* Dotted lines: average wall-clock time $\pm$ two standard deviations

### Histogram of recent observations
Using data since the most recently detected changepoint, we plot histograms of the two cases being considered.

### T-test for difference in average wall-clock time
Data since the most recent changepoint is trimmed to remove outliers, and then we use a t-test to compare the average performance of the two test cases. One, two, and three asterisks indicate significance levels of 0.05, 0.01, and 0.001 respectively.