In [1]:
from glob import glob
from os import path
import json
import numbers
from pprint import pprint

import numpy as np

glob('target-nightly-*')

def getIndividualBenchmark(targetDir, benchName):
    newDir = path.join(targetDir, 'criterion', benchName, 'new')
    mainEstimates = path.join(newDir, 'estimates.json')
    metricsEstimates = path.join(newDir, 'metrics-estimates.json')
    
    with open(metricsEstimates) as f:
        metrics = json.loads(f.read())
    with open(mainEstimates) as f:
        metrics['nanoseconds'] = json.loads(f.read())
    
    return metrics

def parseNightlyResult(targetDir):
    benches = glob(path.join(targetDir, 'criterion/*'))
    benchNames = [path.basename(b) for b in benches]
    allMetrics = {benchName: getIndividualBenchmark(targetDir, benchName) for benchName in benchNames}
    
    return allMetrics

rawResults = {tDir: parseNightlyResult(tDir) for tDir in glob('target-nightly-*')}

In [2]:
# transform results to be a series keyed by benchmark name
from datetime import datetime

results = {datetime.strptime(d.replace('target-nightly-', ''), '%Y-%m-%d'): v for d, v in rawResults.items()}

dates = set()
benches = set()
metricNames = set()
for date, metrics in results.items():
    dates.add(date)
    benches.update(metrics.keys())
    for _, metricName in metrics.items():
        metricNames.update(k for k in metricName.keys())
        
# these metrics should always be *very* close to 0, so lets not investigate them right now
dummyVariableMetrics = { 'align-faults', 'context-switches', 'cpu-migrations', 'emulation-faults', 'page-faults-major'}
metricNames = metricNames.difference(dummyVariableMetrics)

dates = sorted(list(dates))
benches = sorted(list(benches))
metricNames = set(sorted(list(metricNames)))


In [3]:
def descStatForGraphing(stat, daysNeeded=10):
    forGraphing = {}
    for bench in benches:
        forBench = {}
        maxDaysAvailable = 0
        for metricName in metricNames:
            estimatesAndErrors = []
            daysAvailable = 0
            for date in dates:
                try:
                    measure = results[date][bench][metricName]
                    statistic = measure[stat]
                    ci95 = statistic['confidence_interval']
                    pointEstimate = statistic['point_estimate']
                    error = (ci95['upper_bound'] - ci95['lower_bound']) / 2

                    estimatesAndErrors.append((pointEstimate, error))
                    daysAvailable += 1
                except KeyError:
                    estimatesAndErrors.append((float('nan'), float('nan')))

            if daysAvailable > maxDaysAvailable:
                maxDaysAvailable = daysAvailable
            
            estimates, errors= zip(*estimatesAndErrors)
            forBench[metricName] = { 
                'estimate': np.array(estimates), 
                'errors': np.array(errors) 
            }
        
        if maxDaysAvailable >= daysNeeded:
            forGraphing[bench] = forBench
            
    # with open('allresults-{}.json'.format(stat), 'w') as f:
    #     f.write(json.dumps(forGraphing))
    
    return forGraphing

meanForGraphing = descStatForGraphing('Mean')

In [4]:
from copy import copy

def ratios(forGraphing):
    accessSuffix = '-access'
    missSuffix = '-miss'
    newForGraphing = copy(forGraphing)
    for bench, newForBench in newForGraphing.items():
        accessMetrics = [n for n in newForBench.keys() if n.endswith(accessSuffix)]
            
        for accessName in accessMetrics:
            missName = accessName.replace(accessSuffix, missSuffix)
            ratioName = accessName.replace(accessSuffix, '-ratio')

            if accessName not in newForBench or missName not in newForBench:
                continue
            accesses = newForBench[accessName]['estimate']
            misses = newForBench[missName]['estimate']

            ratios = []
            for access, miss in zip(accesses, misses):
                if access == float('nan'):
                    ratios.append(access)
                elif access > 0:
                    ratios.append((access - miss) / access)
                else:
                    ratios.append(1)

            metricNames.add(ratioName)
            newForBench[ratioName] = { 'estimate': np.array(ratios) }
    return newForGraphing

meanWithRatiosForGraphing = ratios(meanForGraphing)

In [5]:
def caches(forGraphing):
    newForGraphing = copy(forGraphing)
    for bench, newForBench in newForGraphing.items():
        cacheRefs = newForBench['cache-references']['estimate']
        cacheMisses = newForBench['cache-misses']['estimate']
        cacheRatios = []
        for ref, miss in zip(cacheRefs, cacheMisses):
            if ref > 0:
                cacheRatios.append((ref - miss) / ref)
            else:
                cacheRatios.append(ref)
                
        newForBench['cache-hit-ratio'] = { 'estimate': np.array(cacheRatios) }
        metricNames.add('cache-hit-ratio')
    return newForGraphing

meanWithRatiosAndCachesForGraphing = caches(meanWithRatiosForGraphing)

In [6]:
def ipc(forGraphing):
    newForGraphing = copy(forGraphing)
    for bench, newForBench in newForGraphing.items():
        instructionCounts = newForBench['instructions']['estimate']
        cycleCounts = newForBench['ref-cpu-cycles']['estimate']
        ipcEstimates = []
        for i, c in zip(instructionCounts, cycleCounts):
            if c > 0:
                ipcEstimates.append(i / c)
            else:
                ipcEstimates.append(float('nan'))

        newForBench['instructions-per-cycle'] = { 'estimate': np.array(ipcEstimates) }
        metricNames.add('instructions-per-cycle')
    return newForGraphing
        
meanForGraphingWithOthers = ipc(meanWithRatiosAndCachesForGraphing)

In [7]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.style.use('seaborn')

In [19]:
%matplotlib osx
def drawAsTimeSeries(bench, leftX=None, rightX=None):
    print('starting graphing')
    metrics = [
        ('nanoseconds', { 'title': 'Wall Time (ns)' }), 
        ('instructions', { 'title': 'CPU Instructions' }),
        ('ref-cpu-cycles', { 'title': 'CPU Cycles' }),
        ('cache-hit-ratio', { 'title': 'Cache Hit Ratio' })
    ]
    for i in range(0,5):
        
    
        fig, axes = plt.subplots(nrows=1, ncols=len(meanForGraphingWithOthers[bench].keys()))
        fig.suptitle(bench)

        #for axis, (metricName, graphing) in zip(axes, metrics):
        for axis, metricName in zip(axes, meanForGraphingWithOthers[bench].keys()):
            print('graphing {}'.format(metricName))

            means = meanForGraphingWithOthers[bench][metricName]['estimate']
            meanErrors = meanForGraphingWithOthers[bench][metricName].get('errors', None)

            axis.set_title(metricName)

            #if 'ylabel' in graphing:
            #    axis.set_ylabel(graphing['ylabel'])

            # figure out how much vertical space to devote, and also decide on
            # an interval 
            highest = max(means)
            lowest = min(means)
            middle = np.median(means)
            stdev = np.std(means)
            midToLim = stdev

            if midToLim < lowest:
                bottom = middle - midToLim
            else:
                bottom = lowest - (0.15 * lowest)

            if midToLim > highest:
                top = middle + midToLim
            else:
                top = highest + (0.15 * highest)

            axis.set_ylim(bottom=bottom, top=top)

            if meanErrors is not None and leftX is not None and rightX is not None:
                axis.set_xlim(left=leftX, right=rightX)
                axis.errorbar(x=dates, y=means, yerr=meanErrors, fmt='o')
            else:
                axis.plot(dates, means, 'o')

        fig.tight_layout()
        fig.subplots_adjust(left=0.1, bottom=0.06, right=0.9, top=0.93, hspace=0.28)
        fig.show()
    
#display(interactive(
drawAsTimeSeries(bench='rayon_1_0_0::factorial::factorial_iterator')
#))

starting graphing
graphing itlb-read-miss
graphing cache-references
graphing nanoseconds
graphing l1d-read-access
graphing node-write-access
graphing cpu-clock
graphing bpu-read-ratio
graphing ll-read-ratio
graphing branch-instructions
graphing ll-write-miss
graphing cpu-cycles
graphing instructions-per-cycle
graphing branch-misses
graphing dtlb-write-ratio
graphing dtlb-read-access
graphing task-clock
graphing itlb-read-access
graphing cache-hit-ratio
graphing node-write-ratio
graphing ll-read-miss
graphing cache-misses
graphing bpu-read-access
graphing page-fault
graphing bus-cycles
graphing node-read-ratio
graphing ll-read-access
graphing ll-write-access
graphing node-write-miss
graphing dtlb-write-miss
graphing l1d-read-miss
graphing page-fault-minor
graphing ll-write-ratio
graphing instructions
graphing bpu-read-miss
graphing l1d-read-ratio
graphing itlb-read-ratio
graphing l1i-read-miss
graphing node-read-access
graphing ref-cpu-cycles
graphing dtlb-read-miss
graphing dtlb-write-

In [9]:
import jenkspy

def drawAsNumberLine(bench):
    print('starting graphing')
    metrics = [
        ('nanoseconds', { 'title': 'Wall Time (ns)', 'better': '-' }), 
        ('instructions', { 'title': 'CPU Instructions', 'better': '-' }),
        ('ref-cpu-cycles', { 'title': 'CPU Cycles', 'better': '-' }),
        ('cache-hit-ratio', { 'title': 'Cache Hit Ratio', 'better': '+' })
    ]
    
    fig, axes = plt.subplots(nrows=1, ncols=len(metrics))
    fig.suptitle(bench)
    
    for ax, (metricName, graphing) in zip(axes, metrics):
        print('graphing {}'.format(metricName))
        means = meanForGraphingWithOthers[bench][metricName]['estimate']
        errors = meanForGraphingWithOthers[bench][metricName].get('errors', None)
        ax.set_title(graphing['title'])
        
        meansWithoutNan = np.array(means[~np.isnan(means)])
        
        if errors is not None:
            errorsWithoutNan = np.array(errors[~np.isnan(means)])
        else:
            errorsWithoutNan = None
            
        highest = max(means)
        lowest = min(means)
        middle = np.median(means)
        stdev = np.std(means)
        midToLim = stdev * 3
        
        if midToLim < lowest:
            bottom = middle - midToLim
        else:
            bottom = lowest - (0.15 * lowest)
        
        if midToLim > highest:
            top = middle + midToLim
        else:
            top = highest + (0.15 * highest)
       
        ax.set_ylim(bottom=bottom, top=top)
        
        breakBoundaries = jenkspy.jenks_breaks(meansWithoutNan, nb_class=3)
        breaks = []
        for i, bottom in enumerate(breakBoundaries[:-1]):
            breaks.append((bottom, breakBoundaries[i+1]))
        
        if graphing['better'] == '+':
            breaks = reversed(breaks)
        
        for bottom, top in breaks:
            withinRangeIndices = np.where(np.logical_and(meansWithoutNan>=bottom, meansWithoutNan<=top))
            print(withinRangeIndices)
            cluster = np.array(meansWithoutNan[withinRangeIndices])
            
            if errorsWithoutNan is not None:
                clusterErrors = np.array(errorsWithoutNan[withinRangeIndices])
                print(cluster)
                print(clusterErrors)
                ax.errorbar(x=np.zeros_like(cluster), y=cluster, yerr=clusterErrors, fmt='o')
            else:
                ax.plot(np.zeros_like(cluster), cluster, 'o')
            #ax.boxplot(cluster, vert=False)
                
    fig.tight_layout()
    fig.subplots_adjust(left=0.1, bottom=0.06, right=0.9, top=0.93, hspace=0.28)
    fig.show()
    
#display(interactive(drawAsNumberLine, bench=sorted(list(meanForGraphingWithOthers.keys()))))
drawAsNumberLine(bench='doom_9e197d7::freedoom1')

starting graphing
graphing nanoseconds
(array([ 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29, 30, 31, 32, 33, 56]),)
[2.90401870e+08 2.88146551e+08 2.91838180e+08 2.90109712e+08
 2.92192681e+08 2.91140921e+08 2.91712092e+08 2.89261821e+08
 2.90398789e+08 2.90564858e+08 2.89365373e+08 2.90189776e+08
 2.88698107e+08 2.91152895e+08 2.88860582e+08 2.90294639e+08
 2.91316158e+08 2.92664940e+08 2.91228037e+08 2.90870496e+08
 2.92359090e+08 2.89985014e+08 2.91733576e+08 2.92201644e+08
 2.93772912e+08]
[ 346439.34913698  189081.16417411  380686.80000514  295092.64631876
  351374.53681266  604670.0676432   243422.76095796  288879.89511639
  275485.10034719  434099.78904578  334430.81346518  385818.03211099
  190303.82398865  258571.1913802   182016.37456948  180249.84173334
  229735.56681094  645785.25647444  351817.32771966  211120.35866007
  698305.70197868  302428.93713182  230386.84887153 1060148.68983579
  696173.58982086]
(array([14, 34, 35, 36, 37,