In [None]:
from glob import glob
from os import path
import json
import numbers
from pprint import pprint

import numpy as np

glob('target-nightly-*')

def getIndividualBenchmark(targetDir, benchName):
    newDir = path.join(targetDir, 'criterion', benchName, 'new')
    mainEstimates = path.join(newDir, 'estimates.json')
    metricsEstimates = path.join(newDir, 'metrics-estimates.json')
    
    with open(metricsEstimates) as f:
        metrics = json.loads(f.read())
    with open(mainEstimates) as f:
        metrics['nanoseconds'] = json.loads(f.read())
    
    return metrics

def parseNightlyResult(targetDir):
    benches = glob(path.join(targetDir, 'criterion/*'))
    benchNames = [path.basename(b) for b in benches]
    allMetrics = {benchName: getIndividualBenchmark(targetDir, benchName) for benchName in benchNames}
    
    return allMetrics

rawResults = {tDir: parseNightlyResult(tDir) for tDir in glob('target-nightly-*')}

In [114]:
# transform results to be a series keyed by benchmark name
from datetime import datetime

results = {datetime.strptime(d.replace('target-nightly-', ''), '%Y-%m-%d'): v for d, v in rawResults.items()}

dates = set()
benches = set()
metricNames = set()
for date, metrics in results.items():
    dates.add(date)
    benches.update(metrics.keys())
    for _, metricName in metrics.items():
        metricNames.update(k for k in metricName.keys())
        
# these metrics should always be *very* close to 0, so lets not investigate them right now
dummyVariableMetrics = { 'align-faults', 'context-switches', 'cpu-migrations', 'emulation-faults', 'page-faults-major'}
metricNames = metricNames.difference(dummyVariableMetrics)

dates = sorted(list(dates))
benches = sorted(list(benches))
metricNames = set(sorted(list(metricNames)))


In [116]:
def descStatForGraphing(stat, daysNeeded=10):
    forGraphing = {}
    for bench in benches:
        forBench = {}
        maxDaysAvailable = 0
        for metricName in metricNames:
            estimatesAndErrors = []
            daysAvailable = 0
            for date in dates:
                try:
                    measure = results[date][bench][metricName]
                    statistic = measure[stat]
                    ci95 = statistic['confidence_interval']
                    pointEstimate = statistic['point_estimate']
                    error = (ci95['upper_bound'] - ci95['lower_bound']) / 2

                    estimatesAndErrors.append((pointEstimate, error))
                    daysAvailable += 1
                except KeyError:
                    estimatesAndErrors.append((float('nan'), float('nan')))

            if daysAvailable > maxDaysAvailable:
                maxDaysAvailable = daysAvailable
            
            estimates, errors= zip(*estimatesAndErrors)
            forBench[metricName] = { 
                'estimate': np.array(estimates), 
                'errors': np.array(errors) 
            }
        
        if maxDaysAvailable >= daysNeeded:
            forGraphing[bench] = forBench
            
    # with open('allresults-{}.json'.format(stat), 'w') as f:
    #     f.write(json.dumps(forGraphing))
    
    return forGraphing

meanForGraphing = descStatForGraphing('Mean')

In [117]:
from copy import copy

def ratios(forGraphing):
    accessSuffix = '-access'
    missSuffix = '-miss'
    newForGraphing = copy(forGraphing)
    for bench, newForBench in newForGraphing.items():
        accessMetrics = [n for n in newForBench.keys() if n.endswith(accessSuffix)]
            
        for accessName in accessMetrics:
            missName = accessName.replace(accessSuffix, missSuffix)
            ratioName = accessName.replace(accessSuffix, '-ratio')

            if accessName not in newForBench or missName not in newForBench:
                continue
            accesses = newForBench[accessName]['estimate']
            misses = newForBench[missName]['estimate']

            ratios = []
            for access, miss in zip(accesses, misses):
                if access == float('nan'):
                    ratios.append(access)
                elif access > 0:
                    ratios.append((access - miss) / access)
                else:
                    ratios.append(1)

            metricNames.add(ratioName)
            newForBench[ratioName] = { 'estimate': np.array(ratios) }
    return newForGraphing

meanWithRatiosForGraphing = ratios(meanForGraphing)

In [118]:
def caches(forGraphing):
    newForGraphing = copy(forGraphing)
    for bench, newForBench in newForGraphing.items():
        cacheRefs = newForBench['cache-references']['estimate']
        cacheMisses = newForBench['cache-misses']['estimate']
        cacheRatios = []
        for ref, miss in zip(cacheRefs, cacheMisses):
            if ref > 0:
                cacheRatios.append((ref - miss) / ref)
            else:
                cacheRatios.append(ref)
                
        newForBench['cache-hit-ratio'] = { 'estimate': np.array(cacheRatios) }
        metricNames.add('cache-hit-ratio')
    return newForGraphing

meanWithRatiosAndCachesForGraphing = caches(meanWithRatiosForGraphing)

In [119]:
def ipc(forGraphing):
    newForGraphing = copy(forGraphing)
    for bench, newForBench in newForGraphing.items():
        instructionCounts = newForBench['instructions']['estimate']
        cycleCounts = newForBench['ref-cpu-cycles']['estimate']
        ipcEstimates = []
        for i, c in zip(instructionCounts, cycleCounts):
            if c > 0:
                ipcEstimates.append(i / c)
            else:
                ipcEstimates.append(float('nan'))

        newForBench['instructions-per-cycle'] = { 'estimate': np.array(ipcEstimates) }
        metricNames.add('instructions-per-cycle')
    return newForGraphing
        
meanForGraphingWithOthers = ipc(meanWithRatiosAndCachesForGraphing)
metricNames

{u'bpu-read-access',
 u'bpu-read-miss',
 u'bpu-read-ratio',
 u'branch-instructions',
 u'branch-misses',
 u'bus-cycles',
 'cache-hit-ratio',
 u'cache-misses',
 u'cache-references',
 u'cpu-clock',
 u'cpu-cycles',
 u'dtlb-read-access',
 u'dtlb-read-miss',
 u'dtlb-read-ratio',
 u'dtlb-write-access',
 u'dtlb-write-miss',
 u'dtlb-write-ratio',
 u'instructions',
 'instructions-per-cycle',
 u'itlb-read-access',
 u'itlb-read-miss',
 u'itlb-read-ratio',
 u'l1d-read-access',
 u'l1d-read-miss',
 u'l1d-read-ratio',
 u'l1d-write-access',
 u'l1i-read-miss',
 u'll-read-access',
 u'll-read-miss',
 u'll-read-ratio',
 u'll-write-access',
 u'll-write-miss',
 u'll-write-ratio',
 'nanoseconds',
 u'node-read-access',
 u'node-read-miss',
 u'node-read-ratio',
 u'node-write-access',
 u'node-write-miss',
 u'node-write-ratio',
 u'page-fault',
 u'page-fault-minor',
 u'ref-cpu-cycles',
 u'task-clock'}

In [128]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

%matplotlib osx

import matplotlib.pyplot as plt

def drawGraph(bench):
    print('starting graphing')
    metrics = [
        ('nanoseconds', { 'title': 'Wall Time (ns)' }), 
        ('instructions', { 'title': 'CPU Instructions' }),
        ('ref-cpu-cycles', { 'title': 'CPU Cycles' }),
        ('cache-hit-ratio', { 'title': 'Cache Hit Ratio' })
    ]
    
    fig, axes = plt.subplots(nrows=len(metrics), ncols=1)
    fig.suptitle(bench)
    
    for axis, (metricName, graphing) in zip(axes, metrics):
        print('graphing {}'.format(metricName))
        
        means = meanForGraphingWithOthers[bench][metricName]['estimate']
        meanErrors = meanForGraphingWithOthers[bench][metricName].get('errors', None)
        
        axis.set_title(graphing['title'])
        
        if 'ylabel' in graphing:
            axis.set_ylabel(graphing['ylabel'])
            
        # figure out how much vertical space to devote, and also decide on
        # an interval 
        highest = max(means)
        lowest = min(means)
        middle = np.median(means)
        stdev = np.std(means)
        midToLim = stdev * 3
        
        if midToLim < lowest:
            bottom = middle - midToLim
        else:
            bottom = lowest - (0.15 * lowest)
        
        if midToLim > highest:
            top = middle + midToLim
        else:
            top = highest + (0.15 * highest)
        
        axis.set_ylim(bottom=bottom, top=top)
        
        if meanErrors is not None:
            axis.errorbar(x=dates, y=means, yerr=meanErrors, fmt='o')
        else:
            axis.plot(dates, means, 'o')
        
    fig.tight_layout()
    fig.subplots_adjust(left=0.1, bottom=0.06, right=0.9, top=0.93, hspace=0.28)
    fig.show()

display(interactive(drawGraph, bench=sorted(list(meanForGraphingWithOthers.keys()))))

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUnYmVuY2gnLCBvcHRpb25zPSgnYnJvdGxpXzFfMV8zOjpiZW5jaF9lMmVfZGVjb2RlX3E5XzVfMTAyNGsnLCAnYnLigKY=
