Skip to content

Commit

Permalink
scaffold performance testing with vbench
Browse files Browse the repository at this point in the history
  • Loading branch information
adamgreenhall committed Dec 11, 2012
1 parent 9502215 commit b678795
Show file tree
Hide file tree
Showing 7 changed files with 338 additions and 0 deletions.
1 change: 1 addition & 0 deletions speed_check/.gitignore
@@ -0,0 +1 @@
benchmarks.db
Empty file added speed_check/data_in_out.py
Empty file.
8 changes: 8 additions & 0 deletions speed_check/minpower_benchmark_utils.py
@@ -0,0 +1,8 @@
import pandas as pd
import numpy as np

from minpower.powersystem import PowerSystem
from minpower.generators import Generator

from minpower.config import user_config
from minpower.solve import solve_problem
14 changes: 14 additions & 0 deletions speed_check/run_suite.py
@@ -0,0 +1,14 @@
#!/usr/bin/env python
from vbench.api import BenchmarkRunner
from suite import *

def run_process(run_option='eod'):
runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_URL,
BUILD, DB_PATH, TMP_DIR, PREPARE,
always_clean=True,
run_option=run_option, start_date=START_DATE,
module_dependencies=dependencies)
runner.run()

if __name__ == '__main__':
run_process('last')
81 changes: 81 additions & 0 deletions speed_check/suite.py
@@ -0,0 +1,81 @@
from vbench.api import Benchmark, GitRepo
from datetime import datetime

import os

modules = [
'unit_commitment',
'data_in_out'
]

by_module = {}
benchmarks = []

for modname in modules:
ref = __import__(modname)
by_module[modname] = [v for v in ref.__dict__.values()
if isinstance(v, Benchmark)]
benchmarks.extend(by_module[modname])

for bm in benchmarks:
assert(bm.name is not None)

import getpass
import sys

USERNAME = getpass.getuser()

if sys.platform == 'darwin':
HOME = '/Users/%s' % USERNAME
else:
HOME = '/home/%s' % USERNAME


REPO_PATH = '/home/adam/minpower'
REPO_URL = 'git@github.com:adamgreenhall/minpower.git'
DB_PATH = os.path.join(REPO_PATH,'speed_check/benchmarks.db')
TMP_DIR = os.path.join(REPO_PATH,'tmp_build')

PREPARE = """
python setup.py clean
"""
BUILD = """
python setup.py build_ext --inplace
"""
START_DATE = datetime(2012, 12, 11)

dependencies = ['minpower_benchmark_utils.py']

repo = GitRepo(REPO_PATH)

def make_plots(benchmarks):
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt

vb_path = os.path.join(REPO_PATH, 'speed_check')
fig_base_path = os.path.join(vb_path, 'figures')

if not os.path.exists(fig_base_path):
print 'creating %s' % fig_base_path
os.makedirs(fig_base_path)

for bmk in benchmarks:
fig_full_path = os.path.join(fig_base_path, '%s.png' % bmk.name)

# make the figure
plt.figure(figsize=(10, 6))
ax = plt.gca()
bmk.plot(DB_PATH, ax=ax)

start, end = ax.get_xlim()

plt.xlim([start - 30, end + 30])
plt.savefig(fig_full_path, bbox_inches='tight')
plt.close('all')

if __name__ == '__main__':
for bm in benchmarks:
print bm.name
print bm.get_results(DB_PATH)
make_plots(benchmarks)
213 changes: 213 additions & 0 deletions speed_check/test_perf.py
@@ -0,0 +1,213 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
What
----
vbench is a library which can be used to benchmark the performance
of a codebase over time.
Although vbench can collect data over many commites, generate plots
and other niceties, for Pull-Requests the important thing is the
performance of the HEAD commit against a known-good baseline.
This script tries to automate the process of comparing these
two commits, and is meant to run out of the box on a fresh
clone.
How
---
These are the steps taken:
1) create a temp directory into which vbench will clone the temporary repo.
2) instantiate a vbench runner, using the local repo as the source repo.
3) perform a vbench run for the baseline commit, then the target commit.
4) pull the results for both commits from the db. use pandas to align
everything and calculate a ration for the timing information.
5) print the results to the log file and to stdout.
"""

import shutil
import os
import argparse
import tempfile
import time

DEFAULT_MIN_DURATION = 0.01
BASELINE_COMMIT = '2149c50' # 0.9.1 + regression fix + vb fixes # TODO: detect upstream/master

parser = argparse.ArgumentParser(description='Use vbench to generate a report comparing performance between two commits.')
parser.add_argument('-a', '--auto',
help='Execute a run using the defaults for the base and target commits.',
action='store_true',
default=False)
parser.add_argument('-b','--base-commit',
help='The commit serving as performance baseline (default: %s).' % BASELINE_COMMIT,
type=str)
parser.add_argument('-t','--target-commit',
help='The commit to compare against the baseline (default: HEAD).',
type=str)
parser.add_argument('-m', '--min-duration',
help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION,
type=float,
default=0.01)
parser.add_argument('-o', '--output',
metavar="<file>",
dest='log_file',
help='path of file in which to save the report (default: vb_suite.log).')
args = parser.parse_args()

def get_results_df(db,rev):
from pandas import DataFrame
"""Takes a git commit hash and returns a Dataframe of benchmark results
"""
bench = DataFrame(db.get_benchmarks())
results = DataFrame(db.get_rev_results(rev).values())

# Sinch vbench.db._reg_rev_results returns an unlabeled dict,
# we have to break encapsulation a bit.
results.columns = db._results.c.keys()
results = results.join(bench['name'], on='checksum').set_index("checksum")
return results

def prprint(s):
print("*** %s"%s)

def main():
from pandas import DataFrame
from vbench.api import BenchmarkRunner
from vbench.db import BenchmarkDB
from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks

if not args.base_commit:
args.base_commit = BASELINE_COMMIT

# GitRepo wants exactly 7 character hash?
args.base_commit = args.base_commit[:7]
if args.target_commit:
args.target_commit = args.target_commit[:7]

if not args.log_file:
args.log_file = os.path.abspath(os.path.join(REPO_PATH, 'vb_suite.log'))

TMP_DIR = tempfile.mkdtemp()
prprint("TMP_DIR = %s" % TMP_DIR)
prprint("LOG_FILE = %s\n" % args.log_file)

try:
logfile = open(args.log_file, 'w')

prprint( "Opening DB at '%s'...\n" % DB_PATH)
db = BenchmarkDB(DB_PATH)

prprint("Initializing Runner...")
runner = BenchmarkRunner(benchmarks, REPO_PATH, REPO_PATH, BUILD, DB_PATH,
TMP_DIR, PREPARE, always_clean=True,
# run_option='eod', start_date=START_DATE,
module_dependencies=dependencies)

repo = runner.repo #(steal the parsed git repo used by runner)

# ARGH. reparse the repo, without discarding any commits,
# then overwrite the previous parse results
#prprint ("Slaughtering kittens..." )
(repo.shas, repo.messages,
repo.timestamps, repo.authors) = _parse_commit_log(REPO_PATH)

h_head = args.target_commit or repo.shas[-1]
h_baseline = args.base_commit

prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,"")))
prprint('Baseline [%s] : %s\n' % (h_baseline,repo.messages.get(h_baseline,"")))

prprint ("removing any previous measurements for the commits." )
db.delete_rev_results(h_baseline)
db.delete_rev_results(h_head)

# TODO: we could skip this, but we need to make sure all
# results are in the DB, which is a little tricky with
# start dates and so on.
prprint( "Running benchmarks for baseline [%s]" % h_baseline)
runner._run_and_write_results(h_baseline)

prprint ("Running benchmarks for target [%s]" % h_head)
runner._run_and_write_results(h_head)

prprint( 'Processing results...')

head_res = get_results_df(db,h_head)
baseline_res = get_results_df(db,h_baseline)
ratio = head_res['timing']/baseline_res['timing']
totals = DataFrame(dict(t_head=head_res['timing'],
t_baseline=baseline_res['timing'],
ratio=ratio,
name=baseline_res.name),columns=["t_head","t_baseline","ratio","name"])
totals = totals.ix[totals.t_head > args.min_duration] # ignore below threshold
totals = totals.dropna().sort("ratio").set_index('name') # sort in ascending order

s = "\n\nResults:\n"
s += totals.to_string(float_format=lambda x: "{:4.4f}".format(x).rjust(10))
s += "\n\n"
s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n"
s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n"

s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head,""))
s += 'Baseline [%s] : %s\n\n' % (h_baseline,repo.messages.get(h_baseline,""))

logfile.write(s)
logfile.close()

prprint(s )
prprint("Results were also written to the logfile at '%s'\n" % args.log_file)

finally:
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
shutil.rmtree(TMP_DIR)
logfile.close()


# hack , vbench.git ignores some commits, but we
# need to be able to reference any commit.
# modified from vbench.git
def _parse_commit_log(repo_path):
from vbench.git import parser, _convert_timezones
from pandas import Series
git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path)
githist = git_cmd + ('log --graph --pretty=format:'
'\"::%h::%cd::%s::%an\" > githist.txt')
os.system(githist)
githist = open('githist.txt').read()
os.remove('githist.txt')

shas = []
timestamps = []
messages = []
authors = []
for line in githist.split('\n'):
if '*' not in line.split("::")[0]: # skip non-commit lines
continue

_, sha, stamp, message, author = line.split('::', 4)

# parse timestamp into datetime object
stamp = parser.parse(stamp)

shas.append(sha)
timestamps.append(stamp)
messages.append(message)
authors.append(author)

# to UTC for now
timestamps = _convert_timezones(timestamps)

shas = Series(shas, timestamps)
messages = Series(messages, shas)
timestamps = Series(timestamps, shas)
authors = Series(authors, shas)
return shas[::-1], messages[::-1], timestamps[::-1], authors[::-1]


if __name__ == '__main__':
if not args.auto and not args.base_commit and not args.target_commit:
parser.print_help()
else:
main()
21 changes: 21 additions & 0 deletions speed_check/unit_commitment.py
@@ -0,0 +1,21 @@
from vbench.benchmark import Benchmark

SECTION = 'Unit commitment'

common_setup = """
from minpower_benchmark_utils import *
"""


setup = common_setup + """
directory = '~/minpower/minpower/tests/uc'
"""
statement = """
solve_problem(directory,
shell=False,
problemfile=False,
csv=False)
"""

bm_simple_uc = Benchmark(statement, setup, ncalls=1,
name='simple_uc')

0 comments on commit b678795

Please sign in to comment.