Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add benchmark framework for Chainer #4458

Closed
wants to merge 15 commits into from
6 changes: 6 additions & 0 deletions benchmarks/.gitignore
@@ -0,0 +1,6 @@
html/
results/
env/
chainer/
cupy/
.asv-machine.json
53 changes: 53 additions & 0 deletions benchmarks/README.rst
@@ -0,0 +1,53 @@
Chainer Benchmarks
==================

Benchmarking Chainer with Airspeed Velocity.

Note that CuPy earlier than v3.1.0 or v4.0.0b1 are not supported.

Requirements
------------

* ``asv``
* ``Cython`` (to build CuPy)

Usage
-----

.. code-block:: sh

# Enable ccache for performance (optional).
export PATH="/usr/lib/ccache:${PATH}"
export NVCC="ccache nvcc"

# Run benchmark against target commit-ish of Chainer and CuPy.
# Note that specified versions must be a compatible combination.
# You can use `find_cupy_version.py` helper tool to get appropriate CuPy
# version for the given Chainer version.
./run.sh master master
./run.sh v4.0.0b4 v4.0.0b4

# Compare the benchmark results between two commits to see regression
# and/or performance improvements in command line.
alias git_commit='git show --format="%H"'
asv compare $(git_commit v4.0.0b4) $(git_commit master)

# Convert the results into HTML.
# The result will be in `html` directory.
asv publish

# Start the HTTP server to browse HTML.
asv preview

Alternatively you can use Docker.

.. code-block:: sh

# Build docker image for benchmark.
docker build -t chainer-benchmark docker

# Create a machine configuration file (`.asv-machine.json`) in this directory (first time only).
nvidia-docker run --rm -it -u ${UID}:${GID} -v ${PWD}:/benchmarks -w /benchmarks -e HOME=/benchmarks chainer-benchmark asv machine --machine $(hostname)

# Run benchmark.
nvidia-docker run --rm -it -u ${UID}:${GID} -v ${PWD}:/benchmarks -w /benchmarks -e HOME=/benchmarks chainer-benchmark ./run.sh master master --machine $(hostname)
153 changes: 153 additions & 0 deletions benchmarks/asv.conf.json
@@ -0,0 +1,153 @@
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,

// The name of the project being benchmarked
"project": "chainer",

// The project's homepage
"project_url": "https://chainer.org/",

// The URL or local path of the source code repository for the
// project being benchmarked
"repo": "https://github.com/chainer/chainer.git",

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
"branches": ["master", "v3"], // for git
// "branches": ["default"], // for mercurial

// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
// "dvcs": "git",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "virtualenv",

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
//"install_timeout": 600,

// the base URL to show a commit for the project.
"show_commit_url": "https://github.com/chainer/chainer/commit/",

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["2.7", "3.3"],

// The list of conda channel names to be searched for benchmark
// dependency packages in the specified order
// "conda_channels": ["conda-forge", "defaults"]

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
// "matrix": {
// "numpy": ["1.6", "1.7"],
// "six": ["", null], // test with and without six installed
// "pip+emcee": [""], // emcee is only available for install with pip.
// },
"matrix": {
// CuPy dependencies.
"numpy": [],
"six": [],
"fastrlock": [],
// Optional dependencies required for benchmark.
"ideep4py": [],
},

// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
// "benchmark_dir": "benchmarks",

// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
// "env_dir": "env",

// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
// "results_dir": "results",

// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
// "html_dir": "html",

// The number of characters to retain in the commit hashes.
// "hash_length": 8,

// `asv` will cache wheels of the recent builds in each
// environment, making them faster to install next time. This is
// number of builds to keep, per environment.
// "wheel_cache_size": 0

// The commits after which the regression search in `asv publish`
// should start looking for regressions. Dictionary whose keys are
// regexps matching to benchmark names, and values corresponding to
// the commit (exclusive) after which to start looking for
// regressions. The default is to start from the first commit
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// }

// The thresholds for relative change in results, after which `asv
// publish` starts reporting regressions. Dictionary of the same
// form as in ``regressions_first_commits``, with values
// indicating the thresholds. If multiple entries match, the
// maximum is taken. If no entry matches, the default is 5%.
//
// "regressions_thresholds": {
// "some_benchmark": 0.01, // Threshold of 1%
// "another_benchmark": 0.5, // Threshold of 50%
// }
}
40 changes: 40 additions & 0 deletions benchmarks/benchmarks/__init__.py
@@ -0,0 +1,40 @@
import inspect

# Ensure that CuPy and cuDNN are available.
import cupy # NOQA
import cupy.cudnn # NOQA


class BenchmarkBase(object):
"""Base class for all benchmarks.

See also: http://asv.readthedocs.io/en/v0.2.1/writing_benchmarks.html
"""

# Allow up to 10 minutes, instead of the default (60 seconds).
timeout = 600

def __init__(self, *args, **kwargs):
# Set pretty_name to ``<class>.<function_name>`` instead of the default
# ``<module>.<class>.<function_name>``. This is because it is often too
# verbose to display module name in result HTML.
# This is a workaround needed until ASV 0.3 release.
members = inspect.getmembers(
self.__class__,
predicate=lambda x: inspect.ismethod(x) or inspect.isfunction(x))
for (name, func) in members:
if hasattr(func, '__func__'):
# For Python 2
func = func.__func__
if name.startswith('time_'):
name = name[5:]
func.pretty_name = '{}.{}'.format(type(self).__name__, name)

def setup(self, *args, **kwargs):
pass

def setup_cache(self, *args, **kwargs):
pass

def teardown(self, *args, **kwargs):
pass
Empty file.
93 changes: 93 additions & 0 deletions benchmarks/benchmarks/convnet/benchmark.py
@@ -0,0 +1,93 @@
import chainer
from chainer import optimizers

from benchmarks import BenchmarkBase
from benchmarks.utils import backends
from benchmarks.utils import is_backend_gpu
from benchmarks.utils import is_backend_ideep
from benchmarks.utils import parameterize


class _ConvnetBase(BenchmarkBase):
"""Benchmark code from convnet-benchmark.

https://github.com/soumith/convnet-benchmarks/tree/master/chainer
"""

timeout = 600
number = 1

def setup(self, arch, batchsize):
xp = self.xp

if arch == 'alexnet':
from benchmarks.convnet.nets import alex
model = alex.Alex()
elif arch == 'googlenet':
from benchmarks.convnet.nets import googlenet
model = googlenet.GoogLeNet()
elif arch == 'vgga':
from benchmarks.convnet.nets import vgga
model = vgga.vgga()
elif arch == 'overfeat':
from benchmarks.convnet.nets import overfeat
model = overfeat.overfeat()
else:
raise ValueError('Invalid architecture name')

if is_backend_gpu():
model.to_gpu()
elif is_backend_ideep():
model.to_intel64()

# Setup optimizer
optimizer = optimizers.SGD(lr=0.01)
optimizer.setup(model)

# Set cuDNN workspace size
workspace_size = int(1 * 2**30)
chainer.cuda.set_max_workspace_size(workspace_size)

chainer.config.train = True

x = xp.ndarray((batchsize, 3, model.insize,
model.insize), dtype=xp.float32)
x.fill(33333)

if arch == 'googlenet':
out1, out2, out3 = model.forward(x)
out = out1 + out2 + out3
else:
out = model.forward(x)

out.zerograd()
out.grad.fill(3)
model.cleargrads()

self._x = x
self._model = model
self._out = out

def time_forward(self, arch, batchsize):
self._model.forward(self._x)

def time_backward(self, arch, batchsize):
self._out.backward()


@backends('gpu', 'gpu-cudnn')
@parameterize([
('arch', ['vgga']),
('batchsize', [32]),
])
class ConvnetVGGA(_ConvnetBase):
pass


@backends('gpu', 'gpu-cudnn')
@parameterize([
('arch', ['alexnet', 'googlenet', 'overfeat']),
('batchsize', [128]),
])
class ConvnetOthers(_ConvnetBase):
pass
Empty file.
29 changes: 29 additions & 0 deletions benchmarks/benchmarks/convnet/nets/alex.py
@@ -0,0 +1,29 @@
import chainer
import chainer.functions as F
import chainer.links as L


class Alex(chainer.Chain):
insize = 224

def __init__(self):
super(Alex, self).__init__()
with self.init_scope():
self.conv1 = L.Convolution2D(3, 64, 11, stride=4, pad=2)
self.conv2 = L.Convolution2D(64, 192, 5, pad=2)
self.conv3 = L.Convolution2D(192, 384, 3, pad=1)
self.conv4 = L.Convolution2D(384, 256, 3, pad=1)
self.conv5 = L.Convolution2D(256, 256, 3, pad=1)
self.fc6 = L.Linear(256 * 6 * 6, 4096)
self.fc7 = L.Linear(4096, 4096)
self.fc8 = L.Linear(4096, 1000)

def forward(self, x):
h = F.max_pooling_2d(F.relu(self.conv1(x)), 3, stride=2)
h = F.max_pooling_2d(F.relu(self.conv2(h)), 3, stride=2)
h = F.relu(self.conv3(h))
h = F.relu(self.conv4(h))
h = F.max_pooling_2d(F.relu(self.conv5(h)), 3, stride=2)
h = F.relu(self.fc6(h))
h = F.relu(self.fc7(h))
return self.fc8(h)