chainer · kmaehashi · Mar 12, 2018 · Mar 12, 2018 · Mar 12, 2018 · Mar 12, 2018
diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
@@ -0,0 +1,6 @@
+html/
+results/
+env/
+chainer/
+cupy/
+.asv-machine.json
diff --git a/benchmarks/README.rst b/benchmarks/README.rst
@@ -0,0 +1,53 @@
+Chainer Benchmarks
+==================
+
+Benchmarking Chainer with Airspeed Velocity.
+
+Note that CuPy earlier than v3.1.0 or v4.0.0b1 are not supported.
+
+Requirements
+------------
+
+* ``asv``
+* ``Cython`` (to build CuPy)
+
+Usage
+-----
+
+.. code-block:: sh
+
+    # Enable ccache for performance (optional).
+    export PATH="/usr/lib/ccache:${PATH}"
+    export NVCC="ccache nvcc"
+
+    # Run benchmark against target commit-ish of Chainer and CuPy.
+    # Note that specified versions must be a compatible combination.
+    # You can use `find_cupy_version.py` helper tool to get appropriate CuPy
+    # version for the given Chainer version.
+    ./run.sh master master
+    ./run.sh v4.0.0b4 v4.0.0b4
+
+    # Compare the benchmark results between two commits to see regression
+    # and/or performance improvements in command line.
+    alias git_commit='git show --format="%H"'
+    asv compare $(git_commit v4.0.0b4) $(git_commit master)
+
+    # Convert the results into HTML.
+    # The result will be in `html` directory.
+    asv publish
+
+    # Start the HTTP server to browse HTML.
+    asv preview
+
+Alternatively you can use Docker.
+
+.. code-block:: sh
+
+    # Build docker image for benchmark.
+    docker build -t chainer-benchmark docker
+
+    # Create a machine configuration file (`.asv-machine.json`) in this directory (first time only).
+    nvidia-docker run --rm -it -u ${UID}:${GID} -v ${PWD}:/benchmarks -w /benchmarks -e HOME=/benchmarks chainer-benchmark asv machine --machine $(hostname)
+
+    # Run benchmark.
+    nvidia-docker run --rm -it -u ${UID}:${GID} -v ${PWD}:/benchmarks -w /benchmarks -e HOME=/benchmarks chainer-benchmark ./run.sh master master --machine $(hostname)
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -0,0 +1,153 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "chainer",
+
+    // The project's homepage
+    "project_url": "https://chainer.org/",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "https://github.com/chainer/chainer.git",
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["master", "v3"], // for git
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    // "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/chainer/chainer/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["2.7", "3.3"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    // "conda_channels": ["conda-forge", "defaults"]
+
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list or empty string indicates to just test against the default
+    // (latest) version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed via
+    // pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // "matrix": {
+    //     "numpy": ["1.6", "1.7"],
+    //     "six": ["", null],        // test with and without six installed
+    //     "pip+emcee": [""],   // emcee is only available for install with pip.
+    // },
+    "matrix": {
+        // CuPy dependencies.
+        "numpy": [],
+        "six": [],
+        "fastrlock": [],
+        // Optional dependencies required for benchmark.
+        "ideep4py": [],
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "numpy": "1.8"},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    // "env_dir": "env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    // "results_dir": "results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    // "html_dir": "html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache wheels of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // number of builds to keep, per environment.
+    // "wheel_cache_size": 0
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // }
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // }
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
@@ -0,0 +1,40 @@
+import inspect
+
+# Ensure that CuPy and cuDNN are available.
+import cupy  # NOQA
+import cupy.cudnn  # NOQA
+
+
+class BenchmarkBase(object):
+    """Base class for all benchmarks.
+
+    See also: http://asv.readthedocs.io/en/v0.2.1/writing_benchmarks.html
+    """
+
+    # Allow up to 10 minutes, instead of the default (60 seconds).
+    timeout = 600
+
+    def __init__(self, *args, **kwargs):
+        # Set pretty_name to ``<class>.<function_name>`` instead of the default
+        # ``<module>.<class>.<function_name>``. This is because it is often too
+        # verbose to display module name in result HTML.
+        # This is a workaround needed until ASV 0.3 release.
+        members = inspect.getmembers(
+            self.__class__,
+            predicate=lambda x: inspect.ismethod(x) or inspect.isfunction(x))
+        for (name, func) in members:
+            if hasattr(func, '__func__'):
+                # For Python 2
+                func = func.__func__
+            if name.startswith('time_'):
+                name = name[5:]
+            func.pretty_name = '{}.{}'.format(type(self).__name__, name)
+
+    def setup(self, *args, **kwargs):
+        pass
+
+    def setup_cache(self, *args, **kwargs):
+        pass
+
+    def teardown(self, *args, **kwargs):
+        pass
diff --git a/benchmarks/benchmarks/convnet/__init__.py b/benchmarks/benchmarks/convnet/__init__.py
diff --git a/benchmarks/benchmarks/convnet/benchmark.py b/benchmarks/benchmarks/convnet/benchmark.py
@@ -0,0 +1,93 @@
+import chainer
+from chainer import optimizers
+
+from benchmarks import BenchmarkBase
+from benchmarks.utils import backends
+from benchmarks.utils import is_backend_gpu
+from benchmarks.utils import is_backend_ideep
+from benchmarks.utils import parameterize
+
+
+class _ConvnetBase(BenchmarkBase):
+    """Benchmark code from convnet-benchmark.
+
+    https://github.com/soumith/convnet-benchmarks/tree/master/chainer
+    """
+
+    timeout = 600
+    number = 1
+
+    def setup(self, arch, batchsize):
+        xp = self.xp
+
+        if arch == 'alexnet':
+            from benchmarks.convnet.nets import alex
+            model = alex.Alex()
+        elif arch == 'googlenet':
+            from benchmarks.convnet.nets import googlenet
+            model = googlenet.GoogLeNet()
+        elif arch == 'vgga':
+            from benchmarks.convnet.nets import vgga
+            model = vgga.vgga()
+        elif arch == 'overfeat':
+            from benchmarks.convnet.nets import overfeat
+            model = overfeat.overfeat()
+        else:
+            raise ValueError('Invalid architecture name')
+
+        if is_backend_gpu():
+            model.to_gpu()
+        elif is_backend_ideep():
+            model.to_intel64()
+
+        # Setup optimizer
+        optimizer = optimizers.SGD(lr=0.01)
+        optimizer.setup(model)
+
+        # Set cuDNN workspace size
+        workspace_size = int(1 * 2**30)
+        chainer.cuda.set_max_workspace_size(workspace_size)
+
+        chainer.config.train = True
+
+        x = xp.ndarray((batchsize, 3, model.insize,
+                        model.insize), dtype=xp.float32)
+        x.fill(33333)
+
+        if arch == 'googlenet':
+            out1, out2, out3 = model.forward(x)
+            out = out1 + out2 + out3
+        else:
+            out = model.forward(x)
+
+        out.zerograd()
+        out.grad.fill(3)
+        model.cleargrads()
+
+        self._x = x
+        self._model = model
+        self._out = out
+
+    def time_forward(self, arch, batchsize):
+        self._model.forward(self._x)
+
+    def time_backward(self, arch, batchsize):
+        self._out.backward()
+
+
+@backends('gpu', 'gpu-cudnn')
+@parameterize([
+    ('arch', ['vgga']),
+    ('batchsize', [32]),
+])
+class ConvnetVGGA(_ConvnetBase):
+    pass
+
+
+@backends('gpu', 'gpu-cudnn')
+@parameterize([
+    ('arch', ['alexnet', 'googlenet', 'overfeat']),
+    ('batchsize', [128]),
+])
+class ConvnetOthers(_ConvnetBase):
+    pass
diff --git a/benchmarks/benchmarks/convnet/nets/__init__.py b/benchmarks/benchmarks/convnet/nets/__init__.py
diff --git a/benchmarks/benchmarks/convnet/nets/alex.py b/benchmarks/benchmarks/convnet/nets/alex.py
@@ -0,0 +1,29 @@
+import chainer
+import chainer.functions as F
+import chainer.links as L
+
+
+class Alex(chainer.Chain):
+    insize = 224
+
+    def __init__(self):
+        super(Alex, self).__init__()
+        with self.init_scope():
+            self.conv1 = L.Convolution2D(3,  64, 11, stride=4, pad=2)
+            self.conv2 = L.Convolution2D(64, 192,  5, pad=2)
+            self.conv3 = L.Convolution2D(192, 384,  3, pad=1)
+            self.conv4 = L.Convolution2D(384, 256,  3, pad=1)
+            self.conv5 = L.Convolution2D(256, 256,  3, pad=1)
+            self.fc6 = L.Linear(256 * 6 * 6, 4096)
+            self.fc7 = L.Linear(4096, 4096)
+            self.fc8 = L.Linear(4096, 1000)
+
+    def forward(self, x):
+        h = F.max_pooling_2d(F.relu(self.conv1(x)), 3, stride=2)
+        h = F.max_pooling_2d(F.relu(self.conv2(h)), 3, stride=2)
+        h = F.relu(self.conv3(h))
+        h = F.relu(self.conv4(h))
+        h = F.max_pooling_2d(F.relu(self.conv5(h)), 3, stride=2)
+        h = F.relu(self.fc6(h))
+        h = F.relu(self.fc7(h))
+        return self.fc8(h)