Merge d3e4c0d into 3c87398

chainer · Dec 27, 2019 · b4a89f7 · b4a89f7
2 parents 3c87398 + d3e4c0d
commit b4a89f7
Show file tree

Hide file tree

Showing 22 changed files with 974 additions and 25 deletions.
diff --git a/.pfnci/script.sh b/.pfnci/script.sh
@@ -34,7 +34,7 @@ main() {
   wait
 
   # Prepare docker args.
-  docker_args=(docker run  --rm --volume="$(pwd):/src:ro")
+  docker_args=(docker run  --rm --volume="$(pwd):/src:ro" --volume="/root/.chainer:/root/.chainer/") 
   if [ "${GPU:-0}" != '0' ]; then
     docker_args+=(--ipc=host --privileged --env="GPU=${GPU}" --runtime=nvidia)
   fi
@@ -67,6 +67,14 @@ main() {
     py2.* ) docker_args+=(--env="PYTHON=python");;
   esac
 
+  for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip
+  do
+      gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} .
+      mkdir -p ~/.chainer/dataset/pfnet/chainerrl/models
+      unzip ${ZIP} -d ~/.chainer/dataset/pfnet/chainerrl/models/
+      rm ${ZIP}
+  done
+
   run "${docker_args[@]}" "${docker_image}" bash /src/.pfnci/run.sh "${TARGET}"
 }
 

diff --git a/.travis.yml b/.travis.yml
@@ -48,7 +48,7 @@ script:
   - flake8 examples
   - autopep8 -r chainerrl tests examples --diff | tee check_autopep8
   - test ! -s check_autopep8
-  - pytest -m "not gpu and not slow" -x tests --cov=chainerrl
+  - pytest -m "not gpu and not slow" -x tests --cov=chainerrl --ignore tests/misc_tests/test_pretrained_models.py
   - ./test_examples.sh -1
   - if [[ $TRAVIS_PYTHON_VERSION == 3.6 && $CHAINER_VERSION == stable ]]; then
       jupyter nbconvert --to notebook --execute examples/quickstart/quickstart.ipynb --ExecutePreprocessor.timeout=600;

diff --git a/chainerrl/misc/__init__.py b/chainerrl/misc/__init__.py
@@ -7,3 +7,4 @@
 from chainerrl.misc.namedpersistent import namedpersistent  # NOQA
 from chainerrl.misc.is_return_code_zero import is_return_code_zero  # NOQA
 from chainerrl.misc.random_seed import set_random_seed  # NOQA
+from chainerrl.misc.pretrained_models import download_model  # NOQA
diff --git a/chainerrl/misc/pretrained_models.py b/chainerrl/misc/pretrained_models.py
@@ -0,0 +1,151 @@
+"""This file is a fork from ChainerCV, an MIT-licensed project,
+https://github.com/chainer/chainercv/blob/master/chainercv/utils/download.py
+"""
+
+from distutils.util import strtobool
+import filelock
+import hashlib
+import os
+import shutil
+import tempfile
+import time
+import sys
+import zipfile
+from six.moves.urllib import request
+
+from chainer.dataset.download import get_dataset_directory
+from chainer.dataset.download import get_dataset_root
+
+
+MODELS = {
+    "DQN": ["best", "final"],
+    "IQN": ["best", "final"],
+    "Rainbow": ["best", "final"],
+    "A3C": ["final"],
+    "DDPG": ["best", "final"],
+    "TRPO": ["best", "final"],
+    "PPO": ["final"],
+    "TD3": ["best", "final"],
+    "SAC": ["best", "final"]
+}
+
+download_url = "https://chainer-assets.preferred.jp/chainerrl/"
+
+
+def _reporthook(count, block_size, total_size):
+    global start_time
+    if count == 0:
+        start_time = time.time()
+        print('  %   Total    Recv       Speed  Time left')
+        return
+    duration = time.time() - start_time
+    progress_size = count * block_size
+    try:
+        speed = progress_size / duration
+    except ZeroDivisionError:
+        speed = float('inf')
+    percent = progress_size / total_size * 100
+    eta = int((total_size - progress_size) / speed)
+    sys.stdout.write(
+        '\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}'
+        .format(
+            percent, total_size / (1 << 20), progress_size / (1 << 20),
+            speed / (1 << 10), eta // 60 // 60, (eta // 60) % 60, eta % 60))
+    sys.stdout.flush()
+
+
+def cached_download(url):
+    """Downloads a file and caches it.
+
+    This is different from the original
+    :func:`~chainer.dataset.cached_download` in that the download
+    progress is reported.
+    It downloads a file from the URL if there is no corresponding cache.
+    If there is already a cache for the given URL, it just returns the
+    path to the cache without downloading the same file.
+    Args:
+        url (string): URL to download from.
+    Returns:
+        string: Path to the downloaded file.
+    """
+    cache_root = os.path.join(get_dataset_root(), '_dl_cache')
+    try:
+        os.makedirs(cache_root)
+    except OSError:
+        if not os.path.exists(cache_root):
+            raise
+    lock_path = os.path.join(cache_root, '_dl_lock')
+    urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
+    cache_path = os.path.join(cache_root, urlhash)
+
+    with filelock.FileLock(lock_path):
+        if os.path.exists(cache_path):
+            return cache_path
+
+    temp_root = tempfile.mkdtemp(dir=cache_root)
+    try:
+        temp_path = os.path.join(temp_root, 'dl')
+        if strtobool(os.getenv('CHAINERRL_DOWNLOAD_REPORT', 'ON')):
+            print('Downloading ...')
+            print('From: {:s}'.format(url))
+            print('To: {:s}'.format(cache_path))
+            request.urlretrieve(url, temp_path, _reporthook)
+        else:
+            request.urlretrieve(url, temp_path)
+        with filelock.FileLock(lock_path):
+            shutil.move(temp_path, cache_path)
+    finally:
+        shutil.rmtree(temp_root)
+
+    return cache_path
+
+
+def download_and_store_model(alg, url, env, model_type):
+    """Downloads a model file and puts it under model directory.
+
+    It downloads a file from the URL and puts it under model directory.
+    If there is already a file at the destination path,
+    it just returns the path without downloading the same file.
+    Args:
+        url (string): URL to download from.
+    Returns:
+        string: Path to the downloaded file.
+    """
+    with filelock.FileLock(os.path.join(
+            get_dataset_directory(os.path.join('pfnet', 'chainerrl', '.lock')),
+            'models.lock')):
+        root = get_dataset_directory(
+            os.path.join('pfnet', 'chainerrl', 'models', alg, env))
+        url_basepath = os.path.join(url, alg, env)
+        file = model_type + ".zip"
+        path = os.path.join(root, file)
+        is_cached = os.path.exists(path)
+        if not is_cached:
+            cache_path = cached_download(os.path.join(url_basepath,
+                                                      file))
+            os.rename(cache_path, path)
+            with zipfile.ZipFile(path, 'r') as zip_ref:
+                zip_ref.extractall(root)
+        return os.path.join(root, model_type), is_cached
+
+
+def download_model(alg, env, model_type="best"):
+    """Downloads and returns pretrained model.
+
+    Args:
+        alg (string): URL to download from.
+        env (string): Gym Environment name.
+        model_type (string): Either `best` or `final`.
+    Returns:
+        str: Path to the downloaded file.
+        bool: whether the model was already cached.
+    """
+    assert alg in MODELS, \
+        "No pretrained models for " + alg + "."
+    assert model_type in MODELS[alg], \
+        "Model type \"" + model_type + "\" is not supported."
+    env = env.replace("NoFrameskip-v4", "")
+    model_path, is_cached = download_and_store_model(alg,
+                                                     download_url,
+                                                     env, model_type)
+    return model_path, is_cached
diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md
@@ -20,6 +20,19 @@ python train_a3c.py [options]
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
+## Running our Pretrained models
+
+We have already pretrained models from this script for all the domains list in the [results](#Results). To load a pretrained model:
+
+```
+python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4
+```
+
+### Useful Options
+- `--env`. Specifies the environment. 
+- `--demo`. Runs an evaluation, instead of training the agent.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+
 ## Results
 These results reflect ChainerRL  `v0.7.0`. The reported results are compared against the scores from the [Noisy Networks Paper](https://arxiv.org/abs/1706.10295), since the original paper does not report scores for the no-op evaluation protocol.
 

diff --git a/examples/atari/reproduction/a3c/train_a3c.py b/examples/atari/reproduction/a3c/train_a3c.py
@@ -57,6 +57,8 @@ def main():
     parser.add_argument('--eval-n-steps', type=int, default=125000)
     parser.add_argument('--weight-decay', type=float, default=0.0)
     parser.add_argument('--demo', action='store_true', default=False)
+    parser.add_argument('--load-pretrained', action='store_true',
+                        default=False)
     parser.add_argument('--load', type=str, default='')
     parser.add_argument('--logging-level', type=int, default=20,
                         help='Logging level. 10:DEBUG, 20:INFO etc.')
@@ -111,8 +113,14 @@ def phi(x):
     agent = a3c.A3C(model, opt, t_max=args.t_max, gamma=0.99,
                     beta=args.beta, phi=phi)
 
-    if args.load:
-        agent.load(args.load)
+    if args.load or args.load_pretrained:
+        # either load or load_pretrained must be false
+        assert not args.load or not args.load_pretrained
+        if args.load:
+            agent.load(args.load)
+        else:
+            agent.load(misc.download_model("A3C", args.env,
+                                           model_type="final")[0])
 
     def make_env(process_idx, test):
         # Use different random seeds for train and test envs
@@ -137,9 +145,9 @@ def make_env(process_idx, test):
             env=env,
             agent=agent,
             n_steps=None,
-            n_episodes=args.eval_n_runs)
-        print('n_runs: {} mean: {} median: {} stdev: {}'.format(
-            args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
+            n_episodes=args.eval_n_steps)
+        print('n_steps: {} mean: {} median: {} stdev: {}'.format(
+            args.eval_n_steps, eval_stats['mean'], eval_stats['median'],
             eval_stats['stdev']))
     else:
 

diff --git a/examples/atari/reproduction/dqn/README.md b/examples/atari/reproduction/dqn/README.md
@@ -21,6 +21,22 @@ python train_dqn.py [options]
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
+
+## Running our Pretrained models
+
+We have already pretrained models from this script for all the domains list in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be representative of the [results](#Results). To load a pretrained model:
+
+```
+python train_dqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
+
+### Useful Options
+- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
+- `--env`. Specifies the environment. 
+- `--demo`. Runs an evaluation, instead of training the agent.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
+
 ## Results
 These results reflect ChainerRL  `v0.6.0`.
 

diff --git a/examples/atari/reproduction/dqn/train_dqn.py b/examples/atari/reproduction/dqn/train_dqn.py
@@ -30,6 +30,10 @@ def main():
     parser.add_argument('--gpu', type=int, default=0,
                         help='GPU to use, set to -1 if no GPU.')
     parser.add_argument('--demo', action='store_true', default=False)
+    parser.add_argument('--load-pretrained', action='store_true',
+                        default=False)
+    parser.add_argument('--pretrained-type', type=str, default="best",
+                        choices=['best', 'final'])
     parser.add_argument('--load', type=str, default=None)
     parser.add_argument('--logging-level', type=int, default=20,
                         help='Logging level. 10:DEBUG, 20:INFO etc.')
@@ -120,8 +124,14 @@ def phi(x):
                   batch_accumulator='sum',
                   phi=phi)
 
-    if args.load:
-        agent.load(args.load)
+    if args.load or args.load_pretrained:
+        # either load or load_pretrained must be false
+        assert not args.load or not args.load_pretrained
+        if args.load:
+            agent.load(args.load)
+        else:
+            agent.load(misc.download_model("DQN", args.env,
+                                           model_type=args.pretrained_type)[0])
 
     if args.demo:
         eval_stats = experiments.eval_performance(

diff --git a/examples/atari/reproduction/iqn/README.md b/examples/atari/reproduction/iqn/README.md
@@ -21,8 +21,24 @@ python train_iqn.py [options]
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
+
+## Running our Pretrained models
+
+We have already pretrained models from this script for all the domains list in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be representative of the [results](#Results). To load a pretrained model:
+
+```
+python train_iqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
+
+### Useful Options
+- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`.
+- `--env`. Specifies the environment. 
+- `--demo`. Runs an evaluation, instead of training the agent.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
+
 ## Results
-These results reflect ChainerRL  `v0.6.0`. The ChainerRL score currently consists of a single run. The reported results from the IQN paper are also from a single run. We use the same evaluation protocol used in the IQN paper.
+These results reflect ChainerRL  `v0.6.0`. We use the same evaluation protocol used in the IQN paper.
 
 
 | Results Summary ||

diff --git a/examples/atari/reproduction/iqn/train_iqn.py b/examples/atari/reproduction/iqn/train_iqn.py
@@ -26,6 +26,10 @@ def main():
                         help='Random seed [0, 2 ** 31)')
     parser.add_argument('--gpu', type=int, default=0)
     parser.add_argument('--demo', action='store_true', default=False)
+    parser.add_argument('--load-pretrained', action='store_true',
+                        default=False)
+    parser.add_argument('--pretrained-type', type=str, default="best",
+                        choices=['best', 'final'])
     parser.add_argument('--load', type=str, default=None)
     parser.add_argument('--final-exploration-frames',
                         type=int, default=10 ** 6)
@@ -148,8 +152,14 @@ def phi(x):
         quantile_thresholds_K=args.quantile_thresholds_K,
     )
 
-    if args.load:
-        agent.load(args.load)
+    if args.load or args.load_pretrained:
+        # either load or load_pretrained must be false
+        assert not args.load or not args.load_pretrained
+        if args.load:
+            agent.load(args.load)
+        else:
+            agent.load(misc.download_model("IQN", args.env,
+                                           model_type=args.pretrained_type)[0])
 
     if args.demo:
         eval_stats = experiments.eval_performance(

diff --git a/examples/atari/reproduction/rainbow/README.md b/examples/atari/reproduction/rainbow/README.md
@@ -21,6 +21,21 @@ python train_rainbow.py [options]
 
 To view the full list of options, either view the code or run the example with the `--help` option.
 
+## Running our Pretrained models
+
+We have already pretrained models from this script for all the domains list in the [results](#Results) section. To load a pretrained model:
+
+```
+python train_rainbow.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
+```
+
+### Useful Options
+- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`.
+- `--env`. Specifies the environment. 
+- `--demo`. Runs an evaluation, instead of training the agent.
+- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
+- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).
+
 ## Results
 These results reflect ChainerRL  `v0.7.0`.