Skip to content

Commit

Permalink
Merge d3e4c0d into 3c87398
Browse files Browse the repository at this point in the history
  • Loading branch information
prabhatnagarajan committed Dec 27, 2019
2 parents 3c87398 + d3e4c0d commit b4a89f7
Show file tree
Hide file tree
Showing 22 changed files with 974 additions and 25 deletions.
10 changes: 9 additions & 1 deletion .pfnci/script.sh
Expand Up @@ -34,7 +34,7 @@ main() {
wait

# Prepare docker args.
docker_args=(docker run --rm --volume="$(pwd):/src:ro")
docker_args=(docker run --rm --volume="$(pwd):/src:ro" --volume="/root/.chainer:/root/.chainer/")
if [ "${GPU:-0}" != '0' ]; then
docker_args+=(--ipc=host --privileged --env="GPU=${GPU}" --runtime=nvidia)
fi
Expand Down Expand Up @@ -67,6 +67,14 @@ main() {
py2.* ) docker_args+=(--env="PYTHON=python");;
esac

for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip
do
gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} .
mkdir -p ~/.chainer/dataset/pfnet/chainerrl/models
unzip ${ZIP} -d ~/.chainer/dataset/pfnet/chainerrl/models/
rm ${ZIP}
done

run "${docker_args[@]}" "${docker_image}" bash /src/.pfnci/run.sh "${TARGET}"
}

Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -48,7 +48,7 @@ script:
- flake8 examples
- autopep8 -r chainerrl tests examples --diff | tee check_autopep8
- test ! -s check_autopep8
- pytest -m "not gpu and not slow" -x tests --cov=chainerrl
- pytest -m "not gpu and not slow" -x tests --cov=chainerrl --ignore tests/misc_tests/test_pretrained_models.py
- ./test_examples.sh -1
- if [[ $TRAVIS_PYTHON_VERSION == 3.6 && $CHAINER_VERSION == stable ]]; then
jupyter nbconvert --to notebook --execute examples/quickstart/quickstart.ipynb --ExecutePreprocessor.timeout=600;
Expand Down
1 change: 1 addition & 0 deletions chainerrl/misc/__init__.py
Expand Up @@ -7,3 +7,4 @@
from chainerrl.misc.namedpersistent import namedpersistent # NOQA
from chainerrl.misc.is_return_code_zero import is_return_code_zero # NOQA
from chainerrl.misc.random_seed import set_random_seed # NOQA
from chainerrl.misc.pretrained_models import download_model # NOQA
151 changes: 151 additions & 0 deletions chainerrl/misc/pretrained_models.py
@@ -0,0 +1,151 @@
"""This file is a fork from ChainerCV, an MIT-licensed project,
https://github.com/chainer/chainercv/blob/master/chainercv/utils/download.py
"""

from distutils.util import strtobool
import filelock
import hashlib
import os
import shutil
import tempfile
import time
import sys
import zipfile
from six.moves.urllib import request

from chainer.dataset.download import get_dataset_directory
from chainer.dataset.download import get_dataset_root


MODELS = {
"DQN": ["best", "final"],
"IQN": ["best", "final"],
"Rainbow": ["best", "final"],
"A3C": ["final"],
"DDPG": ["best", "final"],
"TRPO": ["best", "final"],
"PPO": ["final"],
"TD3": ["best", "final"],
"SAC": ["best", "final"]
}

download_url = "https://chainer-assets.preferred.jp/chainerrl/"


def _reporthook(count, block_size, total_size):
global start_time
if count == 0:
start_time = time.time()
print(' % Total Recv Speed Time left')
return
duration = time.time() - start_time
progress_size = count * block_size
try:
speed = progress_size / duration
except ZeroDivisionError:
speed = float('inf')
percent = progress_size / total_size * 100
eta = int((total_size - progress_size) / speed)
sys.stdout.write(
'\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}'
.format(
percent, total_size / (1 << 20), progress_size / (1 << 20),
speed / (1 << 10), eta // 60 // 60, (eta // 60) % 60, eta % 60))
sys.stdout.flush()


def cached_download(url):
"""Downloads a file and caches it.
This is different from the original
:func:`~chainer.dataset.cached_download` in that the download
progress is reported.
It downloads a file from the URL if there is no corresponding cache.
If there is already a cache for the given URL, it just returns the
path to the cache without downloading the same file.
Args:
url (string): URL to download from.
Returns:
string: Path to the downloaded file.
"""
cache_root = os.path.join(get_dataset_root(), '_dl_cache')
try:
os.makedirs(cache_root)
except OSError:
if not os.path.exists(cache_root):
raise
lock_path = os.path.join(cache_root, '_dl_lock')
urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
cache_path = os.path.join(cache_root, urlhash)

with filelock.FileLock(lock_path):
if os.path.exists(cache_path):
return cache_path

temp_root = tempfile.mkdtemp(dir=cache_root)
try:
temp_path = os.path.join(temp_root, 'dl')
if strtobool(os.getenv('CHAINERRL_DOWNLOAD_REPORT', 'ON')):
print('Downloading ...')
print('From: {:s}'.format(url))
print('To: {:s}'.format(cache_path))
request.urlretrieve(url, temp_path, _reporthook)
else:
request.urlretrieve(url, temp_path)
with filelock.FileLock(lock_path):
shutil.move(temp_path, cache_path)
finally:
shutil.rmtree(temp_root)

return cache_path


def download_and_store_model(alg, url, env, model_type):
"""Downloads a model file and puts it under model directory.
It downloads a file from the URL and puts it under model directory.
If there is already a file at the destination path,
it just returns the path without downloading the same file.
Args:
url (string): URL to download from.
Returns:
string: Path to the downloaded file.
"""
with filelock.FileLock(os.path.join(
get_dataset_directory(os.path.join('pfnet', 'chainerrl', '.lock')),
'models.lock')):
root = get_dataset_directory(
os.path.join('pfnet', 'chainerrl', 'models', alg, env))
url_basepath = os.path.join(url, alg, env)
file = model_type + ".zip"
path = os.path.join(root, file)
is_cached = os.path.exists(path)
if not is_cached:
cache_path = cached_download(os.path.join(url_basepath,
file))
os.rename(cache_path, path)
with zipfile.ZipFile(path, 'r') as zip_ref:
zip_ref.extractall(root)
return os.path.join(root, model_type), is_cached


def download_model(alg, env, model_type="best"):
"""Downloads and returns pretrained model.
Args:
alg (string): URL to download from.
env (string): Gym Environment name.
model_type (string): Either `best` or `final`.
Returns:
str: Path to the downloaded file.
bool: whether the model was already cached.
"""
assert alg in MODELS, \
"No pretrained models for " + alg + "."
assert model_type in MODELS[alg], \
"Model type \"" + model_type + "\" is not supported."
env = env.replace("NoFrameskip-v4", "")
model_path, is_cached = download_and_store_model(alg,
download_url,
env, model_type)
return model_path, is_cached
13 changes: 13 additions & 0 deletions examples/atari/reproduction/a3c/README.md
Expand Up @@ -20,6 +20,19 @@ python train_a3c.py [options]

To view the full list of options, either view the code or run the example with the `--help` option.

## Running our Pretrained models

We have already pretrained models from this script for all the domains list in the [results](#Results). To load a pretrained model:

```
python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4
```

### Useful Options
- `--env`. Specifies the environment.
- `--demo`. Runs an evaluation, instead of training the agent.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.

## Results
These results reflect ChainerRL `v0.7.0`. The reported results are compared against the scores from the [Noisy Networks Paper](https://arxiv.org/abs/1706.10295), since the original paper does not report scores for the no-op evaluation protocol.

Expand Down
18 changes: 13 additions & 5 deletions examples/atari/reproduction/a3c/train_a3c.py
Expand Up @@ -57,6 +57,8 @@ def main():
parser.add_argument('--eval-n-steps', type=int, default=125000)
parser.add_argument('--weight-decay', type=float, default=0.0)
parser.add_argument('--demo', action='store_true', default=False)
parser.add_argument('--load-pretrained', action='store_true',
default=False)
parser.add_argument('--load', type=str, default='')
parser.add_argument('--logging-level', type=int, default=20,
help='Logging level. 10:DEBUG, 20:INFO etc.')
Expand Down Expand Up @@ -111,8 +113,14 @@ def phi(x):
agent = a3c.A3C(model, opt, t_max=args.t_max, gamma=0.99,
beta=args.beta, phi=phi)

if args.load:
agent.load(args.load)
if args.load or args.load_pretrained:
# either load or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
agent.load(args.load)
else:
agent.load(misc.download_model("A3C", args.env,
model_type="final")[0])

def make_env(process_idx, test):
# Use different random seeds for train and test envs
Expand All @@ -137,9 +145,9 @@ def make_env(process_idx, test):
env=env,
agent=agent,
n_steps=None,
n_episodes=args.eval_n_runs)
print('n_runs: {} mean: {} median: {} stdev: {}'.format(
args.eval_n_runs, eval_stats['mean'], eval_stats['median'],
n_episodes=args.eval_n_steps)
print('n_steps: {} mean: {} median: {} stdev: {}'.format(
args.eval_n_steps, eval_stats['mean'], eval_stats['median'],
eval_stats['stdev']))
else:

Expand Down
16 changes: 16 additions & 0 deletions examples/atari/reproduction/dqn/README.md
Expand Up @@ -21,6 +21,22 @@ python train_dqn.py [options]

To view the full list of options, either view the code or run the example with the `--help` option.


## Running our Pretrained models

We have already pretrained models from this script for all the domains list in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be representative of the [results](#Results). To load a pretrained model:

```
python train_dqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
```

### Useful Options
- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`.
- `--env`. Specifies the environment.
- `--demo`. Runs an evaluation, instead of training the agent.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

## Results
These results reflect ChainerRL `v0.6.0`.

Expand Down
14 changes: 12 additions & 2 deletions examples/atari/reproduction/dqn/train_dqn.py
Expand Up @@ -30,6 +30,10 @@ def main():
parser.add_argument('--gpu', type=int, default=0,
help='GPU to use, set to -1 if no GPU.')
parser.add_argument('--demo', action='store_true', default=False)
parser.add_argument('--load-pretrained', action='store_true',
default=False)
parser.add_argument('--pretrained-type', type=str, default="best",
choices=['best', 'final'])
parser.add_argument('--load', type=str, default=None)
parser.add_argument('--logging-level', type=int, default=20,
help='Logging level. 10:DEBUG, 20:INFO etc.')
Expand Down Expand Up @@ -120,8 +124,14 @@ def phi(x):
batch_accumulator='sum',
phi=phi)

if args.load:
agent.load(args.load)
if args.load or args.load_pretrained:
# either load or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
agent.load(args.load)
else:
agent.load(misc.download_model("DQN", args.env,
model_type=args.pretrained_type)[0])

if args.demo:
eval_stats = experiments.eval_performance(
Expand Down
18 changes: 17 additions & 1 deletion examples/atari/reproduction/iqn/README.md
Expand Up @@ -21,8 +21,24 @@ python train_iqn.py [options]

To view the full list of options, either view the code or run the example with the `--help` option.


## Running our Pretrained models

We have already pretrained models from this script for all the domains list in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be representative of the [results](#Results). To load a pretrained model:

```
python train_iqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
```

### Useful Options
- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`.
- `--env`. Specifies the environment.
- `--demo`. Runs an evaluation, instead of training the agent.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

## Results
These results reflect ChainerRL `v0.6.0`. The ChainerRL score currently consists of a single run. The reported results from the IQN paper are also from a single run. We use the same evaluation protocol used in the IQN paper.
These results reflect ChainerRL `v0.6.0`. We use the same evaluation protocol used in the IQN paper.


| Results Summary ||
Expand Down
14 changes: 12 additions & 2 deletions examples/atari/reproduction/iqn/train_iqn.py
Expand Up @@ -26,6 +26,10 @@ def main():
help='Random seed [0, 2 ** 31)')
parser.add_argument('--gpu', type=int, default=0)
parser.add_argument('--demo', action='store_true', default=False)
parser.add_argument('--load-pretrained', action='store_true',
default=False)
parser.add_argument('--pretrained-type', type=str, default="best",
choices=['best', 'final'])
parser.add_argument('--load', type=str, default=None)
parser.add_argument('--final-exploration-frames',
type=int, default=10 ** 6)
Expand Down Expand Up @@ -148,8 +152,14 @@ def phi(x):
quantile_thresholds_K=args.quantile_thresholds_K,
)

if args.load:
agent.load(args.load)
if args.load or args.load_pretrained:
# either load or load_pretrained must be false
assert not args.load or not args.load_pretrained
if args.load:
agent.load(args.load)
else:
agent.load(misc.download_model("IQN", args.env,
model_type=args.pretrained_type)[0])

if args.demo:
eval_stats = experiments.eval_performance(
Expand Down
15 changes: 15 additions & 0 deletions examples/atari/reproduction/rainbow/README.md
Expand Up @@ -21,6 +21,21 @@ python train_rainbow.py [options]

To view the full list of options, either view the code or run the example with the `--help` option.

## Running our Pretrained models

We have already pretrained models from this script for all the domains list in the [results](#Results) section. To load a pretrained model:

```
python train_rainbow.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1
```

### Useful Options
- `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`.
- `--env`. Specifies the environment.
- `--demo`. Runs an evaluation, instead of training the agent.
- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together.
- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training).

## Results
These results reflect ChainerRL `v0.7.0`.

Expand Down

0 comments on commit b4a89f7

Please sign in to comment.