In [1]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [2]:
cd /gdrive/MyDrive/182/procgen/christian/train_procgen_pfrl/

/gdrive/MyDrive/182/procgen/christian/train_procgen_pfrl


In [3]:
! pip install -r requirements.txt



In [4]:
! pip install pfrl



In [5]:
from procgen import ProcgenEnv
from vec_env import VecExtractDictObs
from vec_env import VecMonitor
from vec_env import VecNormalize
import argparse
import os

In [20]:
parser = argparse.ArgumentParser(
    description='Process procgen training arguments.')

# Experiment parameters.
parser.add_argument(
    '--distribution-mode', type=str, default='easy',
    choices=['easy', 'hard', 'exploration', 'memory', 'extreme'])
parser.add_argument('--env-name', type=str, default='fruitbot')
parser.add_argument('--num-envs', type=int, default=64)
parser.add_argument('--num-levels', type=int, default=0)
parser.add_argument('--start-level', type=int, default=0)
parser.add_argument('--num-threads', type=int, default=4)
parser.add_argument('--exp-name', type=str, default='trial01')
parser.add_argument('--log-dir', type=str, default='./log')
parser.add_argument('--model-file', type=str, default=None)
parser.add_argument('--method-label', type=str, default='vanilla')

# PPO parameters.
parser.add_argument('--gpu', type=int, default=0)
# parser.add_argument('--lr', type=float, default=5e-4)
parser.add_argument('--ent-coef', type=float, default=0.01)
parser.add_argument('--vf-coef', type=float, default=0.5)
parser.add_argument('--gamma', type=float, default=0.999)
parser.add_argument('--lam', type=float, default=0.95)
parser.add_argument('--clip-range', type=float, default=0.2)
parser.add_argument('--max-grad-norm', type=float, default=0.5)
parser.add_argument('--nsteps', type=int, default=256)
parser.add_argument('--batch-size', type=int, default=8)
parser.add_argument('--nepochs', type=int, default=3)
parser.add_argument('--max-steps', type=int, default=25_000_000)
parser.add_argument('--save-interval', type=int, default=100)


# A3C parameters.
parser.add_argument("--lr", type=float, default=7e-4)
parser.add_argument("--t-max", type=int, default=5)
parser.add_argument("--beta", type=float, default=1e-2)
parser.add_argument("--profile", action="store_true")
parser.add_argument("--processes", type=int, default=1)
parser.add_argument(
    "--max-frames",
    type=int,
    default=30 * 60 * 60,  # 30 minutes with 60 fps
    help="Maximum number of frames for each episode.",
)
parser.add_argument("--eval-interval", type=int, default=2500)
parser.add_argument("--eval-n-steps", type=int, default=125000)
parser.add_argument("--eval-n-episodes", type=int, default=100)
parser.add_argument("--out-dir", type=str, default="./exp_async_results/a3c")


configs = parser.parse_args(args=[
  '--num-envs',     '64',
  '--num-levels',   '500',
  '--start-level',  '100',
  '--num-threads',  '8',
  '--exp-name',     'a3c-trail01',
  '--method-label', 'vanilla',
  '--max-steps',    '5_000_000'
])
configs.steps = configs.max_steps

In [7]:
configs

Namespace(batch_size=8, beta=0.01, clip_range=0.2, distribution_mode='easy', ent_coef=0.01, env_name='fruitbot', eval_interval=2500, eval_n_episodes=100, eval_n_steps=125000, exp_name='a3c-trail01', gamma=0.999, gpu=0, lam=0.95, log_dir='./log', lr=0.0007, max_frames=108000, max_grad_norm=0.5, max_steps=5000000, method_label='vanilla', model_file=None, nepochs=3, nsteps=256, num_envs=64, num_levels=500, num_threads=8, out_dir='./exp_async_results/a3c', processes=1, profile=False, save_interval=100, start_level=100, steps=5000000, t_max=5, vf_coef=0.5)

In [8]:
def create_venv(config, is_valid=False):
    venv = ProcgenEnv(
        num_envs=config.num_envs,
        env_name=config.env_name,
        num_levels=0 if is_valid else config.num_levels,
        start_level=0 if is_valid else config.start_level,
        distribution_mode=config.distribution_mode,
        num_threads=config.num_threads,
    )
    venv = VecExtractDictObs(venv, "rgb")
    venv = VecMonitor(venv=venv, filename=None, keep_buf=100)
    return VecNormalize(venv=venv, ob=False)

In [9]:
# Create venvs.
train_venv = create_venv(configs, is_valid=False)
valid_venv = create_venv(configs, is_valid=True)

# Setting up the model

In [10]:
from torch import nn
import pfrl
from pfrl import experiments, utils
from pfrl.agents import a3c
from pfrl.optimizers import SharedRMSpropEpsInsideSqrt
from pfrl.policies import SoftmaxCategoricalHead

In [11]:
obs_size = train_venv.observation_space.low.shape[0]
obs_size

64

In [12]:
n_actions = train_venv.action_space.n
n_actions

15

In [13]:
model = nn.Sequential(
    nn.Conv2d(obs_size, 16, 8, stride=4),
    nn.ReLU(),
    nn.Conv2d(16, 32, 4, stride=2),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(2592, 256),
    nn.ReLU(),
    pfrl.nn.Branched(
        nn.Sequential(
            nn.Linear(256, n_actions),
            SoftmaxCategoricalHead(),
        ),
        nn.Linear(256, 1),
    ),
)

In [14]:
opt = SharedRMSpropEpsInsideSqrt(model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99)

In [15]:
assert opt.state_dict()["state"], (
    "To share optimizer state across processes, the state must be"
    " initialized before training."
)

In [16]:
# configurations originally come from atari
# agent = a3c.A3C(
#     model,
#     opt,
#     t_max=args.t_max,
#     gamma=0.99,
#     beta=args.beta,
#     phi=phi,
#     max_grad_norm=40.0,
# )

agent = a3c.A3C(
    model=model,
    optimizer=opt,
    t_max=configs.t_max,
    gamma=0.99,
    beta=configs.beta,
    max_grad_norm=40.0   
)

In [17]:
# Linearly decay the learning rate to zero
def lr_setter(env, agent, value):
    for pg in agent.optimizer.param_groups:
        assert "lr" in pg
        pg["lr"] = value

lr_decay_hook = experiments.LinearInterpolationHook(
    configs.steps, configs.lr, 0, lr_setter
)

In [18]:
# from util.logger import get_current

# get_current()

Logging to /tmp/openai-2021-05-02-05-16-08-490958


<util.logger.Logger at 0x7f6a43740f90>

In [21]:
# from util import logger

# # Configure logger.
# log_dir = os.path.join(
#     configs.log_dir,
#     configs.env_name,
#     'nlev_{}_{}'.format(configs.num_levels, configs.distribution_mode),
#     configs.method_label,
#     configs.exp_name,
# )
# os.makedirs(configs.out_dir, exist_ok=True)
# logger.configure(dir=log_dir, format_strs=['csv', 'stdout'])

# experiments.train_agent_async(
#     agent=agent,
#     outdir=configs.out_dir,
#     processes=configs.processes,
#     make_env=lambda pidx, test: create_venv(configs, is_valid=test),
#     profile=configs.profile,
#     steps=configs.steps,
#     eval_n_steps=None, # configs.eval_n_steps,
#     eval_n_episodes=configs.eval_n_episodes,
#     eval_interval=configs.eval_interval,
#     global_step_hooks=[lr_decay_hook],
#     save_best_so_far_agent=True,
#     logger=logger.get_current()
# )

In [22]:
1! nvidia-smi

Sun May  2 05:23:31 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [30]:
! pip install -e ./vit_pytorch/.

Obtaining file:///gdrive/My%20Drive/182/procgen/christian/train_procgen_pfrl/vit_pytorch
Collecting einops>=0.3
  Downloading https://files.pythonhosted.org/packages/5d/a0/9935e030634bf60ecd572c775f64ace82ceddf2f504a5fd3902438f07090/einops-0.3.0-py2.py3-none-any.whl
Installing collected packages: einops, vit-pytorch
  Running setup.py develop for vit-pytorch
Successfully installed einops-0.3.0 vit-pytorch


In [45]:
# ! python3 train_procgen.py --log-dir tmp

In [None]:
! python3 train_procgen_a3c.py

Logging to ./log/fruitbot/nlev_500_easy/vanilla/a3c-trail02-eval_interval=50_000
	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:1005.)
  square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad)
	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:1005.)
  square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad)
	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:1005.)
  square_avg.mul_(alpha).addcmul_(1 - alpha, grad