In [1]:
%load_ext autoreload
%autoreload 2

import os, sys
home = os.path.expanduser('~')
ex_path = home + '/Documents/Python/BerkeleyDRL_fall2021/hw3'
os.chdir(ex_path)
sys.path.insert(0, ex_path)
if sys.platform == 'darwin':
    if '/usr/X11/bin:' not in os.environ['PATH']: os.environ['PATH'] = '/usr/X11/bin:' + os.environ['PATH']
else:
    os.environ['LD_LIBRARY_PATH'] = '/home/ikasou/.mujoco/mujoco200/bin:/usr/local/nvidia/lib64'

In [2]:
## cythonize at the first import
import mujoco_py

In [3]:
#@title set up virtual display

from pyvirtualdisplay import Display

display = Display(visible=0, size=(1400, 900))
display.start()

# For later
from cs285.infrastructure.colab_utils import (
    wrap_env,
    show_video
)




In [4]:
#@title test virtual display

#@markdown If you see a video of a four-legged ant fumbling about, setup is complete!

import gym
import matplotlib
matplotlib.use('Agg')

env = wrap_env(gym.make("Ant-v2"))

observation = env.reset()
for i in range(10):
    env.render(mode='rgb_array')
    obs, rew, term, _ = env.step(env.action_space.sample() ) 
    if term:
      break;
            
env.close()
print('Loading video...')
show_video()

  f"The environment {path} is out of date. You should consider "


Found 5 GPUs for rendering. Using device 0.
Loading video...


## Editing Code

To edit code, click the folder icon on the left menu. Navigate to the corresponding file (`cs285_f2021/...`). Double click a file to open an editor. There is a timeout of about ~12 hours with Colab while it is active (and less if you close your browser window). We sync your edits to Google Drive so that you won't lose your work in the event of an instance timeout, but you will need to re-mount your Google Drive and re-install packages with every new instance.

## Run Actor Critic

In [5]:
#@title imports
import os
import time

from cs285.agents.ac_agent import ACAgent
from cs285.infrastructure.rl_trainer import RL_Trainer

In [6]:
#@title runtime arguments

class ACArgs:

  def __getitem__(self, key):
    return getattr(self, key)

  def __setitem__(self, key, val):
    setattr(self, key, val)

  def __contains__(self, key):
    return hasattr(self, key)

  env_name = 'CartPole-v0' #@param ['CartPole-v0', 'InvertedPendulum-v2', 'HalfCheetah-v2']
  exp_name = 'q4_ac' #@param

  ## PDF will tell you how to set ep_len
  ## and discount for each environment
  ep_len = 200 #@param {type: "integer"}

  #@markdown batches and steps
  batch_size = 1000 #@param {type: "integer"}
  eval_batch_size =  400#@param {type: "integer"}

  n_iter = 100 #@param {type: "integer"}
  num_agent_train_steps_per_iter = 1 #@param {type: "integer"}
  num_actor_updates_per_agent_update = 1 #@param {type: "integer"}
  num_critic_updates_per_agent_update = 1 #@param {type: "integer"}
  
  #@markdown Actor-Critic parameters
  discount =  0.9#@param {type: "number"}
  learning_rate = 5e-3 #@param {type: "number"}
  dont_standardize_advantages = False #@param {type: "boolean"}
  num_target_updates = 10 #@param {type: "integer"}
  num_grad_steps_per_target_update = 10 #@param {type: "integer"}
  n_layers = 2 #@param {type: "integer"}
  size = 64 #@param {type: "integer"}

  #@markdown system
  save_params = False #@param {type: "boolean"}
  no_gpu = False #@param {type: "boolean"}
  which_gpu = 0 #@param {type: "integer"}
  seed = 1 #@param {type: "integer"}

  #@markdown logging
  ## default is to not log video so
  ## that logs are small enough to be
  ## uploaded to gradscope
  video_log_freq =  -1#@param {type: "integer"}
  scalar_log_freq = 10 #@param {type: "integer"}


args = ACArgs()


if args['video_log_freq'] > 0:
  import warnings
  warnings.warn(
      '''\nLogging videos will make eventfiles too'''
      '''\nlarge for the autograder. Set video_log_freq = -1'''
      '''\nfor the runs you intend to submit.''')

In [7]:
#@title Define AC trainer

class AC_Trainer(object):

    def __init__(self, params):

        #####################
        ## SET AGENT PARAMS
        #####################

        computation_graph_args = {
            'n_layers': params['n_layers'],
            'size': params['size'],
            'learning_rate': params['learning_rate'],
            'num_target_updates': params['num_target_updates'],
            'num_grad_steps_per_target_update': params['num_grad_steps_per_target_update'],
            }

        estimate_advantage_args = {
            'gamma': params['discount'],
            'standardize_advantages': not(params['dont_standardize_advantages']),
        }

        train_args = {
            'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'],
            'num_critic_updates_per_agent_update': params['num_critic_updates_per_agent_update'],
            'num_actor_updates_per_agent_update': params['num_actor_updates_per_agent_update'],
        }

        agent_params = {**computation_graph_args, **estimate_advantage_args, **train_args}

        self.params = params
        self.params['agent_class'] = ACAgent
        self.params['agent_params'] = agent_params
        self.params['train_batch_size'] = params['batch_size']
        self.params['batch_size_initial'] = self.params['batch_size']
        self.params['non_atari_colab_env'] = True

        ################
        ## RL TRAINER
        ################

        self.rl_trainer = RL_Trainer(self.params)

    def run_training_loop(self):

        self.rl_trainer.run_training_loop(
            self.params['n_iter'],
            collect_policy = self.rl_trainer.agent.actor,
            eval_policy = self.rl_trainer.agent.actor,
            )


In [8]:
#@title create directories for logging

data_path = 'data/'

if not (os.path.exists(data_path)):
    os.makedirs(data_path)

logdir = args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
logdir = os.path.join(data_path, logdir)
args['logdir'] = logdir
if not(os.path.exists(logdir)):
    os.makedirs(logdir)

print("LOGGING TO: ", logdir)

LOGGING TO:  data/q4_ac_CartPole-v0_27-06-2022_17-08-18


In [None]:
#@title run training
trainer = AC_Trainer(args)
trainer.run_training_loop()


########################
logging outputs to  data/q4_ac_CartPole-v0_27-06-2022_17-08-18
########################
Using GPU id 0


  f"The environment {path} is out of date. You should consider "


In [None]:
#@markdown You can visualize your runs with tensorboard from within the notebook

## requires tensorflow==2.3.0
# %load_ext tensorboard
%tensorboard --logdir data