# Deep Deterministic Policy Gradient (DDPG)

In [1]:
import os

os.environ['LD_LIBRARY_PATH'] = '/home/user/.mujoco/mjpro150/bin:/usr/lib/nvidia'

In [10]:
import json
import torch

#state = torch.load('models/ddpg/invertedpendulum-v2/reproducibility-0_34bu45fs_61440_20220211T221202.pth')
state = torch.load('models/ddpg/halfcheetah-v2/reproducibility-0_2s8ic49h_150000_20220211T223456.pth')
print(json.dumps(state['config'], indent=2, sort_keys=True))

{
  "batch_size": 1000,
  "buffer_size": 100000,
  "env": "HalfCheetah-v2",
  "env_args": {},
  "gamma": 0.995,
  "learning_starts": 2048,
  "log_step": 4000,
  "lr": 0.001,
  "max_episode_steps": 250,
  "pi_activation": "relu",
  "pi_layer_size": 64,
  "pi_num_layers": 2,
  "pi_std_logits": -0.5,
  "q_activation": "relu",
  "q_layer_size": 64,
  "q_num_layers": 2,
  "rho": 0.9,
  "save_final": true,
  "save_max_eps": false,
  "seed": 0,
  "steps": 150000,
  "training_count": 3,
  "training_freq": 50
}


In [11]:
import gym
from spin_class.algos.ddpg import make_models

kwargs = state['config']['env_args'] if 'env_args' in state['config'] else {}
env = gym.make(state['config']['env'], **kwargs)
device = torch.device('cpu')
pi_net, q_net = make_models(env, device, state['config'])
pi_net.load_state_dict(state['pi_state_dict'])
q_net.load_state_dict(state['q_state_dict'])
pi_net.eval(), q_net.eval()

(DDPGActorMLP(
   (head): Sequential(
     (0): Linear(in_features=17, out_features=64, bias=True)
     (1): ReLU()
     (2): Linear(in_features=64, out_features=64, bias=True)
     (3): ReLU()
     (4): Linear(in_features=64, out_features=6, bias=True)
     (5): Tanh()
   )
 ),
 DDPGCriticMLP(
   (head): Sequential(
     (0): Concat1d()
     (1): Linear(in_features=23, out_features=64, bias=True)
     (2): ReLU()
     (3): Linear(in_features=64, out_features=64, bias=True)
     (4): ReLU()
     (5): Linear(in_features=64, out_features=1, bias=True)
   )
 ))

In [14]:
import gym
from gym import wrappers
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
from torch.distributions.normal import Normal

# start virtual display
if 'display' not in globals():
    display = Display(visible=False, size=(1400, 900))
    display.start()

def play(env, pi, q, steps=1000):
    env = wrappers.Monitor(env, "./video", force=True)
    obs_dtype = (
        torch.int64
        if isinstance(env.observation_space, gym.spaces.Discrete)
        else torch.float32
    )
    obs = env.reset()
    for _ in range(steps):
        with torch.no_grad():
            obs_t = torch.as_tensor(obs, dtype=obs_dtype, device=device).unsqueeze(0)
            mu = pi(obs_t)
            dist = pi.distribution(mu, 0.99)
            action = dist.sample()[0].cpu().numpy().tolist()
        obs, reward, done, info = env.step(action)
        if done:
            print(_)
            break
    env.close()

    video = io.open('./video/openaigym.video.%s.video000000.mp4' % env.file_infix, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''
        <video alt="test" autoplay loop controls style="height: 400px;">
            <source src="data:video/mp4;base64,{0}" type="video/mp4" />
        </video>'''.format(encoded.decode('ascii'))))

    #HTML(data='''
    #    <video width="360" height="auto" alt="test" controls><source src="data:video/mp4;base64,{0}" type="video/mp4" /></video>
    #'''.format(encoded.decode('ascii')))

In [15]:
play(env, pi_net, q_net, steps=500)