# Deep Deterministic Policy Gradients (DDPG)
---
In this notebook, we train DDPG with DeepMind Control Suite's `Cartpole` domain in `two_poles` task.

### 1. Import the Necessary Packages

In [1]:
from dm_control import suite
import cv2
import glob
from PIL import Image
import subprocess
import numpy as np
from collections import deque
from tqdm import trange
from IPython.display import clear_output
import matplotlib.pyplot as plt

from ddpg_agent import Agent
import torch

### 2. Instantiate the Environment and Agent

In [2]:
env = suite.load(domain_name='cartpole', task_name='swingup')

state_size = 0
for key in list(env.observation_spec().keys()):
     state_size += env.observation_spec()[key].shape[0]
action_spec = env.action_spec()

# Prioritized memory bool
PER = False
# discount factor
GAMMA = 0.99

agent = Agent(state_size=state_size, action_size=action_spec.shape[0], random_seed=10, prioritized=PER)

### 3. Train the Agent with DDPG

In [None]:
n_episodes=1e4; max_t=1e3; print_every=100
scores_deque = deque(maxlen=print_every)
scores = []

for i_episode in trange(1, int(n_episodes)+1):
    time_step = env.reset()
    state = np.concatenate([time_step.observation[key] for key in list(time_step.observation.keys())])
    agent.reset()
    score = 0
    for t in range(int(max_t)):
        action = agent.act(state)
        time_step = env.step(action)
        reward, done = time_step.reward, time_step.last()
        next_state = np.concatenate([time_step.observation[key] for key in list(time_step.observation.keys())])

        if agent.per:
            # Convert current action to torch.Tensor
            a = torch.from_numpy(action).float().to(agent.device).view(1,-1)
            # Convert state to torch.Tensor
            s = torch.from_numpy(state).float().to(agent.device).view(1,-1)
            # Convert next_state to torch.Tensor
            next_s = torch.from_numpy(next_state).float().to(agent.device).view(1,-1)

            # Get predicted next-state actions and Q values from target models
            next_a = agent.actor_target(next_s).view(1,-1)
            Q_target = agent.critic_target(next_s, next_a).cpu().data.numpy()
            # Get predicted current state-action Q value from local model
            Q_expected = agent.critic_local(s, a).cpu().data.numpy()

            agent.TD_error = np.abs(reward + GAMMA*Q_target - Q_expected).item()

        agent.step(state, action, reward, next_state, done)

        state = next_state
        score += reward

        if done:
            break

    scores_deque.append(score)
    scores.append(score)
    # print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")

    if i_episode % print_every == 0:
        torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
        torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')

        clear_output(True)
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.plot(scores)
        plt.ylabel('Score')
        plt.xlabel('Episode #')

        plt.show()


### 4. Watch a Smart Agent!

In [None]:
agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

# reset frames folder
subprocess.call([ 'rm', '-rf', 'frames'])
subprocess.call([ 'mkdir', '-p', 'frames'])

time_step = env.reset()
state = np.concatenate([time_step.observation[key] for key in list(time_step.observation.keys())])

agent.actor_local.eval()
agent.critic_local.eval()

with torch.no_grad():
    for t in trange(0, 700):
        action = agent.act(state)
        time_step = env.step(action)

        image_data = env.physics.render(height=480, width=480, camera_id=0)
        img = Image.fromarray(image_data, 'RGB')
        img.save("frames/frame-%.10d.png" % t)

        state = np.concatenate([time_step.observation[key] for key in list(time_step.observation.keys())])
        clear_output(True)
        if time_step.last():
            break



  2%|▏         | 15/700 [00:42<29:15,  2.56s/it][A

In [10]:
# Convert frames to video
img_array = []
for filename in sorted(glob.glob('frames/*.png')):
    img = cv2.imread(filename)
    height, width, layers = img.shape
    size = (width,height)
    img_array.append(img)

out = cv2.VideoWriter('project.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

for i in range(len(img_array)):
    out.write(img_array[i])
out.release()