In [None]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from pathlib import Path
import torch

from ddpg_agent import Agent

In [None]:
env_name = 'CarRacing-v0'
env = gym.make(env_name, verbose=0)
action_dim = env.action_space.shape[0]

### Watch a trained agent

Run the code cells below to watch a trained agent, which is loaded from the locally saved checkpoint files.

In [None]:
ckpt_dirname = '21-01-2021_21:15:45'

In [None]:
agent=  Agent(action_dim=action_dim, seed=42)

# Load the weights from file.
ckpt_dirpath = Path('tmp', ckpt_dirname)
agent.actor_local.load_state_dict(torch.load(Path(ckpt_dirpath, 'checkpoint_actor.pth'), map_location='cpu'))

In [None]:
history_fpath = Path(ckpt_dirpath, 'history.csv')

if os.path.isfile(history_fpath):
    history = pd.read_csv(history_fpath, index_col=0)
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(history.index, history.values)
    ax.set_ylabel('Reward')
    ax.set_xlabel('Episode #')
    ax.set_title('Train run rewards')
    plt.show()
else:
    print('No train history to display.')

In [None]:
n_timesteps = 1_000
ep_reward = 0

state = env.reset()
for timestep in range(n_timesteps):
    action = agent.act(state)
    next_state, reward, is_done, _ = env.step(action)
    agent.step(state, action, reward, next_state, is_done)
    
    state = next_state
    ep_reward += reward

    env.render()
    if is_done:
        break

print(f'Total reward this episode: {ep_reward}')