In [None]:
import matplotlib
import matplotlib.pyplot as plt

import torch

import cart_pole
import agent

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Set up matplotlib

is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

episode_durations = []

def plot_durations():
    plt.figure(2, figsize=(12,6))
    plt.clf()
    durations_t = torch.tensor(episode_durations, dtype=torch.float)
    plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Duration')
    plt.plot(durations_t.numpy())
    # Take 100 episode averages and plot them too
    if len(durations_t) >= 100:
        means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy())
    if is_ipython:
        display.clear_output(wait=True)
        display.display(plt.gcf())

In [None]:
# Virtual screen for rendering the environment

%matplotlib inline
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

In [None]:
# Cart Pole Environment

cpenv = cart_pole.CartPole(device)
plt.figure()
plt.imshow(cpenv.get_screen().cpu().squeeze(0).permute(1, 2, 0).numpy(),
           interpolation='none')
plt.title('Example extracted screen')
plt.show()

In [None]:
# Train an agent

model = agent.RLAgent(cpenv, device)

num_episodes = 5000
for i_episode in range(num_episodes):
    t = model.episode(i_episode)
    episode_durations.append(t + 1)
    plot_durations()

display.clear_output(wait=True)
print('Complete')
cpenv.env.render()
cpenv.env.close()
plt.ioff()