In [7]:
import numpy as np
import gym
from DQN_Utils import make_env, DQNAgent
import torch as T
import pandas as pd
from matplotlib import pyplot as plt

In [8]:
env = gym.make("CarRacing-v2", continuous=False, render_mode='rgb_array')
env = make_env(env)
agent = DQNAgent(gamma=0.99, epsilon=1.0, learning_rate=2.5e-4,
                 input_shape=(env.observation_space.shape),
                 n_actions=env.action_space.n,
                 memory_size=1)
agent.Qeval_Network.load_state_dict(T.load('checkpoints/Qeval_Network.pt', map_location=agent.Qeval_Network.device))

<All keys matched successfully>

In [9]:
frames = []
done = False
max_steps = 1000
step = 0
episode_reward = 0
s = env.reset()
while not done and step < max_steps:
    frames.append(env.render())
    s = T.tensor(np.array([s]), dtype=T.float).to(agent.Qeval_Network.device)
    Qvalues = agent.Qeval_Network.forward(s).detach()
    action = T.argmax(Qvalues).item()
    s, r, done, _, _ = env.step(action)
    episode_reward += r
    step+=1
print(f'Episode Reward: {episode_reward:.2f}')
env.close()

Episode Reward: 592.62


In [11]:
import cv2
video_writer = cv2.VideoWriter('CarRacing.webm', cv2.VideoWriter_fourcc('V', 'P', '9', '0'), 10, (600, 400))
for frame in frames:
    video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
video_writer.release()
del frames

In [None]:
training_logs = pd.read_csv('CarRacing_DQN_training_logs.csv')
training_logs.head()

In [None]:
fig, ax1 = plt.subplots()

ax1.set_xlabel('Steps')
ax1.set_ylabel('Reward')

plot1, = ax1.plot(training_logs['step'], training_logs['50-ep-avg-reward'], color='#bf1000', label='50-Episode Avg Reward')
plot2, = ax1.plot(training_logs['step'], training_logs['best-so-far'], color='#0430bf', label='Best so Far')

ax2 = ax1.twinx()
ax2.set_ylabel('Epsilon', color='#087d04')
plot3, = ax2.plot(training_logs['step'], training_logs['epsilon'], color='#087d04', label='Epsilon', alpha=0.4)

plots = [plot1, plot2, plot3]
labels = [plot.get_label() for plot in plots]

ax1.set_yticks([0, 200, 400, 600, 800, 1000])
ax1.set_ylim([-100, 1100])
ax2.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])
ax2.set_ylim([-0.1, 1.1])

ax1.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
ax1.xaxis.major.formatter._useMathText = True

ax1.grid()

plt.legend(plots, labels, bbox_to_anchor=(1.2, 0.5), loc='center left')
plt.subplots_adjust(right=0.8)
plt.title('Learning Convergence')
plt.savefig('learning-plot.jpg', dpi=200, bbox_inches='tight')
plt.show()