### Racing Game Analysis

What follows will be an analysis of peformance, training, and various other metrics with regards to the racing game environment.

Note that the models we will be speaking of have already been trained, and the metrics pregenerated.

In [29]:
import sys, os
print(os.getcwd())

d:\Downloads\drl-assign1\DRL-Assignment\notebooks


In [30]:
import matplotlib, matplotlib_inline, IPython
print("matplotlib:", matplotlib.__version__)
print("matplotlib_inline:", matplotlib_inline.__version__)
print("IPython:", IPython.__version__)

AttributeError: 'RcParams' object has no attribute '_get'

In [20]:
# Importing relevant libraries
import sys, os
import matplotlib.pyplot as plt

racing_dir = os.path.abspath(os.path.join(os.getcwd(), "..", "envs", "racing_game"))
target_dir = os.path.abspath(os.path.join(os.getcwd(), "..", "envs", "target_game"))
sys.path.append(racing_dir)
from racing_env import RacingEnv
sys.path.append(target_dir)
from target_env import TargetEnv

train_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
sys.path.append(train_dir)
from test_models import SACTester, PPOTester, DDPGTester, A2CTester

In [21]:
# Creating needed objects
renv = RacingEnv()
s = SACTester('pretrained_SAC_racing.pt', renv)
p = PPOTester('pretrained_PPO_racing.pt', renv)
d = DDPGTester('pretrained_DDPG_racing.pt', renv)
a = A2CTester('pretrained_A2C_racing.pt', renv)

Unfortunately, a jupyter notebook doesn't seem to be able to actually render pygame. We'll settle for collecting data on how the agents play, and you can observe them playing elsewhere. Note that this script takes an absolutely ungodly amount of time to run because jupyter notebooks suck, so we're sticking to ten episodes and noting that this means data may be subject to more noise.

In [22]:
ppo_res = p.test(n_episodes=10, visual=False)
sac_res = s.test(n_episodes=10, visual=False)
ddpg_res = d.test(n_episodes=10, visual=False)
a2c_res = a.test(n_episodes=10, visual=False)

Episode 1 finished: total reward = 2856.25
Episode 2 finished: total reward = 3999.17
Episode 3 finished: total reward = 2883.82
Episode 4 finished: total reward = 2865.96
Episode 5 finished: total reward = 2859.63
Episode 6 finished: total reward = 2872.17
Episode 7 finished: total reward = 3634.52
Episode 8 finished: total reward = 4093.67
Episode 9 finished: total reward = 2873.45
Episode 10 finished: total reward = 2858.68
Episode 1 finished: total reward = 3020.23
Episode 2 finished: total reward = 3073.19
Episode 3 finished: total reward = 3022.04
Episode 4 finished: total reward = 3012.55
Episode 5 finished: total reward = 2992.65
Episode 6 finished: total reward = 3029.55
Episode 7 finished: total reward = 3009.03
Episode 8 finished: total reward = 3059.42
Episode 9 finished: total reward = 2981.32
Episode 10 finished: total reward = 3035.19
Episode 1 finished: total reward = 2903.43
Episode 2 finished: total reward = 2903.43
Episode 3 finished: total reward = 2903.43
Episode 4

In [23]:
#print(ppo_res)

In [24]:
episode_infos = ppo_res[1]

# Find the maximum number of steps among all episodes
max_steps = max(len(ep) for ep in episode_infos)

mean_positions = []
for step_idx in range(max_steps):
    sum_pos = [0.0, 0.0]
    count = 0

    for ep in episode_infos:
        if step_idx < len(ep):
            stepinfo = ep[step_idx]
            try:
                position = stepinfo['car_position']
                sum_pos[0] += position[0]
                sum_pos[1] += position[1]
                count += 1
            except KeyError:
                pass

    if count > 0:
        mean_positions.append([sum_pos[0] / count, sum_pos[1] / count])
    else:
        mean_positions.append([None, None])  # or skip, depending on your needs

# print(mean_positions)

In [25]:
episode_infos = sac_res[1]

# Find the maximum number of steps among all episodes
max_steps = max(len(ep) for ep in episode_infos)

mean_positions2 = []
for step_idx in range(max_steps):
    sum_pos = [0.0, 0.0]
    count = 0

    for ep in episode_infos:
        if step_idx < len(ep):
            stepinfo = ep[step_idx]
            try:
                position = stepinfo['car_position']
                sum_pos[0] += position[0]
                sum_pos[1] += position[1]
                count += 1
            except KeyError:
                pass

    if count > 0:
        mean_positions2.append([sum_pos[0] / count, sum_pos[1] / count])
    else:
        mean_positions2.append([None, None])  # or skip, depending on your needs

#print(mean_positions2)


In [26]:
episode_infos = ddpg_res[1]

# Find the maximum number of steps among all episodes
max_steps = max(len(ep) for ep in episode_infos)

mean_positions3 = []
for step_idx in range(max_steps):
    sum_pos = [0.0, 0.0]
    count = 0

    for ep in episode_infos:
        if step_idx < len(ep):
            stepinfo = ep[step_idx]
            try:
                position = stepinfo['car_position']
                sum_pos[0] += position[0]
                sum_pos[1] += position[1]
                count += 1
            except KeyError:
                pass

    if count > 0:
        mean_positions3.append([sum_pos[0] / count, sum_pos[1] / count])
    else:
        mean_positions3.append([None, None])  # or skip, depending on your needs

#print(mean_positions2)

In [27]:
episode_infos = a2c_res[1]

# Find the maximum number of steps among all episodes
max_steps = max(len(ep) for ep in episode_infos)

mean_positions4 = []
for step_idx in range(max_steps):
    sum_pos = [0.0, 0.0]
    count = 0

    for ep in episode_infos:
        if step_idx < len(ep):
            stepinfo = ep[step_idx]
            try:
                position = stepinfo['car_position']
                sum_pos[0] += position[0]
                sum_pos[1] += position[1]
                count += 1
            except KeyError:
                pass

    if count > 0:
        mean_positions4.append([sum_pos[0] / count, sum_pos[1] / count])
    else:
        mean_positions4.append([None, None])  # or skip, depending on your needs

#print(mean_positions2)

In [28]:
x = [point[0] for point in mean_positions]
y = [-point[1] for point in mean_positions] # because of how pygame handles position
plt.scatter(x, y)
plt.show()

AttributeError: 'RcParams' object has no attribute '_get'

In [None]:
x2 = [point[0] for point in mean_positions2]
y2 = [-point[1] for point in mean_positions2]
plt.scatter(x2, y2, color='red')
plt.show()

In [None]:
x3 = [point[0] for point in mean_positions3]
y3 = [-point[1] for point in mean_positions3]
plt.scatter(x3, y3, color='orange')
plt.show()

In [None]:
x4 = [point[0] for point in mean_positions4]
y4 = [-point[1] for point in mean_positions4]
plt.scatter(x4, y4, color='black')
plt.show()

And this is about what should be expected if you have watched the two algorithms play. The PPO-trained model is very jittery and can be sometimes seen spinning in random loops. The SAC-trained model is smooth as butter.

In [None]:
ep_inf_p = ppo_res[1]
ep_inf_s = sac_res[1]

avg_len_p  = 0.0
avg_len_s = 0.0

for episode in ep_inf_p:
    avg_len_p += len(episode)
avg_len_p /= len(ep_inf_p)

for episode in ep_inf_s:
    avg_len_s += len(episode)
avg_len_s /= len(ep_inf_s)

print(f"Average length of PPO episode: {avg_len_p:.2f} | Average length of SAC episode: {avg_len_s:.2f}")

To be perfectly honest, this one surprised me. I was aware that the SAC trained race car accelerates oddly (an artifact of the way the warmup policy runs, since I didn't want to make it too good and get docked marks for a scripted policy) but I thought the disgusting behaviour of the PPO car would outweigh that. Turns out though, I'm seeing the PPO car about 200 steps faster.

In [None]:
ep_inf_p = ppo_res[1]
ep_inf_s = sac_res[1]

avg_rew_p  = 0.0
avg_rew_s = 0.0

for episode in ep_inf_p:
    avg_rew_p += episode[-1]['episode_reward']
avg_rew_p /= len(ep_inf_p)

for episode in ep_inf_s:
    avg_rew_s += episode[-1]['episode_reward']
avg_rew_s /= len(ep_inf_s)

print(f"Average PPO episode reward: {avg_rew_p:.2f} | Average SAC episode reward: {avg_rew_s:.2f}")

SAC beats it out on the results I have, but I suspect there is enough variance enough that they are functionally equivalent. 2440.90 compared to 2441.26 is less than a 0.5 unit reward difference.

In [None]:
ep_inf_p = ppo_res[1]
ep_inf_s = sac_res[1]

# Find the maximum number of steps among all episodes
max_steps_p = max(len(ep) for ep in ep_inf_p)
max_steps_s = max(len(ep) for ep in ep_inf_s)

mean_speeds = []
for step_idx in range(max_steps_p):
    sum_speed = 0.0
    count = 0

    for ep in ep_inf_p:
        if step_idx < len(ep):
            stepinfo = ep[step_idx]
            try:
                speed = stepinfo['car_speed']
                sum_speed += speed
                count += 1
            except KeyError:
                pass

    if count > 0:
        mean_speeds.append(sum_speed / count)
    else:
        mean_speeds.append(None)  # or 0, depending on your needs

mean_speeds2 = []
for step_idx in range(max_steps_s):
    sum_speed = 0.0
    count = 0

    for ep in ep_inf_s:
        if step_idx < len(ep):
            stepinfo = ep[step_idx]
            try:
                speed = stepinfo['car_speed']
                sum_speed += speed
                count += 1
            except KeyError:
                pass

    if count > 0:
        mean_speeds2.append(sum_speed / count)
    else:
        mean_speeds2.append(None)  # or 0, depending on your needs
print(mean_speeds)

In [None]:
plt.plot(mean_speeds, color='blue')
plt.plot(mean_speeds2, color='red')
plt.show()

This graph is a little interesting, in just how drastically it varies. The PPO model has wild differences and downspikes in it's speed value, whereas the SAC trained model smoothly accelerates towards the maximum, and in fact never actually reaches it. The PPO model does finish faster on average, and so the graph doesn't reach all the way. In fact, the average PPO episode ends about 200 timesteps faster, so on some level the downspikes (at past 200) can be attributed to noise.