In [24]:
from stable_baselines3.common.vec_env.vec_frame_stack import VecFrameStack
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecVideoRecorder
import tensorflow as tf

import time
import numpy as np
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
from PIL import Image
import random

In [25]:
n_env = 1
print("Creating environment with {} processes".format(n_env))
model_type = "vanilla"
model_detail = ""
model_path = f"./{model_type}_DQN/model{model_detail}"
video_path = f"./{model_type}_DQN/video{model_detail}"

# Create the environment with 4 stacked frames
env = make_atari_env("ALE/MsPacman-v5")
env = VecFrameStack(env, n_stack=4)
env = VecVideoRecorder(env, video_folder = video_path, record_video_trigger=lambda x: x == 0, video_length=1000, name_prefix="dqn-agent")

n_actions = env.action_space.n

Creating environment with 1 processes


In [26]:
# load model
model = tf.keras.models.load_model(model_path)
eps = 0.1





In [31]:
def predict(model, obs):
    if eps >= random.random():
        action = random.randint(0, n_actions - 1)
    else:
        # We can also use numpy but this is more efficient
        tensor_state = tf.convert_to_tensor(obs)

        actions = model(tensor_state, training=False)[0]
        
        action = tf.argmax(actions).numpy()
    
    return [action]


In [32]:
# Create a virtual display
display = Display(visible=0, size=(1200, 1200))
display.start()

data = {i: [[]] for i in range(n_env)}
tmp_data = {i: 1 for i in range(n_env)}

obs = env.reset()

while True:
    actions = predict(model, obs)
    obs, rewards, dones, info = env.step(actions)
    
    for i in range(n_env):
        if tmp_data[i]-1 >= len(data[i]):
            data[i].append([])
        if rewards[i] is not None:  # Check if the reward is not None
            data[i][tmp_data[i]-1].append(rewards[i])
        if dones[i]:
            tmp_data[i] += 1

    # Render the environment and display the frame
    screen = env.render()
    # Convert the RGB array to an image
    screen = Image.fromarray(screen)

    ipythondisplay.clear_output(wait=True)
    ipythondisplay.display(screen)
    
    time.sleep(1/2)  # Delay for 1/30 seconds to achieve ~30 fps

    if all([tmp_data[i] > 3 for i in range(n_env)]):
        break

ipythondisplay.clear_output(wait=True)
env.close()

Moviepy - Building video /home/mister/Desktop/PacmanDQN/vanilla_DQN/video/dqn-agent-step-0-to-step-1000.mp4.
Moviepy - Writing video /home/mister/Desktop/PacmanDQN/vanilla_DQN/video/dqn-agent-step-0-to-step-1000.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/mister/Desktop/PacmanDQN/vanilla_DQN/video/dqn-agent-step-0-to-step-1000.mp4




In [54]:
import copy
from pprint import pprint

copy_data = copy.deepcopy(data)

for i in range(n_env):
    for j in range(len(copy_data[i])):
        copy_data[i][j] = [np.sum(copy_data[i][j]), len(copy_data[i][j])]

pprint(copy_data)

{0: [[72.0, 109], [9.0, 37], [21.0, 103]]}


In [55]:
for i in range(n_env):
    print("Total episodes for CPU", i, ":", len(copy_data[i]))
    print("Total reward for CPU", i, ":", sum([copy_data[i][j][0] for j in range(len(copy_data[i]))]), "with average reward", format(sum([copy_data[i][j][0] for j in range(len(copy_data[i]))])/len(copy_data[i]), '.2f'))
    print("Total steps for CPU", i, ":", sum([copy_data[i][j][1] for j in range(len(copy_data[i]))]), "with average steps", format(sum([copy_data[i][j][1] for j in range(len(copy_data[i]))])/len(copy_data[i]), '.2f'))
    print("Weighted average reward for CPU", i, ":", format(sum([copy_data[i][j][0]*copy_data[i][j][1] for j in range(len(copy_data[i]))])/sum([copy_data[i][j][1] for j in range(len(copy_data[i]))]), '.2f'))
    print()

print("Total episodes:", sum([len(copy_data[i]) for i in range(n_env)]))

Total episodes for CPU 0 : 2
Total reward for CPU 0 : 102.0 with average reward 34.00
Total steps for CPU 0 : 249 with average steps 83.00
Weighted average reward for CPU 0 : 41.54

Total episodes: 3
