In [1]:
import sys
sys.path.append("../src/")
import gym
import random
import time
from config import *
from dddqn_agent import *
from dueling_dqn_network import *
from pong_wrapper import *
from process_image import *
from replay_buffer import *
from utilities import *
import imageio

In [2]:
pong_wrapper = PongWrapper(ENV_NAME, NO_OP_STEPS)
print("The environment has the following {} actions: {}".format(pong_wrapper.env.action_space.n, pong_wrapper.env.unwrapped.get_action_meanings()))

The environment has the following 6 actions: ['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']


In [3]:
MAIN_DQN = build_q_network(pong_wrapper.env.action_space.n, LEARNING_RATE, input_shape=INPUT_SHAPE)
TARGET_DQN = build_q_network(pong_wrapper.env.action_space.n, input_shape=INPUT_SHAPE)

replay_buffer = ReplayBuffer(size=MEMORY_SIZE, input_shape=INPUT_SHAPE)
dddqn_agent = DDDQN_AGENT(MAIN_DQN, TARGET_DQN, replay_buffer, pong_wrapper.env.action_space.n, 
                    input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE, 
                   replay_buffer_start_size=REPLAY_MEMORY_START_SIZE,
                   max_frames=MAX_FRAMES)

In [4]:
if PATH_LOAD_MODEL is not None:
    start_time = time.time()
    print('Loading model and info from the folder ', PATH_LOAD_MODEL)
    info = dddqn_agent.load(PATH_LOAD_MODEL, LOAD_REPLAY_BUFFER)

    # Apply information loaded from meta
    frame_number = info['frame_number']
    rewards = info['rewards']
    loss_list = info['loss_list']

    print(f'Loaded in {time.time() - start_time:.1f} seconds')
else:
    frame_number = 0
    rewards = []
    loss_list = []

Loading model and info from the folder  ../model/save_agent_202101050927_01353984
Loaded in 0.7 seconds


In [8]:
with imageio.get_writer("../video/trained_agent.mp4", fps=60) as video:
    terminal = True
    eval_rewards = []
    evaluate_frame_number = 0

    for frame in range(EVAL_STEPS):
        if terminal:
            pong_wrapper.reset(evaluation=True)
            episode_reward_sum = 0
            terminal = False

        action = dddqn_agent.get_action(0, pong_wrapper.state, evaluation=True)

        # Step action
        _, reward, terminal = pong_wrapper.step(action, render_mode='human')
        evaluate_frame_number += 1
        episode_reward_sum += reward

        # On game-over
        if terminal:
            eval_rewards.append(episode_reward_sum)

            video.append_data(pong_wrapper.env.render(mode='rgb_array'))
            break



In [9]:
embed_mp4("../video/trained_agent.mp4")