In [8]:
# !pip install gymnasium

In [9]:
from rainbow_agent import RainbowAgent
import gymnasium as gym
import numpy as np
import tensorflow as tf

In [10]:
class NormalizeZeroToOne(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.observation_high = self.env.observation_space.high
        self.observation_low = self.env.observation_space.low

    def observation(self, obs):
        print(obs)
        print((obs - self.observation_low) / (self.observation_high - self.observation_low))
        return (obs - self.observation_low) / (self.observation_high - self.observation_low)

In [11]:
class ClipReward(gym.RewardWrapper):
    def __init__(self, env, min_reward, max_reward):
        super().__init__(env)
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.reward_range = (min_reward, max_reward)

    def reward(self, reward):
        return np.clip(reward, self.min_reward, self.max_reward)

In [12]:
# env = gym.make("CartPole-v1", render_mode="rgb_array")

In [15]:
# env = gym.wrappers.AtariPreprocessing(gym.make("ALE/MsPacman-v5", render_mode="rgb_array"), terminal_on_life_loss=True, scale_obs=True) # as seen online with frame stackign though
# env = gym.wrappers.AtariPreprocessing(gym.make("ALE/MsPacman-v5", render_mode="rgb_array"), terminal_on_life_loss=True, scale_obs=True) # as seen online
env = ClipReward(gym.wrappers.AtariPreprocessing(gym.make("MsPacmanNoFrameskip-v4", render_mode="rgb_array"), terminal_on_life_loss=True), -1, 1) # as recommended by the original paper, should already include max pooling
env = gym.wrappers.FrameStack(env, 4)

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


In [16]:
config = {
    'activation': 'relu',
    'kernel_initializer': 'he_uniform',
    'optimizer_function': tf.keras.optimizers.legacy.Adam, # NO SGD OR RMSPROP FOR NOW SINCE IT IS FOR RAINBOW DQN
    'learning_rate': 0.001, #
    'adam_epsilon': 0.00003125,
    # NORMALIZATION?
    'soft_update': False, # seems to always be false, we can try it with tru
    'ema_beta': 0.95,
    'transfer_frequency': 100,
    'replay_period': 1,
    'replay_batch_size': 128,
    'memory_size': 10000, #############
    'min_memory_size': 500,
    'n_step': 3,
    'discount_factor': 0.99,
    'atom_size': 51, #
    'conv_layers': [(32, 8, (4, 4)), (64, 4, (2, 2)), (64, 3, (1, 1))],
    'conv_layers_noisy': False,
    'width': 512,
    'dense_layers': 2,
    'dense_layers_noisy': True, # i think this is always true for rainbow
    # REWARD CLIPPING
    'noisy_sigma': 0.5, #
    'loss_function': tf.keras.losses.KLDivergence(),
    'dueling': True,
    'advantage_hidden_layers': 1, #
    'value_hidden_layers': 1, #
    'num_training_steps': 25000,
    'per_epsilon': 0.001,
    'per_alpha': 0.5,
    'per_beta': 0.5,
    # 'per_beta_increase': hp.uniform('per_beta_increase', 0, 0.015),
    'v_min': -500.0, # MIN GAME SCORE
    'v_max': 500.0, # MAX GAME SCORE
    # 'search_max_depth': 5,
    # 'search_max_time': 10,
}

# config = {
#     'activation': 'relu',
#     'kernel_initializer': 'he_uniform',
#     'optimizer_function': tf.keras.optimizers.legacy.Adam, # NO SGD OR RMSPROP FOR NOW SINCE IT IS FOR RAINBOW DQN
#     'learning_rate': 0.001, #
#     'adam_epsilon': 0.00003125,
#     # NORMALIZATION?
#     'soft_update': False, # seems to always be false, we can try it with tru
#     'ema_beta': 0.95,
#     'transfer_frequency': 100,
#     'replay_period': 1,
#     'replay_batch_size': 128,
#     'memory_size': 10000, #############
#     'min_memory_size': 500,
#     'n_step': 3,
#     'discount_factor': 0.99,
#     'atom_size': 51, #
#     'conv_layers': [],
#     'conv_layers_noisy': False,
#     'width': 512,
#     'dense_layers': 2,
#     'dense_layers_noisy': True, # i think this is always true for rainbow
#     # REWARD CLIPPING
#     'noisy_sigma': 0.5, #
#     'loss_function': tf.keras.losses.KLDivergence(),
#     'dueling': True,
#     'advantage_hidden_layers': 1, #
#     'value_hidden_layers': 1, #
#     'num_training_steps': 25000,
#     'per_epsilon': 0.001,
#     'per_alpha': 0.5,
#     'per_beta': 0.5,
#     # 'per_beta_increase': hp.uniform('per_beta_increase', 0, 0.015),
#     'v_min': -500.0, # MIN GAME SCORE
#     'v_max': 500.0, # MAX GAME SCORE
#     # 'search_max_depth': 5,
#     # 'search_max_time': 10,
# }

# train
agent = RainbowAgent(env, "RainbowDQN-{}".format(env.unwrapped.spec.id), config=config)
agent.train()

Memory Size: 0/500
Memory Size: 1/500
Memory Size: 2/500
Memory Size: 3/500
Memory Size: 4/500
Memory Size: 5/500
Memory Size: 6/500
Memory Size: 7/500
Memory Size: 8/500
Memory Size: 9/500
Memory Size: 10/500
Memory Size: 11/500
Memory Size: 12/500
Memory Size: 13/500
Memory Size: 14/500
Memory Size: 15/500
Memory Size: 16/500
Memory Size: 17/500
Memory Size: 18/500
Memory Size: 19/500
Memory Size: 20/500
Memory Size: 21/500
Memory Size: 22/500
Memory Size: 23/500
Memory Size: 24/500
Memory Size: 25/500
Memory Size: 26/500
Memory Size: 27/500
Memory Size: 28/500
Memory Size: 29/500
Memory Size: 30/500
Memory Size: 31/500
Memory Size: 32/500
Memory Size: 33/500
Memory Size: 34/500
Memory Size: 35/500
Memory Size: 36/500
Memory Size: 37/500
Memory Size: 38/500
Memory Size: 39/500
Memory Size: 40/500
Memory Size: 41/500
Memory Size: 42/500
Memory Size: 43/500
Memory Size: 44/500
Memory Size: 45/500
Memory Size: 46/500
Memory Size: 47/500
Memory Size: 48/500
Memory Size: 49/500
Memory Siz

KeyboardInterrupt: 

In [None]:
# agent.test()

In [None]:
import base64
import glob
import io
import os

from IPython.display import HTML, display


def ipython_show_video(path: str) -> None:
    """Show a video at `path` within IPython Notebook."""
    if not os.path.isfile(path):
        raise NameError("Cannot access: {}".format(path))

    video = io.open(path, "r+b").read()
    encoded = base64.b64encode(video)

    display(HTML(
        data="""
        <video width="320" height="240" alt="test" controls>
        <source src="data:video/mp4;base64,{0}" type="video/mp4"/>
        </video>
        """.format(encoded.decode("ascii"))
    ))


def show_latest_video(video_folder: str) -> str:
    """Show the most recently recorded video from video folder."""
    list_of_files = glob.glob(os.path.join(video_folder, "*.mp4"))
    latest_file = max(list_of_files, key=os.path.getctime)
    ipython_show_video(latest_file)
    return latest_file


# latest_file = show_latest_video(video_folder='./video')
# print("Played:", latest_file)