In [1]:
from common.train import (
    q_learning_main_and_target_train,
    simple_sarsa,
    actor_critic,
    reinforce_mc
)
from common.env_wrappers.utils import get_env, gif_model_demo
from common.models import (
    create_my_model1,
    predict_action,
    create_from_doc_model,
    create_actor_critic_model1,
    reinforce_mc_model,
)

In [4]:
env = get_env()



Error: We're Unable to find the game "SpaceInvaders". Note: Gym no longer distributes ROMs. If you own a license to use the necessary ROMs for research purposes you can download them via `pip install gym[accept-rom-license]`. Otherwise, you should try importing "SpaceInvaders" via the command `ale-import-roms`. If you believe this is a mistake perhaps your copy of "SpaceInvaders" is unsupported. To check if this is the case try providing the environment variable `PYTHONWARNINGS=default::ImportWarning:ale_py.roms`. For more information see: https://github.com/mgbellemare/Arcade-Learning-Environment#rom-management

## Q-Learnning & Sarsa

In [5]:
from keras import layers
from tensorflow import keras

inputs = layers.Input(shape=env.observation_space.shape)
layer1 = layers.Conv2D(filters=10, kernel_size=(3, 3), strides=2, activation="relu")(inputs)
layer2 = layers.Conv2D(filters=8, kernel_size=(3, 3), activation="relu")(layer1)
layer3 = layers.MaxPooling2D(pool_size=(3, 3), padding='valid')(layer2)
layer4 = layers.Conv2D(filters=5, kernel_size=(3, 3), strides=1, activation="relu")(layer3)
layer5 = layers.MaxPooling2D(pool_size=(3, 3), padding='valid')(layer4)
layer6 = layers.Flatten()(layer5)
layer7 = layers.Dense(512, activation="relu")(layer6)
action = layers.Dense(get_action_space_len(env), activation="linear")(layer7)
model = keras.Model(inputs=inputs, outputs=action)
model.summary()

NameError: name 'env' is not defined

In [None]:
saved_path = q_learning_main_and_target_train(
    get_env(), create_my_model1, gamma=0.99, epsilon=1.0, lr=0.00025,
    batch_size=32, update_target_network=10000, update_after_actions=4,
    max_memory_length=100000,
    num_first_exploration_steps=5000, checkpoint=5000,
    max_time_s=60 * 60 * 5
)

In [None]:
print(f"saved path: {saved_path}")
model = keras.models.load_model(saved_path)
gif_model_demo(lambda state: predict_action(model, state), steps_num=10000)

In [None]:
saved_path = simple_sarsa(
    get_env(), create_my_model1, max_time_s=60 * 60 * 5,
    gamma=0.99, epsilon=1.0, lr=0.00025,
    num_first_exploration_steps=5000, checkpoint=5000,
)

In [None]:
print(f"saved path: {saved_path}")
model = keras.models.load_model(saved_path)
gif_model_demo(lambda state: predict_action(model, state), steps_num=10000)

## Actor Critic

In [None]:
inputs = layers.Input(shape=env.observation_space.shape)
layer1 = layers.Conv2D(filters=10, kernel_size=(3, 3), strides=2, activation="relu")(inputs)
layer2 = layers.Conv2D(filters=8, kernel_size=(3, 3), activation="relu")(layer1)
common = layers.MaxPooling2D(pool_size=(3, 3), padding='valid')(layer2)
layer3 = layers.Dense(512, activation="relu")(layers.Flatten()(common))
action = layers.Dense(get_action_space_len(env), activation="softmax")(layer3)
layer4 = layers.Dense(256, activation="relu")(common)
layer5 = layers.Dense(50, activation="relu")(layer4)
critic = layers.Dense(1)(layer5)
model = keras.Model(inputs=inputs, outputs=[action, critic])
model.summary()

In [None]:
saved_path = actor_critic(
    get_env(), create_actor_critic_model1, max_time_s=60 * 60 * 5,
    gamma=0.99, lr=0.00025, checkpoint=5000,
)

In [None]:
print(f"saved path: {saved_path}")
model = keras.models.load_model(saved_path)
gif_model_demo(lambda state: predict_action(model, state), steps_num=10000)

## Reinforce Monte Carlo

In [None]:
saved_path = reinforce_mc(
    get_env(), reinforce_mc_model, max_time_s=60 * 60 * 5,
    gamma=0.99, lr=0.00025, checkpoint=5000,
)

In [None]:
print(f"saved path: {saved_path}")
model = keras.models.load_model(saved_path)
gif_model_demo(lambda state: predict_action(model, state), steps_num=10000)