# Continuous

In [None]:
from glob import glob
from gnwrapper import Animation
import gym
import numpy as np

from tensorflow.keras.layers import Activation, Concatenate, Dense, Flatten, Input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess

In [None]:
# Constants.
ENV_NAME = "Pendulum-v0"
SEED = 123
STEPS = int(1e3)
VISUALIZE = True
WEIGHTS = f"../data/dqn_{ENV_NAME}_weights.h5f"

In [None]:
# Build environment.
env = Animation(gym.make(ENV_NAME))
np.random.seed(SEED)
env.seed(SEED)
nb_actions = env.action_space.shape[0]

In [None]:
# Build model.
actor = Sequential()

actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(16))
actor.add(Activation("relu"))
actor.add(Dense(16))
actor.add(Activation("relu"))
actor.add(Dense(16))
actor.add(Activation("relu"))
actor.add(Dense(nb_actions))
actor.add(Activation("linear"))

actor.summary()

In [None]:
action_input = Input(shape=(nb_actions,), name="action_input")
observation_input = Input(shape=(1,) + env.observation_space.shape, name="observation_input")
flattened_observation = Flatten()(observation_input)

x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = Activation("relu")(x)
x = Dense(32)(x)
x = Activation("relu")(x)
x = Dense(32)(x)
x = Activation("relu")(x)
x = Dense(1)(x)
x = Activation("linear")(x)

critic = Model(inputs=[action_input, observation_input], outputs=x)
critic.summary()

In [None]:
# Build agent.
memory = SequentialMemory(limit=STEPS, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)

agent = DDPGAgent(
    nb_actions=nb_actions,
    actor=actor,
    critic=critic,
    critic_action_input=action_input,
    memory=memory,
    nb_steps_warmup_critic=100,
    nb_steps_warmup_actor=100,
    random_process=random_process,
    gamma=.99,
    target_model_update=1e-3,
)

agent.compile(Adam(lr=.001, clipnorm=1.), metrics=["mae"])

In [None]:
# Load weights if exist.
if glob(WEIGHTS + "*"):
    agent.load_weights(WEIGHTS)

In [None]:
# Train.
env = Animation(gym.make(ENV_NAME))
agent.fit(env, nb_steps=STEPS, visualize=VISUALIZE, verbose=2)

In [None]:
# Save weights.
agent.save_weights(WEIGHTS, overwrite=True)

In [None]:
# Evaluate.
env = Animation(gym.make(ENV_NAME))
agent.test(env, nb_episodes=5, visualize=VISUALIZE)