In [1]:
# %load ../../scripts/pendulum/ppo.py
import gym
import keras_gym as km
from tensorflow import keras
from tensorflow.keras import backend as K


###############################################################################
# environment (MDP)
###############################################################################

env = gym.make('Pendulum-v0')
env = km.wrappers.BoxActionsToReals(env)
env = km.wrappers.TrainMonitor(
    env=env, tensorboard_dir='/tmp/tensorboard/pendulum/ppo_static')
km.enable_logging()


###############################################################################
# function approximator
###############################################################################

class MLP(km.FunctionApproximator):
    def body(self, X):
        X = keras.layers.Lambda(
            lambda x: K.concatenate([x, K.square(x)], axis=1))(X)
        X = keras.layers.Dense(units=6, activation='tanh')(X)
        X = keras.layers.Dense(units=6, activation='tanh')(X)
        return X


mlp = MLP(env, lr=1e-3)
pi = km.GaussianPolicy(mlp, update_strategy='ppo')
v = km.V(mlp, gamma=0.9, bootstrap_n=5)
ac = km.ActorCritic(pi, v)


buffer = km.caching.ExperienceReplayBuffer.from_value_function(
    value_function=v, capacity=512, batch_size=32)


###############################################################################
# run
###############################################################################

while env.T < 1000000:
    s = env.reset()
    for t in range(env.spec.max_episode_steps):
        a = pi(s, use_target_model=True)
        s_next, r, done, info = env.step(a)

        buffer.add(s, a, r, done, env.ep)
        if len(buffer) >= buffer.capacity:
            # use 4 epochs per round
            num_batches = int(4 * buffer.capacity / buffer.batch_size)
            for _ in range(num_batches):
                ac.batch_update(*buffer.sample())
            buffer.clear()
            pi.sync_target_model(tau=0.1)

        if done:
            break

        s = s_next


ModuleNotFoundError: No module named 'keras_gym'

In [None]:
%pip install keras_gym

Collecting keras_gym
  Downloading keras_gym-0.2.17-py3-none-any.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.2/82.2 KB[0m [31m869.5 kB/s[0m eta [36m0:00:00[0m0:01[0m:00:01[0m
Collecting numpy<1.17,>=1.16
  Downloading numpy-1.16.6.zip (5.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting gym>=0.12.1
  Downloading gym-0.23.0.tar.gz (624 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m624.4/624.4 KB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25h  Downloading gym-0.22.0.tar.gz (631 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m631.1/631.1 KB[0m [31m1.5 MB/s[0m eta 