In [1]:
import equinox as eqx
from jax import numpy as jnp, random as jr, vmap, nn, lax

import numpy as np
from matplotlib import pyplot as plt

from pets.control import plan
from pets.model import Ensemble
from pets.dataset import Normalizer
from pets.envs.atari import AtariEnv, reward_fn

In [2]:
_tile = lambda x: jnp.tile(x[:, None, ...], (1, ensemble_dim, 1))

def forward(model, normalizer, state, action, key):
    inputs = jnp.concatenate([state, action], axis=-1)
    inputs = normalizer.normalize(inputs)
    delta_mean, delta_logvar = vmap(model)(inputs)
    delta_std = jnp.sqrt(jnp.exp(delta_logvar))
    delta = delta_mean + delta_std * jr.normal(key, delta_mean.shape)
    return state + delta


@eqx.filter_jit
def rollout_fn(state, actions):
    state, actions = _tile(state), vmap(_tile)(actions)

    def scan_fn(carry, action):
        state, key = carry
        key, subkey = jr.split(key)
        next_state = forward(model, normalizer, state, action, subkey)
        return (next_state, key), next_state

    (final_state, _), states = lax.scan(scan_fn, (state, key), actions)
    rewards = reward_fn(states)
    return states, rewards.mean(-1)[..., None]


In [3]:
key = jr.PRNGKey(1)

env = AtariEnv("PongDeterministic-v4", render_mode="human")
state_dim = env.observation_space.shape[0]
ensemble_dim, hidden_dim, action_dim, num_steps = 5, 200, 3, 80

key, subkey = jr.split(key)
model = Ensemble(state_dim + 1, state_dim, hidden_dim, ensemble_dim, key=key)
model = eqx.tree_deserialise_leaves("../data/model.eqx", model)
print(model.fc_1.weights)
normalizer = Normalizer.load("../data/normalizer.pkl")

(state, _), total_reward = env.reset(), 0.0
for _ in range(num_steps):
    key, subkey = jr.split(key)

    probs, _, _ = plan(state, rollout_fn, action_dim, subkey)
    action = probs.mean(1).argmax(-1)[0]

    next_state, reward, done, truncated, info = env.step(action)
    total_reward = total_reward + reward
print(f"reward: {total_reward}")

# 5.84451761e-03

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


[[[ 1.94079068e-03  1.85001772e-02  3.47676426e-02 ...  1.07672275e-03
   -1.32432312e-01 -5.94484098e-02]
  [ 1.97882622e-01  8.25284570e-02 -3.75466287e-01 ... -3.76302510e-01
    1.45497143e-01  1.77589670e-01]
  [ 7.02257454e-01  4.87516850e-01 -3.50271016e-02 ... -1.19898483e-01
   -2.07701027e-01 -3.34149629e-01]
  ...
  [ 4.10670727e-01 -2.62854993e-01 -2.27414906e-01 ... -3.04660380e-01
   -1.00729689e-01  5.67810647e-02]
  [-3.45675081e-01  1.00496579e-02  3.00940603e-01 ... -1.49663910e-01
   -4.00797457e-01  3.86841446e-01]
  [ 3.65777940e-01  5.41949295e-04 -4.75432398e-03 ...  5.54655075e-01
    1.12740643e-01  1.39772564e-01]]

 [[-2.42484048e-01  4.84464467e-02  2.87998199e-01 ...  2.89138317e-01
    2.79605724e-02 -8.91767628e-03]
  [-1.28474236e-01 -8.67713615e-02  4.70643938e-02 ... -8.12258292e-03
    2.51646429e-01 -2.18156606e-01]
  [-1.01813398e-01  2.42471755e-01 -7.04200119e-02 ...  4.34684247e-01
    6.30421102e-01  1.47857338e-01]
  ...
  [ 1.06105268e+00 -6.5

: 