The notebook demonstrates how the trained policy perform in the [BipedalWalker](https://www.gymlibrary.ml/environments/box2d/bipedal_walker/) environment. It achieves an average reward of over 300 in 100 repeated rollouts.

In [1]:
import gym
import numpy as np
import matplotlib.pyplot as plt

from nn import NN

In [2]:
%matplotlib inline

In [3]:
def rollout_rewards(policy, repeat_n=1, render=False):
    env = gym.make("BipedalWalker-v3")
    results = np.zeros(repeat_n)
    for i in range(repeat_n):
        predicted = np.zeros(policy.layer_sizes[-1])
        obs = env.reset()
        done = False
        while not done:
            predicted = policy.predict(np.concatenate((obs, predicted[4:])))
            obs, reward, done, _ = env.step(predicted[:4])
            results[i] += reward
            if render:
                env.render()
    return results

In [4]:
def make_weighted_nn(layer_sizes, flatten_weights):
    nn = NN(layer_sizes=layer_sizes)
    nn.set_flatten_weights(flatten_weights)
    return nn

In [5]:
parallel_worker = 4
layer_sizes = [30, 30, 20, 10]
best_policy_path = 'fixtures/normal/policy/final.npy'
best_policy = np.load(best_policy_path)

In [6]:
results = rollout_rewards(make_weighted_nn(layer_sizes, best_policy), 100)

  "We recommend you to use a symmetric and normalized Box action space (range=[-1, 1]) "


In [7]:
print(f'Mean score {np.mean(results):.2f}, standard deviation {np.std(results):.2f}')

Mean score 310.11, standard deviation 1.44


In [8]:
# Render rollout
rollout_rewards(make_weighted_nn(layer_sizes, best_policy), 1, True);