# Use a Fixed Deterministic Policy to Control PongNoFrameskip-v4

## Test

In [1]:
import numpy as np
np.random.seed(0)
import gym
env = gym.make('PongNoFrameskip-v4')
env.seed(0)
print(env.spec.reward_threshold)

None


In [2]:
class Agent:
    def decide(self, observation):
        colors = {'racket': 92, 'ball': 236}
        heights = {'racket': 16, 'ball': 4}
        ymin, ymax = 34, 193
        locations = {}
        for obj in colors:
            match = observation[ymin:ymax, :, 0] == colors[obj]
            yy = np.where(match.any(axis=1))[0]
            if yy.size and yy.min() == 0:
                yy = np.arange(yy.max()-heights[obj]+1, yy.max()+1, 1)
            if yy.size and yy.max() == ymax - ymin:
                yy = np.arange(yy.min(), yy.min()+heights[obj], 1)
            locations[obj + 'y'] = yy.mean() if yy.size else np.nan
        if locations['bally'] < locations['rackety']:
            action = 2 # move up
        elif locations['bally'] > locations['rackety']:
            action = 3 # move down
        else:
            action = 0
        return action


agent = Agent()

In [3]:
def play_once(env, agent):
    observation = env.reset()
    episode_reward = 0.
    while True:
        action = agent.decide(observation)
        observation, reward, done, _ = env.step(action)
        episode_reward += reward
        if done:
            break
    return episode_reward

Test 100 episodes

In [4]:
episode_rewards = [play_once(env, agent) for _ in range(100)]
print('average episode rewards = {:.2f}'.format(np.mean(episode_rewards)))

average episode rewards = 21.00


In [5]:
env.close()