- 0 - Left
- 1 - Down
- 2 - Right
- 3 - Up

In [1]:
import numpy
import time
import gym

In [2]:
def execute(env, policy, episodeLength=100, render=False):
    totalReward = 0
    start = env.reset()
    for t in range(episodeLength):
        if render:
            env.render()
        action = policy[start]
        start, reward, done, _ = env.step(action)
        totalReward += reward
        if done:
            break
    return totalReward


In [3]:
def evaluatePolicy(env, policy, n_episodes=100):
    totalReward = 0.0
    for _ in range(n_episodes):
        totalReward += execute(env, policy)
    return totalReward / n_episodes


In [4]:
def get_random_policy():
    return numpy.random.choice(4, size=((16)))

In [5]:
env = gym.make('FrozenLake-v0')
n_policies = 1000
startTime = time.time()
policy_set = [get_random_policy() for _ in range(n_policies)]
policy_score = [evaluatePolicy(env, p) for p in policy_set]
endTime = time.time()
print("Best score = %0.2f. Time taken = %4.4f seconds" %(numpy.max(policy_score) , endTime - startTime))


Best score = 0.65. Time taken = 13.4511 seconds


### For creating deterministic environment with is_slippery = False

In [None]:
from gym.envs.registration import register
env = register(
    id='FrozenLakeNotSlippery-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name' : '4x4', 'is_slippery': False},
    max_episode_steps=100,
    reward_threshold=0.78, # optimum = .8196
)


In [None]:
import gym

In [None]:
env = gym.make('FrozenLakeNotSlippery-v0')
env.reset()
env.render()
print(env.step(0))
env.render()