In [2]:
import gym
import numpy as np

env = gym.make('FrozenLake-v1')

def policy_eval(policy, env, discount_factor=1.0, theta=0.00001):
    # Start with a random (all 0) value function
    V = np.zeros(env.nS)
    while True:
        delta = 0
        # For each state, perform a "full backup"
        for s in range(env.nS):
            v = 0
            # Look at the possible next actions
            for a, action_prob in enumerate(policy[s]):
                # For each action, look at the possible next states...
                for  transition_prob, next_state, reward, done in env.P[s][a]:
                    # Calculate the expected value. Ref: Sutton book eq. 4.6.
                    v = v + action_prob * transition_prob * (reward + discount_factor * V[next_state])
            # How much our value function changed (across any states)
            delta = max(delta, np.abs(v - V[s]))
            V[s] = v
        print('Error delta: {0}'.format(delta))
        # Stop evaluating once our value function change is below a threshold
        if delta < theta:
            break
    return np.array(V)

random_policy = np.ones([env.nS, env.nA]) / env.nA
v = policy_eval(random_policy, env)

print("Value Function:")
print(v)
print("")


Error delta: 0.25
Error delta: 0.09374999999999994
Error delta: 0.046874999999999986
Error delta: 0.028320312499999986
Error delta: 0.016250610351562472
Error delta: 0.009230613708496094
Error delta: 0.005255997180938721
Error delta: 0.003013335168361664
Error delta: 0.001840144395828247
Error delta: 0.0015904679894447327
Error delta: 0.0013761396985501042
Error delta: 0.0011462080728961137
Error delta: 0.0009309625911555486
Error delta: 0.0007432938909914793
Error delta: 0.0005864328780234036
Error delta: 0.0004588047738234291
Error delta: 0.000356808107080258
Error delta: 0.00027629247729870146
Error delta: 0.00021327900731391708
Error delta: 0.0001642637287276455
Error delta: 0.00012630373753949424
Error delta: 9.699843740286311e-05
Error delta: 7.442652697140124e-05
Error delta: 5.7069988440556704e-05
Error delta: 4.3740108631683955e-05
Error delta: 3.351189367291167e-05
Error delta: 2.566879565789673e-05
Error delta: 1.9657536659412403e-05
Error delta: 1.5051909316351683e-05
Error