# Policy Iteration

## Import necessary libraries

In [None]:
import gym

## Policy Evaluation

In [None]:
def policy_evaluation(obs_space, model, policy, theta, gamma):
    # initialize value function with zeros
    value_function = [0 for _ in obs_space]
    
    while True:
        max_delta = 0
        value_function_old = value_function.copy()
        for obs in obs_space:
            action = policy[obs]
            v = 0
            for prob, next_obs, reward, done in model[obs][action]:
                v+=prob*(reward + gamma*value_function_old[next_obs] * (not done))
            value_function[obs] = v
            
            delta = abs(v - value_function_old[obs])
            if delta > max_delta:
                max_delta = delta
        
        # break condition
        if max_delta < theta:
            break
    
    return value_function

## Policy Improvement

In [None]:
def policy_improvement(obs_space, action_space, model, value_function, policy, gamma):
    new_policy = policy.copy()
    for obs in obs_space:
        v_max = 0
        argmax = 0
        
        for action in action_space:
            v = 0
            for prob, next_obs, reward, done in model[obs][action]:
                v+=prob*(reward + gamma*value_function[next_obs] * (not done))
            if v > v_max:
                v_max = v
                argmax = action
        new_policy[obs] = argmax
    return new_policy

## Policy Iteration = Evaluation + Improvement

In [None]:
def policy_iteration(obs_space, action_space, model, policy, theta, gamma):
    while True:
        value_function = policy_evaluation(obs_space, model, policy, theta, gamma)
        new_policy = policy_improvement(obs_space, action_space, model, value_function, policy, gamma)
        
        if policy==new_policy:
            return value_function, policy
        
        policy = new_policy

## Environment

### Frozen Lake

```
LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3

class FrozenLakeEnv(gym.envs.toy_text.discrete.DiscreteEnv)
 |  FrozenLakeEnv(desc=None, map_name='4x4', is_slippery=True)
 |  
 |  Winter is here. You and your friends were tossing around a frisbee at the
 |  park when you made a wild throw that left the frisbee out in the middle of
 |  the lake. The water is mostly frozen, but there are a few holes where the
 |  ice has melted. If you step into one of those holes, you'll fall into the
 |  freezing water. At this time, there's an international frisbee shortage, so
 |  it's absolutely imperative that you navigate across the lake and retrieve
 |  the disc. However, the ice is slippery, so you won't always move in the
 |  direction you intend.
 |  The surface is described using a grid like the following
 |  
 |      SFFF
 |      FHFH
 |      FFFH
 |      HFFG
 |  
 |  S : starting point, safe
 |  F : frozen surface, safe
 |  H : hole, fall to your doom
 |  G : goal, where the frisbee is located
 |  
 |  The episode ends when you reach the goal or fall in a hole.
 |  You receive a reward of 1 if you reach the goal, and zero otherwise.
```

In [None]:
# If you would like to see the full description of the environment uncomment the help function below.
# help(env.unwrapped)

In [None]:
env = gym.make('FrozenLake-v1')

In [None]:
model = env.env.P

In [None]:
obs_space = {obs for obs in range(env.observation_space.n)}

In [None]:
action_space = {action for action in range(env.action_space.n)}

In [None]:
policy = {obs:0 for obs in obs_space}

In [None]:
value_function, policy = policy_iteration(obs_space, action_space, model, policy, theta=0.0001, gamma=0.99)

In [None]:
print(policy)

In [None]:
print(value_function)