In [1]:
import gymnasium as gym
import numpy as np    
import random
from gymnasium.envs.toy_text.frozen_lake import FrozenLakeEnv


# Helper Functions
### 1.Value Iteration

In [2]:

def train(env,n_states,n_actions,gamma=0.99):
    
    P=env.unwrapped.P #dynamics of the environment
    Values=np.zeros(n_states) # numpy array to store Value function
    
    threshold=1e-15
    gamma=gamma
    delta=1
    ctr=0
    while (delta>threshold):
        ctr+=1
        delta=0
    
        for state in range(n_states):
            old_v=Values[state]
            max_v=0
            for action in range(n_actions):
                v=0
                for prob,next_state,reward,_ in  P[state][action]:
                    v+=prob*(reward+gamma*Values[next_state])
                max_v = max(max_v, v)
             
            Values[state]=max_v
            delta=max(abs(Values[state]-old_v),delta)
    
    return Values
        

### 2. Function to Derive Policy Given the Value Function


In [3]:
def get_policy(env,Values,n_states,n_actions,gamma=0.99):
    
    P=env.unwrapped.P #dynamics of the environment

    Action_values=np.zeros((n_states,n_actions))
    
    for state in range(n_states):
        for action in range(n_actions):
            for prob,next_state,reward,_ in  P[state][action]:
                Action_values[state][action]+=prob*(reward+gamma*Values[next_state])
    
    
    policy=np.zeros(n_states,dtype=int)
    
    
    for state in range(n_states):
        policy[state] = np.argmax(Action_values[state])

    return policy


### 3. Function to test policy by running it 1000 times on the environment

In [4]:
def test(env,policy):
    n_episodes = 1000
    avg_length=0
    avg_reward=0
    for episode in range(n_episodes):
        state = env.reset()
        
        if isinstance(state, tuple):  # Gym returns (obs, info)
            state = state[0]
        done = False
        length=0
        reward=0
        while not done:
            length+=1
            action = int(policy[state])
            state, reward, done, _, _ = env.step(action)
        avg_length+=length   
        avg_reward+=reward
    
    avg_length/=n_episodes
    avg_reward/=n_episodes
    print(f"Average episode length :{avg_length}")
    print(f"Average reward per episode :{avg_reward}")


### 1️⃣ Original Frozen Lake Environment


In [5]:
env = gym.make("FrozenLake-v1", render_mode=None)
Values=train(env,16,4)
Policy=get_policy(env,Values,16,4)

test(env,Policy)

Average episode length :49.277
Average reward per episode :0.825


### 2️⃣ Custom Frozen Lake Environment


In [8]:
import Custom

Custom_env=Custom.CustomFrozenLakeEnv(Custom.P)

Values=train(Custom_env,16,4)
Policy=get_policy(Custom_env,Values,16,4)

test(Custom_env,Policy)


Average episode length :48.841
Average reward per episode :0.819
