In [69]:
import gymnasium as gym
import numpy as np
from gymnasium.envs.toy_text.frozen_lake import generate_random_map


In [70]:
desc = ["SFFF", "FHHH", "FFFF", "HFHF", "FFGF"]
#env = gym.make('FrozenLake-v1', desc=desc, map_name="5x5", is_slippery=False, render_mode = "human")
env = gym.make('FrozenLake-v1', desc=desc, map_name="5x5", is_slippery=False)

In [71]:
observation_space = env.observation_space.n
action_space = env.action_space.n
q_table = np.random.rand(observation_space, action_space) * 0.1
learning_rate = 0.8
discount_factor = 0.95
exploration_rate = 1.0
min_exploration_rate = 0.01
exploration_decay_rate = 0.001
max_episodes = 1000
goal_reaches = 0


In [72]:
def choose_action(state):
    if np.random.uniform(0, 1) < exploration_rate:
        action = env.action_space.sample()
    else:
        action = np.argmax(q_table[state, :])
    return action


In [73]:
def update_q_table(state, action, reward, new_state):
    best_future_q = np.max(q_table[new_state, :])
    current_q = q_table[state, action]
    new_q = (1 - learning_rate) * current_q + learning_rate * (reward + discount_factor * best_future_q)
    q_table[state, action] = new_q
    #print (new_q)


In [82]:
for episode in range(max_episodes):
    state = env.reset()[0]
    done = False
    state_visits = np.zeros(env.observation_space.n)

    while not done:
        action = choose_action(state)
        new_state, reward, terminated, truncated, _ = env.step(action)
        
        state_visits[new_state] += 1
        visit_penalty = -0.01 * (2 ** state_visits[new_state])
        
        if new_state == state:
            reward = visit_penalty
        else:
            # Check for falling into the ice
            if terminated and reward == 0:
                reward = -0.75  # Penalty for falling into the ice
                #print('ded')
            if terminated and reward == 1:
                goal_reaches += 1
            elif not terminated:
                reward = -0.1  # Reward for a safe move
            reward += visit_penalty  # Add penalty for repeated visits

        update_q_table(state, action, reward, new_state)
    
        state = new_state
        done = terminated or truncated


        
        exploration_rate = max(min_exploration_rate, exploration_rate * exploration_decay_rate)
    if episode % 100 == 0:
    
        print(f"Episode {episode}/{max_episodes} complete. Goal reached {goal_reaches} times.")


Episode 0/1000 complete. Goal reached 986 times.
Episode 100/1000 complete. Goal reached 1086 times.
Episode 200/1000 complete. Goal reached 1184 times.
Episode 300/1000 complete. Goal reached 1284 times.
Episode 400/1000 complete. Goal reached 1382 times.
Episode 500/1000 complete. Goal reached 1480 times.
Episode 600/1000 complete. Goal reached 1578 times.
Episode 700/1000 complete. Goal reached 1676 times.
Episode 800/1000 complete. Goal reached 1775 times.
Episode 900/1000 complete. Goal reached 1874 times.


In [75]:
#np.savetxt("q_table.csv", q_table)

In [76]:
#q_table = np.loadtxt("q_table.csv")

In [77]:
env.close()