In [None]:
import gymnasium as gym
import matplotlib.pyplot as plt

from Q import TabularQAgent

In [None]:
environment = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=True, render_mode="human")

In [ ]:
AGENT = TabularQAgent(
    space={
        "states": environment.observation_space.n,
        "actions": environment.action_space.n
    },
    lr=0.8, 
    gamma=0.9, 
    exploration={
        "rate": 0.99,
        "decay": 0.0001,
        "min": 0.01
    }
)
EPISODES = 4000
RENDER = False

In [None]:
def run(episodes, render):
    global AGENT, environment
    
    rewards = []
    for episode in range(episodes):
        state = environment.reset()[0]
        
        reward = 0
        terminated = truncated = False
        while not (terminated or truncated):
            environment.render() if render else None
    
            action = AGENT.action(state)
            new_state, _reward, terminated, truncated, _ = environment.step(action)
    
            AGENT.learn(state, action, _reward, new_state)
    
            reward += _reward
            state = new_state
    
        rewards.append(reward)
    return rewards

In [None]:
rewards = run(EPISODES, RENDER)

plt.plot(rewards)
plt.xlabel("Episode")
plt.ylabel("Accumulated Reward")
plt.show()

In [ ]:
environment.close()