In [None]:
import gymnasium as gym
import numpy as np
from tqdm import tqdm
import pandas as pd

from Agent2 import *

learning_rate = 0.7
gamma = 0.95         
max_steps = 200 

In [None]:

def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, agent:Agent2, wins:list):
    print ('Training.... ')
    agent.setTraningMode(True,learning_rate,gamma)

    for episode in tqdm(range(n_training_episodes)):
        epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
        agent.reset()

        for step in range(max_steps):
            rw=agent.step(epsilon)
            if agent.done():
                if rw == 1:
                    wins.append([episode,step])
                break


In [None]:

def evaluate(max_steps, n_eval_episodes, agent:Agent2, wins:list):
    print ('Evaluate.... ')
    agent.setTraningMode(False)

    episode_rewards = []

    for episode in tqdm(range(n_eval_episodes)):
        agent.reset()

        for step in range(max_steps):
            rw=agent.step()

            if agent.done():
                if rw == 1:
                    wins.append([episode,step])
                break

    episode_rewards.append(agent.totalReward())

    mean_reward = np.mean(episode_rewards)
    std_reward = np.std(episode_rewards)

    print(f"Mean_reward={mean_reward:.2f} +/- {std_reward:.2f}")

In [None]:

wins = []

env = gym.make("FrozenLake-v1", map_name="4x4")

agent = Agent2(env)

# Exploration parameters
max_epsilon = 1.0           
min_epsilon = 0.05           
decay_rate = 0.0005 


# Training parameters
n_training_episodes = 1000000
      

In [None]:
train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, agent, wins)

In [None]:
qtable = agent.getQTable()

np.save("qtable_2",qtable)

In [None]:

npWins = pd.DataFrame(np.array(wins))

win_count, _ = npWins.shape
mediaPassos = np.mean(npWins, axis=0)[1]

print(f"{win_count} on {n_training_episodes} trys -> {(float(win_count*100)/n_training_episodes) :.2f}%")
print(f"media de passos pra vitoria {mediaPassos}")

In [None]:
npWins.describe()

In [None]:
 # Evaluation parameters
n_eval_episodes = 100      
eval_seed = []
max_steps = 200 

wins = []

evaluate(max_steps,n_eval_episodes,agent, wins)

In [None]:
npWins = pd.DataFrame(np.array(wins))

win_count, _ = npWins.shape
mediaPassos = np.mean(npWins, axis=0)[1]

print(f"{win_count} on {n_eval_episodes} trys -> {(float(win_count*100)/n_eval_episodes) :.2f}%")
print(f"media de passos pra vitoria {mediaPassos}")