In [1]:
!pip install gymnasium

zsh:1: command not found: pip


In [1]:
import numpy as np 
import gymnasium as gym
import random 
import os
import csv 

In [2]:

def main():
    env = gym.make('Taxi-v3', render_mode="human")
    state_size = env.observation_space.n
    action_size = env.action_space.n

    qtable_filename = "qtable.npy"
    if os.path.exists(qtable_filename):
        qtable = np.load(qtable_filename)
        print("Q-table chargée avec succès.")
    else:
        qtable = np.zeros((state_size, action_size))
        print("Nouvelle Q-table initialisée.")

    params_filename = "training_params.npy"
    if os.path.exists(params_filename):
        params = np.load(params_filename, allow_pickle=True).item()
        epsilon = params.get('epsilon', 1)
        start_episode = params.get('episode', 0) + 1
        print(f"Paramètres chargés: epsilon={epsilon}, start_episode={start_episode}")
    else:
        epsilon = 1
        start_episode = 0

    learning_rate = 0.9
    discount_rate = 0.8
    decay_rate = 0.005

    num_episodes = 50
    num_steps = 99

    metrics_filename = 'training_metrics.csv'
    file_mode = "a" if os.path.exists(metrics_filename) else "w"
    with open(metrics_filename, mode=file_mode, newline='') as file:
        writer = csv.writer(file)
        if file_mode == "w":
            writer.writerow(["Episode", "Total Rewards", "Steps", "Epsilon"])

    for episode in range(start_episode, num_episodes):
        state, _ = env.reset()
        total_rewards = 0
        steps = 0
        done = False

        for step in range(num_steps):
            if random.uniform(0,1) < epsilon:
                action = env.action_space.sample()
            else:
                action = np.argmax(qtable[state,:])

            new_state, reward, done, info = env.step(action)[:4]

            qtable[state,action] = qtable[state,action] + learning_rate * (reward + discount_rate * np.max(qtable[new_state,:]) - qtable[state,action])

            state = new_state

            if done:
                break

        epsilon = np.exp(-decay_rate * episode)

        with open(metrics_filename, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([episode, total_rewards, steps, epsilon])

        if episode % 10 == 0:
            np.save(qtable_filename, qtable)
            np.save(params_filename, {'epsilon': epsilon, 'episode': episode})
            print(f"Épisode {episode}: Q-table et paramètres sauvegardés.")

    print("Entraînement terminé.")

if __name__ == "__main__":
    main()


Nouvelle Q-table initialisée.
Épisode 0: Q-table et paramètres sauvegardés.
Épisode 10: Q-table et paramètres sauvegardés.


KeyboardInterrupt: 