In [3]:
# Importation des bibliothèques
import gymnasium as gym
import time
import numpy as np
import pygame

# Exercice 1 : Découverte et Exploration d'un Environnement Gym

In [4]:
# Création de l'environnement CartPole
env = gym.make("CartPole-v1", render_mode="human")
observation, info = env.reset()

In [5]:
# Affichage de l'espace d'actions et de l'espace d'observations
print(f"Espace d'actions : {env.action_space}")
print(f"Espace d'observations : {env.observation_space}")


Espace d'actions : Discrete(2)
Espace d'observations : Box([-4.8               -inf -0.41887903        -inf], [4.8               inf 0.41887903        inf], (4,), float32)


In [6]:
iteration = 0
while iteration < 100:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            env.close()
            break
    
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    print(f"Action : {action}, Observation : {observation}, Reward : {reward}")
    
    if terminated or truncated:
        observation, info = env.reset()
    
    iteration += 1
    time.sleep(0.01)

env.close()

Action : 1, Observation : [-0.00881132  0.18421501 -0.0328466  -0.27480096], Reward : 1.0
Action : 1, Observation : [-0.00512702  0.37978983 -0.03834261 -0.57766014], Reward : 1.0
Action : 0, Observation : [ 0.00246878  0.18522567 -0.04989582 -0.29729834], Reward : 1.0
Action : 0, Observation : [ 0.00617329 -0.00915081 -0.05584178 -0.02075977], Reward : 1.0
Action : 1, Observation : [ 0.00599027  0.18672566 -0.05625698 -0.33052546], Reward : 1.0
Action : 1, Observation : [ 0.00972479  0.3826014  -0.06286748 -0.6404051 ], Reward : 1.0
Action : 1, Observation : [ 0.01737682  0.57854086 -0.07567559 -0.9522045 ], Reward : 1.0
Action : 0, Observation : [ 0.02894763  0.38451436 -0.09471968 -0.6842251 ], Reward : 1.0
Action : 0, Observation : [ 0.03663792  0.19082628 -0.10840418 -0.4228013 ], Reward : 1.0
Action : 0, Observation : [ 0.04045444 -0.00260634 -0.11686021 -0.16616398], Reward : 1.0
Action : 0, Observation : [ 0.04040232 -0.19587845 -0.12018349  0.08748815], Reward : 1.0
Action : 0

# Exercice 2 : Manipulation des Observations et Récompenses

In [7]:
env = gym.make("CartPole-v1", render_mode="human")
observation, info = env.reset()


In [8]:
action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action)

In [9]:
print(f"Observation: {observation}")
print(f"Récompense: {reward}")
print(f"Épisode terminé: {terminated}")
print(f"Épisode tronqué: {truncated}")
print(f"Informations supplémentaires: {info}")

Observation: [ 0.01865603  0.14722326 -0.00694064 -0.29809844]
Récompense: 1.0
Épisode terminé: False
Épisode tronqué: False
Informations supplémentaires: {}


In [10]:
for i in range(5):
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            env.close()
            break
    
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    print(f"Pas {i+1} - Action: {action}, Observation: {observation}, Reward: {reward}")
    
    if terminated or truncated:
        observation, info = env.reset()
        print("Environnement réinitialisé")
    
    time.sleep(0.1)

env.close()

Pas 1 - Action: 0, Observation: [ 0.0216005  -0.04779907 -0.01290261 -0.00761254], Reward: 1.0
Pas 2 - Action: 1, Observation: [ 0.02064452  0.14750552 -0.01305486 -0.3043383 ], Reward: 1.0
Pas 3 - Action: 1, Observation: [ 0.02359463  0.34281108 -0.01914163 -0.6011097 ], Reward: 1.0
Pas 4 - Action: 1, Observation: [ 0.03045085  0.5381955  -0.03116382 -0.8997599 ], Reward: 1.0
Pas 5 - Action: 1, Observation: [ 0.04121476  0.7337256  -0.04915902 -1.2020733 ], Reward: 1.0


# Exercice 3 : Contrôle Manuel de l'Agent

In [11]:
env = gym.make("CartPole-v1", render_mode="human")
observation, info = env.reset()
total_reward = 0
steps = 0


In [None]:
running = True
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            env.close()
            break
    
    user_input = input("Entrez une action (0 ou 1, q pour quitter): ")
    if user_input.lower() == 'q':
        running = False
        continue
    
    try:
        action = int(user_input)
        if action not in [0, 1]:
            print("Action invalide, doit être 0 ou 1")
            continue
    except ValueError:
        print("Entrée invalide, veuillez entrer 0, 1 ou q")
        continue
    
    observation, reward, terminated, truncated, info = env.step(action)
    steps += 1
    total_reward += reward
    
    print(f"Position: {observation[0]}, Angle: {observation[2]}, Reward: {reward}, Total: {total_reward}")
    
    if terminated or truncated:
        print(f"Épisode terminé après {steps} pas avec une récompense totale de {total_reward}")
        observation, info = env.reset()
        total_reward = 0
        steps = 0
        print("Environnement réinitialisé")

env.close()

# Exercice 4 : Évaluation des Performances d'une Politique Aléatoire

In [None]:
env = gym.make("CartPole-v1", render_mode="human")
num_episodes = 10
episode_durations = []
episode_steps = []

print(f"Exécution de {num_episodes} épisodes avec des actions aléatoires")

for episode in range(num_episodes):
    observation, info = env.reset()
    steps = 0
    start_time = time.time()
    
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                env.close()
                break
        
        action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(action)
        steps += 1
        
        if terminated or truncated:
            duration = time.time() - start_time
            episode_durations.append(duration)
            episode_steps.append(steps)
            print(f"Épisode {episode+1}: {steps} pas, durée: {duration:.2f} secondes")
            break
        
        time.sleep(0.01)

if episode_durations:
    average_duration = np.mean(episode_durations)
    average_steps = np.mean(episode_steps)
    print(f"\nDurée moyenne des épisodes: {average_duration:.2f} secondes")
    print(f"Nombre de pas moyen: {average_steps:.2f}")
    print(f"Durée minimale: {np.min(episode_durations):.2f} secondes")
    print(f"Durée maximale: {np.max(episode_durations):.2f} secondes")
    print(f"Écart-type des durées: {np.std(episode_durations):.2f} secondes")
else:
    print("Aucune donnée collectée pour l'analyse.")

env.close()