# Politiques simples sur `highway-v0`
## Objectif : comparer 4 politiques (reward, crash rate, durée).

## Initialisation

In [2]:
import gymnasium as gym
import highway_env
import numpy as np

env = gym.make("highway-v0")
print("Action space:", env.action_space)

  from pkg_resources import resource_stream, resource_exists


Action space: Discrete(5)


In [3]:
action_type = env.unwrapped.action_type
ACTIONS = action_type.ACTIONS_ALL

print("Actions disponibles :")
for k, v in ACTIONS.items():
    print(f"{k} -> {v}")

Actions disponibles :
0 -> LANE_LEFT
1 -> IDLE
2 -> LANE_RIGHT
3 -> FASTER
4 -> SLOWER


## Définition des politiques

Nous considérons **quatre (04) politiques** :

- **`BASELINE`**: on reste sur `IDLE`
- **`KEEP_RIGHT`**: on va vers la voie de droite quand on s'en éloigne (au dalà d'un seuil).
- **`FAST_PERIODIC`**: On accélére souvent selon un rythme défini.
- **`RANDOM`**: on sélectionne un action aléatoirement.

In [31]:
def car_ahead(obs, lane_eps=0.15, dist_thresh=1):
    """
    Détecte s'il y a un véhicule devant l'ego dans la même voie.
    """
    ego_x = obs[0, 1]
    ego_y = obs[0, 2]

    # Parcours des autres véhicules
    for i in range(1, obs.shape[0]):
        presence = obs[i, 0]
        if presence == 0:
            continue

        other_x = obs[i, 1]
        other_y = obs[i, 2]

        same_lane = abs(other_y - ego_y) < lane_eps
        ahead = other_x > ego_x
        close = (other_x - ego_x) < dist_thresh

        if same_lane and ahead and close:
            return True

    return False

In [32]:
# Liste des actions
LANE_LEFT  = 0
IDLE       = 1
LANE_RIGHT = 2
FASTER     = 3
SLOWER     = 4

def policy_idle(obs, info, t):
    """Toujours rester (baseline prudente)."""
    return IDLE

def policy_keep_right(obs, info, t, y_target=0.85):
    """
    Politique KEEP_RIGHT simple :
    - si une voiture est devant -> ralentir
    - sinon, aller à droite si possible
    - sinon, rester stable
    """
    ego_y = obs[0, 2]

    if car_ahead(obs):
        return SLOWER

    if ego_y < y_target:
        return LANE_RIGHT

    return IDLE
def policy_fast_periodic(obs, info, t, rythm=10):
    """Accélérer régulièrement selon un rythme défini."""
    if t % rythm == 0:
        return FASTER
    return IDLE

def policy_random(obs, info, t):
    """Action aléatoire."""
    return env.action_space.sample()

POLICIES = {
    "IDLE": policy_idle,
    "KEEP_RIGHT": policy_keep_right,
    "FAST_PERIODIC": policy_fast_periodic,
    "RANDOM": policy_random,
}

## Expérimentations

In [27]:
import time

def run_episode(env, policy_fn, max_steps=200):
    obs, info = env.reset()

    total_reward = 0.0
    t = 0
    episode_over = False

    while t < max_steps:
        # Follow policy to get action
        action = policy_fn(obs, info, t)
        obs, reward, terminated, truncated, info = env.step(action)

        # Updates
        t = t + 1
        total_reward += reward
        episode_over = terminated or truncated

        if episode_over:
            break
        time.sleep(0.1)

    print("Episode terminé")
    print("steps =", t)
    print("reward cumulée =", total_reward)
    print("crashed =", info.get("crashed", False))
    
    return (total_reward, t, info.get("crashed", False))

In [37]:
env_human = gym.make("highway-v0", render_mode="human")

keep_right = POLICIES["KEEP_RIGHT"]

run_episode(env_human, keep_right)

Episode terminé
steps = 14
reward cumulée = 11.26162561396758
crashed = True


(np.float64(11.26162561396758), 14, True)