# Setup

Groupe :
- Pierre JOURDIN
- Aymeric CONTI
- Anthony QUENTIN

In [8]:
import pickle
import gymnasium as gym
import highway_env  # noqa: F401
import matplotlib.pyplot as plt
import numpy as np
import random

In [9]:
LOGGING = False

if LOGGING:
    import mlflow

In [10]:
hyperparameters = {
    "epsilon": 0.05,

}

if LOGGING:
    mlflow.start_run()
    mlflow.log_params(hyperparameters)

In [11]:
config_dict = {
    "observation": {
        "type": "OccupancyGrid",
        "vehicles_count": 10,
        "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"],
        "features_range": {
            "x": [-100, 100],
            "y": [-100, 100],
            "vx": [-20, 20],
            "vy": [-20, 20],
        },
        "grid_size": [[-20, 20], [-20, 20]],
        "grid_step": [5, 5],
        "absolute": False,
    },
    "action": {
        "type": "DiscreteMetaAction",
    },
    "lanes_count": 4,
    "vehicles_count": 15,
    "duration": 60,  # [s]
    "initial_spacing": 0,
    "collision_reward": -1,  # The reward received when colliding with a vehicle.
    "right_lane_reward": 0.5,  # The reward received when driving on the right-most lanes, linearly mapped to
    # zero for other lanes.
    "high_speed_reward": 0.1,  # The reward received when driving at full speed, linearly mapped to zero for
    # lower speeds according to config["reward_speed_range"].
    "lane_change_reward": 0,
    "reward_speed_range": [
        20,
        30,
    ],  # [m/s] The reward for high speed is mapped linearly from this range to [0, HighwayEnv.HIGH_SPEED_REWARD].
    "simulation_frequency": 5,  # [Hz]
    "policy_frequency": 1,  # [Hz]
    "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle",
    "screen_width": 600,  # [px]
    "screen_height": 150,  # [px]
    "centering_position": [0.3, 0.5],
    "scaling": 5.5,
    "show_trajectories": True,
    "render_agent": True,
    "offscreen_rendering": False,
    "disable_collision_checks": True,
}

env = gym.make("highway-fast-v0", render_mode="rgb_array")
env.unwrapped.configure(config_dict)
obs, _ = env.reset()

actions = env.action_space
states = env.observation_space

print("Action Space:", actions)
print("Observation Space:", states)

Action Space: Discrete(5)
Observation Space: Box(-inf, inf, (7, 8, 8), float32)


# DQN

In [12]:
class ReplayBuffer:
    def __init__(self, capacity=100):
        self.capacity = capacity
        self.position = 0
        self.memory = []

    def add(self, s, a, r, sprime, aprime):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = (s, a, r, sprime, aprime)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.choices(self.memory, k=batch_size)
    
    def __len__(self):
        return len(self.memory)

# Viz

In [13]:
if LOGGING:
    mlflow.end_run()

In [None]:
obs, _ = env.reset()

for _ in range(100):
    action = 1
    obs, reward, done, truncated, info = env.step(action)  # Pass an integer, not an array
    env.render()