In [1]:
import os
import sys
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'

import numpy as np
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import CheckpointCallback
from gymnasium import spaces
import torch

In [2]:
# Add the `src` folder to the Python path
sys.path.append(os.path.join(os.path.abspath('..'), 'src'))
import environment

In [3]:
# Wrap the TrafficEnvironment into a gym-like environment
class TrafficEnvWrapper(gym.Env):
    def __init__(self, env):
        super(TrafficEnvWrapper, self).__init__()
        self.env = env
        # Action space: 3 actions (0: left, 1: stay, 2: right)
        self.action_space = spaces.Discrete(3)
        
        # Flatten the observation space from (3, 7) to (21,) if needed
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(21,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        # Set the seed for reproducibility
        super().reset(seed=seed)  # Call the parent class's reset method
        if seed is not None:
            np.random.seed(seed)  # Set numpy's random seed if provided

        # Get the environment reset observation and flatten it
        obs = np.array(self.env.reset(), dtype=np.float32).flatten()  # Flatten the observation
        # gymnasium requires reset to return a tuple (obs, info)
        return obs, {}

    def step(self, action):
        # Subtract 1 to map the action: 0 -> -1 (left), 1 -> 0 (stay), 2 -> 1 (right)
        action = action - 1
        
        # Use the mapped action in your environment logic
        # If the underlying environment does not return `info`, add it manually
        result = self.env.step(action)
        
        # Handle the case where only 3 values are returned (obs, reward, done)
        if len(result) == 3:
            obs, reward, done = result
            info = {}  # Manually add an empty info dictionary
        else:
            obs, reward, done, info = result
    
        # Flatten the observation before returning
        obs = np.array(obs, dtype=np.float32).flatten()
    
        # Ensure to return the expected 5 values: (obs, reward, done, truncated, info)
        return obs, reward, done, False, info

    def render(self, mode="human"):
        pass

In [4]:
# Initialize the custom environment
traffic_env = environment.TrafficEnvironment()
wrapped_env  = TrafficEnvWrapper(traffic_env)

# Check the environment
check_env(wrapped_env, warn=True)

Initial Distance:  4000
Initial Clearance Rates:  [17.7 18.6 18.  17.7 17.1]
Initial Lane:  1
Initial Distance:  4000
Initial Clearance Rates:  [18.2 17.2 19.5 19.8 16.9]
Initial Lane:  1
Initial Distance:  4000
Initial Clearance Rates:  [19.9 19.  17.3 18.9 15.6]
Initial Lane:  1


In [5]:
# Check if GPU is available
print(f"CUDA available: {torch.cuda.is_available()}")
device = 'cuda' if torch.cuda.is_available() else 'cpu'

CUDA available: False


In [6]:
# Define the PPO model using your single environment
ppo_model = PPO("MlpPolicy", wrapped_env, verbose=1, device=device)

# Set up a checkpoint callback to save the model every 1000 steps
checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./ppo_checkpoints/', name_prefix='ppo_traffic_model')

# Train the PPO model
total_timesteps = 50000
ppo_model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback)

# Save the trained model
ppo_model.save("ppo_traffic_model")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Initial Distance:  4000
Initial Clearance Rates:  [17.1 15.3 18.5 17.8 16.3]
Initial Lane:  1
Initial Distance:  4000
Initial Clearance Rates:  [16.2 17.5 18.8 16.1 16.1]
Initial Lane:  1
Initial Distance:  4000
Initial Clearance Rates:  [18.  16.4 19.8 16.1 17. ]
Initial Lane:  1
Initial Distance:  4000
Initial Clearance Rates:  [17.4 15.6 15.1 18.8 15.4]
Initial Lane:  1
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 391       |
|    ep_rew_mean     | -1.15e+03 |
| time/              |           |
|    fps             | 1915      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
----------------------------------
Initial Distance:  4000
Initial Clearance Rates:  [15.5 16.1 16.3 16.3 18.2]
Initial Lane:  1
Initial Distance:  4000
Initial Clearance Rates:  [17.  18.6 19.1 17.  16.1]
Initial L