# Algorithms - A2C

###### Change of the RL Algorithm from PPO to A2C

##### Imports

In [3]:
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
import gymnasium as gym
import highway_env
from stable_baselines3 import A2C
%matplotlib inline

#### HighWay (Fast) Environment

In [4]:
env = gym.make("highway-fast-v0", render_mode='rgb_array')
pprint.pprint(env.config)

{'action': {'type': 'DiscreteMetaAction'},
 'centering_position': [0.3, 0.5],
 'collision_reward': -1,
 'controlled_vehicles': 1,
 'duration': 30,
 'ego_spacing': 1.5,
 'high_speed_reward': 0.4,
 'initial_lane_id': None,
 'lane_change_reward': 0,
 'lanes_count': 3,
 'manual_control': False,
 'normalize_reward': True,
 'observation': {'type': 'Kinematics'},
 'offroad_terminal': False,
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_speed_range': [20, 30],
 'right_lane_reward': 0.1,
 'scaling': 5.5,
 'screen_height': 150,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 5,
 'vehicles_count': 20,
 'vehicles_density': 1}


  logger.warn(


##### Training the agent

In [5]:
model = A2C('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            gamma=0.8,
            n_steps=5,
            vf_coef=0.25,
            ent_coef=0.01,
            max_grad_norm=0.5,
            gae_lambda=0.95,
            verbose=1,
            tensorboard_log="highway_a2c/")
timesteps = 50000
model.learn(total_timesteps=timesteps)
model.save("highway_a2c/model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to highway_a2c/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 9.94     |
|    ep_rew_mean        | 7.62     |
| time/                 |          |
|    fps                | 26       |
|    iterations         | 100      |
|    time_elapsed       | 18       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.828   |
|    explained_variance | 0.162    |
|    learning_rate      | 0.0005   |
|    n_updates          | 99       |
|    policy_loss        | -0.538   |
|    value_loss         | 2.62     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 9.89     |
|    ep_rew_mean        | 7.95     |
| time/                 |          |
|    fps                | 27       |
|    iterations         |

#### Merge Environment

In [6]:
env = gym.make("merge-v0", render_mode='rgb_array')
pprint.pprint(env.config)

{'action': {'type': 'DiscreteMetaAction'},
 'centering_position': [0.3, 0.5],
 'collision_reward': -1,
 'high_speed_reward': 0.2,
 'lane_change_reward': -0.05,
 'manual_control': False,
 'merging_speed_reward': -0.5,
 'observation': {'type': 'Kinematics'},
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_speed_range': [20, 30],
 'right_lane_reward': 0.1,
 'scaling': 5.5,
 'screen_height': 150,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15}


  logger.warn(


##### Training the agent

In [7]:
model = A2C('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            gamma=0.8,
            n_steps=5,
            vf_coef=0.25,
            ent_coef=0.01,
            max_grad_norm=0.5,
            gae_lambda=0.95,
            verbose=1,
            tensorboard_log="merge_a2c/")
timesteps = 50000
model.learn(total_timesteps=timesteps)
model.save("merge_a2c/model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to merge_a2c/A2C_1
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse

#### Roundabout Environment

In [8]:
env = gym.make("roundabout-v0", render_mode='rgb_array')
pprint.pprint(env.config)

{'action': {'target_speeds': [0, 8, 16], 'type': 'DiscreteMetaAction'},
 'centering_position': [0.5, 0.6],
 'collision_reward': -1,
 'duration': 11,
 'high_speed_reward': 0.2,
 'incoming_vehicle_destination': None,
 'lane_change_reward': -0.05,
 'manual_control': False,
 'normalize_reward': True,
 'observation': {'absolute': True,
                 'features_range': {'vx': [-15, 15],
                                    'vy': [-15, 15],
                                    'x': [-100, 100],
                                    'y': [-100, 100]},
                 'type': 'Kinematics'},
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'right_lane_reward': 0,
 'scaling': 5.5,
 'screen_height': 600,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15}


  logger.warn(


##### Training the agent

In [9]:
model = A2C('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            gamma=0.8,
            n_steps=5,
            vf_coef=0.25,
            ent_coef=0.01,
            max_grad_norm=0.5,
            gae_lambda=0.95,
            verbose=1,
            tensorboard_log="roundabout_a2c/")
timesteps = 50000
model.learn(total_timesteps=timesteps)
model.save("roundabout_a2c/model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to roundabout_a2c/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 6.49     |
|    ep_rew_mean        | 5.83     |
| time/                 |          |
|    fps                | 25       |
|    iterations         | 100      |
|    time_elapsed       | 19       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.266   |
|    explained_variance | -863     |
|    learning_rate      | 0.0005   |
|    n_updates          | 99       |
|    policy_loss        | 0.0029   |
|    value_loss         | 0.034    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 6.23     |
|    ep_rew_mean        | 5.74     |
| time/                 |          |
|    fps                | 25       |
|    iterations       

#### Parking Environment

In [10]:
env = gym.make("parking-v0", render_mode='rgb_array')
pprint.pprint(env.config)

{'action': {'type': 'ContinuousAction'},
 'add_walls': True,
 'centering_position': [0.5, 0.5],
 'collision_reward': -5,
 'controlled_vehicles': 1,
 'duration': 100,
 'manual_control': False,
 'observation': {'features': ['x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'],
                 'normalize': False,
                 'scales': [100, 100, 5, 5, 1, 1],
                 'type': 'KinematicsGoal'},
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 5,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_weights': [1, 0.3, 0, 0, 0.02, 0.02],
 'scaling': 7,
 'screen_height': 300,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15,
 'steering_range': 0.7853981633974483,
 'success_goal_reward': 0.12,
 'vehicles_count': 0}


  logger.warn(


##### Training the agent

In [11]:
model = A2C('MultiInputPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            gamma=0.8,
            n_steps=5,
            vf_coef=0.25,
            ent_coef=0.01,
            max_grad_norm=0.5,
            gae_lambda=0.95,
            verbose=1,
            tensorboard_log="parking_a2c/")
timesteps = 50000
model.learn(total_timesteps=timesteps)
model.save("parking_a2c/model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to parking_a2c/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 61.2     |
|    ep_rew_mean        | -30      |
|    success_rate       | 0        |
| time/                 |          |
|    fps                | 98       |
|    iterations         | 100      |
|    time_elapsed       | 5        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -2.85    |
|    explained_variance | -0.00567 |
|    learning_rate      | 0.0005   |
|    n_updates          | 99       |
|    policy_loss        | -2.97    |
|    std                | 1.01     |
|    value_loss         | 1.22     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 49.1     |
|    ep_rew_mean        | -26.6    |
|    success_rate       |

#### Intersection Environment

In [12]:
env = gym.make("intersection-v0", render_mode='rgb_array')
pprint.pprint(env.config)

{'action': {'lateral': False,
            'longitudinal': True,
            'target_speeds': [0, 4.5, 9],
            'type': 'DiscreteMetaAction'},
 'arrived_reward': 1,
 'centering_position': [0.5, 0.6],
 'collision_reward': -5,
 'controlled_vehicles': 1,
 'destination': 'o1',
 'duration': 13,
 'high_speed_reward': 1,
 'initial_vehicle_count': 10,
 'manual_control': False,
 'normalize_reward': False,
 'observation': {'absolute': True,
                 'features': ['presence',
                              'x',
                              'y',
                              'vx',
                              'vy',
                              'cos_h',
                              'sin_h'],
                 'features_range': {'vx': [-20, 20],
                                    'vy': [-20, 20],
                                    'x': [-100, 100],
                                    'y': [-100, 100]},
                 'flatten': False,
                 'observe_intentions': False,


  logger.deprecation(
  logger.warn(


##### Training the agent

In [14]:
model = A2C('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            gamma=0.8,
            n_steps=5,
            vf_coef=0.25,
            ent_coef=0.01,
            max_grad_norm=0.5,
            gae_lambda=0.95,
            verbose=1,
            tensorboard_log="intersection_a2c/")
timesteps = 50000
model.learn(total_timesteps=timesteps)
model.save("intersection_a2c/model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to intersection_a2c/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.53     |
|    ep_rew_mean        | 3.32     |
| time/                 |          |
|    fps                | 16       |
|    iterations         | 100      |
|    time_elapsed       | 30       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.702   |
|    explained_variance | 0.667    |
|    learning_rate      | 0.0005   |
|    n_updates          | 99       |
|    policy_loss        | 0.0363   |
|    value_loss         | 0.404    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 7.59     |
|    ep_rew_mean        | 4.73     |
| time/                 |          |
|    fps                | 17       |
|    iterations     