diff --git a/pgdrive/envs/base_env.py b/pgdrive/envs/base_env.py index e1916a9ad..112b732a6 100644 --- a/pgdrive/envs/base_env.py +++ b/pgdrive/envs/base_env.py @@ -273,6 +273,7 @@ def reset(self, episode_data: dict = None, force_seed: Union[None, int] = None): self.dones = {agent_id: False for agent_id in self.vehicles.keys()} self.episode_steps = 0 self.episode_rewards = defaultdict(float) + self.episode_lengths = defaultdict(int) # generate new traffic according to the map self.scene_manager.reset( diff --git a/pgdrive/envs/marl_envs/marl_inout_roundabout.py b/pgdrive/envs/marl_envs/marl_inout_roundabout.py index eeab56153..bc0cb4e59 100644 --- a/pgdrive/envs/marl_envs/marl_inout_roundabout.py +++ b/pgdrive/envs/marl_envs/marl_inout_roundabout.py @@ -2,15 +2,19 @@ import logging from math import floor +import gym import numpy as np from gym.spaces import Box from pgdrive.envs import PGDriveEnvV2 from pgdrive.envs.multi_agent_pgdrive import MultiAgentPGDrive +from pgdrive.envs.pgdrive_env_v2 import PGDriveEnvV2 +from pgdrive.obs import ObservationType +from pgdrive.obs.state_obs import StateObservation from pgdrive.scene_creator.blocks.first_block import FirstBlock from pgdrive.scene_creator.blocks.roundabout import Roundabout from pgdrive.scene_creator.map import PGMap from pgdrive.scene_creator.road.road import Road -from pgdrive.utils import get_np_random, PGConfig, distance_greater +from pgdrive.utils import get_np_random, distance_greater, norm, PGConfig MARoundaboutConfig = { "num_agents": 2, # Number of maximum agents in the scenarios. @@ -40,6 +44,7 @@ "crash_object_penalty": 1.0, "auto_termination": False, "camera_height": 4, + "bird_camera_height": 120 } @@ -252,6 +257,76 @@ def get_available_born_places(self): return ret +class LidarStateObservationMARound(ObservationType): + def __init__(self, vehicle_config): + self.state_obs = StateObservation(vehicle_config) + super(LidarStateObservationMARound, self).__init__(vehicle_config) + self.state_length = list(self.state_obs.observation_space.shape)[0] + + @property + def observation_space(self): + shape = list(self.state_obs.observation_space.shape) + if self.config["lidar"]["num_lasers"] > 0 and self.config["lidar"]["distance"] > 0: + # Number of lidar rays and distance should be positive! + shape[0] += self.config["lidar"]["num_lasers"] + self.config["lidar"]["num_others"] * self.state_length + return gym.spaces.Box(-0.0, 1.0, shape=tuple(shape), dtype=np.float32) + + def observe(self, vehicle): + """ + State observation + Navi info + 4 * closest vehicle info + Lidar points , + Definition of State Observation and Navi information can be found in **class StateObservation** + Other vehicles' info: [ + Projection of distance between ego and another vehicle on ego vehicle's heading direction, + Projection of distance between ego and another vehicle on ego vehicle's side direction, + Projection of speed between ego and another vehicle on ego vehicle's heading direction, + Projection of speed between ego and another vehicle on ego vehicle's side direction, + ] * 4, dim = 16 + + Lidar points: 240 lidar points surrounding vehicle, starting from the vehicle head in clockwise direction + + :param vehicle: BaseVehicle + :return: observation in 9 + 10 + 16 + 240 dim + """ + num_others = self.config["lidar"]["num_others"] + state = self.state_observe(vehicle) + other_v_info = [] + if vehicle.lidar is not None: + if self.config["lidar"]["num_others"] > 0: + # other_v_info += vehicle.lidar.get_surrounding_vehicles_info(vehicle, self.config["lidar"]["num_others"]) + surrounding_vehicles = list(vehicle.lidar.get_surrounding_vehicles()) + surrounding_vehicles.sort( + key=lambda v: norm(vehicle.position[0] - v.position[0], vehicle.position[1] - v.position[1]) + ) + surrounding_vehicles += [None] * num_others + for tmp_v in surrounding_vehicles[:num_others]: + if tmp_v is not None: + tmp_v = tmp_v.get_vehicle() + other_v_info += self.state_observe(tmp_v).tolist() + else: + other_v_info += [0] * self.state_length + other_v_info += self._add_noise_to_cloud_points( + vehicle.lidar.get_cloud_points(), + gaussian_noise=self.config["lidar"]["gaussian_noise"], + dropout_prob=self.config["lidar"]["dropout_prob"] + ) + return np.concatenate((state, np.asarray(other_v_info))) + + def state_observe(self, vehicle): + return self.state_obs.observe(vehicle) + + def _add_noise_to_cloud_points(self, points, gaussian_noise, dropout_prob): + if gaussian_noise > 0.0: + points = np.asarray(points) + points = np.clip(points + np.random.normal(loc=0.0, scale=gaussian_noise, size=points.shape), 0.0, 1.0) + + if dropout_prob > 0.0: + assert dropout_prob <= 1.0 + points = np.asarray(points) + points[np.random.uniform(0, 1, size=points.shape) < dropout_prob] = 0.0 + + return list(points) + + class MultiAgentRoundaboutEnv(MultiAgentPGDrive): _DEBUG_RANDOM_SEED = None born_roads = [ @@ -286,13 +361,13 @@ def _after_lazy_init(self): # Use top-down view by default if hasattr(self, "main_camera") and self.main_camera is not None: - bird_camera_height = 160 + bird_camera_height = self.config["bird_camera_height"] self.main_camera.camera.setPos(0, 0, bird_camera_height) self.main_camera.bird_camera_height = bird_camera_height self.main_camera.stop_chase(self.pg_world) # self.main_camera.camera.setPos(300, 20, bird_camera_height) - self.main_camera.camera_x += 100 - self.main_camera.camera_y += 20 + self.main_camera.camera_x += 95 + self.main_camera.camera_y += 15 def _process_extra_config(self, config): config = super(MultiAgentRoundaboutEnv, self)._process_extra_config(config) @@ -353,21 +428,6 @@ def reset(self, *args, **kwargs): def step(self, actions): o, r, d, i = super(MultiAgentRoundaboutEnv, self).step(actions) - # Check return alignment - # o_set_1 = set(kkk for kkk, rrr in r.items() if rrr == -self.config["out_of_road_penalty"]) - # o_set_2 = set(kkk for kkk, iii in i.items() if iii.get("out_of_road")) - # condition = o_set_1 == o_set_2 - # condition = set(kkk for kkk, rrr in r.items() if rrr == self.config["success_reward"]) == \ - # set(kkk for kkk, iii in i.items() if iii.get("arrive_dest")) and condition - # condition = ( - # not self.config["crash_done"] or ( - # set(kkk for kkk, rrr in r.items() if rrr == -self.config["crash_vehicle_penalty"]) - # == set(kkk for kkk, iii in i.items() if iii.get("crash_vehicle")) - # ) - # ) and condition - # if not condition: - # raise ValueError("Observation not aligned!") - # Update reborn manager if self.episode_steps >= self.config["horizon"]: self.target_vehicle_manager.set_allow_reborn(False) @@ -388,6 +448,7 @@ def step(self, actions): if d["__all__"]: for k in d.keys(): d[k] = True + return o, r, d, i def _update_destination_for(self, vehicle): @@ -398,40 +459,65 @@ def _update_destination_for(self, vehicle): def _reborn(self): new_obs_dict = {} while True: - allow_reborn, vehicle_info = self.target_vehicle_manager.propose_new_vehicle() - if vehicle_info is None: # No more vehicle to be assigned. - break - if not allow_reborn: - # If not allow to reborn, move agents to some rural places. - v = vehicle_info["vehicle"] - v.set_position((-999, -999)) - v.set_static(True) - self.target_vehicle_manager.confirm_reborn(False, vehicle_info) + new_id, new_obs = self._reborn_single_vehicle() + if new_obs is not None: + new_obs_dict[new_id] = new_obs + else: break - v = vehicle_info["vehicle"] - dead_vehicle_id = vehicle_info["old_name"] - bp_index = self._replace_vehicles(v) - if bp_index is None: # No more born places to be assigned. - self.target_vehicle_manager.confirm_reborn(False, vehicle_info) - break - - self.target_vehicle_manager.confirm_reborn(True, vehicle_info) - - new_id = vehicle_info["new_name"] - self.vehicles[new_id] = v # Put it to new vehicle id. - self.dones[new_id] = False # Put it in the internal dead-tracking dict. - self._born_places_manager.confirm_reborn(born_place_id=bp_index, vehicle_id=new_id) - logging.debug("{} Dead. {} Reborn!".format(dead_vehicle_id, new_id)) - - self.observations[new_id] = vehicle_info["observation"] - self.observations[new_id].reset(self, v) - self.observation_space.spaces[new_id] = vehicle_info["observation_space"] - self.action_space.spaces[new_id] = vehicle_info["action_space"] - new_obs = self.observations[new_id].observe(v) - new_obs_dict[new_id] = new_obs return new_obs_dict + def _force_reborn(self, agent_name): + """ + This function can force a given vehicle to reborn! + """ + self.target_vehicle_manager.finish(agent_name) + new_id, new_obs = self._reborn_single_vehicle() + return new_id, new_obs + + def _reborn_single_vehicle(self): + """ + Arbitrary insert a new vehicle to a new born place if possible. + """ + allow_reborn, vehicle_info = self.target_vehicle_manager.propose_new_vehicle() + if vehicle_info is None: # No more vehicle to be assigned. + return None, None + if not allow_reborn: + # If not allow to reborn, move agents to some rural places. + v = vehicle_info["vehicle"] + v.set_position((-999, -999)) + v.set_static(True) + self.target_vehicle_manager.confirm_reborn(False, vehicle_info) + return None, None + v = vehicle_info["vehicle"] + dead_vehicle_id = vehicle_info["old_name"] + bp_index = self._replace_vehicles(v) + if bp_index is None: # No more born places to be assigned. + self.target_vehicle_manager.confirm_reborn(False, vehicle_info) + return None, None + + self.target_vehicle_manager.confirm_reborn(True, vehicle_info) + + new_id = vehicle_info["new_name"] + v.set_static(False) + self.vehicles[new_id] = v # Put it to new vehicle id. + self.dones[new_id] = False # Put it in the internal dead-tracking dict. + self._born_places_manager.confirm_reborn(born_place_id=bp_index, vehicle_id=new_id) + logging.debug("{} Dead. {} Reborn!".format(dead_vehicle_id, new_id)) + + self.observations[new_id] = vehicle_info["observation"] + self.observations[new_id].reset(self, v) + self.observation_space.spaces[new_id] = vehicle_info["observation_space"] + self.action_space.spaces[new_id] = vehicle_info["action_space"] + new_obs = self.observations[new_id].observe(v) + return new_id, new_obs + def _after_vehicle_done(self, obs=None, reward=None, dones: dict = None, info=None): + for v_id, v_info in info.items(): + if v_info.get("episode_length", 0) >= self.config["horizon"]: + if dones[v_id] is not None: + info[v_id]["max_step"] = True + dones[v_id] = True + self.dones[v_id] = True for dead_vehicle_id, done in dones.items(): if done: self.target_vehicle_manager.finish(dead_vehicle_id) @@ -468,6 +554,9 @@ def _replace_vehicles(self, v): v.update_state(detector_mask=None) return bp_index + def get_single_observation(self, vehicle_config: "PGConfig") -> "ObservationType": + return LidarStateObservationMARound(vehicle_config) + def _draw(): env = MultiAgentRoundaboutEnv() @@ -553,15 +642,22 @@ def _vis(): for r_ in r.values(): total_r += r_ ep_s += 1 - d.update({"total_r": total_r, "episode length": ep_s}) - env.render(text=d) + # d.update({"total_r": total_r, "episode length": ep_s}) + render_text = { + "total_r": total_r, + "episode length": ep_s, + "cam_x": env.main_camera.camera_x, + "cam_y": env.main_camera.camera_y, + "cam_z": env.main_camera.bird_camera_height + } + env.render(text=render_text) if d["__all__"]: print( "Finish! Current step {}. Group Reward: {}. Average reward: {}".format( i, total_r, total_r / env.target_vehicle_manager.next_agent_count ) ) - break + # break if len(env.vehicles) == 0: total_r = 0 print("Reset") @@ -571,7 +667,7 @@ def _vis(): def _profile(): import time - env = MultiAgentRoundaboutEnv({"num_agents": 40}) + env = MultiAgentRoundaboutEnv({"num_agents": 16}) obs = env.reset() start = time.time() for s in range(10000): diff --git a/pgdrive/envs/multi_agent_pgdrive.py b/pgdrive/envs/multi_agent_pgdrive.py index 97210af94..1542a38c5 100644 --- a/pgdrive/envs/multi_agent_pgdrive.py +++ b/pgdrive/envs/multi_agent_pgdrive.py @@ -1,3 +1,5 @@ +import logging + from pgdrive.envs.pgdrive_env_v2 import PGDriveEnvV2 from pgdrive.scene_creator.blocks.first_block import FirstBlock from pgdrive.scene_creator.vehicle.base_vehicle import BaseVehicle @@ -71,12 +73,13 @@ def _process_extra_config(self, config) -> "PGConfig": config, allow_overwrite=False, stop_recursive_update=["target_vehicle_configs"] ) if not ret_config["crash_done"] and ret_config["crash_vehicle_penalty"] > 2: - import logging logging.warning( "Are you sure you wish to set crash_vehicle_penalty={} when crash_done=False?".format( ret_config["crash_vehicle_penalty"] ) ) + if ret_config["use_render"] and ret_config["fast"]: + logging.warning("Turn fast=False can accelerate Multi-agent rendering performance!") return ret_config def done_function(self, vehicle_id): @@ -172,10 +175,13 @@ def _update_spaces_if_needed(self): assert self.is_multi_agent current_obs_keys = set(self.observations.keys()) for k in current_obs_keys: - if k not in set(self.vehicles.keys()): + if k not in self.vehicles: o = self.observations.pop(k) - self.observation_space.spaces.pop(k) self.done_observations[k] = o + current_obs_keys = set(self.observation_space.spaces.keys()) + for k in current_obs_keys: + if k not in self.vehicles: + self.observation_space.spaces.pop(k) # self.action_space.spaces.pop(k) # Action space is updated in _reborn diff --git a/pgdrive/scene_creator/vehicle/base_vehicle.py b/pgdrive/scene_creator/vehicle/base_vehicle.py index c63d62536..fbc988d27 100644 --- a/pgdrive/scene_creator/vehicle/base_vehicle.py +++ b/pgdrive/scene_creator/vehicle/base_vehicle.py @@ -683,13 +683,13 @@ def destroy(self, _=None): self.vehicle_panel.destroy(self.pg_world) self.pg_world = None - def set_position(self, position): + def set_position(self, position, height=0.4): """ Should only be called when restore traffic from episode data :param position: 2d array or list :return: None """ - self.chassis_np.setPos(panda_position(position, 0.4)) + self.chassis_np.setPos(panda_position(position, height)) def set_heading(self, heading_theta) -> None: """ diff --git a/pgdrive/scene_creator/vehicle/base_vehicle_node.py b/pgdrive/scene_creator/vehicle/base_vehicle_node.py index 8055e3ae3..22041222e 100644 --- a/pgdrive/scene_creator/vehicle/base_vehicle_node.py +++ b/pgdrive/scene_creator/vehicle/base_vehicle_node.py @@ -39,3 +39,6 @@ def velocity(self): def destroy(self): # release pointer self._base_vehicle = None + + def get_vehicle(self): + return self._base_vehicle \ No newline at end of file diff --git a/pgdrive/scene_creator/vehicle_module/distance_detector.py b/pgdrive/scene_creator/vehicle_module/distance_detector.py index 9b13e887e..5154e2cde 100644 --- a/pgdrive/scene_creator/vehicle_module/distance_detector.py +++ b/pgdrive/scene_creator/vehicle_module/distance_detector.py @@ -176,6 +176,7 @@ def perceive( extra_filter_node: set = None, detector_mask: np.ndarray = None ): + assert not isinstance(detector_mask, str), "Please specify detector_mask either with None or a numpy array." self.cloud_points, self.detected_objects, colors = cutils.cutils_perceive( cloud_points=self.cloud_points, detector_mask=detector_mask.astype(dtype=np.uint8) if detector_mask is not None else None, diff --git a/pgdrive/tests/test_component/test_utils.py b/pgdrive/tests/test_component/test_utils.py index 3ee307635..1d28b63d5 100644 --- a/pgdrive/tests/test_component/test_utils.py +++ b/pgdrive/tests/test_component/test_utils.py @@ -45,6 +45,6 @@ def test_utils(): if __name__ == '__main__': - # test_cutils() - # test_fake_cutils() + test_cutils() + test_fake_cutils() test_utils() diff --git a/pgdrive/tests/test_env/test_ma_roundabout_env.py b/pgdrive/tests/test_env/test_ma_roundabout_env.py index 069fcd1e7..e2413de09 100644 --- a/pgdrive/tests/test_env/test_ma_roundabout_env.py +++ b/pgdrive/tests/test_env/test_ma_roundabout_env.py @@ -1,15 +1,65 @@ -import gym +import time + import numpy as np +from gym.spaces import Box, Dict from pgdrive.envs.marl_envs.marl_inout_roundabout import MultiAgentRoundaboutEnv from pgdrive.utils import distance_greater, norm +def _check_spaces_before_reset(env): + a = set(env.config["target_vehicle_configs"].keys()) + b = set(env.observation_space.spaces.keys()) + c = set(env.action_space.spaces.keys()) + assert a == b == c + _check_space(env) + + +def _check_spaces_after_reset(env, obs=None): + a = set(env.config["target_vehicle_configs"].keys()) + b = set(env.observation_space.spaces.keys()) + assert a == b + _check_shape(env) + + if obs: + assert isinstance(obs, dict) + assert set(obs.keys()) == a + + +def _check_shape(env): + b = set(env.observation_space.spaces.keys()) + c = set(env.action_space.spaces.keys()) + d = set(env.vehicles.keys()) + e = set(env.scene_manager.target_vehicles.keys()) + f = set([k for k in env.observation_space.spaces.keys() if not env.dones[k]]) + assert d == e == f, (b, c, d, e, f) + assert c.issuperset(d) + _check_space(env) + + +def _check_space(env): + assert isinstance(env.action_space, Dict) + assert isinstance(env.observation_space, Dict) + o_shape = None + for k, s in env.observation_space.spaces.items(): + assert isinstance(s, Box) + if o_shape is None: + o_shape = s.shape + assert s.shape == o_shape + a_shape = None + for k, s in env.action_space.spaces.items(): + assert isinstance(s, Box) + if a_shape is None: + a_shape = s.shape + assert s.shape == a_shape + + def _act(env, action): assert env.action_space.contains(action) obs, reward, done, info = env.step(action) - if not (set(obs.keys()) == set(reward.keys()) == set(env.observation_space.spaces.keys())): - print('sss') + _check_shape(env) + if not done["__all__"]: + assert len(env.vehicles) > 0 assert set(obs.keys()) == set(reward.keys()) == set(env.observation_space.spaces.keys()) assert env.observation_space.contains(obs) assert isinstance(reward, dict) @@ -20,35 +70,40 @@ def _act(env, action): def test_ma_roundabout_env(): env = MultiAgentRoundaboutEnv({"num_agents": 1, "vehicle_config": {"lidar": {"num_others": 8}}}) - assert isinstance(env.observation_space, gym.spaces.Dict) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} assert len(act) == 1 o, r, d, i = _act(env, act) - if step == 0: + if step == 0 or step == 1: assert not any(d.values()) finally: env.close() env = MultiAgentRoundaboutEnv({"num_agents": 1, "vehicle_config": {"lidar": {"num_others": 0}}}) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} assert len(act) == 1 o, r, d, i = _act(env, act) - if step == 0: + if step == 0 or step == 1: assert not any(d.values()) finally: env.close() env = MultiAgentRoundaboutEnv({"num_agents": 4, "vehicle_config": {"lidar": {"num_others": 8}}}) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} @@ -60,7 +115,9 @@ def test_ma_roundabout_env(): env = MultiAgentRoundaboutEnv({"num_agents": 4, "vehicle_config": {"lidar": {"num_others": 0}}}) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} @@ -72,7 +129,9 @@ def test_ma_roundabout_env(): env = MultiAgentRoundaboutEnv({"num_agents": 8, "vehicle_config": {"lidar": {"num_others": 0}}}) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(100): act = {k: [1, 1] for k in env.vehicles.keys()} @@ -101,7 +160,9 @@ def test_ma_roundabout_horizon(): } ) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) last_keys = set(env.vehicles.keys()) for step in range(1, 1000): @@ -141,7 +202,9 @@ def test_ma_roundabout_horizon(): def test_ma_roundabout_reset(): env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 4}) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for step in range(1000): act = {k: [1, 1] for k in env.vehicles.keys()} @@ -151,18 +214,24 @@ def test_ma_roundabout_reset(): if d["__all__"]: obs = env.reset() assert env.observation_space.contains(obs) + + _check_spaces_after_reset(env, obs) assert set(env.observation_space.spaces.keys()) == set(env.action_space.spaces.keys()) == \ set(env.observations.keys()) == set(obs.keys()) == \ set(env.config["target_vehicle_configs"].keys()) + + break finally: env.close() # Put vehicles to destination and then reset. This might cause error if agent is assigned destination BEFORE reset. env = MultiAgentRoundaboutEnv({"horizon": 100, "num_agents": 32, "success_reward": 777}) try: + _check_spaces_before_reset(env) success_count = 0 agent_count = 0 obs = env.reset() + _check_spaces_after_reset(env, obs) assert env.observation_space.contains(obs) for num_reset in range(5): @@ -172,6 +241,7 @@ def test_ma_roundabout_reset(): act = {k: [1, 1] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) + # Force vehicle to success! for v_id, v in env.vehicles.items(): loc = v.routing_localization.final_lane.end v.set_position(loc) @@ -214,7 +284,9 @@ def test_ma_roundabout_reset(): if d["__all__"]: print("Finish {} agents. Success {} agents.".format(agent_count, success_count)) - env.reset() + o = env.reset() + assert env.observation_space.contains(o) + _check_spaces_after_reset(env, o) break finally: env.close() @@ -233,8 +305,10 @@ def _no_close_born(vehicles): env = MultiAgentRoundaboutEnv({"horizon": 50, "num_agents": 16, "map_config": {"exit_length": 20}}) env.seed(100) try: + _check_spaces_before_reset(env) for num_r in range(10): obs = env.reset() + _check_spaces_after_reset(env) for _ in range(10): o, r, d, i = env.step({k: [0, 0] for k in env.vehicles.keys()}) assert not any(d.values()) @@ -247,29 +321,31 @@ def _no_close_born(vehicles): def test_ma_roundabout_reward_done_alignment(): # out of road - env = MultiAgentRoundaboutEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False}) - try: - obs = env.reset() - assert env.observation_space.contains(obs) - for action in [-1, 1]: - for step in range(5000): - act = {k: [action, 1] for k in env.vehicles.keys()} - o, r, d, i = _act(env, act) - for kkk, ddd in d.items(): - if ddd and kkk != "__all__": - assert r[kkk] == -777 - assert i[kkk]["out_of_road"] - # print('{} done passed!'.format(kkk)) - for kkk, rrr in r.items(): - if rrr == -777: - assert d[kkk] - assert i[kkk]["out_of_road"] - # print('{} reward passed!'.format(kkk)) - if d["__all__"]: - env.reset() - break - finally: - env.close() + # env = MultiAgentRoundaboutEnv({"horizon": 200, "num_agents": 4, "out_of_road_penalty": 777, "crash_done": False}) + # try: + # _check_spaces_before_reset(env) + # obs = env.reset() + # _check_spaces_after_reset(env, obs) + # assert env.observation_space.contains(obs) + # for action in [-1, 1]: + # for step in range(5000): + # act = {k: [action, 1] for k in env.vehicles.keys()} + # o, r, d, i = _act(env, act) + # for kkk, ddd in d.items(): + # if ddd and kkk != "__all__": + # assert r[kkk] == -777 + # assert i[kkk]["out_of_road"] + # # print('{} done passed!'.format(kkk)) + # for kkk, rrr in r.items(): + # if rrr == -777: + # assert d[kkk] + # assert i[kkk]["out_of_road"] + # # print('{} reward passed!'.format(kkk)) + # if d["__all__"]: + # env.reset() + # break + # finally: + # env.close() # crash env = MultiAgentRoundaboutEnv( @@ -280,32 +356,107 @@ def test_ma_roundabout_reward_done_alignment(): "crash_done": True, # "use_render": True, - # "fast": True + # "fast": True, + "bird_camera_height": 160 } ) + # Force the seed here so that the agent1 and agent2 are in same heading! Otherwise they might be in vertical + # heading and cause one of the vehicle raise "out of road" error! + env._DEBUG_RANDOM_SEED = 1 try: + _check_spaces_before_reset(env) obs = env.reset() - for step in range(100): + _check_spaces_after_reset(env, obs) + for step in range(5): act = {k: [0, 0] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) - env.vehicles["agent0"].set_position(env.vehicles["agent1"].position) + env.vehicles["agent0"].set_position(env.vehicles["agent1"].position, height=1.2) for step in range(5000): act = {k: [0, 0] for k in env.vehicles.keys()} o, r, d, i = _act(env, act) - if d["__all__"]: - break - for kkk, ddd in d.items(): + + if not any(d.values()): + continue + + assert sum(d.values()) == 2 + + for kkk in ['agent0', 'agent1']: + iii = i[kkk] + assert iii["crash_vehicle"] + assert iii["crash"] + assert r[kkk] == -1.7777 + # for kkk, ddd in d.items(): + ddd = d[kkk] if ddd and kkk != "__all__": assert r[kkk] == -1.7777 assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # print('{} done passed!'.format(kkk)) - for kkk, rrr in r.items(): + # for kkk, rrr in r.items(): + rrr = r[kkk] if rrr == -1.7777: assert d[kkk] assert i[kkk]["crash_vehicle"] assert i[kkk]["crash"] # print('{} reward passed!'.format(kkk)) + # assert d["__all__"] + # if d["__all__"]: + break + finally: + env._DEBUG_RANDOM_SEED = None + env.close() + + # crash with real fixed vehicle + + # crash 2 + env = MultiAgentRoundaboutEnv( + { + "map_config": { + "exit_length": 100, + "lane_num": 1 + }, + # "use_render": True, + # "fast": True, + "horizon": 200, + "num_agents": 40, + "crash_vehicle_penalty": 1.7777, + } + ) + try: + _check_spaces_before_reset(env) + obs = env.reset() + _check_spaces_after_reset(env, obs) + for step in range(1): + act = {k: [0, 0] for k in env.vehicles.keys()} + o, r, d, i = _act(env, act) + + for v_id, v in env.vehicles.items(): + if v_id != "agent0": + v.set_static(True) + + for step in range(5000): + act = {k: [0, 1] for k in env.vehicles.keys()} + o, r, d, i = _act(env, act) + for kkk, iii in i.items(): + if iii["crash"]: + assert iii["crash_vehicle"] + if iii["crash_vehicle"]: + assert iii["crash"] + assert r[kkk] == -1.7777 + for kkk, ddd in d.items(): + if ddd and kkk != "__all__": + assert i[kkk]["out_of_road"] + # print('{} done passed!'.format(kkk)) + for kkk, rrr in r.items(): + if rrr == -1.7777: + # assert d[kkk] + assert i[kkk]["crash_vehicle"] + assert i[kkk]["crash"] + # print('{} reward passed!'.format(kkk)) + if d["agent0"]: + break + if d["__all__"]: + break finally: env.close() @@ -320,7 +471,9 @@ def test_ma_roundabout_reward_done_alignment(): } ) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env) env.vehicles["agent0"].set_position(env.vehicles["agent0"].routing_localization.final_lane.end) assert env.observation_space.contains(obs) for step in range(5000): @@ -371,7 +524,9 @@ def _replace_vehicles(self, v): env = TestEnv({"num_agents": 1}) try: + _check_spaces_before_reset(env) obs = env.reset() + _check_spaces_after_reset(env) ep_reward = 0.0 for step in range(1000): act = {k: [0, 1] for k in env.vehicles.keys()} @@ -390,10 +545,138 @@ def _replace_vehicles(self, v): env.close() +def test_ma_roundabout_init_space(): + try: + for start_seed in [5000, 6000, 7000]: + for num_agents in [16, 32]: + for num_others in [0, 2, 4, 8]: + for crash_vehicle_penalty in [0, 5]: + env_config = dict( + start_seed=start_seed, + num_agents=num_agents, + vehicle_config=dict(lidar=dict(num_others=num_others)), + crash_vehicle_penalty=crash_vehicle_penalty + ) + env = MultiAgentRoundaboutEnv(env_config) + + single_space = env.observation_space["agent0"] + assert single_space.shape is not None, single_space + assert np.prod(single_space.shape) is not None, single_space + + single_space = env.action_space["agent0"] + assert single_space.shape is not None, single_space + assert np.prod(single_space.shape) is not None, single_space + + _check_spaces_before_reset(env) + env.reset() + _check_spaces_after_reset(env) + env.close() + print('Finish: ', env_config) + finally: + if "env" in locals(): + env.close() + + +def test_ma_roundabout_no_short_episode(): + env = MultiAgentRoundaboutEnv({ + "horizon": 300, + "num_agents": 40, + }) + try: + _check_spaces_before_reset(env) + o = env.reset() + _check_spaces_after_reset(env, o) + actions = [[0, 1], [1, 1], [-1, 1]] + start = time.time() + d_count = 0 + d = {"__all__": False} + for step in range(2000): + # act = {k: actions[np.random.choice(len(actions))] for k in o.keys()} + act = {k: actions[np.random.choice(len(actions))] for k in env.vehicles.keys()} + o_keys = set(o.keys()).union({"__all__"}) + a_keys = set(env.action_space.spaces.keys()).union(set(d.keys())) + assert o_keys == a_keys + o, r, d, i = _act(env, act) + for kkk, iii in i.items(): + if d[kkk]: + assert iii["episode_length"] > 1 + d_count += 1 + if d["__all__"]: + o = env.reset() + d = {"__all__": False} + if (step + 1) % 100 == 0: + print( + "Finish {}/2000 simulation steps. Time elapse: {:.4f}. Average FPS: {:.4f}".format( + step + 1, + time.time() - start, (step + 1) / (time.time() - start) + ) + ) + if d_count > 200: + break + finally: + env.close() + + +def test_ma_roundabout_horizon_termination(): + # test horizon + env = MultiAgentRoundaboutEnv({ + "horizon": 100, + "num_agents": 8, + }) + try: + for _ in range(3): # This function is really easy to break, repeat multiple times! + _check_spaces_before_reset(env) + obs = env.reset() + _check_spaces_after_reset(env, obs) + assert env.observation_space.contains(obs) + should_reborn = set() + special_agents = set(["agent0", "agent7"]) + for step in range(1, 10000): + act = {k: [0, 0] for k in env.vehicles.keys()} + for v_id in act.keys(): + if v_id in special_agents: + act[v_id] = [1, 1] # Add some randomness + else: + if v_id in env.vehicles: + env.vehicles[v_id].set_static(True) + obs, r, d, i = _act(env, act) + if step == 0 or step == 1: + assert not any(d.values()) + + if should_reborn: + for kkk in should_reborn: + assert kkk not in obs, "It seems the max_step agents is not reborn!" + assert kkk not in r + assert kkk not in d + assert kkk not in i + should_reborn.clear() + + for kkk, ddd in d.items(): + if ddd and kkk == "__all__": + print("Current: ", step) + continue + if ddd and kkk not in special_agents: + assert i[kkk]["max_step"] + assert not i[kkk]["out_of_road"] + assert not i[kkk]["crash"] + assert not i[kkk]["crash_vehicle"] + should_reborn.add(kkk) + + if d["__all__"]: + obs = env.reset() + should_reborn.clear() + break + finally: + env.close() + + if __name__ == '__main__': test_ma_roundabout_env() - # test_ma_roundabout_horizon() - # test_ma_roundabout_reset() - # test_ma_roundabout_reward_done_alignment() - # test_ma_roundabout_close_born() + test_ma_roundabout_horizon() + test_ma_roundabout_reset() + test_ma_roundabout_reward_done_alignment() + test_ma_roundabout_close_born() test_ma_roundabout_reward_sign() + test_ma_roundabout_init_space() + test_ma_roundabout_no_short_episode() + test_ma_roundabout_horizon_termination()