In [1]:
# Generate the video for the env with a trained model

In [2]:
# !pip install "torchdrivesim @ git+https://github.com/inverted-ai/torchdrivesim.git@first-release-env"

In [3]:
import sys
sys.path.insert(0, "../")
sys.path.insert(0, "../torchdrivesim/")

In [4]:
import numpy as np
import pickle
import random
from typing import List, Dict, Tuple

import invertedai
from invertedai.common import AgentState, AgentAttributes, Point, TrafficLightState

import gymnasium as gym
import torch
from stable_baselines3 import PPO, SAC, TD3
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv, SubprocVecEnv
from torch import Tensor

import torchdriveenv
from torchdriveenv.env_utils import load_waypoint_suite_data, load_rl_training_config
from torchdriveenv.gym_env import WaypointSuiteEnv, SingleAgentWrapper

from torchdrivesim.kinematic import KinematicBicycle

  from .autonotebook import tqdm as notebook_tqdm
Commercial access denied and fallback to check for academic access.


In [5]:
def iai_drive(location: str, 
              agent_states: Tensor, 
              agent_attributes: Tensor, 
              recurrent_states: List, 
              traffic_lights_states: Dict = None,
              waypoint_for_ego: Tuple = None):

    try:
        agent_attributes = [AgentAttributes(length=at[0], width=at[1], rear_axis_offset=at[2]) for at in agent_attributes]
        if waypoint_for_ego is not None:
            agent_attributes[0].waypoint = Point(x=waypoint_for_ego[0], y=waypoint_for_ego[1])
        agent_states = [AgentState(center=Point(x=st[0], y=st[1]), orientation=st[2], speed=st[3]) for st in agent_states]
        seed = random.randint(1, 10000)
        response = invertedai.api.drive(
            location=location, agent_states=agent_states, agent_attributes=agent_attributes,
            recurrent_states=recurrent_states,
            traffic_lights_states=traffic_lights_states,
            random_seed=seed
        )
        agent_states = torch.stack(
            [torch.tensor(st.tolist()) for st in response.agent_states], dim=-2
        )
    except Exception as e:
        raise e
    return agent_states, response.recurrent_states

In [6]:
data = load_waypoint_suite_data("data/validation_cases.yml")
env_config = load_rl_training_config("env_configs/rl_training.yml").env
env_config.render_mode = "video"
env_config.video_fov = 450.0

In [7]:
class ExpertEnv(WaypointSuiteEnv):
    def __init__(self, cfg, data):
        # at most 100 NPCs
#         self.observation_space = gym.spaces.Box(shape=(100, 4), dtype=np.float32)
        super().__init__(cfg=cfg, data=data)
        self.observation_space = gym.spaces.Text(max_length=10)


    def get_obs(self):
        location = f'carla:{":".join(self.locations[self.current_waypoint_suite_idx].split("_"))}'

        agent_states = self.simulator.get_innermost_simulator().get_state()["vehicle"].squeeze().cpu().numpy()
        
        agent_attributes = self.simulator.inner_simulator._agent_attributes
        
        recurrent_states = self.simulator.inner_simulator._recurrent_states
        
        traffic_lights_states = self.simulator.inner_simulator._traffic_light_controller.current_state_with_name
        


        waypoint_for_ego = self.current_target
        
        obs = {"location": location,
               "agent_states": agent_states,
               "agent_attributes": agent_attributes[0],
               "recurrent_states": recurrent_states[0],
               "traffic_lights_states": traffic_lights_states,
               "waypoint_for_ego": waypoint_for_ego}
        
        with open("obs.pkl", "wb") as f:
            pickle.dump(obs, f)
        
        return "pickle"

In [8]:
class Expert:
    def __init__(self):
        self.kinematic_model = KinematicBicycle(left_handed=True)
    
    def predict(self, obs):
        with open("obs.pkl", "rb") as f:
            obs = pickle.load(f)
            
        states, recurrent_states = iai_drive(location=obs["location"], 
                                             agent_states=obs["agent_states"], 
                                             agent_attributes=obs["agent_attributes"], 
                                             recurrent_states=obs["recurrent_states"], 
                                             traffic_lights_states=obs["traffic_lights_states"],
                                             waypoint_for_ego=obs["waypoint_for_ego"])
        
        print("future states in predict")
        print(states[0])
        print("current states in predict")
        print(obs["agent_states"][0])
        action = self.kinematic_model.fit_action(future_state=states[0], current_state=Tensor(obs["agent_states"][0]))
        print("action")
        print(action)
        return action, states

In [9]:
def make_expert_env():
    gym.register('expert-env-v0', 
                 entry_point=lambda args: SingleAgentWrapper(ExpertEnv(cfg=args['cfg'], data=args['data'])))
    env = gym.make('expert-env-v0', args={'cfg': env_config, 'data': data})
    env = Monitor(env)  
    return env

In [10]:
def main(cfg, model):
    env = make_expert_env()
    obs = np.expand_dims(env.reset()[0], 0)
    states = None
    episode_starts = [True]

    while True:
        actions, states = model.predict(obs)
        obs, reward, terminated, truncated, info = env.step(actions)      
        if terminated or truncated:
            break
    env.close()

In [11]:
env_config

EnvConfig(ego_only=False, max_environment_steps=200, use_background_traffic=True, terminated_at_infraction=True, seed=None, simulator=TorchDriveConfig(renderer=RendererConfig(backend='default', render_agent_direction=True, left_handed_coordinates=True, highlight_ego_vehicle=True), single_agent_rendering=False, collision_metric=<CollisionMetric.nograd: 'nograd'>, offroad_threshold=0.5, left_handed_coordinates=True), render_mode='video', video_filename='rendered_video.mp4', video_res=1024, video_fov=450.0)

In [12]:
model = Expert()

In [None]:
main(env_config, model)

INFO:torchdriveenv.gym_env:seed: 1572716061
INFO:torchdriveenv.gym_env:    def get_reward(self):
        x = self.simulator.get_state()[..., 0]
        y = self.simulator.get_state()[..., 1]
        psi = self.simulator.get_state()[..., 2]

        d = math.dist((x, y), (self.last_x, self.last_y)) if (self.last_x is not None) and (self.last_y is not None) else 0
        distance_reward = 1 if d > 0.5 else 0
        psi_reward = (1 - math.cos(psi - self.last_psi)) * (-20.0) if (self.last_psi is not None) else 0
        if self.check_reach_target():
            reach_target_reward = 10
            self.reached_waypoint_num += 1
        else:
            reach_target_reward = 0
        r = torch.zeros_like(x)
        r += reach_target_reward + distance_reward + psi_reward
        return r



future states in predict
tensor([174.2600,  54.8200,   3.0100,   3.4600])
current states in predict
[174.60428    54.774315    3.007789    3.4495482]
action
tensor([0.0469, 0.0012])


  logger.warn(
  logger.warn(


future states in predict
tensor([173.9100,  54.8700,   3.0100,   3.4900])
current states in predict
[174.26016    54.821293    3.0074093   3.4730191]
action
tensor([ 0.1246, -0.0026])
future states in predict
tensor([173.5600,  54.9100,   3.0100,   3.5500])
current states in predict
[173.90962    54.867176    3.0082371   3.535299 ]
action
tensor([-0.0258,  0.0073])
future states in predict
tensor([173.2100,  54.9500,   3.0100,   3.5400])
current states in predict
[173.56107    54.918015    3.0058868   3.522375 ]
action
tensor([0.0055, 0.0285])
future states in predict
tensor([172.8600,  55.0100,   3.0100,   3.5400])
current states in predict
[173.21428    54.981316    2.9966993   3.5251315]
action
tensor([0.0585, 0.0408])
future states in predict
tensor([172.5100,  55.0800,   3.0000,   3.5700])
current states in predict
[172.86658    55.05506     2.9834604   3.5543776]
action
tensor([0.0403, 0.0562])
future states in predict
tensor([172.1600,  55.1600,   2.9900,   3.5900])
current stat

future states in predict
tensor([1.6793e+02, 5.8310e+01, 2.5900e+00, 4.0000e-02])
current states in predict
[1.6792986e+02 5.8309776e+01 2.5854914e+00 5.2584097e-02]
action
tensor([-0.0999, -0.9948])
future states in predict
tensor([1.6793e+02, 5.8310e+01, 2.5900e+00, 1.0000e-02])
current states in predict
[1.6792972e+02 5.8309555e+01 2.5856447e+00 2.6365593e-03]
action
tensor([ 0.0052, -0.9973])
future states in predict
tensor([167.9300,  58.3100,   2.5900,   0.0000])
current states in predict
[1.6792944e+02 5.8309113e+01 2.5859494e+00 5.2405950e-03]
action
tensor([ 0.0104, -0.9987])
future states in predict
tensor([1.6793e+02, 5.8310e+01, 2.5900e+00, 2.0000e-02])
current states in predict
[1.6792889e+02 5.8308228e+01 2.5865569e+00 1.0448719e-02]
action
tensor([ 0.0208, -0.9997])
future states in predict
tensor([1.6793e+02, 5.8310e+01, 2.5900e+00, 1.0000e-02])
current states in predict
[1.6792780e+02 5.8306454e+01 2.5877700e+00 2.0864999e-02]
action
tensor([-0.1252,  0.9996])
future s

future states in predict
tensor([166.6900,  59.0900,   2.5800,   1.0900])
current states in predict
[166.78477    59.031715    2.5821867   1.0508083]
action
tensor([0.1236, 0.0051])
future states in predict
tensor([166.5900,  59.1600,   2.5800,   1.1600])
current states in predict
[166.69095    59.091515    2.5816672   1.1126055]
action
tensor([ 0.2146, -0.0230])
future states in predict
tensor([166.4800,  59.2200,   2.5800,   1.2700])
current states in predict
[166.58531    59.152527    2.5842307   1.2199003]
action
tensor([ 0.0617, -0.0079])
future states in predict
tensor([166.3700,  59.2900,   2.5800,   1.2800])
current states in predict
[166.47835    59.21736     2.585135    1.2507712]
action
tensor([ 0.1074, -0.0217])
future states in predict
tensor([166.2500,  59.3600,   2.5900,   1.3500])
current states in predict
[166.3653     59.282444    2.5877225   1.3044842]
action
tensor([ 0.1701, -0.0244])
future states in predict
tensor([166.1300,  59.4300,   2.5900,   1.4200])
current 

future states in predict
tensor([1.6345e+02, 5.9930e+01, 2.9100e+00, 8.0000e-02])
current states in predict
[1.6346176e+02 5.9927650e+01 2.9137039e+00 8.4569626e-02]
action
tensor([0.0708, 0.0196])
future states in predict
tensor([1.6344e+02, 5.9930e+01, 2.9100e+00, 1.2000e-01])
current states in predict
[1.63450165e+02 5.99307175e+01 2.91348958e+00 1.19969115e-01]
action
tensor([-0.0362,  0.1901])
future states in predict
tensor([1.6343e+02, 5.9940e+01, 2.9100e+00, 8.0000e-02])
current states in predict
[1.63441360e+02 5.99358368e+01 2.91174746e+00 1.01876274e-01]
action
tensor([ 0.0384, -0.0771])
future states in predict
tensor([1.6342e+02, 5.9940e+01, 2.9100e+00, 1.1000e-01])
current states in predict
[1.63429321e+02 5.99371490e+01 2.91259766e+00 1.21056885e-01]
action
tensor([-0.0471, -0.0431])
future states in predict
tensor([1.6341e+02, 5.9940e+01, 2.9100e+00, 1.0000e-01])
current states in predict
[1.6341969e+02 5.9938717e+01 2.9129808e+00 9.7488806e-02]
action
tensor([0.0005, 0