In [2]:
import os
# from pycharm
from matplotlib import pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from tempfile import gettempdir

from l5kit.configs import load_config_data
from l5kit.data import ChunkedDataset, LocalDataManager
from l5kit.dataset import EgoDatasetVectorized
from l5kit.planning.vectorized.closed_loop_model import VectorizedUnrollModel
from l5kit.planning.vectorized.open_loop_model import VectorizedModel
from l5kit.vectorization.vectorizer_builder import build_vectorizer

from torch.utils.tensorboard import SummaryWriter

import sys
from pathlib import Path
# project_path = str(Path(__file__).parents[1])
project_path = "/mnt/share_disk/user/daixingyuan/l5kit"
print("project path: ", project_path)
sys.path.append(project_path)
print(sys.path)

# prepare data path and load cfg
os.environ["L5KIT_DATA_FOLDER"] = "/mnt/share_disk/user/public/l5kit/prediction"

dm = LocalDataManager(None)
# get config

# Home path
from pathlib import Path

dm = LocalDataManager(None)
# get config
cfg = load_config_data(str(Path(project_path, "examples/urban_driver/config.yaml")))

# ===== INIT DATASET
dataset_path = dm.require(cfg["train_data_loader"]["key"])

train_zarr = ChunkedDataset(dataset_path).open()
vectorizer = build_vectorizer(cfg, dm)
train_dataset = EgoDatasetVectorized(cfg, train_zarr, vectorizer)

print(train_zarr)


project path:  /mnt/share_disk/user/daixingyuan/l5kit
['C:\\Users\\XY\\AppData\\Local\\JetBrains\\Toolbox\\apps\\PyCharm-P\\ch-0\\221.5080.212\\plugins\\python\\helpers-pro\\jupyter_debug', 'C:\\Users\\XY\\AppData\\Local\\JetBrains\\Toolbox\\apps\\PyCharm-P\\ch-0\\221.5080.212\\plugins\\python\\helpers\\pydev', '/mnt/share_disk/user/daixingyuan/l5kit/examples/offline_rl', '/usr/local/envs/l5kit/lib/python38.zip', '/usr/local/envs/l5kit/lib/python3.8', '/usr/local/envs/l5kit/lib/python3.8/lib-dynload', '', '/mnt/share_disk/user/.local/lib/python3.8/site-packages', '/mnt/share_disk/user/xijinhao/l5kit/l5kit', '/mnt/share_disk/user/changzhuorui/DenseTNT/DenseTNT/src/argoverse_api', '/usr/local/envs/l5kit/lib/python3.8/site-packages', '/mnt/share_disk/user/daixingyuan/l5kit']
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lig

In [73]:
def load_dataset(cfg, traffic_signal_scene_id=None):
    dm = LocalDataManager(None)
    # ===== INIT DATASET
    # cfg["train_data_loader"]["key"] = "train.zarr"
    train_zarr = ChunkedDataset(dm.require(cfg["train_data_loader"]["key"])).open()

    vectorizer = build_vectorizer(cfg, dm)
    train_dataset = EgoDatasetVectorized(cfg, train_zarr, vectorizer)

    # todo demo for single scene
    if traffic_signal_scene_id:
        train_dataset = train_dataset.get_scene_dataset(traffic_signal_scene_id)
    print(train_dataset)
    return train_dataset


traffic_signal_scene_id = 13
cfg = load_config_data(str(Path(project_path, "examples/urban_driver/config.yaml")))

train_dataset = load_dataset(cfg, traffic_signal_scene_id)



+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|     1      |    249     |   22099    |      6228     |       0.01      |        249.00        |        88.75         |        24.90         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


  dataset = ChunkedDataset("")


+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|     1      |    249     |   22099    |      6228     |       0.01      |        249.00        |        88.75         |        24.90         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


## Compute truncated value

In [71]:
# batch_size与pred_len可变
batch_size = 16
pred_len = 10  # prediction horizon

# 假设batch_size为16，前16个输入到网络，输出对应的next_state, action, reward, value
# value_target需要滚动计算，所以共需要16 + 10个样本。
# 计算过程中，例如，第1个样本的V(s1)，需要计算2:11样本中的reward之和,
# 即V(s1) = r2+r3+...+r11
sample_trajectory_len = batch_size + pred_len

train_dataloader = DataLoader(
    train_dataset,
    shuffle=False,  # 注意不打乱顺序，采样连续样本，以构建truncated value
    batch_size=sample_trajectory_len,
    num_workers=1,
)
tr_it = iter(train_dataloader)
data_batch = next(tr_it)

data_batch['extent'].shape, data_batch["target_positions"].shape


(torch.Size([26, 3]), torch.Size([26, 12, 2]))

In [83]:
from scripts import reward

def get_reward_per_batch(frame):
    # todo @jinhao 以下几个per_frame相关函数改名，应该是per_batch了
    distance_to_center = reward.get_distance_to_centroid_per_frame(frame)
    min_distance_to_other = reward.get_distance_to_other_agents_per_frame(frame)
    _reward = -distance_to_center + min_distance_to_other
    return _reward

def get_truncated_value(_data_batch, _top_n_data, _pred_len):
    assert _top_n_data + _pred_len <= _data_batch['extent'].shape[0]
    truncated_value_batch = []

    all_frame_reward = get_reward_per_batch(_data_batch)
    for element_ix in range(_top_n_data):
        truncated_value = sum(all_frame_reward[element_ix+1:element_ix+_pred_len+1])
        truncated_value_batch.append(truncated_value)

    truncated_value_batch = torch.stack(truncated_value_batch)

    return truncated_value_batch

truncated_value_target = get_truncated_value(data_batch, batch_size, pred_len)
print(truncated_value_target, truncated_value_target.shape)


# todo @jinhao

# 设_pred_len=10
# 计算过程中，例如，第1个样本的V(s1)，需要计算2:11样本中的reward之和,
# 即V(s1) = r2+r3+...+r11
# Note: truncated value的计算不需要加上V(s12), 具体见 equation (8)(9) in the UMBRELLA paper
# 以及一篇MBOP代码的实现 https://github.com/zhanzxy5/MOPP/blob/3f9f567f7fffe0e20a9402ffc53f452e78c4137e/rl_planning/mbop_agent.py#L143

# data_batch_as_state = {k: v[:batch_size] for k, v in data_batch.items()}
# action, prediction, reward, value = model(data_batch_as_state)
# loss_value = F.mse_loss(value, truncated_value_target)



tensor([13.2083, 13.1780, 13.2734, 13.3055, 13.3941, 13.5879, 13.9029, 14.4856,
        15.2478, 16.0507, 16.8346, 17.5826, 18.3533, 19.1106, 19.8922, 20.5748]) torch.Size([16])
