## Prepare Data path and load cfg

By setting the `L5KIT_DATA_FOLDER` variable, we can point the script to the folder where the data lies.

Then, we load our config file with relative paths and other configurations (rasteriser, training params...).

In [None]:
%%writefile setup_notebook_colab.sh
#!/bin/bash

# Make a temporary download folder
TEMP_DOWNLOAD_DIR=$(mktemp -d)
TEMP_DATASET_DIR=$(mktemp -d)

# Download sample zarr
echo "Downloading sample zarr dataset..."
wget https://lyft-l5-datasets-public.s3-us-west-2.amazonaws.com/prediction/v1.1/sample.tar \
    -q --show-progress -P $TEMP_DOWNLOAD_DIR

mkdir -p $TEMP_DATASET_DIR/scenes
tar xf $TEMP_DOWNLOAD_DIR/sample.tar -C $TEMP_DATASET_DIR/scenes

# Download semantic map
echo "Downloading semantic map..."
wget https://lyft-l5-datasets-public.s3-us-west-2.amazonaws.com/prediction/v1.1/semantic_map.tar \
    -q --show-progress -P $TEMP_DOWNLOAD_DIR
mkdir -p $TEMP_DATASET_DIR/semantic_map
tar xf $TEMP_DOWNLOAD_DIR/semantic_map.tar -C $TEMP_DATASET_DIR/semantic_map
cp $TEMP_DATASET_DIR/semantic_map/meta.json $TEMP_DATASET_DIR/meta.json

wget https://raw.githubusercontent.com/woven-planet/l5kit/master/examples/agent_motion_prediction/agent_motion_config.yaml -q

# Install L5Kit
echo "Installing L5kit..."
pip install --progress-bar off --quiet -U l5kit pyyaml
pip install ray==2.0.0rc1 --quiet
pip install "ray[air]" --quiet
pip install -U wandb --quiet

echo "Dataset and L5kit are ready !"
echo $TEMP_DATASET_DIR > "dataset_dir.txt"

Writing setup_notebook_colab.sh


In [None]:
#@title Download L5 Sample Dataset and install L5Kit
#TODO: Place in own step and log to wandb as data logging step
import os
RunningInCOLAB = 'google.colab' in str(get_ipython())
if RunningInCOLAB:
    !sh ./setup_notebook_colab.sh
    os.environ["L5KIT_DATA_FOLDER"] = open("./dataset_dir.txt", "r").read().strip()
else:
    print("Not running in Google Colab.")
    os.environ["L5KIT_DATA_FOLDER"] = "/tmp/l5kit_data"

Downloading sample zarr dataset...
Downloading semantic map...
Installing L5kit...
[?25l
[?25h[?25l
[?25h[?25l
[?25h[?25l
[?25h[?25l
[?25h[?25l
[?25h[?25l
[?25h[?25l
[?25h[?25l
[?25h  Building wheel for ptable (setup.py) ... [?25l[?25hdone
  Building wheel for transforms3d (setup.py) ... [?25l[?25hdone
  Building wheel for asciitree (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
xarray-einstats 0.2.2 requires numpy>=1.21, but you have numpy 1.19.5 which is incompatible.
tensorflow 2.8.2+zzzcolab20220719082949 requires numpy>=1.20, but you have numpy 1.19.5 which is incompatible.
cmdstanpy 1.0.4 requires numpy>=1.21, but you have numpy 1.19.5 which is incompatible.[0m
[K     |████████████████████████████████| 59.4 MB 1.4 MB/s 
[K     |████████████████████████████████| 4.1 MB 39.6 MB/s 
[K     |█████

In [None]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet50
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os

In [None]:
# set env variable for data
# os.environ["L5KIT_DATA_FOLDER"] = "PATH_TO_DATA"
# get config
cfg = load_config_data("./agent_motion_config.yaml")
sample = True
if sample:
    cfg["train_data_loader"]["key"] = "scenes/sample.zarr"

## Model

Our baseline is a simple `resnet50` pretrained on `imagenet`. We must replace the input and the final layer to address our requirements.

In [None]:
def build_model(cfg: Dict) -> torch.nn.Module:
    # load pre-trained Conv2D model
    model = resnet50(pretrained=True)

    # change input channels number to match the rasterizer's output
    num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
    num_in_channels = 3 + num_history_channels
    model.conv1 = nn.Conv2d(
        num_in_channels,
        model.conv1.out_channels,
        kernel_size=model.conv1.kernel_size,
        stride=model.conv1.stride,
        padding=model.conv1.padding,
        bias=False,
    )
    # change output size to (X, Y) * number of future states
    num_targets = 2 * cfg["model_params"]["future_num_frames"]
    model.fc = nn.Linear(in_features=2048, out_features=num_targets)

    return model

In [None]:
def forward(data, model, criterion):
    inputs = data["image"]
    target_availabilities = data["target_availabilities"].unsqueeze(-1)
    targets = data["target_positions"]
    # Forward pass
    outputs = model(inputs).reshape(targets.shape)
    loss = criterion(outputs, targets)
    # not all the output steps are valid, but we can filter them out from the loss using availabilities
    loss = loss * target_availabilities
    loss = loss.mean()
    return loss, outputs

In [None]:
def train_prediction_model_epoch(data, model, criterion, optimizer):
    loss, outputs = forward(data, model, criterion)
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss, outputs

## Load the Train Data

Our data pipeline map a raw `.zarr` folder into a multi-processing instance ready for training by:
- loading the `zarr` into a `ChunkedDataset` object. This object has a reference to the different arrays into the zarr (e.g. agents and traffic lights);
- wrapping the `ChunkedDataset` into an `AgentDataset`, which inherits from torch `Dataset` class;
- passing the `AgentDataset` into a torch `DataLoader`

In [None]:
import ray.train as train
from ray.air import session

In [None]:
def train_prediction_model(cfg : Dict):
    # ==== INIT DATASET
    dm = LocalDataManager(None)
    train_cfg = cfg["train_data_loader"]
    rasterizer = build_rasterizer(cfg, dm)

    train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
    train_dataset = AgentDataset(cfg, train_zarr, rasterizer)

    shuffle = train_cfg["shuffle"]
    batch_size = train_cfg["batch_size"]
    num_workers = train_cfg["num_workers"]
    batch_size_per_worker = batch_size // session.get_world_size()
    train_dataloader = DataLoader(train_dataset, shuffle=shuffle, batch_size=batch_size_per_worker, num_workers=num_workers)
    train_dataloader = train.torch.prepare_data_loader(train_dataloader)
    
    # ==== INIT MODEL
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = build_model(cfg)
    model = train.torch.prepare_model(model)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss(reduction="none")

    # ==== TRAIN LOOP
    tr_it = iter(train_dataloader)
    progress_bar = tqdm(range(cfg["train_params"]["max_num_steps"]))
    losses_train = []
    for _ in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
        model.train()
        torch.set_grad_enabled(True)
        loss, _ = train_prediction_model_epoch(data, model, criterion, optimizer)
        losses_train.append(loss.item())
        progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")

# Training

note: if you're on MacOS and using `py_satellite` rasterizer, you may need to disable opencv multiprocessing by adding:
`cv2.setNumThreads(0)` before the following cell. This seems to only affect running in python notebook and it's caused by the `cv2.warpaffine` function

In [None]:
from ray.train.torch import TorchTrainer
from ray.air.config import RunConfig, ScalingConfig
from ray.air.callbacks.wandb import WandbLoggerCallback

In [None]:
wandb_project = "level-5-pred"

In [None]:
trainer = TorchTrainer(
    train_loop_per_worker=train_prediction_model,
    train_loop_config=cfg,
    scaling_config=ScalingConfig(num_workers=1, use_gpu=True), #TODO: Add logic to check if GPU is available here
    run_config=RunConfig(
            callbacks=[
                # This is the part needed to enable logging to Weights & Biases.
                # It assumes you've logged in before, e.g. with `wandb login`.
                WandbLoggerCallback(
                    project=wandb_project,
                    save_checkpoints=False,
                )
            ]
        )
)

2022-08-17 03:31:28,411	INFO worker.py:1487 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m.


In [None]:
result = trainer.fit()
print(f"Last result: {result.metrics}")

2022-08-17 03:31:30,560	INFO wandb.py:119 -- Already logged into W&B.


Trial name,status,loc
TorchTrainer_1570f_00000,TERMINATED,172.28.0.2:509


[2m[36m(RayTrainWorker pid=556)[0m 2022-08-17 03:31:43,776	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=556)[0m your cfg has loaded filter_agents_threshold=0.5;
[2m[36m(RayTrainWorker pid=556)[0m but that value doesn't have a match among the agents_mask in the zarr
[2m[36m(RayTrainWorker pid=556)[0m Mask will now be generated for that parameter.
[2m[36m(RayTrainWorker pid=556)[0m   agents_mask = self.load_agents_mask()
0it [00:00, ?it/s]


[2m[36m(RayTrainWorker pid=556)[0m starting pool...


1/100: : 1it [00:00,  2.19it/s]
2/100: : 2it [00:00,  3.08it/s]
3/100: : 3it [00:00,  3.28it/s]
4/100: : 3it [00:01,  3.28it/s]
5/100: : 5it [00:01,  4.67it/s]
6/100: : 6it [00:01,  5.39it/s]
7/100: : 7it [00:01,  3.65it/s]
8/100: : 8it [00:01,  4.48it/s]
9/100: : 9it [00:02,  3.42it/s]
10/100: : 10it [00:02,  3.49it/s]
11/100: : 11it [00:03,  2.59it/s]
13/100: : 12it [00:04,  1.76it/s]
14/100: : 14it [00:05,  1.70it/s]
15/100: : 15it [00:05,  1.94it/s]
17/100: : 16it [00:07,  1.42it/s]
18/100: : 18it [00:08,  1.48it/s]
19/100: : 18it [00:08,  1.48it/s]
20/100: : 20it [00:09,  1.63it/s]
21/100: : 21it [00:09,  1.82it/s]
22/100: : 22it [00:10,  1.67it/s]
23/100: : 23it [00:10,  2.04it/s]
24/100: : 24it [00:11,  2.08it/s]
25/100: : 25it [00:11,  2.41it/s]
26/100: : 26it [00:11,  2.21it/s]
27/100: : 27it [00:11,  2.83it/s]
28/100: : 28it [00:12,  2.41it/s]
29/100: : 29it [00:12,  2.92it/s]
30/100: : 30it [00:13,  2.25it/s]
31/100: : 31it [00:13,  2.37it/s]
33/100: : 32it [00:14,  1.77it/s

[2m[36m(RayTrainWorker pid=556)[0m collecting results..
[2m[36m(RayTrainWorker pid=556)[0m start report for /tmp/tmp.hjgZqjOqcl/scenes/sample.zarr
[2m[36m(RayTrainWorker pid=556)[0m {   'reject_th_AV_distance': 204220,
[2m[36m(RayTrainWorker pid=556)[0m     'reject_th_agent_filter_probability_threshold': 1324481,
[2m[36m(RayTrainWorker pid=556)[0m     'reject_th_extent': 84257,
[2m[36m(RayTrainWorker pid=556)[0m     'reject_th_yaw': 5178,
[2m[36m(RayTrainWorker pid=556)[0m     'th_agent_filter_probability_threshold': 0.5,
[2m[36m(RayTrainWorker pid=556)[0m     'th_distance_av': 50,
[2m[36m(RayTrainWorker pid=556)[0m     'th_extent_ratio': 1.1,
[2m[36m(RayTrainWorker pid=556)[0m     'th_yaw_degree': 30,
[2m[36m(RayTrainWorker pid=556)[0m     'total_agent_frames': 1893736,
[2m[36m(RayTrainWorker pid=556)[0m     'total_reject': 1618136}


[2m[36m(RayTrainWorker pid=556)[0m computing past/future table:   0%|          | 0/4 [00:00<?, ?it/s]
computing past/future table:  50%|█████     | 2/4 [00:00<00:00, 18.20it/s]
computing past/future table: 100%|██████████| 4/4 [00:00<00:00, 19.67it/s]


[2m[36m(RayTrainWorker pid=556)[0m +-------------+---------+--------+-------+-------+
[2m[36m(RayTrainWorker pid=556)[0m | past/future |    0    |   10   |   30  |   50  |
[2m[36m(RayTrainWorker pid=556)[0m +-------------+---------+--------+-------+-------+
[2m[36m(RayTrainWorker pid=556)[0m |      0      | 1893736 | 116054 | 64972 | 42984 |
[2m[36m(RayTrainWorker pid=556)[0m |      10     |  116054 | 83760  | 52277 | 36140 |
[2m[36m(RayTrainWorker pid=556)[0m |      30     |  64972  | 52277  | 36140 | 26364 |
[2m[36m(RayTrainWorker pid=556)[0m |      50     |  42984  | 36140  | 26364 | 19560 |
[2m[36m(RayTrainWorker pid=556)[0m +-------------+---------+--------+-------+-------+
[2m[36m(RayTrainWorker pid=556)[0m end report for /tmp/tmp.hjgZqjOqcl/scenes/sample.zarr


[2m[36m(RayTrainWorker pid=556)[0m   cpuset_checked))
[2m[36m(RayTrainWorker pid=556)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=556)[0m Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
  0%|          | 0.00/97.8M [00:00<?, ?B/s]
  1%|          | 768k/97.8M [00:00<00:13, 7.79MB/s]
  2%|▏         | 1.75M/97.8M [00:00<00:11, 8.99MB/s]
  3%|▎         | 2.98M/97.8M [00:00<00:09, 10.6MB/s]
  4%|▍         | 3.99M/97.8M [00:00<00:11, 8.42MB/s]
  5%|▌         | 5.05M/97.8M [00:00<00:13, 6.97MB/s]
  6%|▌         | 5.78M/97.8M [00:00<00:14, 6.88MB/s]
  7%|▋         | 6.48M/97.8M [00:00<00:14, 6.59MB/s]
  7%|▋         | 7.27M/97.8M [00:01<00:17, 5.40MB/s]
  9%|▊         | 8.34M/97.8M [00:01<00:14, 6.44MB/s]
 10%|█         | 9.83M/97.8M [00:01<00:10, 8.57MB/s]
 11%|█         | 10.8M/97.8M [00:01<00:10, 8.48MB/s]
 12%|█▏      

Trial TorchTrainer_1570f_00000 completed. Last result: 


2022-08-17 03:34:09,082	INFO tune.py:759 -- Total run time: 158.82 seconds (158.38 seconds for the tuning loop).


Last result: {'trial_id': '1570f_00000', 'experiment_id': 'ebf43b6f8aaa41f2a45293fe49ba5e16', 'date': '2022-08-17_03-31-39', 'timestamp': 1660707099, 'pid': 509, 'hostname': '7032d25ca7ac', 'node_ip': '172.28.0.2', 'config': {}, 'done': True}


In [None]:
result

Result(metrics={'trial_id': '1570f_00000', 'done': True}, error=None, log_dir=PosixPath('/root/ray_results/TorchTrainer_2022-08-17_03-31-30/TorchTrainer_1570f_00000_0_2022-08-17_03-31-30'))

### Plot Loss Curve
We can plot the train loss against the iterations (batch-wise)

In [None]:
# plt.plot(np.arange(len(losses_train)), losses_train, label="train loss")
# plt.legend()
# plt.show()

# Evaluation

Evaluation follows a slightly different protocol than training. When working with time series, we must be absolutely sure to avoid leaking the future in the data.

If we followed the same protocol of training, one could just read ahead in the `.zarr` and forge a perfect solution at run-time, even for a private test set.

As such, **the private test set for the competition has been "chopped" using the `chop_dataset` function**.

In [None]:
# # ===== GENERATE AND LOAD CHOPPED DATASET
# num_frames_to_chop = 100
# eval_cfg = cfg["val_data_loader"]
# eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), cfg["raster_params"]["filter_agents_threshold"], 
#                               num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)

The result is that **each scene has been reduced to only 100 frames**, and **only valid agents in the 100th frame will be used to compute the metrics**. Because following frames in the scene have been chopped off, we can't just look ahead to get the future of those agents.

In this example, we simulate this pipeline by running `chop_dataset` on the validation set. The function stores:
- a new chopped `.zarr` dataset, in which each scene has only the first 100 frames;
- a numpy mask array where only valid agents in the 100th frame are True;
- a ground-truth file with the future coordinates of those agents;

Please note how the total number of frames is now equal to the number of scenes multipled by `num_frames_to_chop`. 

The remaining frames in the scene have been sucessfully chopped off from the data

In [None]:
# eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
# eval_mask_path = str(Path(eval_base_path) / "mask.npz")
# eval_gt_path = str(Path(eval_base_path) / "gt.csv")

# eval_zarr = ChunkedDataset(eval_zarr_path).open()
# eval_mask = np.load(eval_mask_path)["arr_0"]
# # ===== INIT DATASET AND LOAD MASK
# eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
# eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], 
#                              num_workers=eval_cfg["num_workers"])
# print(eval_dataset)

### Storing Predictions
There is a small catch to be aware of when saving the model predictions. The output of the models are coordinates in `agent` space and we need to convert them into displacements in `world` space.

To do so, we first convert them back into the `world` space and we then subtract the centroid coordinates.

In [None]:
# # ==== EVAL LOOP
# model.eval()
# torch.set_grad_enabled(False)

# # store information for evaluation
# future_coords_offsets_pd = []
# timestamps = []
# agent_ids = []

# progress_bar = tqdm(eval_dataloader)
# for data in progress_bar:
#     _, ouputs = forward(data, model, device, criterion)
    
#     # convert agent coordinates into world offsets
#     agents_coords = ouputs.cpu().numpy()
#     world_from_agents = data["world_from_agent"].numpy()
#     centroids = data["centroid"].numpy()
#     coords_offset = transform_points(agents_coords, world_from_agents) - centroids[:, None, :2]
    
#     future_coords_offsets_pd.append(np.stack(coords_offset))
#     timestamps.append(data["timestamp"].numpy().copy())
#     agent_ids.append(data["track_id"].numpy().copy())
    

### Save results
After the model has predicted trajectories for our evaluation set, we can save them in a `csv` file.

During the competition, only the `.zarr` and the mask will be provided for the private test set evaluation.
Your solution is expected to generate a csv file which will be compared to the ground truth one on a separate server

In [None]:
# pred_path = f"{gettempdir()}/pred.csv"

# write_pred_csv(pred_path,
#                timestamps=np.concatenate(timestamps),
#                track_ids=np.concatenate(agent_ids),
#                coords=np.concatenate(future_coords_offsets_pd),
#               )

### Perform Evaluation
Pleae note that our metric supports multi-modal predictions (i.e. multiple predictions for a single GT trajectory). In that case, you will need to provide a confidence for each prediction (confidences must all be between 0 and 1 and sum to 1).

In this simple example we don't generate multiple trajectories, so we won't pass any confidences vector. Internally, the metric computation will assume a single trajectory with confidence equal to 1

In [None]:
# metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
# for metric_name, metric_mean in metrics.items():
#     print(metric_name, metric_mean)


### Visualise Results
We can also visualise some results from the ego (AV) point of view for those frames of interest (the 100th of each scene).

However, as we chopped off the future from the dataset **we must use the GT csv if we want to plot the future trajectories of the agents**


In [None]:
# model.eval()
# torch.set_grad_enabled(False)

# # build a dict to retrieve future trajectories from GT
# gt_rows = {}
# for row in read_gt_csv(eval_gt_path):
#     gt_rows[row["track_id"] + row["timestamp"]] = row["coord"]

# eval_ego_dataset = EgoDataset(cfg, eval_dataset.dataset, rasterizer)

# for frame_number in range(99, len(eval_zarr.frames), 100):  # start from last frame of scene_0 and increase by 100
#     agent_indices = eval_dataset.get_frame_indices(frame_number) 
#     if not len(agent_indices):
#         continue

#     # get AV point-of-view frame
#     data_ego = eval_ego_dataset[frame_number]
#     im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
#     center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
    
#     predicted_positions = []
#     target_positions = []

#     for v_index in agent_indices:
#         data_agent = eval_dataset[v_index]

#         out_net = model(torch.from_numpy(data_agent["image"]).unsqueeze(0).to(device))
#         out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()
#         # store absolute world coordinates
#         predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
#         # retrieve target positions from the GT and store as absolute coordinates
#         track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
#         target_positions.append(gt_rows[str(track_id) + str(timestamp)] + data_agent["centroid"][:2])


#     # convert coordinates to AV point-of-view so we can draw them
#     predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
#     target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])

#     draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)
#     draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)

#     plt.imshow(im_ego)
#     plt.show()