In [None]:

!pip install --upgrade pip
!pip install pymap3d==2.1.0
!pip install -U l5kit


In [None]:

import numpy as np
import os
import torch

from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet18, resnet50, resnet34
from torchvision.models import vgg19
from tqdm import tqdm
from typing import Dict

from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer

from torch.utils.data.dataset import Subset
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS


In [None]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.resnet import resnet50, resnet18, resnet34, resnet101
from tqdm import tqdm

import l5kit
from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import matplotlib.pyplot as plt

import os
import random
import time

import warnings
warnings.filterwarnings("ignore")

from IPython.display import display
from tqdm import tqdm_notebook
import gc, psutil

print(l5kit.__version__)

## Memory information

In [None]:
def memory(verbose=True):
    mem = psutil.virtual_memory()
    gb = 1024*1024*1024
    if verbose:
        print('Physical memory:',
              '%.2f GB (used),'%((mem.total - mem.available) / gb),
              '%.2f GB (available)'%((mem.available) / gb), '/',
              '%.2f GB'%(mem.total / gb))
    return (mem.total - mem.available) / gb

def gc_memory(verbose=True):
    m = gc.collect()
    if verbose:
        print('GC:', m, end=' | ')
        memory()

memory();

### Input path and train param

In [None]:
DIR_INPUT = "/kaggle/input/lyft-motion-prediction-autonomous-vehicles"
DEBUG = False
VALIDATION = True

## Config information

In [None]:

cfg = {
    'format_version': 4,
    'model_params': {
        'model_architecture': 'resnet34',
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1,
        'model_name' : 'resnet34_gru'
    },
    
    'raster_params': {
        'raster_size': [224, 224],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5,
        'disable_traffic_light_faces': False
    },
    
    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 16,
        'shuffle': True,
        'num_workers': 4
    },
    
    'valid_data_loader': {
        'key': 'scenes/validate.zarr',
        'batch_size': 16,
        'shuffle': True,
        'num_workers': 4
    },

    
    'test_data_loader': {
        'key': 'scenes/test.zarr',
        'batch_size': 16,
        'shuffle': False,
        'num_workers': 4
    },
    
    'train_params': {
        'max_num_steps': 100 if DEBUG else 12000,
        'checkpoint_every_n_steps': 1000,
        'eval_every_n_steps': 1000,
        'steps': 12000,
        'update_steps': 100,
        'checkpoint_steps': 3000,
        # 'eval_every_n_steps': -1
    }
}


### environment variable

In [None]:
# set env variable for data

os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager(None)

### rasterizer

In [None]:
%%time
# ===== INIT DATASET

# Rasterizer
rasterizer = build_rasterizer(cfg, dm)

## Train data loader

In [None]:
%%time
# Train dataset
train_cfg = cfg["train_data_loader"]
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open(cached=False)  # to prevent run out of memory
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"],
                              pin_memory=True
                             )
print(train_dataset)

## Validation data loader

In [None]:
%%time
# Train dataset
valid_cfg = cfg["valid_data_loader"]
valid_zarr = ChunkedDataset(dm.require(valid_cfg["key"])).open(cached=False)  # to prevent run out of memory
valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer)
valid_dataloader = DataLoader(valid_dataset, shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"], num_workers=valid_cfg["num_workers"],
                              pin_memory=True
                             )
print(train_dataset)

## Resnet-GRU model, single trajectory

In [None]:

class GRU_layer(nn.Module):
    def __init__(self, device, batch=32, in_dim=512, out_dim=1024, hidden_size=2048):
        super().__init__()
        self.batch = batch
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hidden_temp = torch.zeros(2, batch, hidden_size).to(device)
            
        self.GRU_layer_1 = nn.GRU(in_dim, hidden_size, batch_first=True, bidirectional=True).to(device)
        self.GRU_layer_2 = nn.GRU(out_dim, hidden_size, batch_first=True, bidirectional=True).to(device)
        
        self.seq_1 = nn.Sequential(nn.Linear(in_features=2*hidden_size, out_features=2048),
                            nn.ReLU(inplace=True),nn.Linear(2048, out_dim).to(device) )
        
            
    def forward(self, x):

        out_1, hidden_state_1 = self.GRU_layer_1(x.view(self.batch, 1, self.in_dim), self.hidden_temp)
        out_1 = self.seq_1(out_1)
        
        out_2, hidden_state_2 = self.GRU_layer_2(out_1, hidden_state_1)
        out_2 = self.seq_1(out_2)
        
        out_3, hidden_state_3 = self.GRU_layer_2(out_2, hidden_state_2)
        out_3 = self.seq_1(out_3)
        
        return out_3


class Resnet_network(nn.Module):

    def __init__(self, cfg: Dict, device):
      
        super().__init__()
        architecture = cfg["model_params"]["model_architecture"]
        backbone = eval(architecture)(pretrained=True, progress=True)
        self.backbone = backbone
        
        num_history_channels = (
            cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels
        
        self.backbone.conv1 = nn.Conv2d(
            num_in_channels,
            self.backbone.conv1.out_channels,
            kernel_size=self.backbone.conv1.kernel_size,
            stride=self.backbone.conv1.stride,
            padding=self.backbone.conv1.padding,
            bias=False,
        )

        backbone_out_features = 512

        self.future_len = cfg["model_params"]["future_num_frames"]
        self.batch_size = cfg['train_data_loader']['batch_size']
        num_targets = 2 * self.future_len
        
        self.num_preds = num_targets

        self.gru_decoder = GRU_layer(
            device,
            batch=self.batch_size, 
            in_dim=backbone_out_features
            )
        
        self.logit = nn.Linear(1024, out_features=self.num_preds)

    def forward(self, x, criterion=pytorch_neg_multi_log_likelihood_batch):

        out = self.backbone.conv1(x)

        out = self.backbone.bn1(out)

        out = self.backbone.relu(out)

        out = self.backbone.maxpool(out)

        out = self.backbone.layer1(out)

        out = self.backbone.layer2(out)

        out = self.backbone.layer3(out)

        out = self.backbone.layer4(out)

        out = self.backbone.avgpool(out)

        out = torch.flatten(out, 1)
       
        out = self.gru_decoder(out)

        out = x.squeeze(1)

        preds = self.logit(out)
        
        return preds

## Model and optimizer initiation

In [None]:
# ==== INIT MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Resnet_network(cfg, device)

#if you need to load pretrained models, uncomment the following lines

'''
weight_path = '/kaggle/input/rmse-model/resnet34_gru_final.pth'
model.load_state_dict(torch.load(weight_path))
'''

model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss(reduction="none")

### calling model, using MSE loss function

In [None]:
def forward(data, model, device, criterion):
    inputs = data["image"].to(device)
    target_availabilities = data["target_availabilities"].to(device)
    targets = data["target_positions"].to(device)
    preds = model(inputs)
    loss = criterion(preds, targets)
    loss = loss * target_availabilities
    loss = loss.mean()
    return loss, preds

## training loop

In [None]:
%%time

tr_it = iter(train_dataloader)
n_steps = cfg["train_params"]["steps"]
progress_bar = tqdm_notebook(range(1, 1 + n_steps), mininterval=5.)
losses = []
iterations = []
metrics = []
memorys = []
times = []
model_name = cfg["model_params"]["model_name"]
update_steps = cfg['train_params']['update_steps']
checkpoint_steps = cfg['train_params']['checkpoint_steps']
t_start = time.time()
torch.set_grad_enabled(True)

for i in progress_bar:
    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_dataloader)
        data = next(tr_it)
    model.train()
    loss, _ = forward(data, model, device, criterion)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loss_v = loss.item()
    losses.append(loss_v)

    if i % update_steps == 0:
        mean_losses = np.mean(losses)
        timespent = (time.time() - t_start) / 60
        print('i: %5d'%i,
              'loss: %10.5f'%loss_v, 'loss(avg): %10.5f'%mean_losses, 
              '%.2fmins'%timespent, end=' | ')
        mem = memory()
        if i % checkpoint_steps == 0:
            torch.save(model.state_dict(), f'{model_name}_{i}.pth')
            torch.save(optimizer.state_dict(), f'{model_name}_optimizer_{i}.pth')
        iterations.append(i)
        metrics.append(mean_losses)
        memorys.append(mem)
        times.append(timespent)

torch.save(model.state_dict(), f'{model_name}_final.pth')
torch.save(optimizer.state_dict(), f'{model_name}_optimizer_final.pth')
results = pd.DataFrame({
    'iterations': iterations, 
    'metrics (avg)': metrics,
    'elapsed_time (mins)': times,
    'memory (GB)': memorys,
})
results.to_csv(f'train_metrics_{model_name}_{n_steps}.csv', index=False)
print(f'Total training time is {(time.time() - t_start) / 60} mins')
memory()
display(results)

## Visualization of training metrics

In [None]:
plt.figure(figsize=(12, 4))
plt.plot(results['iterations'], results['metrics (avg)'])
plt.xlabel('steps'); plt.ylabel('metrics (avg)')
plt.grid(); plt.show()

plt.figure(figsize=(12, 4))
plt.plot(results['iterations'], results['memory (GB)'])
plt.xlabel('steps'); plt.ylabel('memory (GB)')
plt.grid(); plt.show()

plt.figure(figsize=(12, 4))
plt.plot(results['iterations'], results['elapsed_time (mins)'])
plt.xlabel('steps'); plt.ylabel('elapsed_time (mins)')
plt.grid(); plt.show()

## Test data loader

In [None]:
%%time
# Test dataset
test_cfg = cfg["test_data_loader"]
test_zarr = ChunkedDataset(dm.require(test_cfg["key"])).open(cached=False)  # to prevent run out of memory
test_mask = np.load(f"{DIR_INPUT}/scenes/mask.npz")["arr_0"]
test_dataset = AgentDataset(cfg, test_zarr, rasterizer, agents_mask=test_mask)
test_dataloader = DataLoader(test_dataset, shuffle=test_cfg["shuffle"],
                             batch_size=test_cfg["batch_size"], num_workers=test_cfg["num_workers"],
                             pin_memory=False
                            )
print(test_dataset)

## Prediction loop

In [None]:
model.eval()
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
agent_ids = []
memorys_pred = []
t0 = time.time()
times_pred = []
iterations_pred = []

for i, data in enumerate(tqdm_notebook(test_dataloader, mininterval=5.)):
    
    try:
        _, preds = forward(data, model, device, criterion)

        rotation = data["world_from_agent"][:, :2, :2].float().to(device)
        preds = torch.sum(preds[:, :, :, None, :] * rotation[:, None, None, :, :], dim=-1).cpu().numpy()

        future_coords_offsets_pd.append(preds.copy())
        timestamps.append(data["timestamp"].numpy().copy())
        agent_ids.append(data["track_id"].numpy().copy()) 

        if i%100 == 0:
            t = ((time.time() - t0) / 60)
            print('%4d'%i, '%6.2fmins'%t, end=' | ')
            mem = memory()
            iterations_pred.append(i)
            memorys_pred.append(mem)
            times_pred.append(t)
    except:
        continue
print('Total timespent: %6.2fmins'%((time.time() - t0) / 60))
memory()

## Submission file creation

In [None]:
%%time
# create submission to submit to Kaggle
pred_path = 'submission.csv'
write_pred_csv(
    pred_path,
    timestamps=np.concatenate(timestamps),
    track_ids=np.concatenate(agent_ids),
    coords=np.concatenate(future_coords_offsets_pd)
)