In [1]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision.models.resnet import resnet18
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os

import pandas as pd


In [2]:
class Encoder(nn.Module):
    def __init__(self, hidden_size, num_layers):
        super(Encoder, self).__init__()        
        self.lstm = nn.LSTM(1000+2, hidden_size, num_layers, batch_first=True)

    def forward(self, data):
        out, hidden = self.lstm(data)

        return out, hidden
        
def build_cnn(in_channels):
    # load pre-trained Conv2D model
    model = resnet18(pretrained=True)

    # change input channels number to match the rasterizer's output
    if in_channels != 3:
        model.conv1 = nn.Conv2d(
            in_channels,
            model.conv1.out_channels,
            kernel_size=model.conv1.kernel_size,
            stride=model.conv1.stride,
            padding=model.conv1.padding,
            bias=False,
        )

    return model
    
class Seq2Seq(nn.Module):    
    def __init__(self, hidden_size1, hidden_size2, hidden_size3, future_num_frames, seq_len, decoder_seq_len, num_layers=1, input_size=256):
        super(Seq2Seq, self).__init__()
        self.future_num_frames = future_num_frames
        self.input_size = input_size
        self.seq_len = seq_len
        self.decoder_seq_len = decoder_seq_len

        self.encoder_lstm = Encoder(hidden_size1, num_layers)
        self.agent_cnn = build_cnn(2)
        self.road_cnn = build_cnn(3)
        
        self.decoder = nn.LSTM(input_size, hidden_size1, num_layers, batch_first=True)
        
        self.fc1 = nn.Linear((hidden_size1)+1000, hidden_size2)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size2, hidden_size3)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size3, 2)
        
    def forward(self, data, device):
        history_positions = torch.flip(data['history_positions'], [1]).to(device)
        encoded_images, road_image = self.images_to_embeddings(data['image'].to(device))
        encoder_in = torch.cat([history_positions, encoded_images], dim=2)
        
        _, hidden = self.encoder_lstm(encoder_in)
        
        batch_size = history_positions.shape[0]
        
        out, _ = self.decoder(torch.ones(batch_size, self.decoder_seq_len, self.input_size).to(device), hidden)
        
        road_image = road_image.reshape(batch_size, 1, -1).repeat(1, self.decoder_seq_len, 1)  
        out = torch.cat([out, road_image],dim=2)
        
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        
        return out.view(batch_size, self.future_num_frames, 2)
    
    def images_to_embeddings(self,images):
        seq_len = self.seq_len
        batch_size = images.shape[0]
        encoded_images = []
        for i in range(seq_len):
            ego_idx = (seq_len) + i
            im = torch.cat([images[:,i:i+1,:,:], images[:,ego_idx:ego_idx+1,:,:]], dim=1)

            out = self.agent_cnn(im).reshape(batch_size,1,-1)
            encoded_images.append(out)
        encoded_images.reverse()
        return torch.cat(encoded_images, dim=1), self.road_cnn(images[:,-3:,:,:])

In [3]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = "A:/CSCI 566 Project/"
dm = LocalDataManager(None)
# get config
# cfg = load_config_data("./configs/Seq2Seq_Frame_LSTM.yaml")
cfg = {'format_version': 4,
 'model_params': {'history_num_frames': 10,
  'history_step_size': 1,
  'history_delta_time': 0.1,
  'future_num_frames': 50,
  'future_step_size': 1,
  'future_delta_time': 0.1},
 'raster_params': {'raster_size': [224, 224],
  'pixel_size': [0.5, 0.5],
  'ego_center': [0.5, 0.5],
  'map_type': 'py_semantic',
  'satellite_map_key': 'aerial_map/aerial_map.png',
  'semantic_map_key': 'semantic_map/semantic_map.pb',
  'dataset_meta_key': 'meta.json',
  'filter_agents_threshold': 0.5,
  'disable_traffic_light_faces': False},
 'train_data_loader': {'key': 'scenes/train.zarr',
  'batch_size': 4,
  'shuffle': True,
  'num_workers': 1},
 'val_data_loader': {'key': 'scenes/validate.zarr',
  'batch_size': 4,
  'shuffle': False,
  'num_workers': 1},
 'test_data_loader': {'key': 'scenes/test.zarr',
  'batch_size': 32,
  'shuffle': False,
  'num_workers': 1},
 'train_params': {'checkpoint_every_n_steps': 500,
  'max_num_steps': 100000,
  'eval_every_n_steps': 250}}

In [8]:
# ===== INIT DATASET
train_cfg = cfg["train_data_loader"]
rasterizer = build_rasterizer(cfg, dm)
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
train_dataloader = DataLoader(train_dataset,
                              shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"])

FUTURE_NUM_FRAMES = cfg['model_params']['future_num_frames']
SEQ_LEN = cfg['model_params']['history_num_frames'] + 1



# ===== INIT  VAL DATASET
val_cfg = cfg["val_data_loader"]

# Rasterizer
rasterizer = build_rasterizer(cfg, dm)

# Train dataset/dataloader
val_zarr = ChunkedDataset(dm.require(val_cfg["key"])).open()
val_dataset = AgentDataset(cfg, val_zarr, rasterizer)
val_dataloader = DataLoader(val_dataset,
                              shuffle=val_cfg["shuffle"],
                              batch_size=val_cfg["batch_size"])
                              #num_workers=train_cfg["num_workers"])
    
# print(train_dataset, val_dataset)

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Seq2Seq(1024, 1024, 256, FUTURE_NUM_FRAMES, SEQ_LEN, FUTURE_NUM_FRAMES).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss(reduction="none")

In [9]:
# ==== TRAIN LOOP
tr_it = iter(train_dataloader)
vl_it = iter(val_dataloader)

i = -1

# i = 65499

# model.load_state_dict(torch.load(f'saved_models/Seq2Seq_Frame_Partial_CNN_LSTM{i}.pth'))

progress_bar = tqdm(range(i+1, cfg["train_params"]["max_num_steps"]), position=0)
# progress_bar = tqdm(range(1), position=0)
checkpoint_n = cfg["train_params"]["checkpoint_every_n_steps"]
losses_train = []
curr_losses = []
losses_avg = []
hidden = None

losses_val = []
val_avg = []

num_frames = 20
for i in progress_bar:
    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_dataloader)
        data = next(tr_it)
    
    # Train
    model.train()
    torch.set_grad_enabled(True)
    pred = model.forward(data, device)
    
    targets = data['target_positions'].to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    
    # Backward pass
    loss = criterion(pred, targets)
    loss = loss * target_availabilities
    loss = loss.mean()
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    losses_train.append(loss.item())
    curr_losses.append(loss.item())
    
    with torch.no_grad():
        try:
            val_data = next(vl_it)
        except StopIteration:
            vl_it = iter(val_dataloader)
            val_data = next(vl_it)

        model.eval()
        pred = model.forward(val_data, device)
    
        targets = val_data['target_positions'].to(device)
        target_availabilities = val_data["target_availabilities"].unsqueeze(-1).to(device)

        # Backward pass
        v_loss = criterion(pred, targets)
        v_loss = v_loss * target_availabilities
        v_loss = v_loss.mean()
        losses_val.append(v_loss.item())

#     if (i % checkpoint_n) == checkpoint_n - 1:
    if i % 2 == 1:
        with open('Seq2Seq_Partial_LSTM_val.csv','a') as fd:
            for loss in losses_val:
                fd.write(f"{i},{loss}\n")
                
        with open('Seq2Seq_Partial_LSTM_train.csv','a') as fd:
            for loss in losses_train:
                fd.write(f"{i},{loss}\n")

        train_avg_loss = np.mean(losses_train)
        val_avg_loss =  np.mean(losses_val)
        
        losses_train = []
        losses_val = []
        
        
        with open('Seq2Seq_Partial_LSTM_train_avg.csv','a') as fd:
            fd.write(f"{i},{train_avg_loss}\n")
            
        with open('Seq2Seq_Partial_LSTM_val_avg.csv','a') as fd:
            fd.write(f"{i},{val_avg_loss}\n")
        
        progress_bar.set_description(f"loss: {loss} loss(avg): {train_avg_loss} loss_val(avg): {val_avg_loss}")
        if (i+1) % 1000 == 0:
            torch.save(model.state_dict(), f'Seq2Seq_Partial_LSTM_{i}.pth')

loss: 192.76612854003906 loss(avg): 141.22601318359375 loss_val(avg): 120.65033140778542:   0%| | 7/100000 [00:03<12:37


KeyboardInterrupt: 

In [4]:
# ===== INIT DATASET
train_cfg = cfg["train_data_loader"]
rasterizer = build_rasterizer(cfg, dm)
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
train_dataloader = DataLoader(train_dataset,
                              shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"])

FUTURE_NUM_FRAMES = cfg['model_params']['future_num_frames']
SEQ_LEN = cfg['model_params']['history_num_frames'] + 1



# ===== INIT  VAL DATASET
val_cfg = cfg["val_data_loader"]

# Rasterizer
rasterizer = build_rasterizer(cfg, dm)

# Train dataset/dataloader
val_zarr = ChunkedDataset(dm.require(val_cfg["key"])).open()
val_dataset = AgentDataset(cfg, val_zarr, rasterizer)
val_dataloader = DataLoader(val_dataset,
                              shuffle=val_cfg["shuffle"],
                              batch_size=val_cfg["batch_size"])
                              #num_workers=train_cfg["num_workers"])
    
# print(train_dataset, val_dataset)

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

FUTURE_NUM_FRAMES = cfg['model_params']['future_num_frames']
SEQ_LEN = cfg['model_params']['history_num_frames'] + 1

model = Seq2Seq(1024, 1024, 256, FUTURE_NUM_FRAMES, SEQ_LEN, FUTURE_NUM_FRAMES).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss(reduction="none")

model.load_state_dict(torch.load('saved_models/Seq2Seq_Partial_LSTM_45999.pth'))

<All keys matched successfully>

In [6]:
tr_it = iter(train_dataloader)
progress_bar = tqdm(range(1000), position=0)
checkpoint_n = cfg["train_params"]["checkpoint_every_n_steps"]
losses_train = []
curr_losses = []
losses_avg = []

losses_val = []
val_avg = []

hidden = None

num_frames = 20
for i in progress_bar:
    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_dataloader)
        data = next(tr_it)
    
    # Train
    model.eval()
    with torch.no_grad():
        pred = model.forward(data, device)
    
        targets = data['target_positions'].to(device)
        target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)

        # Backward pass
        loss = criterion(pred, targets)
        loss = loss * target_availabilities
        loss = loss.mean()
        
        losses_train.append(loss.item())
    
with open('Seq2Seq_LSTM_Partial_CNN_test.csv','a') as fd:
    for loss in losses_train:
        fd.write(f"{i},{loss}\n")

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [04:36<00:00,  3.62it/s]


In [7]:
print(f"Mean Test Loss: {np.mean(losses_train)}")

Mean Test Loss: 3.618651880376972
