In [2]:
from nuscenes.eval.prediction.splits import get_prediction_challenge_split
from nuscenes.eval.prediction.config import load_prediction_config
from nuscenes.eval.prediction.splits import get_prediction_challenge_split
from nuscenes.prediction import PredictHelper
from nuscenes.prediction.models.physics import ConstantVelocityHeading, PhysicsOracle
from nuscenes.prediction.input_representation.static_layers import StaticLayerRasterizer
from nuscenes.prediction.input_representation.agents import AgentBoxesWithFadedHistory
from nuscenes.prediction.input_representation.interface import InputRepresentation
from nuscenes.prediction.input_representation.combinators import Rasterizer
from nuscenes.prediction.models.backbone import ResNetBackbone
from nuscenes.prediction.models.mtp import MTP, MTPLoss
from nuscenes.prediction.models.covernet import CoverNet, ConstantLatticeLoss

import torch
import numpy as np
from nuscenes.prediction.input_representation.static_layers import StaticLayerRasterizer
from nuscenes.eval.prediction.data_classes import Prediction
import json
import os

import matplotlib.pyplot as plt
from typing import List, Dict, Any
from collections import defaultdict
import pickle

In [3]:
os.path.join(os.getcwd(), 'maps')

'/home/armahade/ECE285/AV/final_proj/MTP_experiments/maps'

In [4]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader


from nuscenes import NuScenes

from nuscenes.eval.prediction.splits import get_prediction_challenge_split
from nuscenes.eval.prediction.config import load_prediction_config
from nuscenes.eval.prediction.splits import get_prediction_challenge_split
from nuscenes.prediction import PredictHelper

from nuscenes.prediction.models.physics import ConstantVelocityHeading, PhysicsOracle
from nuscenes.prediction.input_representation.static_layers import StaticLayerRasterizer
from nuscenes.prediction.input_representation.agents import AgentBoxesWithFadedHistory
from nuscenes.prediction.input_representation.interface import InputRepresentation
from nuscenes.prediction.input_representation.combinators import Rasterizer
from PIL import Image

class NuscenesDataset(Dataset):
    
    def __init__(self, NuscenesBaseDir, 
                 data_set_version = 'v1.0-trainval', 
                 maps_dir = 'maps',
                 save_maps_dataset = False, 
                 split = 'train_val',
                 config_name = 'predict_2020_icra.json',
                 history=1, 
                 in_agent_frame=True):
        
        # path to main dataset
        self.baseDir = NuscenesBaseDir
        self.nusc = NuScenes(version=data_set_version, dataroot=self.baseDir, verbose=True)
        self.helper = PredictHelper(self.nusc)
        
        #initialize maps directory where everything will be saved 
        self.maps_dir = os.path.join(os.getcwd(), 'maps')

        #initialize the data set 
        self.data_set = get_prediction_challenge_split(split,dataroot=self.baseDir)

        #initialize rasterizers for the 
        self.static_layer_rasterizer = StaticLayerRasterizer(self.helper)
        self.agent_rasterizer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=history)
        self.mtp_input_representation = InputRepresentation(self.static_layer_rasterizer, self.agent_rasterizer, Rasterizer())

        self.in_agent_frame = in_agent_frame

        self.config = load_prediction_config(self.helper, config_name)

        self.valid_data_points = []
        
        self.save_maps_dataset = save_maps_dataset
        
        if self.save_maps_dataset: 
            self.save_maps()
            
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            
    def save_maps(self):
        '''
        Input: None 
        Output: None
        
        This method finds all the valid data points in the data set. We define a valid data point 
        where the velocity, acceleartion, and heading specified by token not NaN. 
        '''
        print("starting filtering and creation of data set")
        for i, token in enumerate(self.data_set): 
            instance_token_img, sample_token_img = self.data_set[i].split('_')
            
            file_path = os.path.join(self.maps_dir, "maps_{0}.jpg".format(i))
            
            instance_token_img, sample_token_img = self.data_set[i].split('_')
            img = self.mtp_input_representation.make_input_representation(instance_token_img, sample_token_img)
            im = Image.fromarray(img)
            im.save(file_path)
        
            print("{0}/{1} image saved".format(i, len(self.data_set)))
            
        print("done filtering data set ")
        
    def __len__(self):
        return len(self.data_set)
    
    #return the image tensor, agent state vector, and the ground truth
    def __getitem__(self, index):
        
        instance_token_img, sample_token_img = self.data_set[index].split('_')
        
        velocity = self.helper.get_velocity_for_agent(instance_token_img, sample_token_img)
        acceleration = self.helper.get_acceleration_for_agent(instance_token_img, sample_token_img)
        heading = self.helper.get_heading_change_rate_for_agent(instance_token_img, sample_token_img)        

        #using a padding token of -1
        if np.isnan(velocity) or np.isnan(acceleration) or np.isnan(heading):
            velocity =  acceleration = heading = -1 

        #construct agent state vector 
        agent_state_vec = torch.Tensor([velocity, acceleration, heading])
        
        #change image from (3, N, N), will have data loader take care 
        #get image and construct tensor 
        file_path = os.path.join(self.maps_dir, "maps_{0}.jpg".format(index))
        
        im = Image.open(file_path)
        img = np.array(im)
        image_tensor = torch.Tensor(img).permute(2, 0, 1)
        
        #get ground truth 
        ground_truth = self.helper.get_future_for_agent(instance_token_img, 
                                                        sample_token_img,
                                                        self.config.seconds, 
                                                        in_agent_frame=self.in_agent_frame)
        
        ground_truth = torch.Tensor(ground_truth).unsqueeze(0)
        return image_tensor, agent_state_vec, ground_truth 

In [5]:
dataset = NuscenesDataset( '../full_data/sets/nuscenes')

Loading NuScenes tables for version v1.0-trainval...
23 category,
8 attribute,
4 visibility,
64386 instance,
12 sensor,
10200 calibrated_sensor,
2631083 ego_pose,
68 log,
850 scene,
34149 sample,
2631083 sample_data,
1166187 sample_annotation,
4 map,
Done loading in 40.4 seconds.
Reverse indexing ...
Done reverse indexing in 13.2 seconds.
static_layers.py - Loading Map: singapore-queenstown
static_layers.py - Loading Map: singapore-hollandvillage
static_layers.py - Loading Map: boston-seaport
static_layers.py - Loading Map: singapore-onenorth
static_layers.py - Loading Map: singapore-queenstown
static_layers.py - Loading Map: singapore-hollandvillage
static_layers.py - Loading Map: boston-seaport
static_layers.py - Loading Map: singapore-onenorth


In [None]:
mtp = MTP(backbone, num_modes=M).to(device)

In [None]:
PATH_TO_EPSILON_8_SET = "./nuscenes-prediction-challenge-trajectory-sets/epsilon_8.pkl"
trajectories = pickle.load(open(PATH_TO_EPSILON_8_SET, 'rb'))

#Saved them as a list of lists
trajectories = torch.Tensor(trajectories)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
import torch.optim as optim
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataloader = DataLoader(dataset, batch_size=8, shuffle = True, pin_memory = True, num_workers = 4)

#init network 
count = 0 
M = 20
backbone = ResNetBackbone('resnet50')
mtp = MTP(backbone, num_modes=M).to(device)

# covernet = CoverNet(backbone, num_modes=64).to(device)
#init loss function and optimizers
# criterion = ConstantLatticeLoss(trajectories)

criterion = MTPLoss(num_modes=M)
optimizer = optim.Adam(mtp.parameters(), lr=3e-4)

output_dir = '.'
t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print(current_time)
losses = []
print('starting 100 epochs')
print('total batches:' + str(len(dataloader)))
for i in range(1):
    epoch_loss = 0 
    count = 0 
    for image_tensor, agent_vec, ground_truth in dataloader:
        output = mtp(image_tensor.to(device), agent_vec.to(device))
        loss = criterion(output, ground_truth.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        count+=1
        epoch_loss+=loss
        print("{0}/{1} loss is: {2}".format(count + 1, len(dataloader), loss))
        
t = time.localtime()
current_time = time.strftime("%H:%M:%S", t)
print(current_time)

14:53:53
starting 100 epochs
total batches:1070
2/1070 loss is: 9.904657363891602
3/1070 loss is: 21.824880599975586
4/1070 loss is: 13.754898071289062
5/1070 loss is: 16.572734832763672
6/1070 loss is: 9.820317268371582
7/1070 loss is: 14.750350952148438
8/1070 loss is: 15.147744178771973
9/1070 loss is: 14.552214622497559
10/1070 loss is: 13.717880249023438
11/1070 loss is: 7.0016374588012695
12/1070 loss is: 11.460454940795898
13/1070 loss is: 18.642410278320312
14/1070 loss is: 13.952999114990234
15/1070 loss is: 15.950462341308594
16/1070 loss is: 13.097911834716797
17/1070 loss is: 15.3785400390625
18/1070 loss is: 11.18424129486084
19/1070 loss is: 12.549555778503418
20/1070 loss is: 10.487478256225586
21/1070 loss is: 11.82686996459961
22/1070 loss is: 10.422770500183105
23/1070 loss is: 15.56697940826416
24/1070 loss is: 10.63955020904541
25/1070 loss is: 10.564567565917969
26/1070 loss is: 9.383420944213867
27/1070 loss is: 11.852909088134766
28/1070 loss is: 11.0747489929199

231/1070 loss is: 3.539118766784668
232/1070 loss is: 4.042892932891846
233/1070 loss is: 6.357214450836182
234/1070 loss is: 6.524319171905518
235/1070 loss is: 4.8041887283325195
236/1070 loss is: 8.090332984924316
237/1070 loss is: 4.553734302520752
238/1070 loss is: 4.92221736907959
239/1070 loss is: 5.076413631439209
240/1070 loss is: 4.489249229431152
241/1070 loss is: 4.9366254806518555
242/1070 loss is: 4.624505519866943
243/1070 loss is: 5.161996841430664
244/1070 loss is: 4.948222637176514
245/1070 loss is: 4.571588516235352
246/1070 loss is: 3.6597845554351807
247/1070 loss is: 6.010866165161133
248/1070 loss is: 4.504298210144043
249/1070 loss is: 4.741362571716309
250/1070 loss is: 5.587397575378418
251/1070 loss is: 10.113014221191406
252/1070 loss is: 6.2922043800354
253/1070 loss is: 6.703742980957031
254/1070 loss is: 5.449493408203125
255/1070 loss is: 5.380086898803711
256/1070 loss is: 4.604427337646484
257/1070 loss is: 6.478140354156494
258/1070 loss is: 7.1734519

458/1070 loss is: 4.9861955642700195
459/1070 loss is: 4.270880699157715
460/1070 loss is: 8.099242210388184
461/1070 loss is: 7.025805950164795
462/1070 loss is: 5.210071563720703
463/1070 loss is: 4.880614280700684
464/1070 loss is: 2.942962169647217
465/1070 loss is: 4.247701644897461
466/1070 loss is: 4.1097307205200195
467/1070 loss is: 4.594721794128418
468/1070 loss is: 2.8661653995513916
469/1070 loss is: 5.009261131286621
470/1070 loss is: 4.949841022491455
471/1070 loss is: 5.95875358581543
472/1070 loss is: 4.1394782066345215
473/1070 loss is: 3.9243831634521484
474/1070 loss is: 4.2740373611450195
475/1070 loss is: 5.378903865814209
476/1070 loss is: 4.617550373077393
477/1070 loss is: 3.850227117538452
478/1070 loss is: 4.662897109985352
479/1070 loss is: 4.844101905822754
480/1070 loss is: 6.15587043762207
481/1070 loss is: 3.481034755706787
482/1070 loss is: 6.749127388000488
483/1070 loss is: 4.43846321105957
484/1070 loss is: 2.9697046279907227
485/1070 loss is: 5.1106

685/1070 loss is: 3.5975165367126465
686/1070 loss is: 3.520113468170166
687/1070 loss is: 2.7522225379943848
688/1070 loss is: 3.616110324859619
689/1070 loss is: 4.054043769836426
690/1070 loss is: 5.4775190353393555
691/1070 loss is: 3.3317275047302246
692/1070 loss is: 4.706493377685547
693/1070 loss is: 3.648533344268799
694/1070 loss is: 5.150812149047852
695/1070 loss is: 3.3735191822052
696/1070 loss is: 4.390236854553223
697/1070 loss is: 4.034657955169678
698/1070 loss is: 3.1693837642669678
699/1070 loss is: 4.818690776824951
700/1070 loss is: 3.847196340560913
701/1070 loss is: 3.783782482147217
702/1070 loss is: 3.20729923248291
703/1070 loss is: 3.558377981185913
704/1070 loss is: 7.9215192794799805
705/1070 loss is: 3.604094982147217
706/1070 loss is: 3.552206039428711
707/1070 loss is: 3.917632579803467
708/1070 loss is: 4.0839762687683105
709/1070 loss is: 3.583249807357788
710/1070 loss is: 4.894253253936768
711/1070 loss is: 4.005527019500732
712/1070 loss is: 6.5568

912/1070 loss is: 3.025554656982422
913/1070 loss is: 3.215397834777832
914/1070 loss is: 2.640397548675537
915/1070 loss is: 4.361527919769287
916/1070 loss is: 4.516890525817871
917/1070 loss is: 4.2911858558654785
918/1070 loss is: 4.0379228591918945
919/1070 loss is: 3.3834924697875977
920/1070 loss is: 3.1550421714782715
921/1070 loss is: 6.562574863433838
922/1070 loss is: 3.9911677837371826
923/1070 loss is: 5.345587730407715
924/1070 loss is: 5.632210731506348
925/1070 loss is: 5.180821418762207
926/1070 loss is: 6.975529670715332
927/1070 loss is: 5.398812294006348
928/1070 loss is: 6.816868782043457
929/1070 loss is: 4.56820821762085
930/1070 loss is: 4.118063449859619
931/1070 loss is: 4.397650241851807
932/1070 loss is: 4.415583610534668
933/1070 loss is: 4.423004627227783
934/1070 loss is: 5.039300918579102
935/1070 loss is: 3.5971360206604004
936/1070 loss is: 3.457103729248047
937/1070 loss is: 3.228330135345459
938/1070 loss is: 5.843931198120117
939/1070 loss is: 5.757

In [None]:
!python train.py

In [None]:
#import map rasterizer for agents 

In [None]:
#import config from
from nuscenes.eval.prediction.config import PredictionConfig, load_prediction_config
config_name = 'predict_2020_icra.json'
config = load_prediction_config(helper, config_name)

In [None]:
static_layer_rasterizer = StaticLayerRasterizer(helper)
agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history=1)
mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer())
instance_token_img, sample_token_img = train_set[0].split('_')


img = mtp_input_representation.make_input_representation(instance_token_img, sample_token_img)

plt.imshow(img)
agent_state_vector = torch.Tensor([[helper.get_velocity_for_agent(instance_token_img, sample_token_img),
                                    helper.get_acceleration_for_agent(instance_token_img, sample_token_img),
                                    helper.get_heading_change_rate_for_agent(instance_token_img, sample_token_img)]])
print(agent_state_vector)

In [None]:


#initialize the rasterizer 
static_layer_rasterizer = StaticLayerRasterizer(helper)
agent_rasterizer = AgentBoxesWithFadedHistory(helper, seconds_of_history=1)
mtp_input_representation = InputRepresentation(static_layer_rasterizer, agent_rasterizer, Rasterizer())

#instantiate mtp model 
M = 5
backbone = ResNetBackbone('resnet50')
mtp = MTP(backbone, num_modes=M)
loss = MTPLoss()
mtp_output = []

# for i, token in enumerate(train_set):
    
#     instance_token_img, sample_token_img = token.split('_')
#     img = mtp_input_representation.make_input_representation(instance_token_img, sample_token_img)
        
#     velocity = helper.get_velocity_for_agent(instance_token_img, sample_token_img)
#     acceleration = helper.get_acceleration_for_agent(instance_token_img, sample_token_img)
#     heading = helper.get_heading_change_rate_for_agent(instance_token_img, sample_token_img)
    
#     #if any of the state agent vector has nan values then we simply continue to next data point
#     if np.isnan(velocity) or np.isnan(acceleration) or np.isnan(heading):
#         continue
    
#     #construct agent state vector 
#     agent_state_vec = torch.Tensor([[velocity, acceleration, heading]])
    
#     #change image from (N,N,3) -> (1, 3, N, N)
#     image_tensor = torch.Tensor(img).permute(2, 0, 1).unsqueeze(0)
    
#     #get output from network
#     #Tensor of dimension [batch_size, number_of_modes * number_of_predictions_per_mode + number_of_modes]
#     #B, M*24 + M
#     output = mtp(image_tensor, agent_state_vec)

#     prediction = output[:,:-M].detach().numpy()
#     probabilites = output[:,-M:].squeeze(0).detach().numpy()
#     prediction = prediction.reshape(M, config.seconds * 2, 2)

#     serialized_pred = Prediction(instance_token_img, sample_token_img, prediction, probabilites).serialize()
#     mtp_output.append(serialized_pred)
    
    #compute the metrics 
    
    #go over https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/eval/prediction/compute_metrics.py#L53
    

In [None]:
#dump result of mtp into json file 
json.dump(mtp_output, open('mtp_preds.json', "w"))

In [None]:
print(len(mtp_output))

In [None]:


def compute_metrics(predictions: List[Dict[str, Any]],
                    helper: PredictHelper, config: PredictionConfig) -> Dict[str, Any]:
    """
    Computes metrics from a set of predictions.
    :param predictions: List of prediction JSON objects.
    :param helper: Instance of PredictHelper that wraps the nuScenes val set.
    :param config: Config file.
    :return: Metrics. Nested dictionary where keys are metric names and value is a dictionary
        mapping the Aggregator name to the results.
    """
    n_preds = len(predictions)
    containers = {metric.name: np.zeros((n_preds, metric.shape)) for metric in config.metrics}
    for i, prediction_str in enumerate(predictions):
        prediction = Prediction.deserialize(prediction_str)
        ground_truth = helper.get_future_for_agent(prediction.instance, prediction.sample,
                                                   config.seconds, in_agent_frame=True)
        for metric in config.metrics:
            containers[metric.name][i] = metric(ground_truth, prediction)
    aggregations: Dict[str, Dict[str, List[float]]] = defaultdict(dict)
    for metric in config.metrics:
        for agg in metric.aggregators:
            aggregations[metric.name][agg.name] = agg(containers[metric.name])
    return aggregations


In [None]:
predictions = json.load(open('mtp_preds.json', "r"))
results = compute_metrics(predictions, helper, config)

In [None]:
results

In [None]:
image_tensor = torch.Tensor(img).permute(2, 0, 1).unsqueeze(0)

print(agent_state_vector)
print(img.shape)
print(image_tensor.shape)

In [None]:
# Output has 50 entries.
# The first 24 are x,y coordinates (in the agent frame) over the next 6 seconds at 2 Hz for the first mode.
# The second 24 are the x,y coordinates for the second mode.
# The last 2 are the logits of the mode probabilities
output = mtp(image_tensor, agent_state_vector)

In [None]:
print(output.shape)

In [None]:
logits = output[:,-M:]
print(logits)
probs = torch.softmax(logits, dim = 1)
best_mode = torch.argmax(probs,dim=1).item()
print(best_mode)

In [None]:
future_xy_local = helper.get_future_for_agent(instance_token_img, sample_token_img, seconds=6, in_agent_frame=False)
future_xy_local

In [None]:
output

In [None]:
future_xy_local.shape

In [None]:
future_xy_vec = future_xy_local.reshape(1, 24)
future_xy_vec

In [None]:
output.shape
output[:,:-1].shapefu

In [None]:
future_xy_vec - output[:,:-1].detach().numpy()

In [None]:
output[:,:-1]

In [None]:
future_xy_vec