## Install the package dependencies before running this notebook

In [58]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob

## Use GPU if Available

In [59]:
device = None
if torch.cuda.is_available():
    device = torch.device('cuda')

## Create a Torch.Dataset class for the training dataset

In [60]:
from glob import glob
import pickle
import numpy as np

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):

    
    outputs = None
    
    if split=="train":
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
        
    elif split == 'val':
        f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs = []
        self.outputs = []

        # Include ALL cities in the Dataset
        if (city == 'all'):
            for city_name in cities:
                city_inputs, city_outputs = get_city_trajectories(city=city_name, split=split, normalized=False)
                for i in range(len(city_inputs)):
                    self.inputs.append(city_inputs[i])
                    self.outputs.append(city_outputs[i])
        
        # Only include the city specified
        else:
             self.inputs, self.outputs = get_city_trajectories(city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

# intialize a dataset
city = 'all' 
split = 'train'
train_dataset  = ArgoverseDataset(city = city, split = split)

## Create a DataLoader class for training

In [61]:
batch_sz = 4  # batch size 
train_loader = DataLoader(train_dataset,batch_size=batch_sz)

## Train the Model

In [62]:
from torch import nn, optim

class Pred(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(100, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 32)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 120),
            nn.ReLU(),
            nn.Linear(120, 120)
        )
        
    def forward(self, x):
        x = x.reshape(-1, 100).float()
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.reshape(-1, 60, 2)
        return x

In [63]:
pred = Pred().to(device)
opt = optim.Adam(pred.parameters(), lr=1e-3)

In [64]:
for epoch in range(10):
    
    total_loss = 0
    for i_batch, sample_batch in enumerate(train_loader):
        inp, out = sample_batch
        inp = inp.to(device)
        out = out.to(device)
        preds = pred(inp)
        loss = ((preds - out) ** 2).sum()
        
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        total_loss += loss.item()
        
    print('epoch {} loss: {}'.format(epoch, total_loss / len(train_dataset)))

epoch 0 loss: 1545223.1679539182
epoch 1 loss: 681705.2476858151
epoch 2 loss: 448073.49636376847
epoch 3 loss: 383638.74103367404


## Determine Validation Loss

In [None]:
val_dataset = ArgoverseDataset(city = city, split = 'val')
val_loader = DataLoader(val_dataset,batch_size=batch_sz)

val_loss = 0
for i_batch, sample_batch in enumerate(val_loader):
    inp, out = sample_batch
    inp = inp.to(device)
    out = out.to(device)
    preds = pred(inp)
    loss = ((preds - out) ** 2).sum()

    val_loss += loss.item()
print('loss: {}'.format(val_loss / len(val_dataset)))

## Sample a batch of data and visualize 

In [None]:
import matplotlib.pyplot as plt
import random


def show_sample_batch(sample_batch):
    """visualize the trajectory for a batch of samples"""
    inp, out = sample_batch
    batch_sz = inp.size(0)
    agent_sz = inp.size(1)
    
    fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .5, wspace=.001)
    axs = axs.ravel()   
    for i in range(batch_sz):
        axs[i].xaxis.set_ticks([])
        axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
        axs[i].scatter(inp[i,:,0], inp[i,:,1])
        axs[i].scatter(out[i,:,0], out[i,:,1])

        
for i_batch, sample_batch in enumerate(train_loader):
    inp, out = sample_batch
    print(inp.shape, out.shape)
    
    """
    TODO:
      implement your Deep learning model
      implement training routine
    """
    show_sample_batch(sample_batch)
    break

In [None]:
all_preds = []
for city in cities:
    test_input, _ = get_city_trajectories(city, 'test')
    # print(city, "shape:", test_input.shape)
    for x in range(len(test_input)):
        traj = pred(torch.tensor(test_input[x]))
        # print(city, "shape:", traj.shape)
        # print(traj)
        all_preds.append(traj)
print(len(all_preds))
print(all_preds)

## Calculate Test Predictions and Export to CSV

In [None]:
import pandas as pd

def predictions_to_csv():
    num_pred_steps = 60
    all_preds = np.zeros(shape=(0, num_pred_steps * 2))
    city_col = np.array([])

    # Test predictions for ALL cities
    for city_name in cities:

        print("Processing city", city_name)

        # Get Training input values for this city
        test_traj_in, _ = get_city_trajectories(city=city_name, split="test")
        #print(test_traj_in.shape)

        test_pred_arr = []
        # Iterate through all test inputs for this city
        for x in range(len(test_traj_in)):
            # Get the predicted trajectory from the model
            traj_in_tensor = torch.tensor(test_traj_in[x], device=device)
            traj = pred(traj_in_tensor)[0]
            # Add the prediction to the test prediction array
            traj_arr = traj.cpu().detach().numpy()
            test_pred_arr.append(traj_arr)
        test_pred_arr = numpy.array(test_pred_arr)
        # print(test_pred_arr.shape)
    
        # Reshape the predictions to the submission format size (120)
        test_pred_arr_reshaped = np.reshape(test_pred_arr, newshape=(test_traj_in.shape[0], num_pred_steps * 2))
        print(test_pred_arr_reshaped.shape)

        # Add to total predictions / columns
        all_preds = np.r_[all_preds, test_pred_arr_reshaped]
        city_col = np.r_[city_col, [str(i) + "_" + city_name for i in range(test_pred_arr.shape[0])]]
    
    # Convert predictions to csv file
    sub_df = pd.DataFrame(np.c_[city_col, all_preds], columns=[np.r_[["ID"], ["v" + str(i) for i in range(120)]]])
    sub_df.to_csv('predictions_submission.csv', index=None)


In [None]:
predictions_to_csv()