## Install the package dependencies before running this notebook

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import matplotlib.pyplot as plt

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create training, validation, and test datasets

In [2]:
#This code is inspired by the code from the Week 7 Discussion
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):
    f_in = ROOT_PATH + "train" + "/" + city + "_inputs"
    f_out = ROOT_PATH + "train" + "/" + city + "_outputs"
    
    inputs = None
    outputs = None
    
    if city=="all":
        allInputs = np.zeros((0,50,2))
        allOutputs = np.zeros((0,60,2))
        for city in cities:
            if split=="train":
                f_in = ROOT_PATH + split + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)[:int(n * 0.8)]))

                f_out = ROOT_PATH + split + "/" + city + "_outputs"
                outputs = pickle.load(open(f_out, "rb"))
                allOutputs = np.concatenate((allOutputs, np.asarray(outputs)[:int(n * 0.8)]))

            elif split == 'val':
                f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)[int(n * 0.8):]))

                f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
                outputs = pickle.load(open(f_out, "rb"))
                allOutputs = np.concatenate((allOutputs, np.asarray(outputs)[int(n * 0.8):]))

            else:
                f_in = ROOT_PATH + split + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)))
                

        return allInputs, allOutputs
    
    if split=="train":
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
    
    elif split=="val":
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + spiit + "/" + city + "_inputs"
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)
        return inputs

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

# intialize a dataset
city = 'all' 
train_dataset  = ArgoverseDataset(city = city, split = "train")
val_dataset = ArgoverseDataset(city = city, split = "val")
test_dataset = get_city_trajectories(city = city, split = "test")[0]

## Create model

In [3]:
batch_sz = 20  # batch size 
train_loader = DataLoader(train_dataset,batch_size=batch_sz)

In [4]:
# This code is inspired by the code from the Week 7 Discussion
from torch import nn, optim

class Model(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.model = nn.Sequential(
            nn.Linear(100, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 120),
            nn.ReLU(),
            nn.Linear(120, 120)
        )
    
    def forward(self, x):
        x = x.reshape(-1, 100).float()
        x = self.model(x)
        x = x.reshape(-1, 60, 2)
        return x

In [5]:
learning_rate = 0.001
weight_decay = 0.00001
num_epochs = 50

In [6]:
model = Model()
opt = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

## Train and Validate Model

In [7]:
#This code is inspired by the code from the Week 7 Discussion
import time
import math
for epoch in range(num_epochs):
    if (epoch == 0):
        startTime = time.time()
    
    total_loss = 0
    for i_batch, sample_batch in enumerate(train_loader):
        inp, out = sample_batch
        preds = model(inp)
        loss = ((preds - out) ** 2).sum()
        
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        total_loss += loss.item()
    if (epoch == 0):
        endTime = time.time()
        print(endTime - startTime)
    
    print('epoch {}  RMSE: {} MSE: {}'.format(epoch, math.sqrt(total_loss / len(train_dataset)), total_loss / len(train_dataset)))

11.326593160629272
epoch 0  RMSE: 1218.974146683382 MSE: 1485897.9702824794
epoch 1  RMSE: 742.7175980160783 MSE: 551629.430402773
epoch 2  RMSE: 687.5370499835594 MSE: 472707.1951000955
epoch 3  RMSE: 651.1385277589699 MSE: 423981.38233211875
epoch 4  RMSE: 632.7900863104652 MSE: 400423.29333280603
epoch 5  RMSE: 562.4873208565477 MSE: 316391.98612437694
epoch 6  RMSE: 559.8499143623342 MSE: 313431.9266115129
epoch 7  RMSE: 565.7494217395271 MSE: 320072.40819860925
epoch 8  RMSE: 495.8951480349337 MSE: 245911.99784458883
epoch 9  RMSE: 534.9669064374582 MSE: 286189.5909832642
epoch 10  RMSE: 518.2426430860526 MSE: 268575.43711281766
epoch 11  RMSE: 523.7283365771924 MSE: 274291.37053391297
epoch 12  RMSE: 434.3020149986178 MSE: 188618.24023185964
epoch 13  RMSE: 425.6830729540998 MSE: 181206.07859964547
epoch 14  RMSE: 421.9940716431123 MSE: 178078.9965019322
epoch 15  RMSE: 414.40526758823046 MSE: 171731.7258048729
epoch 16  RMSE: 376.1689037156572 MSE: 141503.0441226394
epoch 17  RM

In [8]:
#This code is inspired by the code from the Week 7 Discussion
val_loader = DataLoader(val_dataset,batch_size=batch_sz)

val_loss = 0
for i_batch, sample_batch in enumerate(val_loader):
    inp, out = sample_batch
    preds = model(inp)
    loss = ((preds - out) ** 2).sum()

    val_loss += loss.item()
print('loss: {}'.format(math.sqrt(val_loss / len(val_dataset))))

loss: 188.59134410253728


## Test Algorithm and Convert to CSV

In [9]:
outputs = np.zeros((test_dataset.shape[0], 120))
for inputIndex in range(0, test_dataset.shape[0]):
        output = model(torch.tensor(test_dataset[inputIndex]))
        output = output.reshape(-1,120)
        for outputIndex in range (0, 120):
            outputs[inputIndex][outputIndex] = output[0][outputIndex]

In [10]:
import pandas as pd
columns = []
for index in range(0, 120):
    columns.append("v" + str(index))
citynames = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
testDataAmounts = [6325, 7971, 6361, 3671, 3829, 1686]
rows = []
for arrayIndex in range(0, 6):
    for itemIndex in range(0, testDataAmounts[arrayIndex]):
        rows.append(str(itemIndex) + "_" + citynames[arrayIndex])

df = pd.DataFrame(outputs, index=rows, columns=columns)
df.to_csv('submission.csv', index=True, header=True)