## Install the package dependencies before running this notebook

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import matplotlib.pyplot as plt

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create training, validation, and test datasets

In [13]:
#This code is inspired by the code from the Week 7 Discussion
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):
    f_in = ROOT_PATH + "train" + "/" + city + "_inputs"
    f_out = ROOT_PATH + "train" + "/" + city + "_outputs"
    
    inputs = None
    outputs = None
    
    if city=="all":
        allInputs = np.zeros((0,50,2))
        allOutputs = np.zeros((0,60,2))
        for city in cities:
            if split=="train":
                f_in = ROOT_PATH + split + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)[:int(n * 0.8)]))

                f_out = ROOT_PATH + split + "/" + city + "_outputs"
                outputs = pickle.load(open(f_out, "rb"))
                allOutputs = np.concatenate((allOutputs, np.asarray(outputs)[:int(n * 0.8)]))

            elif split == 'val':
                f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)[int(n * 0.8):]))

                f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
                outputs = pickle.load(open(f_out, "rb"))
                allOutputs = np.concatenate((allOutputs, np.asarray(outputs)[int(n * 0.8):]))

            else:
                f_in = ROOT_PATH + split + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)))
                
        if (normalized):
            allInputs = (allInputs - np.min(allInputs))/(np.max(allInputs) - np.min(allInputs))
            allOutputs = (allOutputs - np.min(allOutputs))/(np.max(allOutputs) - np.min(allOutputs))
        return allInputs, allOutputs
    
    if split=="train":
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
    
    elif split=="val":
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + spiit + "/" + city + "_inputs"
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)
        return inputs

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = torch.nn.functional.normalize(data)

        return data

# intialize a dataset
city = 'all' 
train_dataset  = ArgoverseDataset(city = city, split = "train")
val_dataset = ArgoverseDataset(city = city, split = "val")
test_dataset= get_city_trajectories(city = city, split = "test")[0]

## Create model

In [4]:
batch_size = 1  # batch size 
train_loader = DataLoader(train_dataset,batch_size=batch_size)

In [5]:
# This code is inspired by the code from the Week 4 Discussion
from torch import nn, optim

class Model(nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size):
        super(Model, self).__init__()
        self.hidden_size = hidden_size
        self.inputToHidden = nn.Linear(input_size + hidden_size, hidden_size)
        self.inputToOutput = nn.Linear(input_size + hidden_size, output_size)
        
    
    def forward(self, x, hidden_state):
        combined = torch.cat((x, hidden_state), 1)
        hidden = torch.sigmoid(self.inputToHidden(combined))
        output = self.inputToOutput(combined)
        return output, hidden
        
    def init_state(self):
        return torch.zeros(1, self.hidden_size)

In [6]:
learning_rate = 0.001
weight_decay = 0.00001
num_epochs = 1
hidden_size = 1

In [7]:
criterion = nn.MSELoss()
model = Model(2, hidden_size, 2)
opt = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

## Train and Validate Model

In [8]:
#This code is inspired by the code from the Week 4 Discussion
import time
import math
import random

for epoch in range(num_epochs):
    if (epoch == 0):
        startTime = time.time()
    
    total_loss = 0
    for i_batch, sample_batch in enumerate(train_loader):
        hidden_state = model.init_state()
        inp, out = sample_batch
        inp = inp.reshape(50,2)
        out = out.reshape(60,2)
        inp = torch.cat((inp, out))
        preds = torch.empty(0,2)
        for index in range(0, inp.shape[0] - 1):
            pred, hidden_state = model(inp[index].reshape(1,2).float(), hidden_state)
            preds = torch.cat((preds, pred))
        loss = criterion(preds, inp[:109].reshape(109,2).float())        
        opt.zero_grad()
        loss.backward()
        opt.step()
        total_loss += loss.item()
        if (i_batch % 1000 == 0):
            print('step {}  RMSE: {} MSE: {}'.format(i_batch, math.sqrt(total_loss / len(train_dataset)), total_loss / len(train_dataset)))
    if (epoch == 0):
        endTime = time.time()
        print(endTime - startTime)
    print('epoch {}  RMSE: {} MSE: {}'.format(epoch, math.sqrt(total_loss / len(train_dataset)), total_loss / len(train_dataset)))

step 0  RMSE: 2.4499981194057643 MSE: 6.002490785091781
step 1000  RMSE: 113.07017086644231 MSE: 12784.863539766458
step 2000  RMSE: 121.2331578872693 MSE: 14697.478571319565
step 3000  RMSE: 123.52305762847568 MSE: 15257.945765887724
step 4000  RMSE: 124.21106198610012 MSE: 15428.387919714807
step 5000  RMSE: 124.40049093812509 MSE: 15475.482145646543
step 6000  RMSE: 124.41709253848543 MSE: 15479.61291573005
step 7000  RMSE: 124.41782948808824 MSE: 15479.796294527
step 8000  RMSE: 124.41784535095987 MSE: 15479.800241775367
step 9000  RMSE: 124.41785460623339 MSE: 15479.80254481783
step 10000  RMSE: 124.41786317832056 MSE: 15479.804677859294
step 11000  RMSE: 124.41787107215336 MSE: 15479.806642126976
step 12000  RMSE: 124.41787906326468 MSE: 15479.808630601156
step 13000  RMSE: 124.41788556621087 MSE: 15479.810248766742
step 14000  RMSE: 124.41789183983933 MSE: 15479.811809869956
step 15000  RMSE: 124.4179012886069 MSE: 15479.81416106153
step 16000  RMSE: 124.41793119450413 MSE: 1547

step 135000  RMSE: 124.42978055707813 MSE: 15482.770289482618
step 136000  RMSE: 124.42985015211221 MSE: 15482.787608877097
step 137000  RMSE: 124.42985087112926 MSE: 15482.787787811467
step 138000  RMSE: 124.42990984688961 MSE: 15482.802464505075
step 139000  RMSE: 124.42993263767114 MSE: 15482.808136215379
step 140000  RMSE: 124.42999677839272 MSE: 15482.824098270823
step 141000  RMSE: 124.43001862506371 MSE: 15482.829535033703
step 142000  RMSE: 124.4300595130471 MSE: 15482.839710420443
step 143000  RMSE: 124.43013996074275 MSE: 15482.859730650029
step 144000  RMSE: 124.43017813875151 MSE: 15482.869231641434
step 145000  RMSE: 124.43020388750911 MSE: 15482.875639487085
step 146000  RMSE: 124.43021379026624 MSE: 15482.87810389136
step 147000  RMSE: 124.43028452264623 MSE: 15482.895706386693
step 148000  RMSE: 124.43030550956854 MSE: 15482.900929204565
step 149000  RMSE: 124.4303912470711 MSE: 15482.92226589919
step 150000  RMSE: 124.43041816673602 MSE: 15482.928965148789
step 151000 

In [9]:
#This code is inspired by the code from the Week 4 Discussion
val_loader = DataLoader(val_dataset,batch_size=batch_size)

val_loss = 0
for i_batch, sample_batch in enumerate(val_loader):
    inp, out = sample_batch
    hidden_state = model.init_state()
    inp = inp.reshape(50,2)
    out = out.reshape(60,2)
    for index in range(0,49):
        output, hidden_state = model(inp[index].reshape(1,2).float(), hidden_state)
    preds = torch.empty(0,2)
    for outputIndex in range(0,60):
        output, hidden_state = model(output, hidden_state)
        preds = torch.cat((preds, output), 0)
    loss = ((preds - out) ** 2).sum()
    val_loss += loss.item()
    if (i_batch % 100 == 0):
        print('Step: {} Current Loss: {} Average Loss: {}'.format(i_batch, math.sqrt(loss.item() / len(val_dataset)), math.sqrt(val_loss / len(val_dataset))))
print('loss: {}'.format(math.sqrt(val_loss / len(val_dataset))))

Step: 0 Current Loss: 1.0220745257157804 Average Loss: 1.0220745257157804
Step: 100 Current Loss: 1.5588431560116875 Average Loss: 11.051068794722688
Step: 200 Current Loss: 0.1912475211046974 Average Loss: 15.920719203452219
Step: 300 Current Loss: 1.6476144581318188 Average Loss: 19.81061528906282
Step: 400 Current Loss: 1.7263631019303247 Average Loss: 22.78998019784148
Step: 500 Current Loss: 0.54830514326979 Average Loss: 25.629256597761817
Step: 600 Current Loss: 1.1172601406316298 Average Loss: 28.17327526424604
Step: 700 Current Loss: 1.8560798804926055 Average Loss: 30.185112445673163
Step: 800 Current Loss: 2.0078420244560142 Average Loss: 31.923962620927934
Step: 900 Current Loss: 1.3649112359245672 Average Loss: 33.71973154565406
Step: 1000 Current Loss: 1.0338913718803853 Average Loss: 35.992182353755155
Step: 1100 Current Loss: 2.121155776123972 Average Loss: 37.86860004985395
Step: 1200 Current Loss: 0.8831115987533428 Average Loss: 39.11857179364235
Step: 1300 Current L

Step: 10800 Current Loss: 0.9880803914079919 Average Loss: 112.97902886430875
Step: 10900 Current Loss: 1.111233048616819 Average Loss: 113.27462616471027
Step: 11000 Current Loss: 0.1482761749886629 Average Loss: 113.76448935233628
Step: 11100 Current Loss: 0.750734807146507 Average Loss: 114.20806252277455
Step: 11200 Current Loss: 0.279166906049315 Average Loss: 114.60811404483137
Step: 11300 Current Loss: 0.8423035840377141 Average Loss: 115.04630698652045
Step: 11400 Current Loss: 0.9113743926795165 Average Loss: 115.51910294025545
Step: 11500 Current Loss: 0.6492028542674129 Average Loss: 115.94548648202938
Step: 11600 Current Loss: 1.0369517741981926 Average Loss: 116.31854751446812
Step: 11700 Current Loss: 0.5740462574041378 Average Loss: 116.66774808944018
Step: 11800 Current Loss: 0.1203534575418534 Average Loss: 116.98224812619607
Step: 11900 Current Loss: 1.0765625637766938 Average Loss: 117.34018347340977
Step: 12000 Current Loss: 0.3652678257223558 Average Loss: 117.7160

Step: 21400 Current Loss: 0.28449742059687894 Average Loss: 149.827920127252
Step: 21500 Current Loss: 2.2243872450719278 Average Loss: 150.17046697769018
Step: 21600 Current Loss: 1.4620166182644443 Average Loss: 150.51951779777693
Step: 21700 Current Loss: 0.8836848626083547 Average Loss: 150.91052195667837
Step: 21800 Current Loss: 1.6236717974286488 Average Loss: 151.21003416274982
Step: 21900 Current Loss: 1.0080660297830586 Average Loss: 151.55292422735505
Step: 22000 Current Loss: 0.04427086372684202 Average Loss: 151.8633463297293
Step: 22100 Current Loss: 0.17773831628604414 Average Loss: 152.1595618393546
Step: 22200 Current Loss: 1.850757483143643 Average Loss: 152.5399578622242
Step: 22300 Current Loss: 0.246051833987007 Average Loss: 152.7963064902626
Step: 22400 Current Loss: 0.9880648584719707 Average Loss: 153.13361870446644
Step: 22500 Current Loss: 1.5176834486117488 Average Loss: 153.4405937904418
Step: 22600 Current Loss: 0.10637248149323003 Average Loss: 153.797648

Step: 32000 Current Loss: 0.8932662081127601 Average Loss: 190.21525337969138
Step: 32100 Current Loss: 2.388120396359847 Average Loss: 190.7977796320859
Step: 32200 Current Loss: 0.9781009077608318 Average Loss: 191.2628354012435
Step: 32300 Current Loss: 1.1222689616416834 Average Loss: 191.79065919142354
Step: 32400 Current Loss: 0.24421184140426197 Average Loss: 192.31669209390247
Step: 32500 Current Loss: 0.17138082688389908 Average Loss: 192.72341303587385
Step: 32600 Current Loss: 1.022877293966481 Average Loss: 193.23488768811052
Step: 32700 Current Loss: 2.207233032315044 Average Loss: 193.74259624489747
Step: 32800 Current Loss: 0.04284785551811867 Average Loss: 194.2704633371137
Step: 32900 Current Loss: 0.19646803586310213 Average Loss: 194.76514896159262
Step: 33000 Current Loss: 1.966588648442623 Average Loss: 195.27379662854304
Step: 33100 Current Loss: 2.0550447687572926 Average Loss: 195.7476972879032
Step: 33200 Current Loss: 0.14252725998970794 Average Loss: 196.3332

## Test Algorithm and Convert to CSV

In [14]:
outputs = np.zeros((test_dataset.shape[0], 120))
for inputIndex in range(0, test_dataset[0].shape[0]):
    hidden_state = model.init_state()
    for index in range(0,49):
        output, hidden_state = model(torch.tensor(test_dataset[inputIndex][index].reshape(1,2)).float(), hidden_state)
    preds = torch.empty(0,2)
    for outputIndex in range(0,60):
        output, hidden_state = model(output, hidden_state)
        preds = torch.cat((preds, output), 0)
    output = preds.reshape(-1,120)
    for outputIndex in range (0, 120):
        outputs[inputIndex][outputIndex] = output[0][outputIndex]

In [15]:
import pandas as pd
columns = []
for index in range(0, 120):
    columns.append("v" + str(index))
citynames = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
testDataAmounts = [6325, 7971, 6361, 3671, 3829, 1686]
rows = []
for arrayIndex in range(0, 6):
    for itemIndex in range(0, testDataAmounts[arrayIndex]):
        rows.append(str(itemIndex) + "_" + citynames[arrayIndex])

df = pd.DataFrame(outputs, index=rows, columns=columns)
df.to_csv('submission.csv', index=True, header=True)