## Install the package dependencies before running this notebook

In [139]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import matplotlib.pyplot as plt

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create training, validation, and test datasets

In [146]:
#This code is inspired by the code from the Week 8 Discussion
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):
    f_in = ROOT_PATH + "train" + "/" + city + "_inputs"
    f_out = ROOT_PATH + "train" + "/" + city + "_outputs"
    
    inputs = None
    outputs = None
    
    if city=="all":
        allInputs = np.zeros((0,50,2))
        allOutputs = np.zeros((0,60,2))
        for city in cities:
            if split=="train":
                f_in = ROOT_PATH + split + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)[:int(n * 0.8)]))

                f_out = ROOT_PATH + split + "/" + city + "_outputs"
                outputs = pickle.load(open(f_out, "rb"))
                allOutputs = np.concatenate((allOutputs, np.asarray(outputs)[:int(n * 0.8)]))

            elif split == 'val':
                f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)[int(n * 0.8):]))

                f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
                outputs = pickle.load(open(f_out, "rb"))
                allOutputs = np.concatenate((allOutputs, np.asarray(outputs)[int(n * 0.8):]))

            else:
                f_in = ROOT_PATH + split + "/" + city + "_inputs"
                inputs = pickle.load(open(f_in, "rb"))
                n = len(inputs)
                allInputs = np.concatenate((allInputs, np.asarray(inputs)))
                

        return allInputs, allOutputs
    
    if split=="train":
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
    
    elif split=="val":
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + spiit + "/" + city + "_inputs"
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)
        return inputs

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

# intialize a dataset
city = 'all' 
train_dataset  = ArgoverseDataset(city = city, split = "train")
val_dataset = ArgoverseDataset(city = city, split = "val")
test_dataset = get_city_trajectories(city = city, split = "test")[0]

## Create model

In [147]:
batch_sz = 20  # batch size 
train_loader = DataLoader(train_dataset,batch_size=batch_sz)

In [210]:
# This code is inspired by the code from the Week 8 Discussion
from torch import nn, optim

class Model(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.model = nn.Sequential(
            nn.Linear(100, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 120),
            nn.ReLU(),
            nn.Linear(120, 120)
        )
    
    def forward(self, x):
        x = x.reshape(-1, 100).float()
        x = self.model(x)
        x = x.reshape(-1, 60, 2)
        return x

In [211]:
learning_rate = 0.001
weight_decay = 0.00001
num_epochs = 50

In [212]:
model = Model()
opt = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

## Train and Validate Model

In [213]:
#This code is inspired by the code from the Week 8 Discussion
import time
import math

indexArray = []
lossArray = []
for epoch in range(num_epochs):
    if (epoch == 0):
        startTime = time.time()
    
    total_loss = 0
    for i_batch, sample_batch in enumerate(train_loader):
        inp, out = sample_batch
        preds = model(inp)
        loss = ((preds - out) ** 2).sum()
        
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        total_loss += loss.item()
    if (epoch == 0):
        endTime = time.time()
        print(endTime - startTime)
    
    indexArray.append(epoch)
    lossArray.append( math.sqrt(total_loss / len(train_dataset )) )
    
    print('epoch {}  RMSE: {} MSE: {}'.format(epoch, math.sqrt(total_loss / len(train_dataset)), total_loss / len(train_dataset)))

10.267403841018677
epoch 0  RMSE: 1149.4500727406355 MSE: 1321235.469723452
epoch 1  RMSE: 670.9788606835506 MSE: 450212.6314841956
epoch 2  RMSE: 540.687425391712 MSE: 292342.8919767182
epoch 3  RMSE: 537.5190483064304 MSE: 288926.72729225067
epoch 4  RMSE: 524.4653857042928 MSE: 275063.94080195256
epoch 5  RMSE: 488.0815245564514 MSE: 238223.57461334992
epoch 6  RMSE: 467.0367504930891 MSE: 218123.32631114393
epoch 7  RMSE: 470.70547101855874 MSE: 221563.64044680324
epoch 8  RMSE: 492.1183880911387 MSE: 242180.5078974206
epoch 9  RMSE: 410.55998944570314 MSE: 168559.50493365587
epoch 10  RMSE: 435.7964867931452 MSE: 189918.577901248
epoch 11  RMSE: 367.72640601534124 MSE: 135222.70968095958
epoch 12  RMSE: 390.52495119176325 MSE: 152509.73750332906
epoch 13  RMSE: 384.51220235216886 MSE: 147849.63375771526
epoch 14  RMSE: 352.53952662561676 MSE: 124284.11783341395
epoch 15  RMSE: 381.3267102833245 MSE: 145410.0599755025
epoch 16  RMSE: 347.8666545979237 MSE: 121011.20938115114
epoch 

In [214]:
#This code is inspired by the code from the Week 8 Discussion
val_loader = DataLoader(val_dataset,batch_size=batch_sz)

val_loss = 0
for i_batch, sample_batch in enumerate(val_loader):
    inp, out = sample_batch
    preds = model(inp)
    loss = ((preds - out) ** 2).sum()

    val_loss += loss.item()
print('loss: {}'.format(math.sqrt(val_loss / len(val_dataset))))

loss: 202.00940829359465


## Test Algorithm and Convert to CSV

In [158]:
outputs = np.zeros((test_dataset.shape[0], 120))
for inputIndex in range(0, test_dataset.shape[0]):
        output = model(torch.tensor(test_dataset[inputIndex]))
        output = output.reshape(-1,120)
        for outputIndex in range (0, 120):
            outputs[inputIndex][outputIndex] = output[0][outputIndex]

In [132]:
import pandas as pd
columns = []
for index in range(0, 120):
    columns.append("v" + str(index))
citynames = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
testDataAmounts = [6325, 7971, 6361, 3671, 3829, 1686]
rows = []
for arrayIndex in range(0, 6):
    for itemIndex in range(0, testDataAmounts[arrayIndex]):
        rows.append(str(itemIndex) + "_" + citynames[arrayIndex])

df = pd.DataFrame(outputs, index=rows, columns=columns)
df.to_csv('submission.csv', index=True, header=True)