## Install the package dependencies before running this notebook

In [11]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy as np
import pickle5 as pickle
from glob import glob

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [38]:

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_cities_trajectories(split="train"):
    inputs_cities = None
    outputs_cities = None
    for city in cities:
        if split=="train":
            f_in = ROOT_PATH + split + "/" + city + "_inputs"
            inputs = pickle.load(open(f_in, "rb"))
            n = len(inputs)
            inputs = np.asarray(inputs)[:int(n * 0.8)]
            f_out = ROOT_PATH + split + "/" + city + "_outputs"
            outputs = pickle.load(open(f_out, "rb"))
            outputs = np.asarray(outputs)[:int(n * 0.8)]
        elif split == 'val':
            f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
            inputs = pickle.load(open(f_in, "rb"))
            n = len(inputs)
            inputs = np.asarray(inputs)[int(n * 0.8):]

            f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
            outputs = pickle.load(open(f_out, "rb"))
            outputs = np.asarray(outputs)[int(n * 0.8):] 
        else:
            f_in = ROOT_PATH + split + "/" + city + "_inputs"
            inputs = pickle.load(open(f_in, "rb"))
            n = len(inputs)
            inputs = np.asarray(inputs)
            outputs= None
        if inputs_cities is None:
            inputs_cities =inputs
        else:
            i = (np.concatenate((inputs_cities, inputs), axis=0))
            inputs_cities = i
        if outputs_cities is None:
            outputs_cities = outputs
        else:
            o = np.concatenate((outputs_cities, outputs), axis=0)
            outputs_cities= o
    return inputs_cities, outputs_cities  


class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_cities_trajectories(split=split)
        global_mean = np.mean(self.inputs, axis = (0,1), keepdims = True)
        global_std = np.std(np.sqrt(self.inputs[:, :, 0]**2 + self.inputs[:, :, 0]**2))
        print(global_mean)
        print(global_std)
        #if split == "train":
            #self.inputs = (self.inputs - global_mean)/global_std
            
            #self.outputs = (self.outputs - global_mean)/global_std
        #else:
            #self.inputs = (self.inputs - global_mean)/global_std
            #self.outputs = (self.outputs - global_mean)/global_std
        
    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

# intialize a dataset
city = 'miami' 
split = 'train'
train_dataset  = ArgoverseDataset(split = split)

[[[2791.70427041 1064.76260345]]]
4100.326502228328


In [39]:
val_dataset = ArgoverseDataset(split = 'val')


[[[2779.01081344 1063.5660105 ]]]
4124.162302888706


In [40]:
print(len(val_dataset))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

40765
cuda:0


## Create a DataLoader class for training

batch_sz = 16  # batch size 
train_loader = DataLoader(train_dataset,batch_size=batch_sz)

In [41]:
batch_sz = 64# batch size 
train_loader = DataLoader(train_dataset,batch_size=batch_sz)

In [42]:
from torch import nn, optim

# Linear Regression
class LR(torch.nn.Module):
    def __init__(self, input_dim, out_dim):
        super(LR, self).__init__()
        self.model = nn.Linear(input_dim, out_dim).cuda() # input_dim = input_length*2
        
    def forward(self, x, output_steps): 
        x = x.reshape(x.shape[0], -1)
        outputs = []
        for i in range(output_steps):
            out = self.model(x)    
            outputs.append(out)
            x = torch.cat([x[:,2:],  out], dim = 1)
            
        outputs = torch.cat(outputs, dim = 1)
        return outputs.reshape(outputs.shape[0], output_steps, 2)
    
model = LR(input_dim = 50 * 2, out_dim = 2)

In [51]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pred = model
pred.to(device)
#opt = optim.SGD(pred.parameters(),  lr=0.005)
opt = optim.Adam(pred.parameters(), lr=.0008)

In [52]:
inputs, outputs = get_city_trajectories(city=city, split=split, normalized=False)
global_mean = np.mean(inputs, axis = (0,1), keepdims = True)
global_std = np.std(np.sqrt(inputs[:, :, 0]**2 + inputs[:, :, 0]**2))
gm = torch.from_numpy(global_mean).to(device)
gs = (global_std)
for epoch in range(20):
    
    total_loss = 0
    for i_batch, sample_batch in enumerate(train_loader):
        inp, out = sample_batch
        inp = inp.to(device)
        out = out.to(device)
        preds = pred(inp.float(),60)
        
        loss = ((preds - out) ** 2).sum()
        
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        total_loss += loss.item()
        
    print('epoch {} loss: {}'.format(epoch, total_loss / len(train_dataset)))

epoch 0 loss: 142482.76895137384
epoch 1 loss: 139067.3133311512
epoch 2 loss: 138263.92298756246
epoch 3 loss: 137519.7380649174
epoch 4 loss: 137106.3895408354
epoch 5 loss: 136554.22388873625
epoch 6 loss: 135953.06998490944
epoch 7 loss: 135621.22173206368
epoch 8 loss: 135005.71122904358
epoch 9 loss: 134523.1713359351


KeyboardInterrupt: 

In [53]:
val_loader = DataLoader(val_dataset,batch_size=batch_sz)
inputs, outputs = get_cities_trajectories(split="val")
val_loss = 0
global_mean = np.mean(inputs, axis = (0,1), keepdims = True)
global_std = np.std(np.sqrt(inputs[:, :, 0]**2 + inputs[:, :, 0]**2))
gm = torch.from_numpy(global_mean).to(device)
gs = (global_std)
for i_batch, sample_batch in enumerate(val_loader):
    inp, out = sample_batch
    inp, out = inp.to(device), out.to(device)
   # print(out.size())
    preds = pred(inp.float(),60)
    #preds = (preds * gs ) + gm
    loss = ((preds - out) ** 2).sum()
    val_loss += loss.item()
print('loss: {}'.format(val_loss / len(val_dataset)))

loss: 131139.51032478898


## Sample a batch of data and visualize 

In [54]:
import matplotlib.pyplot as plt
import random


def show_sample_batch(sample_batch):
    """visualize the trajectory for a batch of samples"""
    inp, out = sample_batch
    batch_sz = inp.size(0)
    agent_sz = inp.size(1)
    
    fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .5, wspace=.001)
    axs = axs.ravel()   
    for i in range(batch_sz):
        axs[i].xaxis.set_ticks([])
        axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
        axs[i].scatter(inp[i,:,0], inp[i,:,1])
        axs[i].scatter(out[i,:,0], out[i,:,1])

        
for i_batch, sample_batch in enumerate(train_loader):
    inp, out = sample_batch
    print(inp.shape, out.shape)
    break
    """
    TODO:
      implement your Deep learning model
      implement training routine
    """
    show_sample_batch(sample_batch)
    break
def get_traj_pred(test_traj_in):
    test_data = torch.tensor(test_traj_in).to(device)
    test_preds = pred(test_data.float(),60)
    #est_preds.detach().numpy()
    return test_preds.cpu().detach().numpy()

torch.Size([64, 50, 2]) torch.Size([64, 60, 2])


In [61]:
print(model)

LR(
  (model): Linear(in_features=100, out_features=2, bias=True)
)


In [56]:
num_pred_steps=60
all_preds = np.zeros(shape=(0, num_pred_steps * 2))
city_col = np.array([])

for city_name in cities:

    print("Processing city", city_name)
    
    test_traj_in, test_traj_out = get_city_trajectories(city=city_name, split="test")
    print(test_traj_in.shape)
    
    test_pred_arr = get_traj_pred(test_traj_in)
    print(test_pred_arr.shape)
    
    test_pred_arr_reshaped = np.reshape(test_pred_arr, newshape=(test_traj_in.shape[0], num_pred_steps * 2))
    print(test_pred_arr_reshaped.shape)

    all_preds = np.r_[all_preds, test_pred_arr_reshaped]
    city_col = np.r_[city_col, [str(i) + "_" + city_name for i in range(test_pred_arr.shape[0])]]

Processing city austin
(6325, 50, 2)
(6325, 60, 2)
(6325, 120)
Processing city miami
(7971, 50, 2)
(7971, 60, 2)
(7971, 120)
Processing city pittsburgh
(6361, 50, 2)
(6361, 60, 2)
(6361, 120)
Processing city dearborn
(3671, 50, 2)
(3671, 60, 2)
(3671, 120)
Processing city washington-dc
(3829, 50, 2)
(3829, 60, 2)
(3829, 120)
Processing city palo-alto
(1686, 50, 2)
(1686, 60, 2)
(1686, 120)


In [59]:
import pandas as pd
sub_df = pd.DataFrame(np.c_[city_col, all_preds], columns=[np.r_[["ID"], ["v" + str(i) for i in range(120)]]])
sub_df.to_csv('linearRegression1.csv', index=None)

In [60]:
sample_sub = pd.read_csv('linearRegression1.csv')
sample_sub.tail()

Unnamed: 0,ID,v0,v1,v2,v3,v4,v5,v6,v7,v8,...,v110,v111,v112,v113,v114,v115,v116,v117,v118,v119
29838,1681_palo-alto,-1296.938354,-317.593842,-1360.113647,-432.915009,-1360.436157,-443.81189,-1366.592163,-438.40741,-1374.495483,...,-1385.634644,-455.768433,-1386.546143,-461.132172,-1388.46167,-459.14624,-1380.809937,-454.081055,-1384.614868,-459.819
29839,1682_palo-alto,133.752014,-168.185791,137.953461,-53.0741,129.624451,-48.918953,129.056076,-52.447479,131.411514,...,130.320023,-34.802097,129.449585,-33.509361,130.614258,-34.80735,128.227402,-37.048088,128.762329,-35.630714
29840,1683_palo-alto,-1437.814331,2159.3479,-1438.039795,2161.934326,-1438.383911,2162.068359,-1438.486694,2162.055908,-1438.495361,...,-1433.952271,2158.454346,-1433.804321,2158.375,-1433.615967,2158.23877,-1433.540527,2158.074951,-1433.372437,2158.020996
29841,1684_palo-alto,986.874146,1293.890869,1042.423462,1367.714233,1045.028809,1376.545044,1050.776489,1372.824829,1057.262573,...,1069.884033,1380.907349,1070.914185,1385.210815,1072.303589,1383.636475,1066.116821,1379.60437,1069.384888,1384.231689
29842,1685_palo-alto,-232.771332,593.715393,-127.262581,442.04834,-100.314964,449.865479,-87.291542,451.581055,-80.366112,...,-58.30793,429.566803,-53.646492,435.490417,-53.624962,435.860443,-60.354939,433.429779,-54.685375,439.759613
