## Install the package dependencies before running this notebook

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import matplotlib.pyplot as plt

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [197]:
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):
    outputs = None
    
    if split=="train":
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
        
    elif split == 'val':
        f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    elif split == 'test':
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)
        
    else:
        print('\"split\" should be train, val, or test.')
        inputs = None

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform
        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)
        
        # centering
        num_inputs, input_seq_len = self.inputs.shape[:2]
        num_outputs, output_seq_len = self.outputs.shape[:2]
        center_input = (
            np.repeat(self.inputs[:, 0, :], input_seq_len, axis=0)
            .reshape(num_inputs, input_seq_len, 2)
        )
        center_output = (
            np.repeat(self.inputs[:, 0, :], output_seq_len, axis=0)
            .reshape(num_outputs, output_seq_len, 2)
        )
        self.inputs -= center_input
        self.outputs -= center_output
        self.input_velocities = np.hstack((np.zeros((num_inputs, 1, 2)), np.diff(self.inputs, axis=1)))
        self.output_velocities = np.hstack((np.zeros((num_outputs, 1, 2)), np.diff(self.outputs, axis=1)))

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx], self.input_velocities[idx], self.output_velocities[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

## Creating velocities

In [198]:
city = "austin"
train_dataset = ArgoverseDataset(city = city, split = 'train')

In [199]:
austin0 = train_dataset.__getitem__(0)

In [200]:
austin0_input, austin0_output, austin0_input_velocities, austin0_output_velocities = austin0

Note: the problem with the current version is that even if output velocity is correctly predicted, the first position of the output still needs to be accurately predicted in order for the cumulative sum to be the output positions. This should be an easier task than predicting the entire output sequence, however. 

In [183]:
x = np.random.uniform(size=(5, 2))

In [184]:
x

array([[0.15971921, 0.3918624 ],
       [0.44660739, 0.57008611],
       [0.3212512 , 0.51697017],
       [0.80102251, 0.52036852],
       [0.11094208, 0.55893831]])

In [194]:
np.vstack((x[0, :], np.diff(x, axis=0))).cumsum(axis=0)

array([[0.15971921, 0.3918624 ],
       [0.44660739, 0.57008611],
       [0.3212512 , 0.51697017],
       [0.80102251, 0.52036852],
       [0.11094208, 0.55893831]])

In [191]:
np.vstack((np.zeros(2), np.diff(x, axis=0)))

array([[ 0.        ,  0.        ],
       [ 0.28688818,  0.17822371],
       [-0.12535619, -0.05311594],
       [ 0.47977131,  0.00339835],
       [-0.69008043,  0.0385698 ]])