# Preprocessing for Argoverse Data

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy as np
import pickle
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt

Sampled at 10 Hz rate
- Train: 205,942 sequences
- Val: 3200 sequences
- Test: 36272 sequences

In [4]:
class ArgoverseDataset(Dataset):
    '''Dataset class for Argoverse'''
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()

    def __len__(self):
        return len(self.pkl_list)
    
    def __getitem__(self, idx):
        pkl_path = self.pkl_list[idx]
        
        with open(pkl_path, "rb") as file:
            data = pickle.load(file)
        
        if self.transform:
            data = self.transform(data)
        
        return data

In [5]:
# initialize datasets
train_data = ArgoverseDataset("data/new_train")
val_data = ArgoverseDataset("data/new_val_in")

In [6]:
print("TRAIN DATA SEQUENCES:", train_data.__len__())
print("VAL DATA SEQUENCES:", val_data.__len__())

TRAIN DATA SEQUENCES: 205942
VAL DATA SEQUENCES: 3200


In [None]:
for key, value in train_data.__getitem__(0).items():
    print(key, type(value))

In [None]:
# Display keys and values for a pickled object

for key, value in train_data.__getitem__(0).items():
    if key == "p_in" or key == "v_in" or key == "p_out" or key == "v_out":
        print(key, value.shape)

In [None]:
for key, value in train_data.__getitem__(0).items():
    if key == "p_in":
        for i in np.nditer(value):
            print(i)

In [None]:
from tqdm import tqdm

# Create Distribution

In [None]:
p_in_x = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "p_in":
            for i in np.nditer(value):
                if value[i].all() != 0:
                    p_in_x = np.append(p_in_x, value[i][j][0])

fig = plt.hist(p_in_x, bins=10)

In [None]:
p_in_y = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "p_in":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(19):
                        p_in_y = np.append(p_in_y, value[i][j][1])
                    
fig = plt.hist(p_in_y, bins=10)

In [None]:
v_in_x = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "v_in":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(19):
                        v_in_x = np.append(v_in_x, value[i][j][0])

In [None]:
fig = plt.hist(v_in_x, bins=50)

In [None]:
v_in_y = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "v_in":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(19):
                        v_in_y = np.append(v_in_y, value[i][j][1])

In [None]:
fig = plt.hist(v_in_y, bins=50)

In [None]:
p_out_x = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "p_out":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(30):
                        p_out_x = np.append(p_out_x, value[i][j][0])

fig = plt.hist(p_out_x, bins=10)

In [None]:
p_out_y = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "p_out":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(30):
                        p_out_y = np.append(p_out_y, value[i][j][1])

fig = plt.hist(p_out_y, bins=10)

In [None]:
v_out_x = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "v_out":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(30):
                        v_out_x = np.append(v_out_x, value[i][j][0])

In [None]:
fig = plt.hist(v_out_x, bins=50)

In [None]:
v_out_y = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "v_out":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(30):
                        v_out_y = np.append(v_out_y, value[i][j][1])

fig = plt.hist(v_out_y, bins=50)

In [None]:
fig = plt.hist(v_out_y, bins=50)

In [None]:
euclidean = np.empty([0])
for d in tqdm(range(3200)):
    for key, value in train_data.__getitem__(d).items():
        if key == "v_out":
            for i in range(60):
                if value[i].all() != 0:
                    for j in range(30):
                        v_out_y = np.append(v_out_y, value[i][j][1])

fig = plt.hist(v_out_y, bins=50)