### This file is used to extract the input/output positions and velocities, lane, and lane norm from the training and test data

In [1]:
import os, os.path 
import pickle
from glob import glob

import torch
import numpy as np
import pandas as pd

import tqdm
import tqdm.notebook

In [2]:
# File paths
TEST_PATH = './new_val_in'  # Path to input of the test set
TRAIN_PATH = './new_train'  # Path to input + output of training set

DUMMY_TRAIN_PATH = './dummy_train'
DUMMY_TEST_PATH = './dummy_val'

IN_POS_FILE = './input_positions.csv'
OUT_POS_FILE = './output_positions.csv'

IN_VEL_FILE = './input_velocities.csv'
OUT_VEL_FILE = './output_velocities.csv'

LANE_FILE = './lane.txt'
LANE_NORM_FILE = './lane_norm.txt'

IN_POS_FILE_TEST = './input_positions_test.csv'
IN_VEL_FILE_TEST = './input_velocities_test.csv'

LANE_FILE_TEST = './lane_test.txt'
LANE_NORM_FILE_TEST = './lane_norm_test.txt'

# Keys to the pickle objects
CITY = 'city'
LANE = 'lane'
LANE_NORM = 'lane_norm'
SCENE_IDX = 'scene_idx'
AGENT_ID = 'agent_id'
P_IN = 'p_in'
V_IN = 'v_in'
P_OUT = 'p_out'
V_OUT = 'v_out'
CAR_MASK = 'car_mask'
TRACK_ID = 'track_id'

# Additional keys for DataFrames
WAS_TARGET = 'was_target'
P_IN_X = ['p_in_x' + str(i) for i in range(1, 20)]
P_IN_Y = ['p_in_y' + str(i) for i in range(1, 20)]
V_IN_X = ['v_in_x' + str(i) for i in range(1, 20)]
V_IN_Y = ['v_in_y' + str(i) for i in range(1, 20)]

P_OUT_X = ['p_out_x' + str(i) for i in range(1, 20)]
P_OUT_Y = ['p_out_y' + str(i) for i in range(1, 20)]
V_OUT_X = ['v_out_x' + str(i) for i in range(1, 20)]
V_OUT_Y = ['v_out_y' + str(i) for i in range(1, 20)]

P_IN_CSV_HEADER = [SCENE_IDX, CITY, TRACK_ID, WAS_TARGET] + P_IN_X + P_IN_Y
# Add commas for each element
P_IN_CSV_HEADER = [col + ',' for col in P_IN_CSV_HEADER]
# Remove the last commas
P_IN_CSV_HEADER[-1] = P_IN_CSV_HEADER[-1].rstrip(',')

V_IN_CSV_HEADER = [SCENE_IDX, CITY, TRACK_ID, WAS_TARGET] + V_IN_X + V_IN_Y
# Add commas for each element
V_IN_CSV_HEADER = [col + ',' for col in V_IN_CSV_HEADER]
# Remove the last commas
V_IN_CSV_HEADER[-1] = V_IN_CSV_HEADER[-1].rstrip(',')

P_OUT_CSV_HEADER = [SCENE_IDX, CITY, TRACK_ID, WAS_TARGET] + P_OUT_X + P_OUT_Y
# Add commas for each element
P_OUT_CSV_HEADER = [col + ',' for col in P_OUT_CSV_HEADER]
# Remove the last commas
P_OUT_CSV_HEADER[-1] = P_OUT_CSV_HEADER[-1].rstrip(',')

V_OUT_CSV_HEADER = [SCENE_IDX, CITY, TRACK_ID, WAS_TARGET] + V_OUT_X + V_OUT_Y
# Add commas for each element
V_OUT_CSV_HEADER = [col + ',' for col in V_OUT_CSV_HEADER]
# Remove the last commas
V_OUT_CSV_HEADER[-1] = V_OUT_CSV_HEADER[-1].rstrip(',')

In [3]:
# Set the training and test paths

# train_path = DUMMY_TRAIN_PATH
# test_path = DUMMY_TEST_PATH

train_path = TRAIN_PATH
test_path = TEST_PATH

CSV_PATH = IN_VEL_FILE_TEST  # target file 
CSV_HEADER = V_IN_CSV_HEADER  # target header

BATCH_SIZE = 4
NUM_WORKERS = 0

In [4]:
class ArgoverseDatasetEDA(torch.utils.data.Dataset):
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDatasetEDA, self).__init__()
        self.data_path = data_path
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.pkl_list)

    def __getitem__(self, idx):
        pkl_path = self.pkl_list[idx]
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
        return data

In [5]:
def collate_train_eda(batch):
    """ 
    Custom collate_fn function for train dataset. 
        """          
    batch_data = []
    for scene in batch:  
        # Get data for tracked agents
        idxs = np.nonzero(scene[CAR_MASK])[0]  # indexes of tracked agents out of the 60
        scene_idxs = [scene[SCENE_IDX]] * len(idxs)
        city = [scene[CITY]] * len(idxs)
        track_ids = scene[TRACK_ID][idxs, 0, 0]  # id's of tracked agents
        was_target = (track_ids == scene[AGENT_ID]).astype(int)  # whether tracked agent was the target
        
        # @ CHANGE these lines accordingly
#         p_in_x = scene[P_IN][idxs, :, 0]  # all p_in x-components
#         p_in_y = scene[P_IN][idxs, :, 1]  # all p_in y-components 
#         batch_data.append([scene_idxs, city, track_ids, was_target, p_in_x, p_in_y])                   


        v_in_x = scene[V_IN][idxs, :, 0]  # all v_in x-components
        v_in_y = scene[V_IN][idxs, :, 1]  # all v_in y-components 
        batch_data.append([scene_idxs, city, track_ids, was_target, v_in_x, v_in_y]) 

    out = []
    return [batch_data, out]

In [6]:
def collate_test_eda(batch):
    """ 
    Custom collate_fn for validation dataset.
    """    
    batch_data = []
    for scene in batch:  
        # Get data for tracked agents
        idxs = np.nonzero(scene[CAR_MASK])[0]  # indexes of tracked agents out of the 60
        scene_idxs = [scene[SCENE_IDX]] * len(idxs)
        city = [scene[CITY]] * len(idxs)
        track_ids = scene[TRACK_ID][idxs, 0, 0]  # id's of tracked agents
        was_target = (track_ids == scene[AGENT_ID]).astype(int)  # whether tracked agent was the target
        
        # @ CHANGE these lines accordingly
#         p_in_x = scene[P_IN][idxs, :, 0]  # all p_in x-components
#         p_in_y = scene[P_IN][idxs, :, 1]  # all p_in y-components 
#         batch_data.append([scene_idxs, city, track_ids, was_target, p_in_x, p_in_y])   
        
        v_in_x = scene[V_IN][idxs, :, 0]  # all v_in x-components
        v_in_y = scene[V_IN][idxs, :, 1]  # all v_in y-components         
        batch_data.append([scene_idxs, city, track_ids, was_target, v_in_x, v_in_y])                   


    out = []
    return [batch_data, out]

In [7]:
# dataset = ArgoverseDatasetEDA(data_path=train_path)
# loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,shuffle=False, 
#                                      collate_fn=collate_train_eda, num_workers=NUM_WORKERS)
dataset = ArgoverseDatasetEDA(data_path=test_path)
loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,shuffle=False, 
                                     collate_fn=collate_test_eda, num_workers=NUM_WORKERS)

In [8]:
# # Look at one data sample      
# for i_batch, (data, label) in enumerate(loader):
#     print(type(data))
#     print(type(label))
#     break

In [9]:
def data_to_csv(loader):  
    """
    Extracts hard-coded, specified features from Pickle files into CSV file.
    
    BEFORE calling this method:
    1. You'll need to change the CSV_PATH, CSV_HEADER, and scene[4] and scene[5] keys
    for each feature you investigate.
    
    2. Make sure the collate_fn you use has selected the feature you're investigating.
    """
    with open(CSV_PATH, "w") as csv_file:
        # Clear the csv file before appending data to it
        csv_file.truncate()
        # Write the header to the csv file
        csv_file.writelines(CSV_HEADER + ['\n']) 
        
    iterator = tqdm.notebook.tqdm(loader, total=int(len(loader)))
    
    for batch_idx, (data, label) in enumerate(iterator):
        # Convert data into DataFrame
        for scene in data:
            df = pd.DataFrame(
                {
                    SCENE_IDX: scene[0],
                    CITY: scene[1],
                    TRACK_ID: scene[2],
                    WAS_TARGET: scene[3],                            
                }
            )
            # @ CHANGE these lines accordingly
#             df[P_IN_X] = scene[4]
#             df[P_IN_Y] = scene[5]
            df[V_IN_X] = scene[4]
            df[V_IN_Y] = scene[5]
            # Write data to CSV file
            df.to_csv(CSV_PATH, index=False, header=False, mode='a')     

In [11]:
data_to_csv(loader)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=800.0), HTML(value='')))




# Lane and lane norm extraction

In [9]:
def lane_to_csv(src_path, out_path, key):
    """
    src_path: directory containing the Picke files to get data from
    out_path: file to write the results to
    key: One of LANE or LANE_NORM

    """
    # Get all lane components
    # Open directory containing pickle files
    with open(out_path, "w") as output_file:
        with os.scandir(src_path) as entries:  
            iterator = tqdm.notebook.tqdm(entries, total=3200)        
            for entry in iterator:  
                # Load the  pickle file
                with open(entry, "rb") as file:
                    scene = pickle.load(file)

                    scene_idx = scene[SCENE_IDX]
                    n_lanes = scene[key].shape[0]
                    xlane = scene[key][:, 0]
                    ylane = scene[key][:, 1]
                    data = np.concatenate( (xlane, ylane) )

                    # Form comma-separated string of all lane values
                    # Format is scene index, number of lanes, x-values, y-values
                    s = ','.join( [str(scene_idx), str(n_lanes)] + [str(num) for num in data] )                  
                    # Write data to file                
                    output_file.write(s + "\n")

In [12]:
lane_to_csv(TEST_PATH, LANE_NORM_FILE_TEST, LANE)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=3200.0), HTML(value='')))


