# 导入相关库

In [36]:
import os
import pathlib
import pandas as pd
import numpy as np
import pickle as pkl

from time import time

# 数据获取和处理

In [38]:
# 数据是一个一个的csv文件，每个csv文件保存了一个5s的sequence，包含了多个车辆的轨迹信息
DATASET_PATH = "/ssd/datasets/argoverse/argoverse-forecasting-dataset/"
TRAIN = DATASET_PATH + "train/data"
VAL = DATASET_PATH + "val/data"
TEST = DATASET_PATH + "test_obs/data/"

def data_process(root_dir):
    t = time()
    
    root_dir = pathlib.Path(root_dir)
    paths = [(root_dir / filename).absolute() for filename in os.listdir(root_dir)]
    features = []
    for path in paths:
        sequence = pd.read_csv(path)
        agent_x = sequence[sequence["OBJECT_TYPE"] == "AGENT"]["X"]
        agent_y = sequence[sequence["OBJECT_TYPE"] == "AGENT"]["Y"]
        xy = np.column_stack((agent_x, agent_y))
        # 如果是train或者val，则xy shape: (50, 2) 记录了5秒（每秒10帧）的agent xy坐标
        # 否则xy shape: (20, 2)
        vel = xy[1:] - xy[:-1]
        init_unknown_vel = np.array([np.nan, np.nan])
        vel = np.vstack((init_unknown_vel, vel))
        # vel shape: (50, 2)，差分得到速度，初始速度无法获取，设为NaN
        feature = np.column_stack((xy, vel))
        # feature shape: (50, 4), 各列分别是x, y, vel_x, vel_y
        features.append(feature)
    print("use {}s to process".format(time() - t))
    print("total num of sequences: {}".format(len(features)))
    return features

def save_features_to_pkl(features, fliepath):
    with open(filepath, 'wb') as f:
        pkl.dump(features, f)
        
def load_pkl_features(filepath):
    with open(filepath, 'rb') as f:
        features = pkl.load(f)

In [42]:
d = {"train": TRAIN,
     "val": VAL,
     "test": TEST} 
for name, path in d.items():
    print(name)
    features = data_process(path)
    save_features_to_pkl(features, name + '.pkl')

train /ssd/datasets/argoverse/argoverse-forecasting-dataset/train/data
val /ssd/datasets/argoverse/argoverse-forecasting-dataset/val/data
test /ssd/datasets/argoverse/argoverse-forecasting-dataset/test_obs/data/
