# Notebook Overview
This Notebook takes raw CARLA data in `../data/00_data_raw` and, after a series of intermediary steps, produce `../data/03_preprocessed/01_data.csv`. This csv file can be easily used by the next Notebook, `01_Process-Data.ipynb`, to prepare data for Trajectron++.

In [None]:
import pandas as pd
import os
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Constants

In [None]:
RAW_PATH = '../data/00_data_raw'
RAW_CSV_PATH = '../data/01_data_raw_csv'
RAW_CSV_TEMPLATE = '../data/01_data_raw_csv/%04d.csv'
CSV_PATH = '../data/02_data_csv'
CSV_TEMPLATE = '../data/02_data_csv/%04d.csv'
CSV_MERGED = '../data/03_preprocessed/00_merged.csv'
OUTFILE = '../data/03_preprocessed/01_data.csv'

# Helper Functions

In [None]:
def get_files(path):
    for P,D,F in os.walk(path):
        F = [f for f in F if os.path.splitext(f)[1] in ['.txt', '.csv']]
        return sorted(os.path.join(P,f) for f in F)

def get_node_id(filename):
    return int(os.path.splitext(os.path.basename(filename))[0])

# CSV Functions
Individual functions each representing a step in the data pre-processing chain. Takes raw output data from CARLA and pre-processes it for easy digestion into the Trajectron++ system.

In [None]:
# Takes CARLA output file in 00_data_raw and turns it into CSVs
# in 01_data_raw_csv
def create_raw_csv(filename):
    with open(filename) as READ:
        data = READ.read()
        
    data_csv = data.replace(' | ',',').replace('Position', 'Pos_X,Pos_Y,Pos_Z') \
                                      .replace('Angular Velocity', 'AVel_1,AVel_2,AVel_3') \
                                      .replace('Velocity', 'Vel_X,Vel_Y,Vel_Z') \
                                      .replace('Acceleration', 'Accel_X,Accel_Y,Accel_Z') \
                                      .replace('Stopped at Red Light + Light ID ', 'LightStop,LightID')
    
    node_id = int(filename.split('myrecording')[1].split('.')[0])
    outfile = RAW_CSV_TEMPLATE % node_id

    with open(outfile, 'w') as WRITE:
        WRITE.write(data_csv)

In [None]:
# Takes CSV file in 01_data_raw_csv, drops unused data and converts
# data into correct coordinate system for Trajectron++. Writes converted
# CSV file to 02_data_csv
def create_csv(filename):
    df = pd.read_csv(filename)

    # drop unneeded columns
    df = df.drop('Timestamp',1).drop('AVel_1',1).drop('AVel_2',1) \
           .drop('AVel_3',1).drop('LightStop',1).drop('LightID',1)

    # add NodeID column with number from filename
    node_id = get_node_id(filename)
    df['NodeID'] = node_id
    
    # Convert Simulation Frame into Sample Frame
    df['Frame'] = df['Simulation Frame'].apply(lambda x: x//10)
    df = df.drop('Simulation Frame',1)
    
    # Convert Heading to Radian with 0 radian along x+ axis, pi/2 along y+ axis
    df['Heading'] = df['Heading'].apply(lambda x: -1 * (x-90) * np.pi / 180)

    # Ordering for Columns
    column_ordering = ['Frame',
                       'NodeID',
                       'Pos_X',
                       'Pos_Y',
                       'Pos_Z',
                       'Vel_X',
                       'Vel_Y',
                       'Vel_Z',
                       'Accel_X',
                       'Accel_Y',
                       'Accel_Z',
                       'Heading']

    df = df[column_ordering]
    
    outfile = CSV_TEMPLATE % node_id
    
    with open(outfile,'w') as WRITE:
        WRITE.write(df.to_csv(index=False))

In [None]:
# Takes CSV files in 02_data_csv and merges them into one
# large CSV called merged.csv
def create_merged_csv():
    dfs = {}
    for f in get_files(CSV_PATH):
        dfs[get_node_id(f)] = pd.read_csv(f)
    
    df_all = pd.DataFrame()
    for df in dfs.values():
        df_all = df_all.append(df)
    
    with open(CSV_MERGED, 'w') as WRITE:
        WRITE.write(df_all.to_csv(index=False))

In [None]:
# Takes merged.csv and enforces that a unique node ID is tracked
# for one concurrent peroid of time. Writes result to the
# final data.csv
# Trajectron++ requires all data for a tracked object to be
# consecutive. Thus, if an object enters and leaves tracking,
# it must be assigned a new Node ID so all observed timesteps
# remain consecutive.
def create_consecutive_csv():
    df = pd.read_csv(CSV_MERGED)
    df.sort_values(['NodeID','Frame'], inplace=True)

    node_id = 0
    # we need keep cycling until all nodes are consecutive
    while node_id <= max(pd.unique(df['NodeID'])):
    #for node_id in pd.unique(df['NodeID']):
        node_df = df[df['NodeID'] == node_id]

        diff = np.diff(node_df['Frame'])
        if np.all(diff == 1):
            node_id += 1
            continue

        splits = np.where(diff != 1)[0]
        split = splits[0]
        #for split in splits:
        
        # get the last value in Frame in current run
        split_frame = node_df.iloc[split]['Frame']

        # Get the indices that need to be updated
        split_indices = df[(df['NodeID'] == node_id) & (df['Frame'] > split_frame)].index

        # determine an available NodeID number
        new_id = max(pd.unique(df['NodeID'])) + 1

        # assign NodeID number
        df.loc[split_indices, 'NodeID'] = new_id
        
        node_id += 1

    with open(OUTFILE, 'w') as WRITE:
        WRITE.write(df.to_csv(index=False))

# Execute
Runs the data processing steps in order.

In [None]:
for f in get_files(RAW_PATH):
    create_raw_csv(f)

In [None]:
for f in get_files(RAW_CSV_PATH):
    create_csv(f)

In [None]:
create_merged_csv()

In [None]:
create_consecutive_csv()