# Import Basics

In [13]:
import numpy as np
import pandas as pd 
import random
from matplotlib import pyplot
import math
from statsmodels.tsa.ar_model import AutoReg

import warnings
warnings.filterwarnings('ignore')

# Introduction

our task is to predict the future position of the ego vehicle 3 seconds into the future $y_i \in \mathbb{R}^{60}$  given $x_i$, one second of vehicle position data for the ego vehicle and (up to) the ten nearest agents to the ego at the point in time where prediction starts

# Data Cleaning

in this part we are going to perform the following operations on the each dataset $X_i$:
<ol>
  <li>delete unwanted columns (time step, id, role, present)</li>
  <li>convert non-numerical values to numerical</li>
  <li>fill the empty columns so that each dataset has 10 elements (car, pedestrain/bicycle)</li>
</ol>
then we will perform the following operations on the each dataset $y_i$:
<ol>
  <li>not all the datasets contain 30 $(x, y)$ tuple, we are going to fill this broken datasets</li>
  <li>delete unwanted columns (time step)</li>
  <li>fill the empty columns so that each dataset has 10 elements (car, pedestrain/bicycle)</li>
</ol>

In [14]:
def clean_data_X(data):
    data = data.drop(columns = ['time step'])
    
    data = data.replace({" car": 1, " pedestrian/bicycle": 2})
    data = data.replace({" agent": 1, " others": 2})
    
    #delete empty columns
    data = data.replace({0: math.nan})
    data = data.dropna(how='all', axis=1)
    data = data.replace({math.nan: 0})

    #we want our data to have 10 elements (car/pedestrain/bicycle) per dataset
    m = int(data.columns[-1][-1])+1
    n = 10
    t = n-m
    for i in range(m):
        data = data.drop(columns = [' id' +str(i), ' present'+str(i)])

    if m != n:        
        s = np.random.choice(m, t)
        for i in range(m,n):      
            k = i-m
            data[' role'+str(i)] = data[' role'+str(s[k])]
            data[' type'+str(i)] = data[' type'+str(s[k])]
            data[' x'+str(i)] = data[' x'+str(s[k])]
            data[' y'+str(i)] = data[' y'+str(s[k])]
    
    
    return data

# clean_data_y needs to be implemented
def clean_data_y(data):
    size = 30    
    if size != data.shape[0]:
        diff=size-data.shape[0]    
        x = data[' x'].values
        y = data[' y'].values
        t = data['time step'].values

        model_x = AutoReg(x, 5)
        model_y = AutoReg(y, 5)
        model_t = AutoReg(t, 5)
        predictions_x = model_x.fit().predict(start=len(x), end=len(x)+diff-1, dynamic=False)
        predictions_y = model_y.fit().predict(start=len(y), end=len(y)+diff-1, dynamic=False)
        predictions_t = model_t.fit().predict(start=len(t), end=len(t)+diff-1, dynamic=False)
        d = np.concatenate((predictions_t.reshape(-1,1),predictions_x.reshape(-1,1),predictions_y.reshape(-1,1)),axis=1)
        d = pd.DataFrame(d, columns=['time step',' x', ' y'])
        data = data.append(d)
    
    return data.drop(columns = ['time step'])

# Adding New Features

we are going to add some new features in this part namely speed direction, acceleration, turning.

In [3]:
def continuous_angle(x):
    
    last = 0
    out = []

    for angle in x:
        while angle < last-np.pi: angle += 2*np.pi
        while angle > last+np.pi: angle -= 2*np.pi
        last = angle
        out.append(angle)

    return np.array(out)

#%%

def speed_direction(data):

    for i in range(10):
        
        speed = np.zeros(11)
        sin_dir = np.zeros(11)
        cos_dir = np.zeros(11)
        
        x = data[' x%d' % i]
        y = data[' y%d' % i]
        
        speed[0] = np.sqrt((x[1]-x[0])**2+(y[1]-y[0])**2)
        direction = np.arctan2(y[1]-y[0],x[1]-x[0])
        sin_dir[0] = np.sin(direction)
        cos_dir[0] = np.cos(direction)
        
        speed[10] = np.sqrt((x[10]-x[9])**2+(y[10]-y[9])**2)
        direction = np.arctan2(y[10]-y[9],x[10]-x[9])
        sin_dir[10] = np.sin(direction)
        cos_dir[10] = np.cos(direction)
        
        for t in range(1,10):
            
            speed[t] = np.sqrt((x[t+1]-x[t-1])**2+(y[t+1]-y[t-1])**2)/2
            direction = np.arctan2(y[t+1]-y[t-1],x[t+1]-x[t-1])
            sin_dir[t] = np.sin(direction)
            cos_dir[t] = np.cos(direction)
            
            
        data[' speed%d' % i] = speed
        data[' sin(dir)%d' % i] = sin_dir
        data[' cos(dir)%d' % i] = cos_dir
        
    return data



def acceleration(data):
    
    for i in range(10):
        
        a = np.zeros(11)
        
        speed = data[' speed%d' % i]
        
        a[0] = speed[1]-speed[0]
        a[10] = speed[10]-speed[9]
        
        for t in range(1,10):
            a[t] = (speed[t+1]-speed[t-1])/2
            
            
        data[' acceleration%d' % i] = a
        
    return data




def turning(data):
    
    for i in range(10):
        
        turn = np.zeros(11)
        
        sin_dir = data[' sin(dir)%d' % i]
        cos_dir = data[' cos(dir)%d' % i]
        direction = np.arctan2(sin_dir, cos_dir)
        direction = continuous_angle(direction)
        
        turn[0] = direction[1]-direction[0]
        turn[10] = direction[10]-direction[9]
        
        for t in range(1,10):
            turn[t] = (direction[t+1]-direction[t-1])/2
            
            
        data[' turning%d' % i] = turn
        
    return data

# add_features needs to be implemented
def add_features(data):    
    data = speed_direction(data)
    data = acceleration(data)
    data = turning(data)
    
    return data

# Reading and Processing 

in this part and next part we are going to iterate over all csv files and perform the following operation on each file:
<ol>
  <li>read the dataset</li>
  <li>clean the dataset</li>
  <li>add new features</li>
  <li>reshape the dataset to a (1,x) shaped numpy array</li>
  <li>merge all produced numpy arrays into a single matrix</li>
</ol>

In [15]:
def read_data(path):
    data = pd.read_csv(path)
    return data

def get_path_X_train(i):
    string = '../Data/train/X/X_'+str(i)+'.csv'
    return string

def get_path_y_train(i):
    string = '../Data/train/y/y_'+str(i)+'.csv'
    return string

def get_path_X_val(i):
    string = '../Data/val/X/X_'+str(i)+'.csv'
    return string

def get_path_y_val(i):
    string = '../Data/val/y/y_'+str(i)+'.csv'
    return string

def get_path_X_test(i):
    string = '../Data/test/X/X_'+str(i)+'.csv'
    return string

def convert(data):
    return data.values.ravel()

def get_data_X_train(i):    
    return convert(add_features(clean_data_X(read_data(get_path_X_train(i)))))

def get_data_y_train(i):
    return convert(clean_data_y(read_data(get_path_y_train(i))))

def get_data_X_val(i):
    return convert(add_features(clean_data_X(read_data(get_path_X_val(i)))))

def get_data_y_val(i):
    return convert(clean_data_y(read_data(get_path_y_val(i))))

def get_data_X_test(i):
    return convert(add_features(clean_data_X(read_data(get_path_X_test(i)))))


# Final Dataset

In [16]:
def get_data_train():
    n = 2308
    X_train = get_data_X_train(0).reshape(1,-1)
    y_train = get_data_y_train(0).reshape(1,-1)
    for i in range(1,n):
        X_train = np.concatenate((X_train,get_data_X_train(i).reshape(1,-1)))
        y_train = np.concatenate((y_train,get_data_y_train(i).reshape(1,-1)))
    return X_train, y_train

def get_data_val():
    n = 524
    X_val = get_data_X_val(0).reshape(1,-1)
    y_val = get_data_y_val(0).reshape(1,-1)
    for i in range(1,n):
        X_val = np.concatenate((X_val,get_data_X_val(i).reshape(1,-1)))
        y_val = np.concatenate((y_val,get_data_y_val(i).reshape(1,-1)))
    return X_val, y_val

def get_data_test():
    n = 20
    X_test = get_data_X_test(0).reshape(1,-1)
    for i in range(1,n):
        X_test = np.concatenate((X_test, get_data_X_test(i).reshape(1,-1)))
    return X_test

In [17]:
X_train, y_train = get_data_train()
X_val, y_val = get_data_val()
X_test = get_data_test()

In [19]:
X_train.shape

(2308, 990)

# Saving The Processed Dataset

In [20]:
pd.DataFrame(X_train).to_csv("../processed_data/X_train.csv")
pd.DataFrame(y_train).to_csv("../processed_data/y_train.csv")
pd.DataFrame(X_val).to_csv("../processed_data/X_val.csv")
pd.DataFrame(y_val).to_csv("../processed_data/y_val.csv")
pd.DataFrame(X_test).to_csv("../processed_data/X_test.csv")