This notebook is used to determine the optimal parameters that minimise the kalman filtering error for highD data.

In [None]:
import glob
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from scipy.optimize import minimize, differential_evolution
from joblib import Parallel, delayed
import get_heading_ekf
import matplotlib.pyplot as plt
from IPython.display import clear_output, display
import time as systime

manualSeed = 131
np.random.seed(manualSeed)

path_raw = '../../Data/RawData/'

In [None]:
# Extract meta information
metadatafiles =  sorted(glob.glob(path_raw + 'highD/RecordingMetadata/*.csv'))
metadata = []
for metadatafile in metadatafiles:
    df = pd.read_csv(metadatafile)
    metadata.append(df)
metadata = pd.concat(metadata)
metadata['lane_num'] = metadata.lowerLaneMarkings.str.len()//5
metadata['numFrames'] = (metadata['frameRate']*metadata['duration']).astype(int)

print(metadata.groupby('locationId').agg({'numCars':'sum','numTrucks':'sum'}))
trackid_base = 10**len(str(int(metadata['numCars'].max())))
frameid_base = 10**len(str(int(metadata['numFrames'].max())))

In [None]:
data_train = []
data_test = []
for locid in tqdm(metadata.locationId.unique()):
    loc = 'highD_' + str(locid).zfill(2)
    data = []
    for fileid in metadata[(metadata.locationId==locid)]['id'].values:
        df = pd.read_csv(path_raw + 'highD/' + str(fileid).zfill(2) + '_tracks.csv')
        df = df.rename(columns={'frame':'frame_id',
                                'id':'track_id',
                                'xVelocity':'vx',
                                'yVelocity':'vy',
                                'width':'length',
                                'height':'width'})
        df = df[['track_id','frame_id','x','y','vx','vy','width','length',
                 'laneId','precedingId','followingId']]
        df['x'] = df['x'] + df['length']/2
        df['y'] = df['y'] + df['width']/2
        df['loc_id'] = locid
        df['file_id'] = fileid
        df['track_id'] = df['loc_id'].astype(str)+'-'+df['file_id'].astype(str)+'-'+df['track_id'].astype(str)
        data.append(df)
    data = pd.concat(data)
    indices = np.random.choice(data['track_id'].unique(),100,replace=False)
    df_train = data[data['track_id'].isin(indices[:50])].copy()
    df_test = data[data['track_id'].isin(indices[50:])].copy()
    data_train.append(df_train)
    data_test.append(df_test)
data_train = pd.concat(data_train)
data_test = pd.concat(data_test)

In [None]:
def optimise_ekf_params(data, track_id):
    res = minimize(get_heading_ekf.ekf, x0=[100., 5., 5., 4.5, 2., 50.],
                   args=(data.set_index('track_id'), track_id, True),
                   bounds=((1., 200.), (0.01, 50.), (0.01, 50.), (3., 15.), (0.01, 2.5), (25., 65.)),
                   method='L-BFGS-B')
    if res.success:
        return res.x, res.fun
    else:
        return np.zeros(6)*np.nan, np.nan

train_parameters = Parallel(n_jobs=4)(delayed(optimise_ekf_params)(data_train, track_id) for track_id in tqdm(data_train.track_id.unique()))
parameters = np.array([x[0] for x in train_parameters])
loss_values = np.array([x[1] for x in train_parameters])
train_results = pd.DataFrame(parameters, columns=['uncertainty_init','uncertainty_pos','uncertainty_speed','noise_acc','noise_rad','noise_speed'])
train_results['loss'] = loss_values

In [None]:
train_results.hist(bins=20, figsize=(9,5.5))
print(np.round(train_results.median(),4))

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(8,2), sharex=True)
for i, params in zip(range(2),[np.array([100.2683, 0.01, 0.01, 11.1333, 2.5, 52.4380]), np.array([100., 5., 5., 4.5, 2., 50.])]):
    test_loss_values = Parallel(n_jobs=4)(delayed(get_heading_ekf.ekf)(params, data_test.set_index('track_id'), track_id, True) for track_id in tqdm(data_test['track_id'].unique()))
    test_loss_values = np.array(test_loss_values)
    axes[i].hist(test_loss_values, bins=20)
    axes[i].set_xlabel('Loss')
    axes[i].set_title('Trained parameters' if i==0 else 'Default parameters')
    axes[i].text(0.75, 0.9, 'Mean loss: {:.4f}'.format(test_loss_values.mean()), horizontalalignment='center', verticalalignment='center', transform=axes[i].transAxes)