# CF regime identification for simulated trajectories

In [16]:
import os
import numpy as np
import pandas as pd
import utils_regime as rgm
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from tqdm import tqdm
from joblib import Parallel, delayed

parent_dir = '../../' # Set your parent directory here.
                              # Without change the current setting is the parent directory of this file.
data_path = parent_dir + 'Data/InputData/'
simuldata_path = parent_dir + 'Data/OutputData/Variability/crossfollow/'
output_path = parent_dir + 'Data/OutputData/CF regime/'

In [53]:
string1 = os.path.abspath(simuldata_path+'idm/crossfollow_Lyft_fHH_lHHhigherVar_0.h5')
string2 = r'C:\SURFdrive\PhD progress\PhDResearch\3_AVCF\experiments\Explaining-headway-reduction-of-HVs-following-AVs\Data\OutputData\Variability\crossfollow\idm'

In [56]:
data = pd.read_hdf(simuldata_path+'idm/crossfollow_Lyft_fHH_lHHhigherVar_0.h5', key='data')

FileNotFoundError: File ../../Data/OutputData/Variability/crossfollow/idm/crossfollow_Lyft_fHH_lHHhigherVar_0.h5 does not exist

In [4]:
pd.read_csv(simuldata_path+'idm/parameters_Lyft_HHlowerVar.csv')

FileNotFoundError: [Errno 2] No such file or directory: '../../Data/OutputData/Variability/crossfollow/idm/parameters_Lyft_HHlowerVar.csv'

## Newell model calibration

In [None]:
def Newell_loss(x, t, x_leader, x_follower):
    tau, d, desired_v = x
    id_tau = int(tau/0.1)
    x_follower_modelled = np.zeros_like(x_follower)
    x_follower_modelled[0:id_tau] = x_follower[0:id_tau]
    for idx in range(0,len(t)-id_tau):
        x_follower_modelled[idx+id_tau] = min(x_follower_modelled[idx]+desired_v*tau, x_leader[idx]-d)
    squared_error = (x_follower_modelled[id_tau:] - x_follower[id_tau:])**2
    
    return squared_error.mean()

In [None]:
def calibrate_newell(cfpair):
    data = pd.read_hdf(simuldata_path+'crossfollow_Lyft_'+cfpair+'.h5', key='data')

    case_ids = data.case_id.unique()
    data = data.set_index('case_id')
    def calibrate_newell(case_id):
        t, x_leader, x_follower = data.loc[case_id][['time','x_leader','x_follower']].values.T
        res = minimize(Newell_loss, x0=[1.5,5,15],
                    args=(t, x_leader, x_follower),
                    bounds=((0.1,5.),(2.5,50.),(0.,30.)),
                    method='Nelder-Mead')
        if res.success:
            return res.x
        else:
            return np.zeros(3)*np.nan

    parameters = Parallel(n_jobs=4)(delayed(calibrate_newell)(case_id) for case_id in tqdm(case_ids))

    parameters = pd.DataFrame(parameters, columns=['tau','d','desired_v'], index=case_ids)
    return parameters.dropna()

# leader variation
for count in tqdm(range(5)):
    for cfpair in ['fHH_lHHhigherVar','fHH_lHHlowerVar']:
        parameters = calibrate_newell(cfpair+'_'+str(count))
        parameters.to_csv(output_path+'Simulated Lyft/newell/newell_'+cfpair+'_'+str(count)+'.csv')

# follower variation
for cfpair in ['fHHhigherVar_lHH','fHHlowerVar_lHH']:
    parameters = calibrate_newell(cfpair)
    parameters.to_csv(output_path+'Simulated Lyft/newell/newell_'+cfpair+'.csv')

## Parameter distribution

In [None]:
def read_tau(cfpair, dataset='Lyft'):
    if dataset == 'Lyft':
        newell = pd.read_csv(output_path+'Lyft/newell/newell_'+cfpair+'.csv', index_col=0)
    elif dataset == 'Waymo':
        newell = pd.read_csv(output_path+'Waymo/newell/newell_'+cfpair+'.csv', index_col=0)
    elif dataset == 'Simulated Lyft':
        newell = pd.read_csv(output_path+'Simulated Lyft/newell/newell_'+cfpair+'.csv', index_col=0)
    tau_hat = newell['tau']
    return tau_hat

In [None]:
fig, axes = plt.subplots(1,2,figsize=(8,2.4),constrained_layout=True,sharey=True)

for ax, dataset in zip(axes, ['Lyft','Waymo']):
    tau_hat_HA = read_tau('HA', dataset)
    tau_hat_HH = read_tau('HH', dataset)
    tau_hat_AH = read_tau('AH', dataset)
    _ = ax.hist(tau_hat_HA, bins=np.arange(0,4.1,0.1), alpha=0.3, density=True, label='HA')
    _ = ax.hist(tau_hat_HH, bins=np.arange(0,4.1,0.1), alpha=0.3, density=True, label='HH')
    _ = ax.hist(tau_hat_AH, bins=np.arange(0,4.1,0.1), alpha=0.3, density=True, label='AH')
    ax.text(1.65, 2.5, 'HA: mean='+str(round(tau_hat_HA.mean(),2)) +
                      ', sigma='+str(round(tau_hat_HA.std(),2)) + '\n' +
                      'HH: mean='+str(round(tau_hat_HH.mean(),2)) +
                      ', sigma='+str(round(tau_hat_HH.std(),2)) + '\n' +
                      'AH: mean='+str(round(tau_hat_AH.mean(),2)) +
                      ', sigma='+str(round(tau_hat_AH.std(),2)))
    ax.set_xlim([-0.1,4.1])
    ax.set_xlabel('Calibrated time gap (s)')
    ax.set_title(dataset)

axes[0].set_ylabel('Density')
axes[0].legend(loc='upper left')

In [None]:
fig, axes = plt.subplots(1,2,figsize=(8,2.4),constrained_layout=True,sharey=True)

for ax, dataset in zip(axes, ['Leader variability','Follower variability']):
    if dataset=='Leader variability':
        count = 4
        tau_hat_HA = read_tau('fHH_lHHlowerVar'+'_'+str(count), dataset='Simulated Lyft')
        tau_hat_HH = read_tau('fHH_lHHhigherVar'+'_'+str(count), dataset='Simulated Lyft')
    elif dataset=='Follower variability':
        tau_hat_HA = read_tau('fHHlowerVar_lHH', dataset='Simulated Lyft')
        tau_hat_HH = read_tau('fHHhigherVar_lHH', dataset='Simulated Lyft')
        
    _ = ax.hist(tau_hat_HA, bins=np.arange(0,4.1,0.1), alpha=0.3, density=True, label='HA')
    _ = ax.hist(tau_hat_HH, bins=np.arange(0,4.1,0.1), alpha=0.3, density=True, label='HH')
    ax.text(1.65, 1.5, 'HA: mean='+str(round(tau_hat_HA.mean(),2)) +
                      ', sigma='+str(round(tau_hat_HA.std(),2)) + '\n' +
                      'HH: mean='+str(round(tau_hat_HH.mean(),2)) +
                      ', sigma='+str(round(tau_hat_HH.std(),2)))
    ax.set_xlim([-0.1,4.1])
    ax.set_xlabel('Calibrated time gap (s)')
    ax.set_title(dataset)

axes[0].set_ylabel('Density')
axes[0].legend(loc='upper left')

## Regime categorisation

In [None]:
def determine_regime(cfpair):
    data = pd.read_hdf(simuldata_path+'crossfollow_Lyft_'+cfpair+'.h5', key='data')
    case_ids = data.case_id.unique()
    data = data.set_index('case_id')

    tau_hat = read_tau(cfpair, dataset='Simulated Lyft')
    miu_limit, sigma_limit = tau_hat.mean() + 2*tau_hat.std(), tau_hat.std() # for NGSIM, miu_limit = 5s, sigma_limit = 1.5s
    print(cfpair + ': miu limit = '+ str(round(miu_limit,2)) + ', sigma limit = ' + str(round(sigma_limit,2)))
    
    def identify_regime(case_id):
        t, x_leader, x_follower, v_leader, v_follower = data.loc[case_id][['time','x_leader','x_follower','v_leader','v_follower']].values.T
        regimes = rgm.time_regime(t, x_leader, x_follower, v_leader, v_follower, miu_limit, sigma_limit)
        regimes = pd.DataFrame({'regime':regimes, 'time':t}, index=data.loc[case_id].index)
        return regimes

    regimes_all = Parallel(n_jobs=4)(delayed(identify_regime)(case_id) for case_id in tqdm(case_ids))
    regimes_all = pd.concat(regimes_all).reset_index()
    regimes_all.to_hdf(output_path+'Simulated Lyft/regimes/regimes_all_'+cfpair+'.h5', key='regimes')

    regimes_list = (regimes_all.groupby(['case_id','regime']).time.count()/10).unstack().fillna(0)
    try:
        print(len(regimes_list['Fd']))
    except:
        regimes_list = regimes_list.copy()
        regimes_list['Fd'] = 0.0
    regimes_list = regimes_list[['Fa', 'C', 'A', 'D', 'F', 'S', 'Fd']]
    regimes_list['regime_comb'] = regimes_list.apply(lambda x: ''.join([str(i) for i in x[x>0].index]), axis=1)
    regimes_list.to_csv(output_path+'Simulated Lyft/regimes/regimes_list_'+cfpair+'.csv')

# leader variation
for count in range(2,5):
    for cfpair in ['fHH_lHHhigherVar','fHH_lHHlowerVar']:
        determine_regime(cfpair+'_'+str(count))

# follower variation
for cfpair in ['fHHhigherVar_lHH','fHHlowerVar_lHH']:
    determine_regime(cfpair)