## This script preprocesses the INTERACTION dataset.

This reuses Code/Sampling_exp3.ipynb from DriverSpaceInference https://github.com/Yiru-Jiao/DriverSpaceInference

In [None]:
import os
import sys
import glob
from tqdm import tqdm
import pandas as pd
import numpy as np
from itertools import combinations
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rc('font',family='Arial')
from shapely.geometry import Point, Polygon
sys.path.append('INT_MapVis/python')
import main_visualize_data as vs
import warnings
warnings.filterwarnings('ignore')

location = 'DR_USA_Intersection_GL'
path_raw = f'../RawData/INTERACTION/{location}/'
path_processed = f'../ProcessedData/INTERACTION/'
os.makedirs(path_processed, exist_ok=True)

### Road segmentation

In [3]:
data_files = sorted(glob.glob(f'{path_raw}vehicle*.csv'))
data = pd.read_csv(data_files[0])
x_min,x_max,y_min,y_max = [data.x.min(), data.x.max(), data.y.min(), data.y.max()]

In [4]:
def createfig(t):
    fig, ax = plt.subplots(figsize=(16,6))

    lines = vs.draw_map(ax, 'DR_USA_Intersection_GL', xlim=[x_min-5, x_max+5], ylim=[y_min-5, y_max+5])
    # ax.set_title('t = '+str(t/10)+' s')
    ax.set(aspect='equal')
    ax.set_xlabel('Global '+r'$x$'+' coordinate (m)')
    ax.set_ylabel('Global '+r'$y$'+' coordinate (m)')

    return fig, ax, lines

def plotpolygon(xlist, ylist):
    for i in range(len(xlist)):
        plt.plot(xlist[i], ylist[i])
        plt.text(xlist[i][0], ylist[i][0], str(i))
        plt.gca().set_aspect('equal')

_, _, lines = createfig(0)
x_lists, y_lists, _ = lines
plt.close()

In [None]:
x = []
y = []

x.append(x_lists[15][4:][::-1])
y.append(y_lists[15][4:][::-1])

x.append(x_lists[21])
y.append(y_lists[21])

x.append(x_lists[16])
y.append(y_lists[16])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

outmidright = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
outmidright

In [None]:
x = []
y = []

x.append(x_lists[12][:-5])
y.append(y_lists[12][:-5])

x.append(x_lists[7][::-1])
y.append(y_lists[7][::-1])

x.append(x_lists[6][:-3][::-1])
y.append(y_lists[6][:-3][::-1])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

inmidright = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
inmidright

In [None]:
x = []
y = []

x.append(x_lists[8])
y.append(y_lists[8])

x.append(x_lists[15][:4])
y.append(y_lists[15][:4])

x.append([(x_lists[2][-1]*0.6+x_lists[2][0]*0.4),x_lists[2][-1]][::-1])
y.append([(y_lists[2][-1]*0.6+y_lists[2][0]*0.4),y_lists[2][-1]][::-1])

x.append(x_lists[11][::-1])
y.append(y_lists[11][::-1])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

inmidleftdown = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
inmidleftdown

In [None]:
x = []
y = []

x.append(x_lists[13][::-1])
y.append(y_lists[13][::-1])

x.append(x_lists[26][2:][::-1])
y.append(y_lists[26][2:][::-1])

x.append([(x_lists[2][-1]*0.6+x_lists[2][0]*0.4),x_lists[2][0]][::-1])
y.append([(y_lists[2][-1]*0.6+y_lists[2][0]*0.4),y_lists[2][0]][::-1])

x.append(x_lists[11][::-1])
y.append(y_lists[11][::-1])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

outmidleftdown = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
outmidleftdown

In [None]:
x = []
y = []

x.append(x_lists[25])
y.append(y_lists[25])

x.append([(x_lists[31][-1]+x_lists[31][0])/2,x_lists[31][0]])
y.append([(y_lists[31][-1]+y_lists[31][0])/2,y_lists[31][0]])

x.append(x_lists[24])
y.append(y_lists[24])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

outmidleftup = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
outmidleftup

In [None]:
x = []
y = []

x.append(x_lists[28])
y.append(y_lists[28])

x.append([(x_lists[31][-1]+x_lists[31][0])/2,x_lists[31][-1]][::-1])
y.append([(y_lists[31][-1]+y_lists[31][0])/2,y_lists[31][-1]][::-1])

x.append(x_lists[25][::-1])
y.append(y_lists[25][::-1])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

inmidleftup = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
inmidleftup

In [None]:
x = []
y = []

x.append(x_lists[10])
y.append(y_lists[10])

x.append(x_lists[0][:])
y.append(y_lists[0][:])

x.append([(x_lists[4][0]+x_lists[4][-1])/2,x_lists[4][0]])
y.append([(y_lists[4][0]+y_lists[4][-1])/2,y_lists[4][0]])

x.append(x_lists[9][1:])
y.append(y_lists[9][1:])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

outleft = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
outleft

In [None]:
x = []
y = []

x.append(x_lists[10])
y.append(y_lists[10])

x.append(x_lists[0][:])
y.append(y_lists[0][:])

x.append([(x_lists[4][0]+x_lists[4][-1])/2,x_lists[4][-1]])
y.append([(y_lists[4][0]+y_lists[4][-1])/2,y_lists[4][-1]])

x.append(x_lists[30][::-1])
y.append(y_lists[30][::-1])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

inleft = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
inleft

In [None]:
x = []
y = []

x.append(x_lists[5][1:][::-1])
y.append(y_lists[5][1:][::-1])

x.append(x_lists[21][::-1])
y.append(y_lists[21][::-1])

x.append(x_lists[15][3:5][::-1])
y.append(y_lists[15][3:5][::-1])

x.append(x_lists[2][:][::-1])
y.append(y_lists[2][:][::-1])

x.append(x_lists[4][:][::-1])
y.append(y_lists[4][:][::-1])

x.append(x_lists[27][2:][::-1])
y.append(y_lists[27][2:][::-1])

x.append(x_lists[31][:][::-1])
y.append(y_lists[31][:][::-1])

x.append(x_lists[7][:][::-1])
y.append(y_lists[7][:][::-1])

x.append(x_lists[6][8:])
y.append(y_lists[6][8:])

xlist, ylist = (x,y)
# plotpolygon(xlist, ylist)

inside = Polygon(np.array([[e for x in xlist for e in x], [e for x in ylist for e in x]]).T)
inside

### Load data

In [None]:
data = []
for data_file in tqdm(data_files):
    df = pd.read_csv(data_file)
    file_id = int(data_file[-7:-4])
    df['file_id'] = file_id
    df['frame_id'] = (file_id * 1e4 + df['frame_id']).astype(int)
    df['track_id'] = (file_id * 1e3 + df['track_id']).astype(int)
    df['unique_id'] = (df['track_id'] * 1e4 + df['frame_id'] % 1e4).astype(int)
    data.append(df)
data = pd.concat(data, ignore_index=True)
print('Rows with NaN:', data.isna().any(axis=1).sum())

In [None]:
positions = np.zeros(len(data)).astype(str)
poses = np.zeros(len(data)).astype(int)
for idx in tqdm(range(len(data))):
    point = Point(data.loc[idx,['x','y']].values)
    for poly, position, pos in zip([outmidright, inmidright, inmidleftdown, outmidleftdown, outmidleftup, inmidleftup, outleft, inleft, inside],
                                ['out_midright', 'in_midright', 'in_midleftdown', 'out_midleftdown', 'out_midleftup', 'in_midleftup', 'out_left', 'in_left', 'inside'],
                                [-1, 1, 1, -1, -1, 1, -1, 1, 0]):
        if point.within(poly):
            positions[idx] = position
            poses[idx] = pos
            continue
data['position'] = positions
data['pos'] = poses
print('Number of vehicles not in any polygon:', len(data[data['pos'].isna()]))
data.loc[data['pos'].isna(),'pos'] = 0
data.loc[abs(data['pos'])<0.5,'position'] = 'inside'

# visualise to check
fig, ax, _ = createfig(0)
ax.scatter(data['x'], data['y'], s=1, c=data['pos'], cmap='viridis')

In [None]:
print('Rows with NaN:', data.isna().any(axis=1).sum())

# correct heading directions
data['hx'] = np.cos(data['psi_rad'])
data['hy'] = np.sin(data['psi_rad'])
data.loc[np.sign(data['vx'])*np.sign(data['hx'])==-1, 'hx'] = -data[np.sign(data['vx'])*np.sign(data['hx'])==-1]['hx']
data.loc[np.sign(data['vy'])*np.sign(data['hy'])==-1, 'hy'] = -data[np.sign(data['vy'])*np.sign(data['hy'])==-1]['hy']

In [None]:
# derive acceleration
data['time'] = data['timestamp_ms']/1000
data = data.sort_values(['track_id','time']).set_index('track_id')
for track_id in tqdm(data.index.unique()):
    df = data.loc[track_id]
    longitudinal_speed = np.sqrt(df['vx']**2 + df['vy']**2).values
    acceleration = np.gradient(longitudinal_speed, df['time'].values)
    data.loc[track_id, 'v'] = longitudinal_speed
    data.loc[track_id, 'acc'] = acceleration

data = data.reset_index()
data.to_hdf(f'{path_processed}GL.h5', key='data', mode='w')

In [None]:
print('There are {} vehicles'.format(len(data['track_id'].drop_duplicates())))
print('{:.2f} hours of data is recorded'.format(len(data['frame_id'].drop_duplicates())/10/60/60))
print('The distance range is {:.2f} m'.format(data['x'].max() - data['x'].min()))

### Pairing

In [47]:
data = pd.read_hdf(f'{path_processed}GL.h5', key='data')

In [None]:
print('Creating pairs...')
pair_idx = data.groupby('frame_id').apply(lambda x : pd.DataFrame.from_records(combinations(x.unique_id, 2)))

print('Organising pairs...')
pairs = pd.DataFrame({'frame_id':pair_idx.index.get_level_values(0).astype(int), 
                      'i':(pair_idx[0].values//1e4).astype(int), 
                      'j':(pair_idx[1].values//1e4).astype(int)})
pairs['file_id'] = (pairs['frame_id']//1e4).astype(int)
pairs['time'] = pairs['frame_id']%1e4/10
features = ['x','y','acc','v','vx','vy','hx','hy','length','width','pos']
pairs[[f'{feature}_ego' for feature in features]] = data.set_index('unique_id').reindex(index=pair_idx[0].values)[features].values
pairs[[f'{feature}_sur' for feature in features]] = data.set_index('unique_id').reindex(index=pair_idx[1].values)[features].values
data = [] # release memory

In [None]:
pairs = pairs[(pairs['pos_ego']*pairs['pos_sur'])>-0.5] # vehicles in the entering lane cannot interact with vehicles in the exit lane
pair_size = pairs.groupby(['i','j']).size()
pair_size = pair_size[(pair_size>=25)&(pair_size<=85)] # 2.5-8.5 seconds, otherwise either too short or too long

pairs = pairs[pairs.set_index(['i','j']).index.isin(pair_size.index)].reset_index(drop=True)

print('Saving pairs...')
pairs.to_hdf(f'{path_processed}paired_GL.h5', key='pairs', mode='w')