In [None]:
import pickle
import polars as pl
import polars.datatypes as pld

In [None]:
with open('./I-24MOTION_2022-11-17_08-10-00.json.pkl', 'rb') as f:
    df = pickle.load(f)

df

In [None]:
c = pl.col

In [None]:
import json
from collections import OrderedDict
import os
import numpy as np

# Trajectories within 300m behind an actively controlled AV 

In [None]:
df_once_within = df.filter(pl.col('distance_to_upstream_av_meters').list.min() < 300)
# Return all trajectories that are 'once' within 300m behind an actively controlled AV
df_once_within

In [None]:
(df_once_within
    .with_columns(
        # The column "within_indics" is a list of indices of the timestamps where the vehicle is within 300m behind an actively controlled AV
        within_indcs=pl.col('distance_to_upstream_av_meters').list.eval(pl.arg_where(pl.element() < 300)),
    )
    .with_columns(
        # The column "within_distance_to_upstream_av_meters" is a list of the timestamp where the vehicle is within 300m behind an actively controlled AV
        within_timestamps=pl.col('timestamp').list.gather(pl.col('within_indcs'))
    )
)

# Trajectories within 300m ahead of an actively controlled AV

In [None]:
df_once_within = df.filter(pl.col('distance_to_downstream_av_meters').list.min() > -300)
# Return all trajectories that are 'once' within 300m ahead of an actively controlled AV
df_once_within

In [None]:
(df_once_within
    .with_columns(
        # The column "within_indics" is a list of indices of the timestamps where the vehicle is within 300m ahead of an actively controlled AV
        within_indcs=pl.col('distance_to_downstream_av_meters').list.eval(pl.arg_where(pl.element() > -300)),
    )
    .with_columns(
        # The column "within_distance_to_upstream_av_meters" is a list of the timestamp where the vehicle is within 300m ahead of an actively controlled AV
        within_timestamps=pl.col('timestamp').list.gather(pl.col('within_indcs'))
    )
)

# Trajectories more than 2000m away from an AV 

In [None]:
df_once_away = df.filter(
    (pl.col('distance_to_downstream_av_meters').list.max() > 2000) |
    (pl.col('distance_to_upstream_av_meters').list.max() < -2000)
)
# Return all trajectories that are 'once' more than 2000m away from an actively controlled AV
df_once_away

In [None]:
(df_once_away
     .with_columns(
        # The column "daway_indics" is a list of indices of the timestamps where the vehicle is more than 2000m ahead of an actively controlled AV
         daway_indcs=pl.col('distance_to_downstream_av_meters').list.eval(pl.arg_where(pl.element() > 2000)),
        # The column "uaway_indics" is a list of indices of the timestamps where the vehicle is more than 2000m behind an actively controlled AV
         uaway_indcs=pl.col('distance_to_upstream_av_meters').list.eval(pl.arg_where(pl.element() < -2000)),
     )
     .with_columns(
        # The column "daway_timestamps" is a list of the timestamp where the vehicle is more than 2000m ahead of an actively controlled AV
         daway_timestamps=pl.col('timestamp').list.gather(pl.col('daway_indcs')),
        # The column "uaway_timestamps" is a list of the timestamp where the vehicle is more than 2000m behind an actively controlled AV
         uaway_timestamps=pl.col('timestamp').list.gather(pl.col('uaway_indcs'))
     )
)

# Trajectories exactly behind/ahead of an active AV

In [None]:
# POS = 'downstream'
POS = 'upstream'


df_timestamp_bin = (df
    .select(
        'trajectory_id',
        'timestamp',
        f'{POS}_av_id',
        f'distance_to_{POS}_av_meters',
    )
    .rename({
        'trajectory_id': 'tid',
        f'{POS}_av_id': 'avid',
        f'distance_to_{POS}_av_meters': 'dist'
    })
    # Remove all the trajectories without any av id.
    .filter(pl.col('avid').list.drop_nulls().list.len() != 0)
    .with_columns(
        # Since the timestamps for each trajectories do not align, we snap the timestamp of all the trajectories to the nearest 1/25th seconds (Each trajectory point is recorded every 40 millisecond).
        timestamp_bin=pl.col('timestamp').list.eval((pl.element() * 100. / 4. + .5).cast(pld.Int64) * 4),

        # A list of indices of a trajectory points. Can be used to reference each point of the trajectory.
        trajectory_point_idx=pl.int_ranges(pl.col('timestamp').list.len()),
    )
)
df_timestamp_bin

In [None]:
df_flatten = (df_timestamp_bin
    # Flatten all trajectories points
    .explode(
        'timestamp',
        'dist',
        'timestamp_bin',
        'trajectory_point_idx',
        'avid'
    )
    # Remove trajectory points that is not matched with any AV.
    .filter(~pl.col('avid').is_null())
)
df_flatten

In [None]:
df_grouped = (df_flatten
    # For every AV id and every timestamp (snapped to the nearest millissecond -> every millisecond)
    .group_by('avid', 'timestamp_bin')
    # Roll up ungrouped columns into lists
    .all()
    .with_columns(
        # Find the index of the nearest vehicle
        idx_nearest=pl.col('dist').list.eval(pl.element().abs()).list.arg_min()
    )
)
df_grouped

In [None]:
df_nearest = (df_grouped
    .with_columns(
        # Choose the vehicle from the index of the nearest vehicle
        timestamp=pl.col('timestamp')
            .list.get(pl.col('idx_nearest')),
        trajectory_id=pl.col('tid')
            .list.get(pl.col('idx_nearest')),
        dist=pl.col('dist')
            .list.get(pl.col('idx_nearest')),
        trajectory_point_idx=pl.col('trajectory_point_idx')
            .list.get(pl.col('idx_nearest')),
    )
    .drop('idx_nearest', 'tid', 'timestamp_bin')
    .sort('timestamp')
)
df_nearest

In [None]:
(df_nearest
    .group_by('avid')
    # Roll up ungrouped columns into lists
    .all()
    .sort('avid')
)