# Positional Validation


In [1]:
# ruff: noqa: E402

%load_ext autoreload
%autoreload 2

# find the root of the project
import os
from pathlib import Path
import sys
import polars as pl

ROOT = Path(os.getcwd()).parent
while not ROOT.joinpath(".git").exists():
    ROOT = ROOT.parent

# add the root to the python path
sys.path.append(str(ROOT))

## Read the Data


In [2]:
veh_df = (
    pl.read_csv(
        ROOT / "data" / "vehicle_drives" / "2023-10-31.csv",
    )
    .drop("")
    .with_columns(
        pl.col("gps_time").str.strptime(
            dtype=pl.Datetime(
                time_unit="us",
            ),
        ),
    )
    .sort(
        "gps_time",
    )
    .with_row_count(name="seq")
)

### Map to Lanes


In [3]:
import geopandas as gpd
from src.geometry import RoadNetwork


network = RoadNetwork(
    lane_gdf=gpd.read_file(ROOT / "data/mainline_lanes.geojson"),
)

lane_df = network.df

## Snap the Vehicle Trajectories to Lanes


In [4]:
import utm

x, y, _, _ = utm.from_latlon(
    latitude=veh_df["lat"].to_numpy(),
    longitude=veh_df["lon"].to_numpy(),
)

veh_df = veh_df.with_columns(
    x=x,
    y=y,
)

veh_df.head()

seq,gps_time,lat,lon,altitude,x,y
u32,"datetime[μs, UTC]",f64,f64,f64,f64,f64
0,2023-10-31 16:43:11.400 UTC,33.235349,-87.610211,24.8966,443147.978315,3677500.0
1,2023-10-31 16:43:11.500 UTC,33.235349,-87.610211,24.8969,443147.978692,3677500.0
2,2023-10-31 16:43:11.600 UTC,33.235349,-87.610211,24.8968,443147.979248,3677500.0
3,2023-10-31 16:43:11.700 UTC,33.235349,-87.610211,24.8972,443147.978501,3677500.0
4,2023-10-31 16:43:11.800 UTC,33.235349,-87.610211,24.8965,443147.97897,3677500.0


### Build the KDTree


In [5]:
veh_df = network.map_to_lane(
    veh_df,
    dist_upper_bound=3,
    utm_x_col="x",
    utm_y_col="y",
)

In [6]:
veh_df = (
    veh_df.with_columns(
        pl.col("name").fill_null(""),
    )
    .with_columns(
        (
            (pl.col("name").shift(1) != pl.col("name"))
            & (pl.col("name").shift(1) != "")
        ).alias("sequence"),
    )
    .with_columns(
        (pl.col("sequence").cum_sum() * (pl.col("name") != "")).alias("sequence_id"),
    )
    .filter(pl.col("sequence_id") != 0)
)

In [7]:
veh_df["sequence_id"].value_counts()

sequence_id,counts
u32,u32
10,379
21,21
6,157
22,999
7,94
11,549
15,7
14,7
20,1242
5,431


In [8]:
ebl1_df = veh_df.filter(pl.col("sequence_id") == 12).with_columns(
    (
        pl.col("s").diff() / (pl.col("gps_time").diff().dt.total_milliseconds() / 1e3)
    ).alias("speed")
)

In [9]:
ebl1_df.head()

seq,gps_time,lat,lon,altitude,x,y,lane_index,d,x_lane,y_lane,s,angle,name,sequence,sequence_id,speed
u32,"datetime[μs, UTC]",f64,f64,f64,f64,f64,u32,f64,f64,f64,f64,f64,str,bool,u32,f64
8562,2023-10-31 16:58:46.600 UTC,33.234887,-87.614469,24.0903,442751.008159,3677500.0,42288,-2.595858,442750.349067,3677500.0,842.090361,-2.879562,"""WBL2""",False,12,
8563,2023-10-31 16:58:46.700 UTC,33.234893,-87.614466,24.0889,442751.285803,3677500.0,42284,-2.118468,442750.735409,3677500.0,841.690365,-2.879562,"""WBL2""",False,12,-3.999954
8564,2023-10-31 16:58:46.800 UTC,33.234898,-87.614463,24.0934,442751.525031,3677500.0,42280,-1.620647,442751.121751,3677500.0,841.29037,-2.879562,"""WBL2""",False,12,-3.999954
8565,2023-10-31 16:58:46.900 UTC,33.234903,-87.614461,24.1052,442751.729192,3677500.0,42277,-1.104216,442751.411507,3677500.0,840.990373,-2.879562,"""WBL2""",False,12,-2.999966
8566,2023-10-31 16:58:47 UTC,33.234908,-87.614459,24.1184,442751.928222,3677500.0,42273,-0.587141,442751.797849,3677500.0,840.590378,-2.879562,"""WBL2""",False,12,-3.999954


### Read in the Trajectories


In [10]:
from datetime import timedelta
import polars as pl
from src.radar import CalibratedRadar
from src.pipelines.open_file import prep_df


# make a consolidated network with only EBL1 and WBL1

mainline_net = RoadNetwork(
    lane_gdf=gpd.read_file(ROOT / "data/mainline_lanes.geojson"),
    keep_lanes=["EBL1", "WBL1"],
)

radar_obj = CalibratedRadar(
    radar_location_path=ROOT / "configuration" / "october_calibrated.yaml",
)


radar_df = (
    pl.scan_parquet(
        ROOT.joinpath("data", "raw", "*.parquet"),
    )
    .with_columns(
        pl.col("epoch_time").dt.replace_time_zone("UTC"),
    )
    .with_context(ebl1_df.lazy())
    .filter(
        pl.col("epoch_time").is_between(
            pl.col("gps_time").min() - timedelta(seconds=30),
            pl.col("gps_time").max() + timedelta(seconds=30),
        )
    )
    .collect()
    .lazy()
    .pipe(prep_df, f=radar_obj)
    # .filter(pl.col("ip").str.contains("146")
    # .collect()
    .pipe(
        mainline_net.map_to_lane,
        dist_upper_bound=6,
        utm_x_col="utm_x",
        utm_y_col="utm_y",
    )
    .filter(pl.col("name").is_not_null())
    .rename({"name": "lane"})
)

function: create_object_id took: 0.004339933395385742 seconds
function: filter_short_trajectories took: 0.0022399425506591797 seconds
function: clip_trajectory_end took: 0.000392913818359375 seconds
function: resample took: 0.0014619827270507812 seconds
function: fix_duplicate_positions took: 0.0001201629638671875 seconds
function: set_timezone took: 0.00017309188842773438 seconds
function: add_cst_timezone took: 2.3126602172851562e-05 seconds
function: add_heading took: 3.409385681152344e-05 seconds
function: rotate_radars took: 0.00021696090698242188 seconds
function: update_origin took: 0.00011801719665527344 seconds


In [11]:
from src.pipelines.kalman_filter import (
    prepare_frenet_measurement,
    build_extension,
    add_timedelta,
    build_kalman_id,
    filter_short_trajectories,
)

radar_df = (
    radar_df.rename(
        {
            "angle": "heading_lane",
        }
    )
    .pipe(filter_short_trajectories, minimum_distance_m=10, minimum_duration_s=2)
    .pipe(prepare_frenet_measurement)
    .pipe(build_extension)
    .pipe(add_timedelta)
    .pipe(build_kalman_id)
    .collect()
)

function: filter_short_trajectories took: 0.0030760765075683594 seconds
function: prepare_frenet_measurement took: 0.00039696693420410156 seconds
function: build_extension took: 0.012917041778564453 seconds
function: add_timedelta took: 5.698204040527344e-05 seconds
function: build_kalman_id took: 0.00011682510375976562 seconds


In [12]:
radar_df[["s", "s_velocity", "d", "d_velocity"]].describe()

describe,s,s_velocity,d,d_velocity
str,f64,f64,f64,f64
"""count""",37382.0,37382.0,37382.0,37382.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",844.161843,18.724573,0.298787,-0.161905
"""std""",335.666871,4.626214,2.954782,0.586732
"""min""",43.499691,0.122669,-5.995774,-4.79173
"""25%""",580.895872,16.92116,-1.387667,-0.425346
"""50%""",820.594169,19.436233,0.086923,-0.09821
"""75%""",1126.391996,21.732312,3.014929,0.162009
"""max""",1646.101826,33.915581,5.996178,3.170978


In [13]:
from src.pipelines.kalman_filter import build_kalman_df

filter_df = radar_df.pipe(build_kalman_df).collect()

function: build_kalman_df took: 0.0002853870391845703 seconds


In [14]:
# radar_df.filter(
#     pl.col('kalman_id') == veh_df['kalman_id'][0]
# )

### Loop through all the Trajectories and find the Trouble Ones


In [15]:
# from src.filters.vectorized_kalman import IMMFilter, CALCFilter, CVLKFilter
# import numpy as np


# for vehicle_id in filter_df["vehicle_ind"].unique():
#     veh_df = filter_df.filter(pl.col("vehicle_ind") == vehicle_id).with_columns(
#         pl.lit(0).alias("vehicle_ind")
#     )

#     IMMFilter(
#         df=veh_df.rename({"measurement": "z"}),
#         filters=("CALC", "CALK", "CVLK"),
#         M=np.array([[0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]),
#         mu=np.array([0.05, 0.3, 0.65]),
#         gpu=False,
#     ).apply_filter()

In [16]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from src.filters.vectorized_kalman import IMMFilter, CALCFilter, CVLKFilter
import numpy as np


# from src.filters.kalman2 import kf_filter_lanechange

# veh = joined_df['object_id'].sample(1).to_numpy()[0]
veh_id = filter_df.filter(pl.col("vehicle_ind") == 238)["kalman_id"][0]
# veh_df = joined_df.filter(pl.col("vehicle_ind") == veh).sort("epoch_time")
veh_df = filter_df.filter(pl.col("kalman_id") == veh_id).sort("epoch_time")


filt = IMMFilter(
    df=filter_df.rename({"measurement": "z"})
    .filter(pl.col("kalman_id") == veh_id)
    .with_columns(pl.lit(0).alias("vehicle_ind")),
    filters=(
        "CALC",
        "CALK",
    ),
    M=np.array([[0.8, 0.2], [0.2, 0.8]]),
    mu=np.array([0.05, 0.3]),
    # M=np.array([[0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]),
    # mu=np.array([0.05, 0.3, 0.65]),
    # chunk_size=3_500,
    gpu=False,
)

res = filt.apply_filter()


fig = make_subplots(
    rows=3,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.02,
    subplot_titles=(
        f"Vehicle {veh_id} S",
        f"Vehicle {veh_id} D",
    ),
    # add a secondary y axis to the velocity plots
    specs=[
        [{"secondary_y": True}],
        [{"secondary_y": True}],
        [{"secondary_y": False}],
    ],
)


colors = {
    "": "blue",
    "_filt": "red",
}


for df, ext in [
    (veh_df, ""),
]:
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df["measurement"].arr.to_list().list.get(0),
            mode="markers+lines",
            name=f"S{ext}",
            marker_color=colors[ext],
            line_dash="dash",
        ),
        row=1,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=res[0][:, 0, 0],
            mode="markers+lines",
            name=f"S{ext}_filt",
            marker_color=colors[ext],
        ),
        row=1,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=res[0][:, 0, 1],
            mode="markers+lines",
            name=f"S Velocity{ext}_filt",
            marker_color=colors[ext],
        ),
        row=1,
        col=1,
        secondary_y=True,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df["measurement"].arr.to_list().list.get(1),
            mode="markers+lines",
            name=f"S Velocity{ext}",
            marker_color=colors[ext],
        ),
        row=1,
        col=1,
        secondary_y=True,
    )

    # add the D dimension
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df["measurement"].arr.to_list().list.get(2),
            mode="markers+lines",
            name=f"D{ext}",
            marker_color=colors[ext],
        ),
        row=2,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=res[0][:, 0, 3],
            mode="markers+lines",
            name=f"D{ext}_filt",
            marker_color=colors[ext],
        ),
        row=2,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df["measurement"].arr.to_list().list.get(3),
            mode="markers+lines",
            name=f"D Velocity{ext}",
            marker_color=colors[ext],
        ),
        row=2,
        col=1,
        secondary_y=True,
    )


fig.add_trace(
    go.Scatter(
        x=veh_df["epoch_time"],
        y=veh_df["prediction"],
        mode="markers+lines",
        name="PREDICTION",
        marker_color="green",
    ),
    row=3,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=veh_df["epoch_time"],
        y=veh_df["missing_data"],
        mode="markers+lines",
        name="S Velocity",
        marker_color="green",
    ),
    row=3,
    col=1,
)


# for p in ["mu_CALC", "mu_CALK", "mu_CVLK"]:
#     # plot the probabilities
#     fig.add_trace(
#         go.Scatter(
#             x=veh_df["epoch_time"],
#             y=veh_df[p],
#             mode="markers+lines",
#             name=p,
#             # marker_color="green",
#         ),
#         row=3,
#         col=1,
#     )


# bound the y axis
# fig.update_yaxes(range=[-10, 100], row=1, col=1)
# fig.update_yaxes(range=[-10, 10], row=2, col=1)

fig.update_layout(
    height=800,
    width=1200,
)

  self._z[self._inds[:, 0], self._inds[:, 1]] = torch.Tensor(
  0%|          | 0/168 [00:00<?, ?it/s]

100%|██████████| 168/168 [00:00<00:00, 601.61it/s]


In [17]:
from src.filters.vectorized_kalman import batch_imm_df
import numpy as np

filt_df = batch_imm_df(
    filter_df.rename({"measurement": "z"}),
    filters=("CALC", "CALK", "CVLK"),
    M=np.array([[0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]),
    mu=np.array([0.05, 0.3, 0.65]),
    # chunk_size=3_500,
    chunk_size=2000,
    gpu=False,
)

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 318/318 [00:01<00:00, 222.62it/s]
100%|██████████| 1/1 [00:01<00:00,  1.56s/it]
