# IMM Filtering all Trajectories with a Vectorized Approach

The input file for this comes from [../radar/lane_classification.ipynb](../radar/lane_classification.ipynb)


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%load_ext autoreload
%autoreload 2

# find the root of the project
import os
from pathlib import Path

ROOT = Path(os.getcwd()).parent
while not ROOT.joinpath(".git").exists():
    ROOT = ROOT.parent

# add the root to the python path
import sys

sys.path.append(str(ROOT))

In [None]:
import dotenv

# load the environment variables
dotenv.load_dotenv(ROOT.joinpath(".env"))

## Load the _Already Processed_ Radar Data


In [None]:
from src.radar import Filtering


f = Filtering(
    network_boundary_path=ROOT / "geo_data" / "network_outline.geojson",
    radar_location_path=ROOT / "geo_data" / "calibrated_origins.json",
)

In [None]:
import polars as pl

radar_df = pl.scan_parquet(
    ROOT
    / "notebooks"
    / "clean_workflow"
    / "data"
    / "all_working_processed_1Lane.parquet",
)

# radar_df.shape

In [None]:
from datetime import timedelta
import numpy as np

radar_df = (
    radar_df
    # .fetch(10_000_000)
    .lazy()
    .filter(
        (pl.col("s").is_not_null()) & (pl.col("lane").is_not_null())
        # & (pl.col("epoch_time") < (pl.col("epoch_time").min() + timedelta(minutes=120)))
    )
    .sort("epoch_time", descending=False)
    .set_sorted(["epoch_time"])
    ## ---------------------------------------------------
    ## Add Lane Group
    ## ---------------------------------------------------
    # .pipe(f.add_lane_group)
    # ---------------------------------------------------
    ## Remove all of the Radar's Internal Predictions (only the end)
    ## ---------------------------------------------------
    # .filter(pl.col("ui16_predictionCount") < 1)
    .with_columns(
        [
            # Trim off the ends of trajectoriers where the radar has done predictions.
            # This can be done by reversing the cummulative count of predictions and comparing to the reverse cummulative count
            # They are equal until the last measure data point
            (
                pl.col("ui16_predictionCount").count()
                - pl.col("ui16_predictionCount").cumcount()
            )
            .over("object_id")
            .alias("cumcount"),
            (pl.col("ui16_predictionCount") > 0)
            .reverse()
            .cumsum()
            .reverse()
            .over("object_id")
            .alias("reverse_cumcount"),
        ]
    )
    .with_columns(
        [
            (pl.col("cumcount") != pl.col("reverse_cumcount")).alias("keep"),
        ]
    )
    .filter(pl.col("keep"))
    .drop(["keep", "cumcount", "reverse_cumcount"])
    # .with_columns(
    #     [
    #         (pl.col("epoch_time").diff() / 1000)
    #         .backward_fill()
    #         .over(["object_id", "lane"])
    #         .alias("time_diff"),
    #     ]
    # )
    ## ---------------------------------------------------
    ## Calculating the S and D components of the velocity
    ## ---------------------------------------------------
    .with_columns(
        [
            (pl.col("direction") - pl.col("s_angle")).sin().alias("sin"),
            (pl.col("direction") - pl.col("s_angle")).cos().alias("cos"),
        ]
    )
    .pipe(f.atan2, x_col="cos", y_col="sin", out_col="s_angle_diff", normalize=False)
    .with_columns(
        pl.when(pl.col("s_angle_diff").abs() > np.deg2rad(30))
        .then(None)
        .otherwise(pl.col("s_angle_diff"))
        .alias("s_angle_diff")
    )
    .with_columns(
        [
            pl.col("s_angle_diff")
            .interpolate()
            .over(["object_id", "lane"])
            .alias("s_angle_diff")
        ]
    )
    .filter(pl.col("s_angle_diff").is_not_null())
    .with_columns(
        [
            (pl.col("f32_velocityInDir_mps") * pl.col("s_angle_diff").cos()).alias(
                "s_velocity"
            )
            * -1,
            (
                pl.col("f32_velocityInDir_mps")
                * pl.col("s_angle_diff").sin()
                * pl.when(pl.col("lane").str.contains("W")).then(-1).otherwise(1)
            ).alias("d_velocity"),
        ]
    )
    ## ---------------------------------------------------
    ## Map the distance to the front and back of the
    ##          vehicle to the S dimension
    ## ---------------------------------------------------
    .with_columns(
        [
            (pl.col("f32_distanceToFront_m") * pl.col("s_angle_diff").cos()).alias(
                "distanceToFront_s"
            ),
            (pl.col("f32_distanceToBack_m") * pl.col("s_angle_diff").cos()).alias(
                "distanceToBack_s"
            ),
            # do the vehicle length
            (pl.col("f32_length_m") * pl.col("s_angle_diff").cos()).alias("length_s"),
        ]
    )
    ## ---------------------------------------------------
    ## Making a Z (measurement) vector for the Kalman Filter (+ time)
    ## ---------------------------------------------------
    .with_columns(
        [
            pl.concat_list(
                [
                    pl.col("s"),
                    pl.col("s_velocity"),
                    pl.col("min_d"),
                    pl.col("d_velocity"),
                ]
            ).alias("z")
        ]
    )
    .collect()
)

In [None]:
radar_df["epoch_time"].min(), radar_df["epoch_time"].max()

### Filter Short Trajectories


In [None]:
radar_df = (
    radar_df.with_columns(
        ((pl.col("epoch_time").max() - pl.col("epoch_time").min()) / 1000)
        .over("object_id")
        .alias("lane_time_diff"),
        (pl.col("s").max() - pl.col("s").min())
        .abs()
        .over("object_id")
        .alias("lane_distance"),
    )
    # gotta be on the lane for 5 seconds & travel for 10 meters
    .filter((pl.col("lane_time_diff") > 5) & (pl.col("lane_distance") > 5))
)

### Label Lane Trajectories As Needing Extension or Not

Need an extension if the vehicle ends near a lane center. Otherwise we assume that it has left the netwerk


In [None]:
# aka do the ends need a match or not
radar_df = (
    radar_df.lazy()
    .with_columns(
        [
            pl.col("epoch_time")
            .max()
            .over(
                [
                    "object_id",
                ]
            )
            .alias("max_time_vehicle"),
            pl.col("epoch_time")
            .min()
            .over(
                [
                    "object_id",
                ]
            )
            .alias("min_time_vehicle"),
        ]
    )
    .with_columns(
        [
            (pl.col("epoch_time") == pl.col("max_time_vehicle"))
            .over(["object_id"])
            .alias("is_end"),
            (pl.col("epoch_time") == pl.col("min_time_vehicle"))
            .over(["object_id"])
            .alias("is_start"),
            pl.col("min_d").last().over(["object_id"]).alias("last_d"),
        ]
    )
    .drop(["max_time_vehicle", "min_time_vehicle"])
    .collect()
)

In [None]:
radar_df = (
    radar_df.lazy()
    .sort(
        by=["epoch_time"],
    )
    .with_columns(
        (pl.col("is_end").any() & pl.col("last_d").is_between(-2, 5))
        .over(["object_id"])
        .alias("extend")
    )
    .collect()
)

## Upsampling and Building Prediction onto Trajectories


In [None]:
extend_df = (
    radar_df.lazy()
    .group_by(
        [
            "object_id",
        ]
    )
    .agg([pl.col("extend").any().alias("extend_me")])
    .filter(pl.col("extend_me"))
    # .with_columns(pl.lit(0.1).alias("timestep"))
    # .collect()
)

In [None]:
# this is miles faster than upsample
radar_df = (
    radar_df.lazy()
    .join(
        extend_df,
        on=["object_id"],
        how="left",
    )
    .with_columns(
        pl.lit(0.1).alias("timestep"),
    )
    .select(
        pl.when(pl.col("extend_me"))
        .then(
            ((pl.col("lane_time_diff") / 0.1) + 4 / 0.1).cast(pl.UInt32),
        )
        .otherwise(
            (pl.col("lane_time_diff") / 0.1).cast(pl.UInt32),
        )
        .alias("time_count"),
        pl.col("epoch_time"),
        pl.col("object_id"),
        pl.col("timestep"),
        pl.col("extend_me"),
    )
    .group_by("object_id")
    .agg(
        (
            pl.col("timestep").first().repeat_by(pl.col("time_count").first()).cumsum()
            - pl.col("timestep").first()
        ).alias("timestep"),
        pl.col("epoch_time").first(),
        pl.col("epoch_time").last().alias("last_epoch_time"),
        pl.col("extend_me").any().alias("extend_me"),
    )
    .explode("timestep")
    .with_columns(
        (pl.col("epoch_time") + pl.col("timestep") * 1e3)
        .cast(pl.Datetime(time_unit="ms", time_zone="UTC"))
        .alias("epoch_time")
    )
    .with_columns(
        pl.struct(["object_id", "epoch_time"]).hash().alias("join_key"),
        (
            (pl.col("epoch_time") > pl.col("last_epoch_time")) & pl.col("extend_me")
        ).alias("prediction"),
    )
    .drop(
        [
            "time_count",
            "object_id",
            "timestep",
            "extend_me",
        ]
    )
    .join(
        radar_df.lazy()
        .with_columns(pl.struct(["object_id", "epoch_time"]).hash().alias("join_key"))
        .drop(["epoch_time"])
        .with_columns(pl.lit(False).alias("missing_data")),
        on="join_key",
        how="left",
    )
    .with_columns(
        pl.col("prediction").fill_null(False).alias("prediction"),
        pl.col("missing_data").fill_null(True).alias("missing_data"),
        pl.col("z").fill_null(pl.lit(None)).alias("z"),
    )
    .with_columns(pl.col(set(radar_df.columns) ^ {"prediction", "z"}).forward_fill())
    .collect()
)

In [None]:
# r_df.filter(
#     pl.col("missing_data")
# )

In [None]:
# # radar_df = radar_df.with_columns(pl.lit(False).alias("prediction"))
# tmp_df = (
#     radar_df.lazy()
#     .group_by(
#         [
#             "object_id",
#             "lane",
#         ]
#     )
#     .agg([pl.col("extend").any().alias("extend_me"), pl.all().last()])
#     .filter(pl.col("extend_me"))
#     .with_columns(pl.lit(0.1).alias("timestep"))
#     .group_by("object_id")
#     .agg(
#         pl.col("timestep").repeat_by(int(5 / 0.1) + 1).cumsum()
#         - pl.col("timestep").first(),
#         pl.col("epoch_time").first(),
#     )
#     .explode("timestep")
#     .with_columns(
#         (pl.col("epoch_time") + (pl.col("timestep") * 1000).cast(int))
#         .cast(pl.Datetime("ms", time_zone="UTC"))
#         .alias("epoch_time"),
#         pl.lit(True).alias("prediction"),
#     )
#     .select(["object_id", "epoch_time", "prediction"])
#     .join(radar_df.lazy(), on=["object_id", "epoch_time"], how="left")
#     # if we sort we don't have to window the next function
#     .sort("object_id")
#     .with_columns(
#         [
#             pl.all().forward_fill(),
#         ]
#     )
#     .with_columns(
#         pl.lit(None, dtype=pl.List(pl.Float64())).alias("z"),
#     )
#     .collect()
# )


# radar_df = (
#     tmp_df.extend(
#         radar_df.with_columns(pl.lit(False).alias("prediction")).select(tmp_df.columns)
#     )
#     .unique(["object_id", "epoch_time"])
#     .sort("epoch_time")
#     .set_sorted(["epoch_time"])
# )

In [None]:
radar_df = radar_df.sort(
    by=["epoch_time"],
).with_columns(
    [
        (pl.col("epoch_time").diff() / 1000)
        .backward_fill(1)
        .over(
            "object_id",
        )
        .fill_null(0)
        .alias("time_diff")
    ]
)

In [None]:
# TODO:

# 1. split trajectories that have dt > 4? seconds into different trajectories (done by changing the object id)
radar_df = (
    radar_df.with_columns(
        (pl.col("time_diff") > 4).alias("split"),
    )
    .with_columns(
        pl.col("split").cumsum().over(["object_id", "lane"]).alias("trajectory_id"),
    )
    .with_columns(
        pl.struct(
            (pl.col("object_id"), pl.col("trajectory_id")),
        )
        .hash()
        .alias("kalman_id"),
    )
)

In [None]:
test_df = (
    radar_df.lazy()
    .select(
        [
            "kalman_id",
            "epoch_time",
            "z",
            "time_diff",
            "prediction",
            "missing_data",
            "lane",
        ]
    )
    .with_columns(
        [
            pl.col("epoch_time").cumcount().over("kalman_id").alias("time_ind"),
        ]
    )
)

# filter out trajectories that just have one point
test_df = test_df.filter(
    pl.col("kalman_id").is_in(
        test_df.group_by("kalman_id")
        .agg(pl.col("time_ind").max())
        .filter(pl.col("time_ind") > 10)
        .select("kalman_id")
        .collect()["kalman_id"]
    )
)


test_df = (
    test_df.join(
        test_df.group_by("kalman_id").agg(
            pl.col("time_ind").max().alias("time_ind_max")
        )
        # .sort("time_ind")  # turn this on to get an ordered list of trajectories from small to large.
        .with_row_count("vehicle_ind"),
        on="kalman_id",
        how="inner",
    )
    .sort(["vehicle_ind", "time_ind"])
    .with_columns(pl.col("vehicle_ind").cast(pl.Int32).alias("vehicle_ind"))
    .collect()
)

### Filter Out Short Trajectories Again


In [None]:
import torch

# list gpu devices
torch.cuda.device_count()

In [None]:
print("Dimensions", test_df["vehicle_ind"].max(), "x", test_df["time_ind"].max())

In [None]:
from src.filters.vectorized_kalman import batch_imm_df
import gc

filt_df = batch_imm_df(
    test_df,
    filters=("CALC", "CALK", "CVLK"),
    M=np.array([[0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]),
    mu=np.array([0.05, 0.3, 0.65]),
    # chunk_size=3_500,
    chunk_size=4000,
    gpu=True,
)

In [None]:
filt_df.shape

In [None]:
# join the filtered states to the original data
joined_df = (
    test_df.with_columns(
        [
            # # unpack the list of z values
            pl.col("z").list.get(0).alias("s"),
            pl.col("z").list.get(1).alias("s_velocity"),
            pl.col("z").list.get(2).alias("d"),
            pl.col("z").list.get(3).alias("d_velocity"),
        ]
    )
    .join(
        filt_df.with_columns(pl.col("time_ind").cast(pl.UInt32)),
        on=["time_ind", "vehicle_ind"],
        how="inner",
        suffix="_filt",
    )
    # need these later...
    .join(
        radar_df.select(
            ["object_id", "epoch_time", "s_angle_diff", "is_end", "kalman_id"]
        ),
        on=["kalman_id", "epoch_time"],
        how="inner",
    )
    .drop(["kalman_id"])
)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# from src.filters.kalman2 import kf_filter_lanechange

# veh = joined_df['object_id'].sample(1).to_numpy()[0]
veh_id = joined_df.filter(pl.col("lane") == "EBL1")["object_id"].sample(1)[0]
# veh_df = joined_df.filter(pl.col("vehicle_ind") == veh).sort("epoch_time")
veh_df = joined_df.filter(pl.col("object_id") == veh_id).sort("epoch_time")

fig = make_subplots(
    rows=3,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.02,
    subplot_titles=(
        f"Vehicle {veh_id} S",
        f"Vehicle {veh_id} D",
    ),
    # add a secondary y axis to the velocity plots
    specs=[
        [{"secondary_y": True}],
        [{"secondary_y": True}],
        [{"secondary_y": False}],
    ],
)


colors = {
    "": "blue",
    "_filt": "red",
}


for df, ext in [(veh_df, ""), (veh_df, "_filt")]:
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df[f"s{ext}"],
            mode="markers+lines",
            name=f"S{ext}",
            marker_color=colors[ext],
        ),
        row=1,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df[f"s_velocity{ext}"] * -1,
            mode="markers+lines",
            name=f"S Velocity{ext}",
            marker_color=colors[ext],
        ),
        row=1,
        col=1,
        secondary_y=True,
    )

    # add the D dimension
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df[f"d{ext}"],
            mode="markers+lines",
            name=f"D{ext}",
            marker_color=colors[ext],
        ),
        row=2,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df[f"d_velocity{ext}"],
            mode="markers+lines",
            name=f"D Velocity{ext}",
            marker_color=colors[ext],
        ),
        row=2,
        col=1,
        secondary_y=True,
    )


for p in ["mu_CALC", "mu_CALK", "mu_CVLK"]:
    # plot the probabilities
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=veh_df[p],
            mode="markers+lines",
            name=p,
            # marker_color="green",
        ),
        row=3,
        col=1,
    )


# bound the y axis
# fig.update_yaxes(range=[-10, 100], row=1, col=1)
# fig.update_yaxes(range=[-10, 10], row=2, col=1)

fig.update_layout(
    height=800,
    width=1200,
)

## Saving the Filtered Data


In [None]:
joined_df.shape

In [None]:
radar_df.shape

In [None]:
joined_df.write_parquet(
    ROOT / "notebooks/clean_workflow/data" / "imm_filtered.parquet",
    # compression="gzip",
    use_pyarrow=True,
    compression="lz4",
    # parti
)