# Lane Classification Step


This relies on the output of [./vectorized_filter.ipynb](./vectorized_filter.ipynb)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%load_ext autoreload
%autoreload 2

# find the root of the project
import os
from pathlib import Path

ROOT = Path(os.getcwd()).parent
while not ROOT.joinpath(".git").exists():
    ROOT = ROOT.parent

# add the root to the python path
import sys

sys.path.append(str(ROOT))

In [None]:
import dotenv
import polars as pl
from pomegranate.distributions import Normal
from pomegranate.gmm import GeneralMixtureModel


# load the environment variables
dotenv.load_dotenv(ROOT.joinpath(".env"))

## Read in the Dataset

In [None]:
radar_df = pl.read_parquet(
    ROOT.joinpath("notebooks/clean_workflow/data/imm_filtered.parquet"),
    use_pyarrow=False,
)

In [None]:
radar_df.head(1)

## Mixture Model Lanes

In [None]:
# radar_df.filter(
#     pl.col('d_filt') <= radar_df["d_filt"].min()
# )

In [None]:
# radar_df['d_filt'].to_pandas().plot.hist(bins=100)

# plot [d] over top of [d_filt] (make the bins the same size)
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 5))

radar_df.filter(pl.col('lane').str.contains('E'))[["d_filt", "d"]].sample(100_000).to_pandas().plot.hist(
    bins=100, ax=ax, alpha=0.5
)

# set the range of the x-axis to be the same
ax.set_xlim(-6, 10)
# radar_df["d"].to_pandas().plot.hist(bins=100, ax=ax, alpha=0.5)

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 5))

radar_df.filter(pl.col('lane').str.contains('W'))[["d_filt", "d"]].sample(100_000).to_pandas().plot.hist(
    bins=100, ax=ax, alpha=0.5
)

ax.set_xlim(-6, 10)

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 5))

radar_df.filter(pl.col('lane').str.contains('W') & (pl.col('mu_CALC') < 0.2))[["d_filt", "d"]].sample(100_000).to_pandas().plot.hist(
    bins=100, ax=ax, alpha=0.5
)

ax.set_xlim(-6, 10)

In [None]:
radar_df.filter(pl.col("mu_CALC") > 0.5)[["d_filt"]].sample(
    100_000
).to_pandas().plot.hist(bins=100, ax=ax, alpha=0.5)

fig

In [None]:
# radar_df[["d_filt",]].to_pandas().plot.hist(bins=100, alpha=0.5)

In [None]:
radar_df = radar_df.filter(~pl.col("d_filt").is_nan())

### Fit a GMM to the data

In [None]:
radar_df.shape

In [None]:
import torch

def process_lane(radar_df, lane, cuda):
    X = (
        radar_df.filter(
            pl.col("d_filt").is_between(-20, 20) 
            & pl.col("lane").str.contains(lane)
            # & (pl.col("mu_CALC") < 0.5)
        )["d_filt"]
        .sample(fraction=0.5)
        .to_numpy()
        .copy()
        .reshape(-1, 1)
    )
    X = torch.from_numpy(X).float().to(cuda)

    d1 = Normal(
        means=[
            -3.75,
        ],
        covs=[
            1,
        ],
        covariance_type="diag",
    ).cuda()

    d2 = Normal(
        means=[
            0.0,
        ],
        covs=[
            1.0,
        ],
        covariance_type="diag",
    ).cuda()

    d3 = Normal(
        means=[
            3.75,
        ],
        covs=[
            1.0,
        ],
        covariance_type="diag",
    ).cuda()

    d5 = Normal(
        means=[
            7.25,
        ],
        covs=[
            1,
        ],
        covariance_type="diag",
    ).cuda()

    mm = GeneralMixtureModel(
        [d1, d2, d3, d5],
        verbose=True,
        tol=1e-6,
        priors=[0.05, 0.85 * 6/10, 0.85 * 4/10, 0.10],
    ).cuda()

    mm.fit(X)

    return mm


models = {
    l: process_lane(radar_df, l, cuda=torch.device("cuda:0")) for l in ["EBL1", "WBL1"]
}

### Plot the Fitted GMM

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scienceplots

def plot_model(model, ax):
    x = np.linspace(-6, 10, 1000).reshape(-1, 1)
    y = model.probability(torch.from_numpy(x).float().to("cuda")).cpu().numpy()
    ax.plot(x, y, color="red")

    y = model.predict_proba(torch.from_numpy(x).float().to("cuda")).cpu().numpy()
    ax.plot(x, y[:, 0])
    ax.plot(x, y[:, 1])
    ax.plot(x, y[:, 2])
    ax.plot(x, y[:, 3])

    for d in model.distributions:
        ax.axvline(d.means[0].cpu(), color="black", linestyle="--")

    ax.set_xlim(-4, 8)
    # make the legend have a white background
    ax.legend(["Observed $d$", "Right Turn", "Mainline Right", "Mainline Left", "Left Turn"],  bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
    ax.set_xlabel("$d$ [m]")
    ax.set_ylabel("Probability Density")

plt.style.use(["science", "ieee"])

for lane, model in models.items():
    
    fig, ax = plt.subplots()
    radar_df.filter(pl.col("d_filt").is_between(-4, 8) & pl.col("lane").str.contains(lane))[
        [
            "d_filt",
        ]
    ].to_pandas().plot.hist(bins=100, alpha=0.5, ax=ax, density=True)
    
    plot_model(model, ax)
    print(lane)
    plt.show()


# for ax, (lane, model) in zip(axs, models.items()):
#     radar_df.filter(pl.col("d_filt").is_between(-6, 10) & pl.col("lane").str.contains(lane))[
#         [
#             "d_filt",
#         ]
#     ].to_pandas().plot.hist(bins=100, alpha=0.5, ax=ax, density=True)
#     plot_model(model, ax)

### Split the Vehicle Into Lanes 1 & 2

In [None]:
# this is distribution 1 (right lane) and distribution 2 (left lane)
dfs = []
for lane, df in radar_df.partition_by('lane', as_dict=True).items():

    all_data = torch.from_numpy(df["d_filt"].to_numpy().copy().reshape(-1, 1)).to(
        device="cuda"
    )

    probs = models[lane].predict_proba(all_data).cpu().numpy()
    
    df = df.with_columns(
        [
            pl.Series("prob_lane_1", probs[:, 1]),
            pl.Series("prob_lane_2", probs[:, 2]),
            pl.Series("lane_index", probs.argmax(axis=1)),
        ]
    )

    dfs.append(df)

radar_df = pl.concat(dfs)


## Plot a Vehicle with Lane Change

In [None]:
change_vehicles = radar_df.filter(
    ((pl.col('lane_index') == 1).any() & (pl.col('lane_index') == 2).any()).over('object_id')
)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

veh_id = change_vehicles.filter(pl.col("lane") == "EBL1")["object_id"].sample(1)[0]
veh_df = change_vehicles.filter(pl.col("object_id") == veh_id).sort("epoch_time")

fig = make_subplots(
    rows=3,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.02,
    subplot_titles=(
        f"Vehicle {veh_id} S",
        f"Vehicle {veh_id} D",
    ),
    # add a secondary y axis to the velocity plots
    specs=[
        [{"secondary_y": True}],
        [{"secondary_y": True}],
        [{"secondary_y": False}],
    ],
)


colors = {
    "": "blue",
    "_filt": "red",
}


for df, ext in [(veh_df, ""), (veh_df, "_filt")]:
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df[f"s{ext}"],
            mode="markers+lines",
            name=f"S{ext}",
            marker_color=colors[ext],
        ),
        row=1,
        col=1,
    )

    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df[f"s_velocity{ext}"] * -1,
            mode="markers+lines",
            name=f"S Velocity{ext}",
            marker_color=colors[ext],
        ),
        row=1,
        col=1,
        secondary_y=True,
    )

    # add the D dimension
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=df[f"d{ext}"],
            mode="markers+lines",
            name=f"D{ext}",
            marker_color=colors[ext],
        ),
        row=2,
        col=1,
    )

    # fig.add_trace(
    #     go.Scatter(
    #         x=veh_df["epoch_time"],
    #         y=df[f"d_velocity{ext}"],
    #         mode="markers+lines",
    #         name=f"D Velocity{ext}",
    #         marker_color=colors[ext],
    #     ),
    #     row=2,
    #     col=1,
    #     secondary_y=True,
    # )


for p in ["mu_CALC", "prob_lane_1", "prob_lane_2"]:
    # plot the probabilities
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=veh_df[p],
            mode="markers+lines",
            name=p,
            # marker_color="green",
        ),
        row=3,
        col=1,
    )




for p in ["prob_lane_1", "prob_lane_2"]:
    # plot the probabilities
    fig.add_trace(
        go.Scatter(
            x=veh_df["epoch_time"],
            y=veh_df[p] * (1- veh_df["mu_CALC"]),
            mode="markers+lines",
            name=p + " * mu_CALC",
        ),
        row=3,
        col=1,
    )


fig.update_layout(
    height=800,
    width=1200,
)

## Filter the DataFrame for Vehicles that are in Lane 1 | 2

In [None]:
radar_df = radar_df.filter(pl.col("lane_index").is_in([1, 2]))

## Save the DataFrame

In [None]:
radar_df["epoch_time"].min(), radar_df["epoch_time"].max()

In [None]:
radar_df.write_parquet(
    ROOT.joinpath("notebooks/clean_workflow/data/imm_filtered_lanes.parquet"),
    use_pyarrow=True,
)