# Trajectory Association in Historical Data


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# find the root of the project
import os
from pathlib import Path

ROOT = Path(os.getcwd()).parent
while not ROOT.joinpath(".git").exists():
    ROOT = ROOT.parent

# add the root to the python path
import sys

sys.path.append(str(ROOT))

In [None]:
import dotenv
from src.frenet import SplineLane
from src.radar import BasicRadar
from src.polars_utils import build_if_when

# load the environment variables
dotenv.load_dotenv(ROOT.joinpath(".env"))

## Start & End Time


In [None]:
from datetime import datetime
from pytz import timezone


start_time = datetime(2023, 3, 13, 6, 0, 0, tzinfo=timezone("US/Central"))
end_time = datetime(2023, 3, 13, 12, 0, 0, tzinfo=timezone("US/Central"))

# convert to utc
start_time_utc = start_time.astimezone(timezone("UTC"))
end_time_utc = end_time.astimezone(timezone("UTC"))

## File Paths


In [None]:
RADAR_DIR = Path("/DOECV2X/Radar") / "all_working"
print(RADAR_DIR)

## Read in Radar Data


In [None]:
import polars as pl

In [None]:
# map to a file for speed
tmp_file = ROOT / "tmp" / f"{RADAR_DIR.stem}.parquet"
tmp_file.parent.mkdir(exist_ok=True, parents=True)

radar_df = pl.scan_parquet(tmp_file)

## Read in the Data


In [None]:
from src.radar import Filtering

f = Filtering(
    network_boundary_path=ROOT / "geo_data" / "network_outline.geojson",
    radar_location_path=ROOT / "geo_data" / "calibrated_origins.json",
)

In [None]:
if isinstance(radar_df, pl.DataFrame):
    radar_df = radar_df.lazy()

radar_df = (
    radar_df
    # .filter(
    #     (
    #         (pl.col("epoch_time").cast(pl.Float64()) / 1000)
    #         > start_time_utc.timestamp()
    #     )
    #     & (
    #         (pl.col("epoch_time").cast(pl.Float64()) / 1000)
    #         < end_time_utc.timestamp()
    #     )
    # )
    # .collect(streaming=True)
    # .lazy()
    # create the object_id column
    .pipe(f.create_object_id)
    # .pipe(f.correct_center)
    # sort by object_id and epoch_time
    .sort(by=["object_id", "epoch_time"])
    .set_sorted(["object_id", "epoch_time"])
    # filter out vehicles that don't trave some minimum distance (takes care of radar noise)
    # .pipe(f.filter_short_trajectories, minimum_distance_m=10, minimum_duration_s=2)
    # resample to 10 Hz
    .pipe(f.resample, 100)
    # smooth the values during stop events. This is allowed because there is no
    # .pipe(f.fix_stop_param_walk)
    # # fix when the radar is outputs the same data for multiple frames
    .pipe(f.fix_duplicate_positions)
    # clip the end of trajectories where the velocity is constant
    # .pipe(f.clip_trajectory_end)
    .pipe(f.set_timezone, timezone_="UTC")
    # .pipe(f.add_cst_timezone)
    # filter just the first 12 hours of data
    # .pipe(f.crop_radius, 400)
    .pipe(f.rotate_radars)
    .pipe(f.update_origin)
    .pipe(f.rotate_heading)
    # .collect(streaming=True)
    # .pipe(f.radar_to_latlon)
)

In [None]:
from datetime import timedelta

radar_df.filter(
    pl.col('epoch_time').is_between(
        pl.col('epoch_time').min() + timedelta(minutes=45),
        pl.col('epoch_time').min() + timedelta(minutes=50)
    )
).pipe(
    f.radar_to_latlon
).select(['ip', 'object_id', 'lat', 'lon', 'epoch_time']).write_csv(
    'test.csv'
)

## Get the Lane Centerlines


### Build the Frenet Centerlines


In [None]:
from src.frenet import SplineLane
from src.geometry import load_centerlines
import numpy as np

spline_lanes = [
    SplineLane(
        name=lane,
        centerline=np.c_[l_df.geometry.x, l_df.geometry.y],
        width=3.7,
        crs=l_df.crs,
    )
    .fit(
        s=2,
        k=2,
    )
    .interpolate(ds=0.1)
    for lane, l_df in load_centerlines(
        ROOT / "geo_data" / "centerlines.geojson"
    ).groupby("lane")
    if lane in ['EBL1', 'WBL1']
]

### Map the Radar Points to the Centerlines


In [None]:
from src.pipelines.snap_lanes import pipe_lanes, pipe_lanebounce_fix

radar_df = radar_df.pipe(
    pipe_lanes, 
    radar_obj=BasicRadar, 
    spline_lanes=spline_lanes, 
    # 10 meter matching threshold (widest lane is 3.7 meters)
    distance_threshold=10
)

In [None]:
radar_df.filter(
    pl.col('lane') == 'EBL1'
)['min_d'].sample(100_000).to_pandas().hist(bins=100)

In [None]:
radar_df.filter(
    pl.col('lane') == 'WBL1'
)['min_d'].sample(100_000).to_pandas().hist(bins=100)

In [None]:
radar_df['epoch_time'].min(), radar_df['epoch_time'].max()

In [None]:
processed_file = ROOT / "notebooks" / "clean_workflow" / "data" / f"{RADAR_DIR.stem}_processed.parquet"


radar_df.filter(pl.col('lane').is_not_null()).filter(
    pl.col('min_d').is_between(-6, 10)
).write_parquet(
    processed_file.parent / f"{processed_file.stem}_1Lane.parquet"
)