In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import polars as pl
import pandas as pd
import plotly.graph_objects as go
import utm

from filtering import Filtering


## Read in the Data


In [3]:
df = pl.scan_parquet(
    "/Users/max/Library/CloudStorage/Box-Box/Radar-Data/1677797903256.parquet"
)


In [4]:
df = df.collect()


In [5]:
# add the object id & sort by time
df = df.with_columns(
    [
        (pl.col("ui32_objectID").cast(str) + "_" + pl.col("ip")).alias("object_id"),
    ]
).sort("epoch_time")


### Resample the Radar Data to Every .15 Seconds


In [6]:
GROUPBY_EVERY = "100ms"

df = df.sort("epoch_time").groupby_dynamic(
    index_column="epoch_time",
    every=GROUPBY_EVERY,
    by=["object_id"],
).agg(
    [
        pl.col('f32_positionX_m').mean(),
        pl.col('f32_positionY_m').mean(),
        pl.col("f32_velocityInDir_mps").mean(),
        # take the first value of the rest of the columns
        *(
            pl.col(col).first()
            for col in df.columns
            if col not in ["f32_positionX_m", "f32_positionY_m", "f32_velocityInDir_mps", "object_id", "epoch_time"]
        ),
    ]
)


In [7]:
### Remove Objects that don't move atleast 10 meters or spend < 10 seconds on the radar

### Geolocate the Radar Data


In [8]:
# create the file paths
network_outline_file = "./geo_data/network_outline.geojson"
radar_locations_file = "./geo_data/radar_origins.json"


In [9]:
f = Filtering(
    radar_location_path=radar_locations_file,
    network_boundary_path=network_outline_file,
)


In [10]:
df = (
    df.pipe(f.radar_to_utm)
    .pipe(f.radar_to_latlon)
    .pipe(f.radar_to_h3)
    .pipe(f.filter_network_boundaries)
)


function: radar_to_utm took: 0.2780170440673828 seconds
function: radar_to_latlon took: 0.1320507526397705 seconds
function: radar_to_h3 took: 0.9018926620483398 seconds
function: filter_network_boundaries took: 0.05010628700256348 seconds


### Automatically Find the Regions of Overlapping Data

In [11]:
overlaps = df.groupby(['h3', 'ip']).agg([
    pl.col('object_id').count().alias('count'),
]).pivot(values='count', index='h3', columns='ip', aggregate_function='sum').fill_null(0).to_pandas().set_index('h3')

# divide each row by its sum
overlaps = overlaps.div(overlaps.sum(axis=1), axis=0)


# overlaps.to_csv('overlaps.csv')

### Testing Stuff

#### Creating Overlap Zones

In [12]:
AIRPORT = '10.160.7.136', '10.160.7.137'
LOWES = "10.160.7.141", "10.160.7.142"
HARPER ="10.160.7.146", "10.160.7.147"

overlapping_pairs = (
    (AIRPORT[1], LOWES[0]),
)

In [13]:
# create a set of h3 corresponding to the overlapping pairs
THRESHOLD = 0.05

overlapping_h3 = {
    pair: list(set(overlaps.loc[overlaps[pair[0]] > THRESHOLD].index)
    & set(overlaps.loc[overlaps[pair[1]] > THRESHOLD].index))
    for pair in overlapping_pairs
}

#### Test Pair



In [18]:
test_pair = overlapping_pairs[0]

In [19]:
match_df = df.filter(pl.col('h3').is_in(overlapping_h3[test_pair]))


In [20]:
ip_1_df = df.filter(pl.col('ip') == test_pair[0])
ip_2_df = df.filter(pl.col('ip') == test_pair[1])

In [83]:
matches_ip1 = {}

for veh in ip_1_df['object_id'].unique().to_list():
    if ip_1_df.filter(pl.col('object_id') == veh).select(['epoch_time', 'h3']).join(
       ip_2_df,
        on=['epoch_time', 'h3'],
        ).shape[0] > 1:
        
            match_df = ip_1_df.filter(pl.col('object_id') == veh).select(['epoch_time', 'h3']).join(
                ip_2_df,
                on=['epoch_time', 'h3'],
            )

            matches = match_df.groupby('object_id').agg([
                pl.col('epoch_time').count().alias('count'),
            ]).sort('count', descending=True).to_numpy()

            # if len(matches) > 1:
            #     break
            
            matches_ip1[veh] = matches[0][0]

In [84]:
len(ip_1_df['object_id'].unique().to_list()), len(matches_ip1)


(658, 192)

### Plot 

In [67]:
match_df = ip_1_df.filter(pl.col('object_id') == veh).select(['epoch_time', 'h3']).join(
    ip_2_df,
    on=['epoch_time', 'h3'],
)

match_df.groupby('object_id').agg([
    pl.col('epoch_time').count().alias('count'),
]).sort('count', descending=True).to_numpy()[0][0]

'61809_10.160.7.141'

### Calculate Distance to TL1 Stop Bar


In [None]:
f.utm_zone[1]

In [None]:
TL1_POS = (33.233039472986604, -87.62266063800959)
TL1_POS_UTM = utm.from_latlon(
    *TL1_POS, force_zone_number=f.utm_zone[0], force_zone_letter=f.utm_zone[1]
)[:2]


In [None]:
df = df.with_columns(
    [
        (
            ((pl.col("x") - TL1_POS_UTM[0]) ** 2 + (pl.col("y") - TL1_POS_UTM[1]) ** 2)
            ** 0.5
        ).alias("distance_from_tl1"),
    ]
)


### Grab two Simlar Trajectories


In [None]:
veh1 = "62249_10.160.7.141"
veh2 = "46656_10.160.7.137"
veh3 = "62252_10.160.7.141"
## get the data for the two vehicles
df1 = df.filter(pl.col("object_id") == veh1).to_pandas()
df2 = df.filter(pl.col("object_id") == veh2).to_pandas()
# only consider objects that move closer to TL1 during their time in the network


In [None]:
set(df1[['epoch_time', 'h3']].apply(tuple, axis=1)) & set(df2[['epoch_time', 'h3']].apply(tuple, axis=1))

In [None]:
# 

In [None]:
import plotly.express as px

fig = go.Figure()

keep_ips = [
    "10.160.7.136",
    "10.160.7.137",
    "10.160.7.141",
    "10.160.7.142",
    "10.160.7.146",
    "10.160.7.147",
]


radar_ips = df["ip"].unique().to_list()
radar_colors = px.colors.qualitative.D3
radar_color_map = {
    ip: radar_colors[i % len(radar_colors)] for i, ip in enumerate(radar_ips)
}

plotted_ips = set()
for veh in [veh1, veh2, veh3]:
    _df = df.filter(pl.col("object_id") == veh).to_pandas()

    fig.add_trace(
        go.Scatter(
            x=_df.epoch_time,
            y=_df.distance_from_tl1,
            mode="lines",
            opacity=1,
            line_color=radar_color_map[_df.ip.iloc[0]],
            name=_df.ip.iloc[0],
            showlegend=True,
        )
    )

fig.show()


In [None]:
import plotly.express as px

fig = go.Figure()

keep_ips = [
    "10.160.7.136",
    "10.160.7.137",
    "10.160.7.141",
    "10.160.7.142",
    "10.160.7.146",
    "10.160.7.147",
]


radar_ips = df["ip"].unique().to_list()
radar_colors = px.colors.qualitative.D3
radar_color_map = {
    ip: radar_colors[i % len(radar_colors)] for i, ip in enumerate(radar_ips)
}

plotted_ips = set()
for veh in [veh1, veh2, veh3]:
    _df = df.filter(pl.col("object_id") == veh).to_pandas()

    fig.add_trace(
        go.Scatter(
            x=_df.x,
            y=_df.y,
            mode="lines",
            opacity=1,
            line_color=radar_color_map[_df.ip.iloc[0]],
            name=_df.ip.iloc[0],
            showlegend=True,
        )
    )


fig.show()


## Try the Trajectory Association Methods

Following https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4635697

1. Subtract the first moment of the trajectory from itself
2. Fit a 2nd order polynomial to the trajectory
3. Take the absolute difference between the start and end position of the trajectories IN THE OVERLAPPING REGION
4. They drop the time vector....

Following https://www.eecs.qmul.ac.uk/~andrea/papers/2009_AVSS_TrajAssociation_Anjum_Cavallaro.pdf

1. They add the average velocity to above
2. They add the average position to above
3. They do a histogram function of direction angles to find the three dominant angles of the trajectory

Both create a vector of these features for each trajectory and then calculate the correlation between all of the trajectories, taking the max as the match

**Problems: They do not consider time at all? Nevermind, it is baked into the polynomial regression**


### Get the Overlapping Region


In [None]:
# get only trajectories that are inside of the overlapping box
import geopandas as gpd


In [None]:
box_df = gpd.read_file("overlap_zones/141_137.geojson")

# convert to utm
box_df = box_df.to_crs(box_df.estimate_utm_crs())

# convert from multilinestring to polygon
box_df["geometry"] = box_df["geometry"].convex_hull


In [None]:
keep_ips = ["10.160.7.141", "10.160.7.137"]


sliced = df.filter(pl.col("ip").is_in(keep_ips)).to_pandas()

radar_df = gpd.GeoDataFrame(
    sliced, geometry=gpd.points_from_xy(sliced.x, sliced.y), crs=box_df.crs
)


In [None]:
intersection_df = radar_df.loc[radar_df.geometry.intersects(box_df.geometry.iloc[0])]


In [None]:
import plotly.express as px

fig = go.Figure()

keep_ips = [
    "10.160.7.136",
    "10.160.7.137",
    "10.160.7.141",
    "10.160.7.142",
    "10.160.7.146",
    "10.160.7.147",
]


radar_ips = df["ip"].unique().to_list()
radar_colors = px.colors.qualitative.D3
radar_color_map = {
    ip: radar_colors[i % len(radar_colors)] for i, ip in enumerate(radar_ips)
}

plotted_ips = set()
for veh in [veh1, veh2, veh3]:
    _df = intersection_df.loc[intersection_df["object_id"] == veh]

    fig.add_trace(
        go.Scatter(
            x=_df.epoch_time,
            y=_df.distance_from_tl1,
            mode="lines",
            opacity=1,
            line_color=radar_color_map[_df.ip.iloc[0]],
            name=_df.ip.iloc[0],
            showlegend=True,
        )
    )

fig.show()


### Subtract the First Moment


In [None]:
def subtract_mean(df):
    df["x_prime"] = df["x"] - df["x"].mean()
    df["y_prime"] = df["y"] - df["y"].mean()
    return df


intersection_df = intersection_df.groupby("object_id", group_keys=False).apply(
    subtract_mean
)


### Fit a 2nd Order Polynomial


In [None]:
import numpy as np


intersection_df["norm_time"] = (
    intersection_df["epoch_time"] - intersection_df["epoch_time"].min()
).dt.total_seconds()


def fit_polynomial(df, degree=2):
    if len(df) < (degree + 1):
        return None

    t = df["norm_time"]
    y = df[["x_prime", "y_prime"]]

    return np.polyfit(t, y, degree)


polys = (
    intersection_df.groupby("object_id", group_keys=True)
    .apply(fit_polynomial)
    .to_dict()
)


In [None]:
fig = go.Figure()

# get a random vehicle
obj_id = intersection_df.object_id.sample(1).values[0]

pred_x = np.polyval(
    polys[obj_id][:, 0],
    intersection_df.loc[intersection_df["object_id"] == obj_id, "norm_time"].values,
)
pred_y = np.polyval(
    polys[obj_id][:, 1],
    intersection_df.loc[intersection_df["object_id"] == obj_id, "norm_time"].values,
)

fig.add_trace(
    go.Scatter(
        x=intersection_df.loc[intersection_df["object_id"] == obj_id, "norm_time"],
        y=intersection_df.loc[intersection_df["object_id"] == obj_id, "x_prime"],
        mode="lines",
        opacity=1,
        line_color="red",
        name="x_prime",
        showlegend=True,
    )
)

fig.add_trace(
    go.Scatter(
        x=intersection_df.loc[intersection_df["object_id"] == obj_id, "norm_time"],
        y=pred_x,
        mode="lines",
        opacity=1,
        line_color="blue",
        name="pred_x",
        showlegend=True,
    )
)

# add the predicted y values
fig.add_trace(
    go.Scatter(
        x=intersection_df.loc[intersection_df["object_id"] == obj_id, "norm_time"],
        y=intersection_df.loc[intersection_df["object_id"] == obj_id, "y_prime"],
        mode="lines",
        opacity=1,
        line_color="green",
        name="y_prime",
        showlegend=True,
    )
)


fig.add_trace(
    go.Scatter(
        x=intersection_df.loc[intersection_df["object_id"] == obj_id, "norm_time"],
        y=pred_y,
        mode="lines",
        opacity=1,
        line_color="orange",
        name="pred_y",
        showlegend=True,
    )
)


fig.show()


### Calculate the Difference Between the Start and End Positions


In [None]:
import numpy as np


def get_total_distance(df):
    start_x, start_y = df[["x_prime", "y_prime"]].iloc[0]
    end_x, end_y = df[["x_prime", "y_prime"]].iloc[-1]

    return np.sqrt((end_x - start_x) ** 2 + (end_y - start_y) ** 2)


distances = intersection_df.groupby(
    "object_id",
).apply(get_total_distance)


### Create the Vector of Features

$\left [ \beta_0, \beta_1, \beta_2, \alpha \right]$

Except my beta is 2x3


In [None]:
vectors = {}
cars = {}
for k, betas in polys.items():
    if betas is not None:
        # unpack the betas
        ip = k.split("_")[1]
        if ip not in vectors:
            vectors[ip] = []
            cars[ip] = []

        vectors[ip].append(np.array([*betas[:, 0], *betas[:, 1], distances[k]]))
        # vectors[ip].append(np.array([*betas, distances[k], ]))
        cars[ip].append(k)


In [None]:
from scipy.spatial import distance
import numpy.ma as ma


In [None]:
for ip, vect in vectors.items():
    vectors[ip] = np.array(vect)
    cars[ip] = np.array(cars[ip])


In [None]:
# stack the vectors and normalize them
vector_stack = np.vstack([vectors[ip] for ip in vectors])

# normalize the vectors
vector_stack = (vector_stack - vector_stack.mean(axis=0)) / (vector_stack.std(axis=0))

# split the vectors back into the radar groups
vectors = {
    ip: vector_stack[i * len(vectors[ip]) : (i + 1) * len(vectors[ip])]
    for i, ip in enumerate(vectors)
}


In [None]:
corr_distance = distance.cdist(
    vectors["10.160.7.137"], vectors["10.160.7.141"], "correlation"
)
# mx = ma.masked_array(corr_distance, np.eye(corr_distance.shape[0], dtype=bool))


In [None]:
matches = dict(
    zip(
        cars["10.160.7.137"],
        cars["10.160.7.141"][corr_distance.argmin(axis=1)],
    )
)


In [None]:
fig = go.Figure()


# get a random vehicle
obj_id = (
    intersection_df.object_id.loc[
        intersection_df.object_id.str.contains("10.160.7.137")
    ]
    .sample(1)
    .values[0]
)


plotted_ips = set()
for veh in [obj_id, matches[obj_id]]:
    _df = df.filter(pl.col("object_id") == veh).to_pandas()

    fig.add_trace(
        go.Scatter(
            x=_df.epoch_time,
            y=_df.distance_from_tl1,
            mode="lines",
            opacity=1,
            line_color=radar_color_map[_df.ip.iloc[0]],
            name=veh,
            showlegend=True,
        )
    )

fig.show()


## Trying Other Methods


https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9513575

1. Uses OSPA distance & hungarian algorithm to find the best match
   1. This seems overly complicated, and still doesn't consider time


### My Own Method


In [None]:
line_df = gpd.read_file("overlap_zones/137_handoff.geojson")

# convert to utm
line_df = line_df.to_crs(line_df.estimate_utm_crs())

# convert from multilinestring to polygon
line_df["geometry"] = line_df["geometry"].convex_hull


In [None]:
from shapely.geometry import LineString

# create a grouped geodataframe for vehicles with their trajectories as a linestring
res = (
    intersection_df.groupby("object_id")
    .apply(lambda x: LineString(x[["x", "y"]].values))
    .to_frame("geometry")
)
res["epoch_time"] = intersection_df.groupby("object_id").apply(
    lambda x: x["epoch_time"].values
)
res["ip"] = intersection_df.groupby("object_id")["ip"].transform("first")
#         'epoch_time': lambda x: x['epoch_time'].values,
#         'ip': lambda x: x['ip'].values,

#     }
# )


res = gpd.GeoDataFrame(res, geometry="geometry", crs=line_df.crs)


In [None]:
res["xy1"] = res.geometry.intersection(line_df.geometry.iloc[0])
res["xy2"] = res.geometry.intersection(line_df.geometry.iloc[1])
res["xy3"] = res.geometry.intersection(line_df.geometry.iloc[2])


In [None]:
res
