In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import polars as pl
import pandas as pd
import plotly.graph_objects as go
import utm

from filtering import Filtering

# set the pandas plotting backend to plotly
pd.options.plotting.backend = "plotly"


## Read in the Data


In [3]:
df = pl.scan_parquet(
    # "/Users/max/Library/CloudStorage/Box-Box/Radar-Data/1677797903256.parquet"
    # "/Users/max/Downloads/1677797903256.parquet"
    "/Users/max/Library/CloudStorage/Box-Box/Radar-Data/new_format/1678654910050.parquet"
)


In [4]:
df = df.collect()


In [5]:
# add the object id & sort by time
df = df.with_columns(
    [
        (pl.col("ui32_objectID").cast(str) + "_" + pl.col("ip")).alias("object_id"),
    ]
).sort("epoch_time")


### Resample the Radar Data to Every .15 Seconds


In [6]:
GROUPBY_EVERY = "100ms"

df = (
    df.sort("epoch_time")
    .groupby_dynamic(
        index_column="epoch_time",
        every=GROUPBY_EVERY,
        by=["object_id"],
    )
    .agg(
        [
            pl.col("f32_positionX_m").mean(),
            pl.col("f32_positionY_m").mean(),
            pl.col("f32_velocityInDir_mps").mean(),
            # take the first value of the rest of the columns
            *(
                pl.col(col).first()
                for col in df.columns
                if col
                not in [
                    "f32_positionX_m",
                    "f32_positionY_m",
                    "f32_velocityInDir_mps",
                    "object_id",
                    "epoch_time",
                ]
            ),
        ]
    )
)


In [7]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

mapbox_key = "pk.eyJ1IjoibWF4LXNjaHJhZGVyIiwiYSI6ImNrOHQxZ2s3bDAwdXQzbG81NjZpZm96bDEifQ.etUi4OK4ozzaP_P8foZn_A"
fig = go.Figure()

fig.update_layout(template="ggplot2", height=600, width=1200, font_size=18)


# create a color map for all the radar ips
radar_ips = df['ip'].unique().to_list()
radar_colors = px.colors.qualitative.D3
radar_color_map = {ip : radar_colors[i % len(radar_colors)] for i, ip in enumerate(radar_ips)}

pandas_df = df.to_pandas()

for ip, r in pandas_df.groupby("ip"):

    _s = r.sample(r.__len__() // 2)

    fig.add_trace(
        go.Scattermapbox(
            lon=_s["lon"],
            lat=_s["lat"],
            mode="markers",
            opacity=1,
            line_width=4,
            name=ip,
            marker=dict(
                size=10,
                color=radar_color_map[ip],
                opacity=1,
            ),
            visible=True,
        )
    )

fig.update_layout(
    margin=go.layout.Margin(
        l=50,  # left margin
        r=50,  # right margin
        b=50,  # bottom margin
        t=50,  # top margin
    ),
    mapbox=dict(
        accesstoken=mapbox_key,
        bearing=0,
        # style as mapbox satellite
        style="satellite-streets",
        center=go.layout.mapbox.Center(lat=_s["lat"].mean(), lon=_s["lon"].mean()),
        pitch=0,
        zoom=14.1,
    ),
)


fig.show()


### Remove Objects that don't move atleast X meters or spend X seconds on the radar


The current settings here are arbitrary. I'm not sure what the best settings are. I'm also not sure if this is the best way to do this. I'm open to suggestions.


In [8]:
MIN_DISTANCE = 100  # meters
MIN_TIME = 5  # seconds


In [9]:
df = df.filter(
    pl.col("object_id").is_in(
        df.groupby("object_id")
        .agg(
            [
                # calculate the distance between the first and last position
                (
                    (
                        pl.col("f32_positionX_m").first()
                        - pl.col("f32_positionX_m").last()
                    ).pow(2)
                    + (
                        pl.col("f32_positionY_m").first()
                        - pl.col("f32_positionY_m").last()
                    ).pow(2)
                )
                .sqrt()
                .alias("straight_distance"),
                (
                    (pl.col("f32_positionX_m").diff()) ** 2
                    + (pl.col("f32_positionY_m").diff()) ** 2
                )
                .sqrt()
                .sum()
                .alias("distance"),
                # calculate the time between the first and last position
                (pl.col("epoch_time").last() - pl.col("epoch_time").first())
                .dt.seconds()
                .alias("duration"),
            ]
        )
        .filter(
            (pl.col("straight_distance") >= MIN_DISTANCE)
            & (pl.col("duration") >= MIN_TIME)
        )["object_id"]
        .to_list()
    )
)


#### Filter Out The Artifact at End of Trajectory Where Velocity == Itself


In [10]:
df = (
    df.sort("epoch_time")
    .with_columns(
        [
            (
                (pl.col("f32_velocityInDir_mps").diff().abs() < 0.01).fill_null(True)
                & (pl.col("f32_velocityInDir_mps") > 0)
            )
            .over("object_id")
            .alias("stopped"),
        ]
    )
    .with_columns(
        [
            (~pl.col("stopped"))
            .cast(pl.Int8())
            .cumsum()
            .over("object_id")
            .alias("stopped_count")
        ]
    )
    .with_columns(
        (pl.col("stopped_count") >= pl.col("stopped_count").max())
        .over("object_id")
        .alias("trim")
    )
    .filter(~pl.col("trim"))
    .sort(["object_id", "epoch_time"])
)


### Geolocate the Radar Data


In [11]:
# create the file paths
network_outline_file = "./geo_data/network_outline.geojson"
radar_locations_file = "./geo_data/radar_origins.json"


In [12]:
f = Filtering(
    radar_location_path=radar_locations_file,
    network_boundary_path=network_outline_file,
)


In [15]:
transformed_df = (
    df.clone()
    .pipe(f.correct_center)
    .pipe(f.rotate_radars)
    .pipe(f.radar_to_utm)
    .pipe(f.radar_to_latlon)
    .pipe(f.radar_to_h3)
    .pipe(f.filter_network_boundaries)
)


function: correct_center took: 0.005028963088989258 seconds
function: rotate_radars took: 0.013345718383789062 seconds
function: radar_to_utm took: 0.015944242477416992 seconds
function: radar_to_latlon took: 0.020218849182128906 seconds
function: radar_to_h3 took: 0.0903012752532959 seconds
function: filter_network_boundaries took: 0.003957033157348633 seconds


## Plot transformed vs Rust transform

In [16]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

mapbox_key = "pk.eyJ1IjoibWF4LXNjaHJhZGVyIiwiYSI6ImNrOHQxZ2s3bDAwdXQzbG81NjZpZm96bDEifQ.etUi4OK4ozzaP_P8foZn_A"
fig = go.Figure()

fig.update_layout(template="ggplot2", height=600, width=1200, font_size=18)


plot_ips = ['10.160.7.141']

for name, _df in [("rust", df), ("python", transformed_df)]:

    plot_df = _df.filter(pl.col("ip").is_in(plot_ips)).to_pandas()

    for ip, r in plot_df.groupby("ip"):

        fig.add_trace(
            go.Scattermapbox(
                lon=r["lon"] if name == "rust" else r["lon_new"],
                lat=r["lat"] if name == "rust" else r["lat_new"],
                mode="markers",
                opacity=1,
                line_width=4,
                name=name + ip,
                marker=dict(
                    size=10,
                    # color=,
                    opacity=1,
                ),
                visible=True,
            )
        )

fig.update_layout(
    margin=go.layout.Margin(
        l=50,  # left margin
        r=50,  # right margin
        b=50,  # bottom margin
        t=50,  # top margin
    ),
    mapbox=dict(
        accesstoken=mapbox_key,
        bearing=0,
        # style as mapbox satellite
        style="satellite-streets",
        center=go.layout.mapbox.Center(lat=_s["lat"].mean(), lon=_s["lon"].mean()),
        pitch=0,
        zoom=14.1,
    ),
)


fig.show()
