## Resample vp

Resample vehicle positions at 10 second or 5 second granularity with `scipy` and plot the vehicle positions against shape.

Look at `scipy` docs and see that `scipy.interp1d` is going to be deprecated. 

Compare this with our `numpy.interp` approach, and it seems like for the linear interpolation, it's the same results, so we'll keep our existing interpolation method.

Take a look at loop and inlining trips specifically to look at order of points. Plot these on a chart and see if that weird jumping around pattern is observed (weirdness is a V-shape, but that might be ok if you start at the middle of the shape and travel around).

References:

* https://machinelearningmastery.com/resample-interpolate-time-series-data-python/
* https://stackoverflow.com/questions/71431644/resampling-time-series-data-using-python-numpy


In [1]:
import altair as alt
import folium
import numpy as np
import pandas as pd
import geopandas as gpd
import scipy
import shapely

from segment_speed_utils import helpers
from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT
from shared_utils import rt_dates

analysis_date = rt_dates.DATES["oct2024"]

In [2]:
# Random first 5 trips for Feb 2025
subset_trips = [
    '00004f47348591029b238f22b41c729d',
    '0000ae0f52e0d403dabbe01c316d5675',
    '00012ab5fe19df8e8f79ef353d5499ce',
    '00012f8f126d74e1d59c3053a833fbf4',
    '0001b4806fc36d83838024680579fd15'
]

In [3]:
# Pick LADOT that has loopy and regular trips, test these
ladot_key = "cc53a0dbf5df90e3009b9cb5d89d80ba"
trips_to_use = [
    "183-07rmbur"
    "30--wPRGN1uEEG",
    "30-_6WXotLOIF7fc",
    "47-3pbila5j7"
]

loopy_trips_to_use = [
    "183-04u26szx9",
    "30-15t6vxs", 
    "30-0SpZx5QGbqM" 
]


trip_filter = [[
    ("schedule_gtfs_dataset_key", "==", ladot_key),
    ("trip_id", "in", trips_to_use + loopy_trips_to_use)
]]

trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["trip_id", "trip_instance_key", "shape_array_key"],
    get_pandas = True,
    filters = [[("gtfs_dataset_key", "==", ladot_key),
               ("trip_id", "in", trips_to_use + loopy_trips_to_use)]]
)

trips = trips.assign(
    loopy = trips.apply(lambda x: 1 if x.trip_id in loopy_trips_to_use else 0, axis=1)
)

shapes = helpers.import_scheduled_shapes(
    analysis_date,
    columns = ["shape_array_key", "geometry"],
    get_pandas = True,
    crs = "EPSG:3310",
    filters = [[("shape_array_key", "in", trips.shape_array_key.tolist())]]
).merge(
    trips,
    on = "shape_array_key",
    how = "inner"
)

In [4]:
VP_CONDENSED = GTFS_DATA_DICT.speeds_tables.vp_condensed_line

vp = gpd.read_parquet(
    f"{SEGMENT_GCS}{VP_CONDENSED}_{analysis_date}.parquet",
    columns = ["trip_instance_key", "location_timestamp_local", "geometry"],
    filters = [[("trip_instance_key", "in", trips.trip_instance_key.tolist())]]
).to_crs("EPSG:3310")

In [5]:
shapes.explore(
    "shape_array_key", 
    tiles = "CartoDB Positron"
)

In [6]:
gdf = pd.merge(
    vp.rename(columns = {"geometry": "vp_geometry"}),
    shapes.rename(columns = {"geometry": "shape_geometry"}),
    on = "trip_instance_key",
    how = "inner"
).set_geometry("vp_geometry")

In [7]:
# Get a version where we take the array of timestamps, convert it to seconds,
# resample it and get it at a higher frequency (5-10 seconds), and get the distance against shape
# that's our vp_meters
# for vp paths that follow shapes that are simple, this should work ok
new_timestamps_series = []
vp_meters_series = []
vp_geom_series = []
vp_point_series = []

for row in gdf.itertuples():
    vp_points = np.asarray(getattr(row, "vp_geometry").coords)
    vp_meters = np.asarray([getattr(row, "shape_geometry").project(shapely.Point(p)) for p in vp_points])
    
    timestamps = np.asarray(getattr(row, "location_timestamp_local").astype("datetime64[s]").astype("float64"))
    
    # Resampled seconds
    timestamps_new = np.arange(min(timestamps), max(timestamps), step=5)
    new_timestamps_series.append(timestamps_new)
    
    # Example uses scipy, but we get same result with numpy
    # so let's not use scipy
    #interpolation_func = scipy.interpolate.interp1d(timestamps, vp_meters, kind="linear")
    #vp_meters_new = interpolation_func(timestamps_new)
    vp_meters_new = np.interp(timestamps_new, timestamps, vp_meters)
    vp_meters_series.append(vp_meters_new)
    
    new_vp_positions = shapely.LineString([getattr(row, "vp_geometry").interpolate(d) for d in vp_meters_new]) 
    new_vp_points = [shapely.Point(p) for p in new_vp_positions.coords]
    vp_geom_series.append(new_vp_positions)
    vp_point_series.append(new_vp_points)

  return lib.line_locate_point(line, other)


Take a look at how resampled times do with plotting the vehicle positions.

We should definitely expect getting back a very similar path that follows shape, since we're only filling in the gaps.

In [8]:
gdf = gdf.assign(
    resampled_times = new_timestamps_series,
    interpolated_distances = vp_meters_series,
    vp_geometry2 = gpd.GeoSeries(vp_geom_series, crs = "EPSG:3310"),
    vp_points = vp_point_series
)

In [9]:
# Save out resampled vp as line
gdf2 = gdf[["trip_instance_key", "loopy", "vp_geometry2"]].set_geometry("vp_geometry2")

In [10]:
# Save out resampled vp as points
gdf3 = gpd.GeoDataFrame(
    gdf[["trip_instance_key", "loopy", "resampled_times", "vp_points"]].explode(["resampled_times", "vp_points"]),
    geometry = "vp_points",
    crs = "EPSG:3310"
)

gdf3 = gdf3.assign(
    sec_from_start = (
        gdf3.resampled_times - (gdf3.groupby("trip_instance_key")
                                .resampled_times
                                .transform("min"))
    )
)

In [11]:
def plot_shapes_vp(shapes, vp_line, vp_points):
    
    m = shapes.explore(tiles = "CartoDB Positron", name="shapes")
    m = vp_line.explore(m=m, color="orange", name="vp as line")
    m = vp_points.explore("sec_from_start", categorical=True, legend=False, 
                     m=m, name="vp as points")

    folium.LayerControl().add_to(m)

    return m

In [12]:
plot_shapes_vp(shapes[shapes.loopy==0], gdf2[gdf2.loopy==0], gdf3[gdf3.loopy==0])

  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


In [13]:
plot_shapes_vp(shapes[shapes.loopy==1], gdf2[gdf2.loopy==1], gdf3[gdf3.loopy==1])

  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


In [14]:
for one_trip in gdf2[gdf2.loopy==1].trip_instance_key.unique():
    print(one_trip)
    display(
        plot_shapes_vp(
            shapes[(shapes.loopy==1) & (shapes.trip_instance_key==one_trip)], 
            gdf2[(gdf2.loopy==1) & (gdf2.trip_instance_key==one_trip)], 
            gdf3[(gdf3.loopy==1) & (gdf3.trip_instance_key==one_trip)])
    )

02163359921353bec00ac82aec81452f


  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


0a4ce7e1b99840db5b3e8051272184a0


73652091499f345ed1f5d2d2e117030c


  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


In [15]:
for one_trip in gdf2[gdf2.loopy==1].trip_instance_key.unique():
    print(one_trip)
    display(
        plot_shapes_vp(
            shapes[(shapes.loopy==1) & (shapes.trip_instance_key==one_trip)], 
            gdf2[(gdf2.loopy==1) & (gdf2.trip_instance_key==one_trip)], 
            gdf3[(gdf3.loopy==1) & (gdf3.trip_instance_key==one_trip)])
    )

02163359921353bec00ac82aec81452f


  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


0a4ce7e1b99840db5b3e8051272184a0


  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


73652091499f345ed1f5d2d2e117030c


  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


In [16]:
gdf4 = gdf[["trip_instance_key", "loopy", "resampled_times",
         "interpolated_distances"]
       ].explode(["interpolated_distances", "resampled_times"])


In [17]:
# Let's see if these distances stop jumping around
for one_trip in gdf2[gdf2.loopy==1].trip_instance_key.unique():
    print(one_trip)
    chart = alt.Chart(gdf4[gdf4.trip_instance_key==one_trip]).mark_line().encode(
        x = "resampled_times",
        y = "interpolated_distances"
    )
    display(chart)
    


02163359921353bec00ac82aec81452f


0a4ce7e1b99840db5b3e8051272184a0


73652091499f345ed1f5d2d2e117030c
