## Plot `nearest_vp_to_stop` and `interpolate_stop_arrival` results

In [None]:
import dask.dataframe as dd
import folium
import geopandas as gpd
import numpy as np
import pandas as pd

from segment_speed_utils import helpers, segment_calcs
from segment_speed_utils.project_vars import SEGMENT_GCS, PROJECT_CRS
from shared_utils import rt_dates

analysis_date = rt_dates.DATES["sep2023"]

In [None]:
test_trips = [
    'e23a83e19843f71f6599cb302e23ae6e',
    '0adf85083a66f635dd3edcbdf0a5d8da',
    '73c4533f88c0759a4817902ae45df1c0',
    '70ec3122f3971fd94a50402f76b6336c',
    '1a7599df4fcd547d9b9c423345c08a0f',
    '2f567724fe306d15bd213c913f47027e',
    'e029d4c256171e2e476a4cad574f6685',
    '9a4c7a548deb282384e63bf98ac991d7',
    'db3ce71b08df1598db06615d7ed0b77f',
    '01365dc998719fc064b259ba4c1476de'
]

In [None]:
stop_arrivals_interp = pd.read_parquet(
    f"{SEGMENT_GCS}stop_arrivals_{analysis_date}.parquet")

In [None]:
stop_arrivals_interp = segment_calcs.convert_timestamp_to_seconds(
    stop_arrivals_interp, 
    ["arrival_time"]
).drop(columns = [
    "nearest_location_timestamp_local", 
    "subseq_location_timestamp_local", 
    "arrival_time"
])

### Merge in interpolated stop arrivals with vp and stop geometry

In [None]:
vp = pd.read_parquet(
    f"{SEGMENT_GCS}vp_usable_{analysis_date}/",
    filters = [[("trip_instance_key", "in", test_trips)]],
    columns = ["vp_idx", "x", "y", "location_timestamp_local"]
).pipe(segment_calcs.convert_timestamp_to_seconds, 
       ["location_timestamp_local"]).drop(columns = "location_timestamp_local")

vp_gdf = gpd.GeoDataFrame(
    vp,
    geometry = gpd.points_from_xy(vp.x, vp.y),
    crs = "EPSG:4326"
).to_crs(PROJECT_CRS).drop(columns = ["x", "y"])

In [None]:
stops_projected = gpd.read_parquet(
    f"{SEGMENT_GCS}stops_projected_{analysis_date}.parquet",
    columns = ["shape_array_key", "stop_sequence", "stop_id", 
               "stop_geometry", 
               "loop_or_inlining"]
)

In [None]:
# Merge selected vp with stops_projected
gdf = pd.merge(
    stops_projected,
    stop_arrivals_interp,
    on = ["shape_array_key", "stop_sequence", "stop_id"],
    how = "inner"
)

In [None]:
gdf2 = pd.merge(
    gdf,
    vp_gdf.rename(columns = {
        "vp_idx": "nearest_vp_idx",
        "location_timestamp_local_sec": "nearest_sec",
        "geometry": "nearest_vp_geometry"
    }),
    on = "nearest_vp_idx",
    how = "inner"
)

In [None]:
gdf3 = pd.merge(
    gdf2,
    vp_gdf.rename(columns = {
        "vp_idx": "subseq_vp_idx",
        "location_timestamp_local_sec": "subseq_sec",
        "geometry": "subseq_vp_geometry"
    }),
    on = "subseq_vp_idx",
    how = "inner"
)

In [None]:
def plot_one_trip(gdf: gpd.GeoDataFrame, one_trip: str):
    subset_gdf = gdf[gdf.trip_instance_key==one_trip]
    
    m = subset_gdf.set_geometry("stop_geometry").explore(
        categorical = True,
        legend = False,
        color = "black",
        tiles = "CartoDB Positron",
        name="Stops"
    )

    m = subset_gdf.set_geometry("nearest_vp_geometry").explore(
        m=m, 
        color="orange", 
        name="nearest vp"
    )

    m = subset_gdf.set_geometry("subseq_vp_geometry").explore(
        m=m, 
        color="yellow", 
        name="subseq vp"
    )
    # this is completely optional
    folium.LayerControl().add_to(m)

    return m

In [None]:
for t in test_trips:
    print(f"trip_instance_key: {t}")
    m = plot_one_trip(gdf3, t)
    display(m)

In [None]:
# Select one stop to look at
test_map = plot_one_trip(gdf3[gdf3.stop_sequence==68], test_trips[0])

In [None]:
test_map

In [None]:
def look_at_arrival_time(df, one_trip):
    subset_df = df[(df.trip_instance_key==one_trip)
                  ].sort_values("stop_sequence").reset_index()
    
    cols = ["stop_sequence", 
            "nearest_vp", "arrival_time", "subseq_vp"]
    
    subset_df = subset_df.assign(
        nearest_vp = pd.to_datetime(subset_df.nearest_sec, unit="s").dt.time,
        arrival_time = pd.to_datetime(subset_df.arrival_time_sec, unit="s").dt.time,
        subseq_vp = pd.to_datetime(subset_df.subseq_sec, unit="s").dt.time,
    )
    
    display(subset_df[cols])
    

In [None]:
for t in test_trips:
    look_at_arrival_time(gdf3, t)