# Explore `projected_shape_meters`

* Now that `map_partitions` gets us `shape_meters` in 5 min, let's use it as much as we can
* Spot check that if `loop_or_inlining==0`, we can just use this and go on
* `loop_or_inlining==1` can pose challenges, so figure out a way to bring in direction (either through normalized x, y direction vector) or readable direction
* Loop or inlining segments have proven hard to cut successfully overall, and the last bit may never be fixed. If we can use direction, it may be a bit more robust.
* Speed can be calculated either within a segment or using endpoints, and we should opt for a simpler, streamlined approach that's performant. 
* For normal shapes, we might be able to fill in the RT `stop_times` table while we're at it.

In [None]:
import altair as alt
import dask.dataframe as dd
import geopandas as gpd
import numpy as np
import pandas as pd

from segment_speed_utils import helpers
from segment_speed_utils.project_vars import SEGMENT_GCS, PROJECT_CRS
from shared_utils import rt_dates

analysis_date = rt_dates.DATES["sep2023"]

In [None]:
# Get RT trips
rt_trips = pd.read_parquet(
    f"{SEGMENT_GCS}vp_usable_{analysis_date}",
    columns = ["trip_instance_key"]
).drop_duplicates()

# Find the shape_array_key for RT trips
trip_to_shape = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["trip_instance_key", "shape_array_key"],
    get_pandas = True
).merge(
    rt_trips,
    on = "trip_instance_key",
    how = "inner"
)

# Find whether it's loop or inlining
shapes_loop_inlining = pd.read_parquet(
    f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet",
    columns = ["shape_array_key", "loop_or_inlining"]
).drop_duplicates().merge(
    trip_to_shape,
    on = "shape_array_key",
    how = "inner"
)

In [None]:
ok_trips = (shapes_loop_inlining[
    shapes_loop_inlining.loop_or_inlining==0]
            .sample(25).trip_instance_key.tolist()
           )

ok_trips

In [None]:
loopy_trips = (shapes_loop_inlining[
    shapes_loop_inlining.loop_or_inlining==1]
            .sample(25).trip_instance_key.tolist()
           )

loopy_trips

In [None]:
subset_trips = ok_trips + loopy_trips

projected_shape_meters = pd.read_parquet(
    f"{SEGMENT_GCS}projection/vp_projected_{analysis_date}.parquet"
)

vp = pd.read_parquet(
    f"{SEGMENT_GCS}vp_usable_{analysis_date}",
    filters = [[("trip_instance_key", "in", subset_trips)]]
).merge(
    projected_shape_meters,
    on = "vp_idx",
    how = "inner"
).drop(columns = "location_timestamp")

In [None]:
def plot_shape_meters(df: pd.DataFrame, one_trip: str):
    """
    Plot how the projected shape meters looks for one trip.
    
    vp_idx is ordered by timestamp, use as x.
    """
    subset_df = df[df.trip_instance_key==one_trip]
    
    print(f"{subset_df.gtfs_dataset_name.iloc[0]}")
    print(f"trip_instance_key: {one_trip}, trip_id: {subset_df.trip_id.iloc[0]}")
    
    chart = (alt.Chart(subset_df)
             .mark_line()
             .encode(
                 x="vp_idx",
                 y="shape_meters:Q"
             )
            )
       
    display(chart)


In [None]:
for t in ok_trips:
    plot_shape_meters(vp, t)

In [None]:
for t in loopy_trips:
    plot_shape_meters(vp, t)

In [None]:
speed = pd.read_parquet(
    f"{SEGMENT_GCS}speeds_comparison_{analysis_date}.parquet"
)

In [None]:
trip = "10096002510743-JUNE23"
speed[speed.trip_id==trip]

In [None]:
metro_trip = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["trip_instance_key", "trip_id"],
    filters = [[("trip_id", "==", trip)]],
    get_pandas = True
)

trip_key = metro_trip.trip_instance_key.iloc[0]

In [None]:
vp_pared = pd.read_parquet(
    f"{SEGMENT_GCS}vp_pared_stops_{analysis_date}",
    filters = [[("trip_instance_key", "==", trip_key)]])

In [None]:
# 27:13, 27:56  *  29:14, 29:52, * 30:13
vp_pared[vp_pared.stop_sequence==36]

In [None]:
metro_trip