# Explore `projected_shape_meters`

* Now that `map_partitions` gets us `shape_meters` in 5 min, let's use it as much as we can
* Spot check that if `loop_or_inlining==0`, we can just use this and go on
* `loop_or_inlining==1` can pose challenges, so figure out a way to bring in direction (either through normalized x, y direction vector) or readable direction
* Loop or inlining segments have proven hard to cut successfully overall, and the last bit may never be fixed. If we can use direction, it may be a bit more robust.
* Speed can be calculated either within a segment or using endpoints, and we should opt for a simpler, streamlined approach that's performant. 
* For normal shapes, we might be able to fill in the RT `stop_times` table while we're at it.

In [None]:
import altair as alt
import geopandas as gpd
import numpy as np
import pandas as pd

from segment_speed_utils import helpers
from segment_speed_utils.project_vars import SEGMENT_GCS, PROJECT_CRS
from shared_utils import rt_dates
from calitp_data_analysis import calitp_color_palette as cp

analysis_date = rt_dates.DATES["sep2023"]

alt.data_transformers.disable_max_rows()

In [None]:
# Get RT trips
rt_trips = pd.read_parquet(
    f"{SEGMENT_GCS}vp_usable_{analysis_date}",
    columns = ["trip_instance_key"]
).drop_duplicates()

# Find the shape_array_key for RT trips
trip_to_shape = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["trip_instance_key", "shape_array_key", "route_id", "route_short_name"],
    get_pandas = True
).merge(
    rt_trips,
    on = "trip_instance_key",
    how = "inner"
)

# Find whether it's loop or inlining
shapes_loop_inlining = pd.read_parquet(
    f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet",
    columns = ["shape_array_key", "loop_or_inlining"],
    filters = [[("loop_or_inlining", "==", 1)]]
).drop_duplicates().merge(
    trip_to_shape,
    on = "shape_array_key",
    how = "inner"
)

In [None]:
loopy_trips = shapes_loop_inlining.trip_instance_key.tolist()

In [None]:
def merge_vp_with_projected_shape_meters(
    analysis_date,
    vp_filters: tuple
):
    projected_shape_meters = pd.read_parquet(
        f"{SEGMENT_GCS}projection/vp_projected_{analysis_date}.parquet"
    )

    vp = pd.read_parquet(
        f"{SEGMENT_GCS}vp_usable_{analysis_date}",
        filters = vp_filters,
        columns = ["gtfs_dataset_key", "gtfs_dataset_name", 
                   "vp_idx", "trip_instance_key", "trip_id"]
    ).merge(
        projected_shape_meters,
        on = "vp_idx",
        how = "inner"
    )
    
    return vp

In [None]:
vp = merge_vp_with_projected_shape_meters(
    analysis_date,
    vp_filters = [[("trip_instance_key", "in", loopy_trips)]]
)

In [None]:
#https://stackoverflow.com/questions/10226551/whats-the-most-pythonic-way-to-calculate-percentage-changes-on-a-list-of-numbers
def pct_change(arr):
    if not isinstance(arr, np.ndarray):
        arr = np.asarray(arr)
        
    pct = np.zeros_like(arr)
    pct[1:] = np.diff(arr) / np.abs(arr[:-1])
    
    return pct * 100

def number_of_elements_over_threshold(
    arr, 
    pct_change_threshold
):
    return len((pct_change(arr) > pct_change_threshold).nonzero()[0])

In [None]:
# Find most jumpy trips
vp2 = (vp.groupby("trip_instance_key")
       .agg({"shape_meters": lambda x: list(x)})
       .reset_index()
      )

vp2 = vp2.assign(
    big_jumps = vp2.apply(
        lambda x: 
        number_of_elements_over_threshold(
            x.shape_meters, 20), axis=1
    )
)

In [None]:
all_colors = (cp.CALITP_CATEGORY_BRIGHT_COLORS + cp.CALITP_CATEGORY_BOLD_COLORS + 
              cp.CALITP_DIVERGING_COLORS + cp.CALITP_SEQUENTIAL_COLORS
             )


final_trip_keys = {
    "afaf17f1c2816652f0e4522a5c7f206b": cp.CALITP_CATEGORY_BRIGHT_COLORS[1], # anaheim orange
    "a880d82a382929aa1de15be733f10a51": cp.CALITP_CATEGORY_BOLD_COLORS[2], # kings green
    "d628de22f56dbb4c0e3f8242a2fe78d3": cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # vine blue
    "f1a0a79baa78db2c26b3248a30662a7b": cp.CALITP_CATEGORY_BRIGHT_COLORS[5], # presidgo purple
    "26146503b5bf0235c0c8fe98dcd8d90b": cp.CALITP_SEQUENTIAL_COLORS[4], # dumbarton navy
    "4f76b1c357ee534ac931a5c1bd1cbb87": cp.CALITP_CATEGORY_BOLD_COLORS[2], # santa maria yellow
    "e5bcf460be0899f437b1e53b7f3feced": cp.CALITP_CATEGORY_BOLD_COLORS[4], # tcrta gray blue
    "18cc0764a463566e8690f0d44c32c199": cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # sd green
    "45d8634a92be1fa10ae4f4aa5aa6d5b9": cp.CALITP_CATEGORY_BRIGHT_COLORS[4], # foothill light blue
    "aa851696959462180fe04f189dc75584": cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # big blue bus green
}

In [None]:
shapes = helpers.import_scheduled_shapes(
    analysis_date, 
    columns = ["shape_array_key", "shape_id", "geometry"],
    get_pandas = True,
    crs = "EPSG:4326"
).merge(
    shapes_loop_inlining[
        shapes_loop_inlining.trip_instance_key.isin(final_trip_keys.keys())],
    on = "shape_array_key",
    how = "inner"
)

In [None]:
gdf = pd.merge(
    shapes,
    vp,
    on = "trip_instance_key",
    how = "inner"
)

# assign colors
gdf = gdf.assign(
    color = gdf.trip_instance_key.map(final_trip_keys),
    gtfs_dataset_name = gdf.gtfs_dataset_name.str.replace("VehiclePositions", "Vehicle Positions")
)

In [None]:
def make_chart_map(df: pd.DataFrame, one_trip: str):
    """
    Plot how the projected shape meters looks for one trip.
    
    vp_idx is ordered by timestamp, use as x.
    """
    subset_df = df[df.trip_instance_key==one_trip].drop(columns = "geometry")
    subset_gdf = df[df.trip_instance_key==one_trip][[
        "shape_id", "geometry"]].drop_duplicates()
    
    GTFS_DATA = subset_df.gtfs_dataset_name.iloc[0]
    TRIP_ID = subset_df.route_id.iloc[0]
    COLOR = subset_df.color.iloc[0]
    
    font="Lato"
    grid_bool = False
    grid_opacity = 0
    WIDTH = 400
    HEIGHT = 400
    
    chart = (alt.Chart(subset_df)
             .mark_line(color=COLOR, strokeWidth=1.5)
             .encode(
                 x=alt.X("vp_idx", 
                         title = "", 
                         axis=alt.Axis(labels=False, 
                                       gridOpacity=grid_opacity, 
                                       tickOpacity=grid_opacity)
                        ),
                 y=alt.Y("shape_meters:Q", 
                         title = "",
                         axis=alt.Axis(labels=False, 
                                       gridOpacity=grid_opacity, 
                                       tickOpacity=grid_opacity)
                        ),
             ).properties(
                 title = {"text": [GTFS_DATA], 
                         "subtitle": f"trip_id: {TRIP_ID}"}
             )
            )
       
    chart = (chart
             .configure(font=font)
             .configure_axis(grid=grid_bool, labelFontSize=0, 
                             gridOpacity=grid_opacity, 
                             domainOpacity=grid_opacity)
             .configure_axisBand(grid=grid_bool)
             .configure_view(strokeOpacity=grid_opacity)
             .properties(width = WIDTH*1.2, height = HEIGHT)
            )
    
    display(chart)
    
    m = subset_gdf.explore(
        "shape_id", 
        tiles = "CartoDB Positron",
        legend = False,
        style_kwds = {
            "color": COLOR, 
        },
        map_kwds = {
            "dragging": False,
            "scrollWheelZoom": False,
        },
        zoom_control = False,
        width = WIDTH, height = HEIGHT
    )
    
    display(m)


In [None]:
gdf[["gtfs_dataset_name", "route_id", "route_short_name"]].drop_duplicates()

In [None]:
for t in final_trip_keys.keys():
    make_chart_map(gdf, t)