# Select a couple of simpler trips to compare

* Compare methodologies, which differ when handling more complex shapes
* But is it different even in simpler shapes?
* Start with Big Blue Bus and LA Metro

In [None]:
import geopandas as gpd
import pandas as pd

from shared_utils import rt_dates, rt_utils
from segment_speed_utils import helpers
from segment_speed_utils.project_vars import SEGMENT_GCS, GCS_FILE_PATH
from calitp_data_analysis import utils

RT_DELAY_GCS = f"{GCS_FILE_PATH}rt_delay/v2_segment_speed_views/"
analysis_date = rt_dates.DATES["sep2023"]

## Prep Eric's segments

In [None]:
itp_ids = [
    182,
    300,
]

# Don't narrow down time-of-day yet, we might select a trip from any 
# of these
time_of_day = [
    "AM_Peak", "Midday", "PM_Peak"
]

eric_dfs = [
    gpd.read_parquet(
        f"{RT_DELAY_GCS}{itp_id}_{analysis_date}_{time}.parquet")
      for itp_id, time in zip(itp_ids, time_of_day)
     ]

df_eric = pd.concat(eric_dfs, axis=0).reset_index(drop=True)

## Prep Tiff's segments

* Use what's concatenated from Eric to filter down preemptively
* Merge in identifiers so we can compare off of `shape_id`, `trip_id`, and `stop_id`, `stop_sequence` instead of `shape_array_key` and `trip_instance_key`

In [None]:
shape_trips = df_eric[["shape_id", "trip_id"]].drop_duplicates()

In [None]:
scheduled_trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = [
        "gtfs_dataset_key", "name", 
        "trip_id", "trip_instance_key",
        "shape_id", "shape_array_key",
        "route_id", "direction_id"],
    get_pandas = True
).rename(columns = {"gtfs_dataset_key": "schedule_gtfs_dataset_key"})

In [None]:
# Grab the trip_instance_keys we need and use it
# to filter the speeds parquet down
subset_trips = scheduled_trips.merge(
    shape_trips,
    on = ["shape_id", "trip_id"],
    how = "inner"
)

trip_instances = subset_trips.trip_instance_key.unique().tolist()
subset_shapes = subset_trips.shape_array_key.unique().tolist()

In [None]:
segments = gpd.read_parquet(
    f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet",
    filters = [[("shape_array_key", "in", subset_shapes)]]
).drop(columns = ["geometry_arrowized", "district_name"])

filtered_trip_speeds = pd.read_parquet(
    f"{SEGMENT_GCS}speeds_stop_segments_{analysis_date}.parquet",
    filters = [[("trip_instance_key", "in", trip_instances)]]
).merge(
    subset_trips,
    on = ["trip_instance_key", "shape_array_key"],
    how = "inner"
)

df_tiff = pd.merge(
    segments,
    filtered_trip_speeds,
    on = ["schedule_gtfs_dataset_key", "shape_array_key", "stop_sequence"],
    how = "inner"
)

## Merge dfs

* stop_sequence doesn't exactly merge, but that's fine, since Eric cuts shorter segments, so `stop_sequence` can have values like 1.25, 1.50, etc.
* Leave it in the merge for now, and allow `left_only` merges

In [None]:
cols_to_compare = [
    "trip_id", "shape_id", "stop_id", "stop_sequence",
    "route_id", "direction_id",
]

speed_df = pd.merge(
    df_eric[cols_to_compare + ["speed_mph"]].rename(
        columns = {"speed_mph": "eric_speed_mph"}),
    df_tiff[cols_to_compare + ["speed_mph"]].rename(
        columns = {"speed_mph": "tiff_speed_mph"}),
    on = cols_to_compare,
    how = "left",
    indicator = True
)

In [None]:
utils.geoparquet_gcs_export(
    df_eric,
    SEGMENT_GCS,
    f"speeds_eric_{analysis_date}"
)

utils.geoparquet_gcs_export(
    df_tiff,
    SEGMENT_GCS,
    f"speeds_tiff_{analysis_date}"
)

speed_df.to_parquet(f"{SEGMENT_GCS}speeds_comparison_{analysis_date}.parquet")

## Side-by-Side Maps

In [None]:
df_eric.shape, df_tiff.shape 

In [None]:
#df_tiff.route_id.unique()

In [None]:
#one_route = "96-13168"
#df_tiff[df_tiff.route_id==one_route].explore(
#    "route_id", tiles = "CartoDB Positron")

In [None]:
#df_tiff[df_tiff.route_id==one_route].trip_id.unique()

In [None]:
trips_to_try = {
    "metro_720": "10720012750651-JUNE23", #route_id: 720-13168
    "metro_901": "10901000590843-JUNE23", #route_id: 901-13168
    "metro_550": "10550001350610-JUNE23", # route_id: 550-13168
    "metro_230":  "10230000830600-JUNE23", # route_id: 230-13168
    "metro_96": "10096002510743-JUNE23", # route_id: 96-13168
    "bbb1": "908521", # route_id: 3639
    #"bbb2": "", #route_id
}

In [None]:
# Why can't we return 2 map objects? Second one doesn't plot
# Just break it apart then
def eric_map(gdf: gpd.GeoDataFrame, one_trip: str):
    
    gdf2 = gdf[gdf.trip_id==one_trip]

    m1 = gdf2.explore(
         "speed_mph", 
        tiles = "CartoDB Positron",
        cmap = rt_utils.ZERO_THIRTY_COLORSCALE
    )
    
    return m1


def tiff_map(gdf: gpd.GeoDataFrame, one_trip: str):
    
    gdf2 = gdf[gdf.trip_id==one_trip]

    m1 = gdf2.explore(
         "speed_mph", 
        tiles = "CartoDB Positron",
        cmap = rt_utils.ZERO_THIRTY_COLORSCALE
    )

    return m1

In [None]:
eric_map(df_eric, trips_to_try["metro_720"])

In [None]:
tiff_map(df_tiff, trips_to_try["metro_720"])

In [None]:
eric_map(df_eric, trips_to_try["metro_901"])

In [None]:
tiff_map(df_tiff, trips_to_try["metro_901"])

In [None]:
eric_map(df_eric, trips_to_try["metro_550"])

In [None]:
tiff_map(df_tiff, trips_to_try["metro_550"])

In [None]:
eric_map(df_eric, trips_to_try["metro_230"])

In [None]:
tiff_map(df_tiff, trips_to_try["metro_230"])

In [None]:
eric_map(df_eric, trips_to_try["metro_96"])

In [None]:
tiff_map(df_tiff, trips_to_try["metro_96"])

In [None]:
eric_map(df_eric, trips_to_try["bbb1"])

In [None]:
tiff_map(df_tiff, trips_to_try["bbb1"])