# Set up different segments for nearest neighbors

In [1]:
import geopandas as gpd
import pandas as pd
import shapely

from calitp_data_analysis import geography_utils
from segment_speed_utils import helpers, neighbor
from shared_utils import rt_dates

from segment_speed_utils.project_vars import GTFS_DATA_DICT, SEGMENT_GCS

import sys 
sys.path.append('scripts/')

analysis_date = rt_dates.DATES["mar2024"]

In [None]:
'''
operator_name = "Big Blue Bus Schedule"

r10_df = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["gtfs_dataset_key", "name",
               "trip_instance_key",],
    filters = [[("name", "==", operator_name),
                ("route_short_name", "==", "R10")]],
    get_pandas = True
)

operator_key = r10_df.schedule_gtfs_dataset_key.iloc[0]
operator_trip_keys = r10_df.trip_instance_key.unique().tolist()

operator_name, operator_key, operator_trip_keys
'''

In [2]:
operator_name = 'Big Blue Bus Schedule'
operator_key = 'efbbd5293be71f7a5de0cf82b59febe1'
operator_trip_keys = [
    'd98e5cc1fb62e6e5ed0030934ef8a396',
    'c3577c96a714df975f5525b246987e41',
    'f1866ab43b90dd43adb557fe527177d2',
    '8a452661390b69cd00fdcde92066cc4a',
    '5d3dc572ce27ef902024f6f135cf59c2',
    '3505bf6a20e8d29e83e545784a421bc7'
]

In [None]:
stop_segments = gpd.read_parquet(
    f"{SEGMENT_GCS}segment_options/stop_segments_{analysis_date}.parquet",
    filters = [[("trip_instance_key", "in", operator_trip_keys)]]
)

In [None]:
stop_segments.dtypes

In [None]:
stop_segments.head(2)

Need to have `stop_primary_direction` column. Use `ffill` whenever we set up our own "stops"

## Regular stop times

In [None]:
stop_times = helpers.import_scheduled_stop_times(
    analysis_date,
    columns = ["trip_instance_key", "shape_array_key",
               "stop_sequence", "stop_id", "stop_pair", 
               "stop_primary_direction",
               "geometry"],
    filters = [[("trip_instance_key", "in", operator_trip_keys)]],
    with_direction = True,
    get_pandas = True,
    crs = "EPSG:4326"
)

stop_times.dtypes

In [None]:
stop_times.head()

In [None]:
gdf = neighbor.merge_stop_vp_for_nearest_neighbor(
    stop_times, analysis_date)

gdf.dtypes

In [None]:
gdf.head(2)

In [None]:
results = neighbor.add_nearest_neighbor_result(gdf, analysis_date)

results.dtypes

In [None]:
results.head()

## Interpolated segments

In [4]:
speedmap_stops = gpd.read_parquet(
    f"{SEGMENT_GCS}stop_time_expansion/"
    f"speedmap_stop_times_{analysis_date}.parquet",
    filters = [[("trip_instance_key", "in", operator_trip_keys)]]
)
speedmap_stops.dtypes

trip_instance_key           object
shape_array_key             object
stop_sequence                int64
stop_id                     object
stop_pair                   object
stop_primary_direction      object
geometry                  geometry
stop_sequence1             float64
dtype: object

In [3]:
gdf_speedmap = neighbor.merge_stop_vp_for_nearest_neighbor(
    speedmap_stops, analysis_date)

gdf_speedmap.dtypes

trip_instance_key           object
shape_array_key             object
stop_sequence                int64
stop_id                     object
stop_pair                   object
stop_primary_direction      object
stop_geometry             geometry
stop_sequence1             float64
vp_geometry               geometry
vp_idx                      object
dtype: object

In [6]:
# look at our decimal stop_sequence1 values specifically
gdf_speedmap[
    gdf_speedmap.stop_sequence != gdf_speedmap.stop_sequence1
]

Unnamed: 0,trip_instance_key,shape_array_key,stop_sequence,stop_id,stop_pair,stop_primary_direction,stop_geometry,stop_sequence1,vp_geometry,vp_idx
27,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.44098 34.03014),18.06,"LINESTRING (-118.49104 34.01601, -118.49265 34...","[6163904, 6163908, 6163909, 6163910, 6163911, ..."
28,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.43030 34.03158),18.11,"LINESTRING (-118.49104 34.01601, -118.49265 34...","[6163904, 6163908, 6163909, 6163910, 6163911, ..."
29,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.41947 34.03168),18.17,"LINESTRING (-118.49104 34.01601, -118.49265 34...","[6163904, 6163908, 6163909, 6163910, 6163911, ..."
30,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.40896 34.03007),18.23,"LINESTRING (-118.49104 34.01601, -118.49265 34...","[6163904, 6163908, 6163909, 6163910, 6163911, ..."
31,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.39820 34.02911),18.28,"LINESTRING (-118.49104 34.01601, -118.49265 34...","[6163904, 6163908, 6163909, 6163910, 6163911, ..."
...,...,...,...,...,...,...,...,...,...,...
218,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.40166 34.02959),15.74,"LINESTRING (-118.48973 34.01560, -118.48820 34...","[6164031, 6164032, 6164033, 6164034, 6164036, ..."
219,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.41225 34.03119),15.79,"LINESTRING (-118.48973 34.01560, -118.48820 34...","[6164031, 6164032, 6164033, 6164034, 6164036, ..."
220,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.42295 34.03187),15.85,"LINESTRING (-118.48973 34.01560, -118.48820 34...","[6164031, 6164032, 6164033, 6164034, 6164036, ..."
221,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.43373 34.03133),15.91,"LINESTRING (-118.48973 34.01560, -118.48820 34...","[6164031, 6164032, 6164033, 6164034, 6164036, ..."


In [7]:
results_speedmap = neighbor.add_nearest_neighbor_result(gdf_speedmap, analysis_date)

results_speedmap.dtypes

trip_instance_key                  object
shape_array_key                    object
stop_sequence                       int64
stop_id                            object
stop_pair                          object
stop_primary_direction             object
stop_geometry                    geometry
stop_sequence1                    float64
nearest_vp_idx                      int64
vp_idx_trio                        object
location_timestamp_local_trio      object
vp_coords_trio                   geometry
dtype: object

In [8]:
results_speedmap[
    results_speedmap.stop_sequence != results_speedmap.stop_sequence1
]

Unnamed: 0,trip_instance_key,shape_array_key,stop_sequence,stop_id,stop_pair,stop_primary_direction,stop_geometry,stop_sequence1,nearest_vp_idx,vp_idx_trio,location_timestamp_local_trio,vp_coords_trio
27,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.44098 34.03014),18.06,6163925,"[6163924, 6163925, 6163926]","[2024-03-13T07:22:06.000000, 2024-03-13T07:22:...","LINESTRING (-118.44891 34.02792, -118.44260 34..."
28,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.43030 34.03158),18.11,6163926,"[6163925, 6163926, 6163927]","[2024-03-13T07:22:51.000000, 2024-03-13T07:24:...","LINESTRING (-118.44260 34.02987, -118.42242 34..."
29,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.41947 34.03168),18.17,6163926,"[6163925, 6163926, 6163927]","[2024-03-13T07:22:51.000000, 2024-03-13T07:24:...","LINESTRING (-118.44260 34.02987, -118.42242 34..."
30,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.40896 34.03007),18.23,6163927,"[6163926, 6163927, 6163928]","[2024-03-13T07:24:20.000000, 2024-03-13T07:25:...","LINESTRING (-118.42242 34.03169, -118.41077 34..."
31,3505bf6a20e8d29e83e545784a421bc7,37468c4ffbbbab83f270b8fcecb6de61,18,228,228__106,Eastbound,POINT (-118.39820 34.02911),18.28,6163928,"[6163927, 6163928, 6163929]","[2024-03-13T07:25:05.000000, 2024-03-13T07:25:...","LINESTRING (-118.41077 34.03050, -118.39928 34..."
...,...,...,...,...,...,...,...,...,...,...,...,...
218,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.40166 34.02959),15.74,6164044,"[6164043, 6164044, 6164045]","[2024-03-13T14:43:40.000000, 2024-03-13T14:45:...","LINESTRING (-118.40574 34.02985, -118.40142 34..."
219,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.41225 34.03119),15.79,6164041,"[6164040, 6164041, 6164042]","[2024-03-13T14:40:42.000000, 2024-03-13T14:42:...","LINESTRING (-118.41893 34.03175, -118.41304 34..."
220,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.42295 34.03187),15.85,6164109,"[6164108, 6164109, 6164110]","[2024-03-13T16:19:02.000000, 2024-03-13T16:19:...","LINESTRING (-118.41968 34.03191, -118.42393 34..."
221,f1866ab43b90dd43adb557fe527177d2,d78657863e0bc2635c13d67785cb8b03,15,111,111__229,Southbound,POINT (-118.43373 34.03133),15.91,6164110,"[6164109, 6164110, 6164111]","[2024-03-13T16:19:46.000000, 2024-03-13T16:21:...","LINESTRING (-118.42393 34.03187, -118.43489 34..."


TODO: probably if it can make it through the first nearest neighbor script, the other scripts will be fine, but some of the `group_cols` will change.
Still need to test that out.