In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd

from shared_utils import rt_dates
from segment_speed_utils import helpers, wrangle_shapes
from segment_speed_utils.project_vars import SEGMENT_GCS, SHARED_GCS

analysis_date = rt_dates.DATES["oct2023"]

road_id_cols = ["linearid", "mtfcc", "primary_direction"]
segment_identifier_cols = road_id_cols + ["segment_sequence"]

two_roads = [
    "1104259334520",
    "110413812414"
]

In [2]:
shape_road_crosswalk = pd.read_parquet(
    f"{SEGMENT_GCS}roads_staging/"
    f"shape_road_crosswalk_{analysis_date}.parquet",
    filters = [[("linearid", "in", two_roads)]]
)

In [3]:
one_shape = "2419613ed3a3e49420a24f8d7efd3a4e"

shape_to_trip = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["trip_instance_key", "shape_array_key"],
    filters = [[("shape_array_key", "==", one_shape)]]
)

one_trip = shape_to_trip.trip_instance_key[5]
one_trip

'39f9ef24457401947b5d270956f7812e'

In [16]:
shape_road_crosswalk = shape_road_crosswalk.merge(
    shape_to_trip,
    on = "shape_array_key",
    how = "inner"
)

In [4]:
vp_nn = gpd.read_parquet(
    f"{SEGMENT_GCS}condensed/vp_nearest_neighbor_{analysis_date}.parquet",
    filters = [[("trip_instance_key", "==", one_trip)]]
)

In [5]:
subset_shape = helpers.import_scheduled_trips(
    analysis_date,
    columns = ["trip_instance_key", "shape_array_key"],
    filters = [[("trip_instance_key", "==", one_trip)]],
    get_pandas = True
)

shapes = helpers.import_scheduled_shapes(
    analysis_date,
    columns = ["shape_array_key", "geometry"],
    filters = [[("shape_array_key", "in", subset_shape.shape_array_key)]],
    crs = "EPSG:3310"
).merge(subset_shape)

shapes = shapes.assign(
    geometry = shapes.geometry.buffer(25)
).to_crs("EPSG:4326")

In [6]:
shapes

Unnamed: 0,shape_array_key,geometry,trip_instance_key
0,2419613ed3a3e49420a24f8d7efd3a4e,"POLYGON ((-122.47242 37.70511, -122.47236 37.7...",39f9ef24457401947b5d270956f7812e


In [7]:
road_segments = gpd.read_parquet(
    f"{SHARED_GCS}road_segments/",
    columns = segment_identifier_cols + ["geometry"],
    filters = [[("linearid", "in", two_roads)]]
)

In [8]:
# We can substitute this step when we have already generated crosswalk
def sjoin_shape_to_road(shapes, roads):
  
    keep_cols = ["shape_array_key", "trip_instance_key", 
                 "linearid", 
       "mtfcc", "segment_sequence"]
    
    roads = roads.to_crs(shapes.crs)
    
    shapes_to_roads = gpd.sjoin(
        shapes,
        roads,
        how = "inner",
        predicate = "intersects"
    )[keep_cols].drop_duplicates()
    
    return shapes_to_roads

#road_segments_sjoin = sjoin_shape_to_road(shapes, road_segments)

In [18]:
road_segments2 = pd.merge(
    road_segments,
    shape_road_crosswalk,
    on = ["linearid", "mtfcc", "segment_sequence"], 
    how = "inner"
)

In [19]:
road_segments2

Unnamed: 0,linearid,mtfcc,primary_direction,segment_sequence,geometry,shape_array_key,trip_instance_key
0,110413812414,S1400,Southbound,0,"LINESTRING (-217403.567 -31756.999, -217405.44...",2419613ed3a3e49420a24f8d7efd3a4e,5673f53f969bdc20741e13812dad41d1
1,110413812414,S1400,Southbound,0,"LINESTRING (-217403.567 -31756.999, -217405.44...",2419613ed3a3e49420a24f8d7efd3a4e,6d87761a1fec379c0c1ec79030fe1805
2,110413812414,S1400,Southbound,0,"LINESTRING (-217403.567 -31756.999, -217405.44...",2419613ed3a3e49420a24f8d7efd3a4e,e5a237bbc6b0710664a0d1653b1b92fe
3,110413812414,S1400,Southbound,0,"LINESTRING (-217403.567 -31756.999, -217405.44...",2419613ed3a3e49420a24f8d7efd3a4e,1a224331091730e297c409b8391dacdd
4,110413812414,S1400,Southbound,0,"LINESTRING (-217403.567 -31756.999, -217405.44...",2419613ed3a3e49420a24f8d7efd3a4e,478f105a52171bc012c92d56941037e6
...,...,...,...,...,...,...,...
115,1104259334520,S1400,Southbound,0,"LINESTRING (-217507.140 -31767.089, -217508.71...",2419613ed3a3e49420a24f8d7efd3a4e,3fd8625d501a5e06f9e9e483dd1c236e
116,1104259334520,S1400,Southbound,0,"LINESTRING (-217507.140 -31767.089, -217508.71...",2419613ed3a3e49420a24f8d7efd3a4e,b2a91ee4ab373221c2587c7d4546f29e
117,1104259334520,S1400,Southbound,0,"LINESTRING (-217507.140 -31767.089, -217508.71...",2419613ed3a3e49420a24f8d7efd3a4e,42684d4cc65a91db9caeff6f8b704568
118,1104259334520,S1400,Southbound,0,"LINESTRING (-217507.140 -31767.089, -217508.71...",2419613ed3a3e49420a24f8d7efd3a4e,3f76c0f82dac574b8d5acbe42ce50390


In [20]:
import shapely

road_segments0 = road_segments2.assign(
    geometry = road_segments2.apply(
        lambda x: shapely.Point(x.geometry.coords[0]), 
        axis=1),
).assign(stop_type=0)

road_segments1 = road_segments2.assign(
    geometry = road_segments2.apply(
        lambda x: shapely.Point(x.geometry.coords[-1]), 
        axis=1),
).assign(stop_type=1)

In [21]:
road_segments_long = pd.concat(
    [road_segments0, road_segments1], 
    axis=0
).sort_values(
    ["linearid", "segment_sequence", "stop_type"]
).rename(
    columns = {"primary_direction": "stop_primary_direction"}
).reset_index(drop=True)

In [22]:
road_segments_long

Unnamed: 0,linearid,mtfcc,stop_primary_direction,segment_sequence,geometry,shape_array_key,trip_instance_key,stop_type
0,110413812414,S1400,Southbound,0,POINT (-217403.567 -31756.999),2419613ed3a3e49420a24f8d7efd3a4e,5673f53f969bdc20741e13812dad41d1,0
1,110413812414,S1400,Southbound,0,POINT (-217403.567 -31756.999),2419613ed3a3e49420a24f8d7efd3a4e,6d87761a1fec379c0c1ec79030fe1805,0
2,110413812414,S1400,Southbound,0,POINT (-217403.567 -31756.999),2419613ed3a3e49420a24f8d7efd3a4e,e5a237bbc6b0710664a0d1653b1b92fe,0
3,110413812414,S1400,Southbound,0,POINT (-217403.567 -31756.999),2419613ed3a3e49420a24f8d7efd3a4e,1a224331091730e297c409b8391dacdd,0
4,110413812414,S1400,Southbound,0,POINT (-217403.567 -31756.999),2419613ed3a3e49420a24f8d7efd3a4e,478f105a52171bc012c92d56941037e6,0
...,...,...,...,...,...,...,...,...
235,1104259334520,S1400,Southbound,0,POINT (-217491.737 -32761.464),2419613ed3a3e49420a24f8d7efd3a4e,3fd8625d501a5e06f9e9e483dd1c236e,1
236,1104259334520,S1400,Southbound,0,POINT (-217491.737 -32761.464),2419613ed3a3e49420a24f8d7efd3a4e,b2a91ee4ab373221c2587c7d4546f29e,1
237,1104259334520,S1400,Southbound,0,POINT (-217491.737 -32761.464),2419613ed3a3e49420a24f8d7efd3a4e,42684d4cc65a91db9caeff6f8b704568,1
238,1104259334520,S1400,Southbound,0,POINT (-217491.737 -32761.464),2419613ed3a3e49420a24f8d7efd3a4e,3f76c0f82dac574b8d5acbe42ce50390,1


In [23]:
from segment_speed_utils import neighbor

gdf = neighbor.merge_stop_vp_for_nearest_neighbor(
    road_segments_long, 
    analysis_date
)

In [25]:
results = neighbor.add_nearest_neighbor_result(gdf, analysis_date)

In [41]:
results2 = results.copy()

In [42]:
PROJECT_CRS = "EPSG:3310"

results2 = results2.assign(
    stop_geometry = results2.stop_geometry.to_crs(PROJECT_CRS),
    vp_coords_trio = results2.vp_coords_trio.to_crs(PROJECT_CRS)
)

In [43]:
shapes = helpers.import_scheduled_shapes(
    analysis_date,
    columns = ["shape_array_key", "geometry"],
    crs = PROJECT_CRS
).dropna(subset="geometry")

gdf = pd.merge(
    results2,
    shapes.rename(columns = {"geometry": "shape_geometry"}),
    on = "shape_array_key",
    how = "inner"
)

In [44]:
results2.dtypes

linearid                           object
mtfcc                              object
stop_primary_direction             object
segment_sequence                    int16
stop_geometry                    geometry
shape_array_key                    object
trip_instance_key                  object
stop_type                           int64
nearest_vp_idx                      int64
vp_idx_trio                        object
location_timestamp_local_trio      object
vp_coords_trio                   geometry
dtype: object

In [30]:
import interpolate_stop_arrival

In [None]:
segment_identifier_cols2 = ['linearid', 'mtfcc', 
                            'stop_primary_direction', 'segment_sequence']

In [53]:
stop_meters_series = []
stop_arrival_series = []
for row in gdf.itertuples():

    stop_meters, interpolated_arrival = interpolate_stop_arrival.project_points_onto_shape(
        getattr(row, "stop_geometry"),
        getattr(row, "vp_coords_trio"),
        getattr(row, "shape_geometry"),
        getattr(row, "location_timestamp_local_trio")
    )

    stop_meters_series.append(stop_meters)
    stop_arrival_series.append(interpolated_arrival)

results2 = gdf.assign(
    stop_meters = stop_meters_series,
    arrival_time = stop_arrival_series,
)[segment_identifier_cols2 + [
    "trip_instance_key", "shape_array_key", 
    "stop_type",
     "stop_meters", "arrival_time"]
 ].sort_values(
    segment_identifier_cols2 + ["trip_instance_key", "stop_type", ]
).reset_index(drop=True)

  return lib.line_locate_point(line, other)


In [58]:
grouped_df = results2.groupby(segment_identifier_cols2 + 
                               ["trip_instance_key"])

min_arrival = grouped_df.agg({"arrival_time": "min"}).reset_index()
max_arrival = grouped_df.agg({"arrival_time": "max"}).reset_index()

min_max_arrival = pd.merge(
    min_arrival,
    max_arrival,
    on = segment_identifier_cols2 + ["trip_instance_key"]
).query('arrival_time_x != arrival_time_y')

In [61]:
results3 = pd.merge(
    results2,
    min_max_arrival[segment_identifier_cols2 + ["trip_instance_key"]],
    on = segment_identifier_cols2 + ["trip_instance_key"],
    how = "inner"
)

In [64]:
from segment_speed_utils import segment_calcs

results3 = segment_calcs.convert_timestamp_to_seconds(
    results3, ["arrival_time"]
).sort_values(segment_identifier_cols2 + ["trip_instance_key"]).reset_index(drop=True)

In [65]:
trip_cols = segment_identifier_cols2 + ["trip_instance_key"]
results3 = results3.assign(
        subseq_arrival_time_sec = (results3.groupby(trip_cols, 
                                             observed=True, group_keys=False)
                                  .arrival_time_sec
                                  .shift(-1)
                                 ),
        subseq_stop_meters = (results3.groupby(trip_cols, 
                                        observed=True, group_keys=False)
                             .stop_meters
                             .shift(-1)
                            )
    )

In [68]:
speed = results3.assign(
    meters_elapsed = results3.subseq_stop_meters - results3.stop_meters, 
    sec_elapsed = results3.subseq_arrival_time_sec - results3.arrival_time_sec,
).pipe(
    segment_calcs.derive_speed, 
    ("stop_meters", "subseq_stop_meters"), 
    ("arrival_time_sec", "subseq_arrival_time_sec")
)

In [74]:
speed.dropna().query(
    'meters_elapsed > 250 & sec_elapsed > 60'
).speed_mph.describe()

count    38.000000
mean      6.528378
std       3.033379
min       0.814473
25%       4.293709
50%       6.238865
75%       9.318838
max      11.763124
Name: speed_mph, dtype: float64