# stop-to-stop segments losing curve
* when we cut it at particular stop locations along the `shape`, stringing those segments together as `shapely.geometry.LineString` forces those to be straight lines
* not ideal -- we need to keep all the points that are present in the `shape` so that the segments follow any curves or turns around streets
* Find the endpoint of that segment and keep all the other shapely objects in between, otherwise we lose the curves

In [1]:
import dask_geopandas as dg
import dask.dataframe as dd
import folium
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely

from update_vars import SEGMENT_GCS, COMPILED_CACHED_VIEWS, analysis_date



In [2]:
test_shape = "b315bbbd7eff442b38f52615fde4a2d2"

test = dg.read_parquet(
    f"{SEGMENT_GCS}test_stop_segments_{analysis_date}"
).compute()

test = test[test.shape_array_key==test_shape]

df = gpd.read_parquet(
    f"./scripts/data/stops_projected_wide.parquet", 
    filters = [[("shape_array_key", "==", test_shape)]]
)

stops = gpd.read_parquet(
    f"./scripts/data/stops_projected.parquet",
    filters = [[("shape_array_key", "==", test_shape)]]
)

In [3]:
m = test.explore(
    "segment_sequence", tiles="CartoDB Positron", 
    name = "segments", legend=False,
    categorical = True, cmap = "tab20",
    highlight = True,
    highlight_kwds = {'fillColor': '#DD1C77',"fillOpacity": 0.6},
)

stops.explore("stop_sequence", m=m, categorical = False, name="stops")
folium.LayerControl().add_to(m)
m

In [4]:
def get_shape_inputs(row: gpd.GeoDataFrame) -> tuple:
    """
    Since we're using itertuples, use getattr to get that row's column values.
    
    Set up stop_break_dist array with endpoints. 
    We already have shape_meters as an array, just add 0 and the line's length.
    
    Also back out an array for the shape's line geometry to get 
    all the coords for the shape's path.
    """
    stop_break_dist = getattr(row, "shape_meters")
    shape_geom = getattr(row, "shape_geometry")
    
    stop_break_dist_with_endpoints = np.array(
        [0] + stop_break_dist.tolist() + [shape_geom.length]
    )
    
    # Get all the distances for all the 
    # coordinate points included in shape line geom
    shape_path_dist = np.array(
        [shape_geom.project(shapely.geometry.Point(p)) 
        for p in shape_geom.coords]
    )
    
    return stop_break_dist_with_endpoints, shape_path_dist


def get_shape_coords_up_to_stop(
    shape_path_dist: list, 
    stop_break_dist: list,
    start_end_tuple: tuple
) -> list:
    """
    For every pair of start/end shape_meters, grab all the 
    shape path's coords in between.
    
    Ex: if (start, end) = (50, 150), then grab the subset of the 
    shape distance array that spans [50, 150], which could be 
    [50, 55, 70, 100, 120, 135, 150]
    """
    start_dist, end_dist = start_end_tuple
    
    # Get the subset of shape_path points that
    # covers start_dist to end_dist
    # https://stackoverflow.com/questions/16343752/numpy-where-function-multiple-conditions
    shape_path_subset = shape_path_dist[
        (np.where(
            (shape_path_dist >= start_dist) & 
            (shape_path_dist <= end_dist))
        )]
    
    # Now add the start_dist and end_dist to the subset
    shape_subset_with_endpoints = np.unique(np.array(
            [start_dist] + shape_path_subset.tolist() + [end_dist]))
    
    return shape_subset_with_endpoints


def cut_stop_segments_for_shape(row: gpd.GeoDataFrame) -> pd.DataFrame:
    """
    For a row (shape_id), grab the shape_geom, array of 
    stop's shape_meters.
    
    Loop over each stop's shape_meters and grab the subset of relevant
    shape's array values.
    """
    stop_break_dist, shape_path_dist = get_shape_inputs(row)
    
    shape_key = getattr(row, "shape_array_key")
    
    shape_segments = []
    
    for i, _ in enumerate(stop_break_dist):
        # Skip if i == 0, because that's the start of the shape
        # and it has prior element to look against
        
        if i > 0:
            # grab the elements in the array
            # grab the element prior and the current element
            # [, i+1 ] works similar to range(), it just includes i, not i+1
            one_segment = get_shape_coords_up_to_stop(
                shape_path_dist, 
                stop_break_dist,
                stop_break_dist[i-1: i+1]
            )
            
        elif i == 0:
            one_segment = []
        
        shape_segments.append(one_segment)
    
    shape_segment_cutoffs = pd.DataFrame()
    
    shape_segment_cutoffs = shape_segment_cutoffs.assign(
        segment_cutoff_dist = shape_segments,
        shape_meters = pd.Series(stop_break_dist),
        shape_array_key = shape_key,
    )

    return shape_segment_cutoffs

In [5]:
test

Unnamed: 0,geometry,shape_array_key,segment_sequence,stop_sequence
0,"LINESTRING (-131169.642 73212.380, -131111.530...",b315bbbd7eff442b38f52615fde4a2d2,0,1.0
1,"LINESTRING (-131111.530 73198.851, -131444.770...",b315bbbd7eff442b38f52615fde4a2d2,1,3.0
2,"LINESTRING (-131444.770 72367.757, -131445.751...",b315bbbd7eff442b38f52615fde4a2d2,2,4.0
3,"LINESTRING (-131445.751 72101.821, -131394.726...",b315bbbd7eff442b38f52615fde4a2d2,3,5.0
4,"LINESTRING (-131394.726 71653.259, -131197.471...",b315bbbd7eff442b38f52615fde4a2d2,4,6.0
5,"LINESTRING (-131197.471 71064.759, -131091.915...",b315bbbd7eff442b38f52615fde4a2d2,5,7.0
6,"LINESTRING (-131091.915 70351.389, -130871.334...",b315bbbd7eff442b38f52615fde4a2d2,6,8.0
7,"LINESTRING (-130871.334 70149.720, -130255.438...",b315bbbd7eff442b38f52615fde4a2d2,7,9.0
8,"LINESTRING (-130255.438 68903.138, -130263.216...",b315bbbd7eff442b38f52615fde4a2d2,8,10.0
9,"LINESTRING (-130263.216 68418.264, -130266.099...",b315bbbd7eff442b38f52615fde4a2d2,9,11.0


In [6]:
segment_cutoffs = pd.DataFrame()

for row in df.itertuples():
    shape_segment_cutoffs = cut_stop_segments_for_shape(row)

segment_cutoffs = pd.concat(
    [segment_cutoffs, shape_segment_cutoffs], 
    axis=0, ignore_index=True)


In [7]:
segment_cutoffs.head()

Unnamed: 0,segment_cutoff_dist,shape_meters,shape_array_key
0,[],0.0,b315bbbd7eff442b38f52615fde4a2d2
1,"[0.0, 30.070079153609207, 59.68769037507417]",59.68769,b315bbbd7eff442b38f52615fde4a2d2
2,"[59.68769037507417, 72.12851381750923, 98.2020...",1031.72522,b315bbbd7eff442b38f52615fde4a2d2
3,"[1031.7252200790044, 1037.6165100344676, 1056....",1297.697547,b315bbbd7eff442b38f52615fde4a2d2
4,"[1297.697546644578, 1414.501786476603, 1444.51...",1755.207639,b315bbbd7eff442b38f52615fde4a2d2


In [8]:
segment_cutoffs_with_shape = pd.merge(
    df[["shape_array_key", "shape_geometry"]],
    segment_cutoffs,
    on = "shape_array_key",
    how = "inner",
    validate = "1:m"
)

In [9]:
segment_line_geometry = []

for row in segment_cutoffs_with_shape.itertuples():
    shape_geom = getattr(row, "shape_geometry")
    segment_dist = getattr(row, "segment_cutoff_dist")
    
    segment_points = [shape_geom.interpolate(i) for i in segment_dist]
    segment_line = shapely.geometry.LineString(segment_points)
    segment_line_geometry.append(segment_line)

In [10]:
segment_cutoffs_with_shape = segment_cutoffs_with_shape.assign(
    stop_segment_geometry = segment_line_geometry
)

  arr = construct_1d_object_array_from_listlike(values)


In [11]:
segment_cutoffs_with_shape.columns

Index(['shape_array_key', 'shape_geometry', 'segment_cutoff_dist',
       'shape_meters', 'stop_segment_geometry'],
      dtype='object')

In [12]:
plot_me = segment_cutoffs_with_shape.drop(
    # this is array, can't keep in gdf.explore
    columns = ["segment_cutoff_dist"]
).set_geometry("stop_segment_geometry", crs="EPSG:3310")

plot_me.head()

Unnamed: 0,shape_array_key,shape_geometry,shape_meters,stop_segment_geometry
0,b315bbbd7eff442b38f52615fde4a2d2,"LINESTRING (-131169.642 73212.380, -131140.182...",0.0,GEOMETRYCOLLECTION EMPTY
1,b315bbbd7eff442b38f52615fde4a2d2,"LINESTRING (-131169.642 73212.380, -131140.182...",59.68769,"LINESTRING (-131169.642 73212.380, -131140.182..."
2,b315bbbd7eff442b38f52615fde4a2d2,"LINESTRING (-131169.642 73212.380, -131140.182...",1031.72522,"LINESTRING (-131111.530 73198.851, -131099.495..."
3,b315bbbd7eff442b38f52615fde4a2d2,"LINESTRING (-131169.642 73212.380, -131140.182...",1297.697547,"LINESTRING (-131444.770 72367.757, -131444.864..."
4,b315bbbd7eff442b38f52615fde4a2d2,"LINESTRING (-131169.642 73212.380, -131140.182...",1755.207639,"LINESTRING (-131445.751 72101.821, -131449.135..."


In [13]:
plot_me2 = plot_me[~plot_me.stop_segment_geometry.is_empty]
plot_me2.explore("shape_meters", tiles="Carto DB Positron")