# Grab all the loopy/inlining/odd shapes
Select a couple of examples to see what the pattern is for `stop_sequence` and `shape_meters`.

In [1]:
import os
os.environ['USE_PYGEOS'] = '0'

import dask.dataframe as dd
import folium
import geopandas as gpd
import pandas as pd
import numpy as np
import shapely
import sys

from segment_speed_utils import (helpers, gtfs_schedule_wrangling, 
                                 wrangle_shapes)
from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date

# Temp append so we can import functions from scripts/
sys.path.append("./scripts/")
import prep_stop_segments
import cut_stop_segments
import loop_utils
import loopy

shape_with_utc = "f765b9d12fcca0173b4e3ddbc0374d18"
another_shape = "007cf76062f5957d4e38ea54e624c0ad"



In [2]:
loop_shapes = loop_utils.grab_loop_shapes(analysis_date)

stop_times_with_geom = prep_stop_segments.stop_times_aggregated_to_shape_array_key(
        analysis_date, loop_shapes)

st_loops = stop_times_with_geom.compute()

In [3]:
gdf = (loop_utils.assign_visits_to_stop(st_loops)
       .sort_values(["shape_array_key", "stop_sequence"])
       .reset_index(drop=True)
      )

In [4]:
gdf2 = gdf[gdf.shape_array_key.isin([another_shape])]

In [5]:
gdf2.head()

Unnamed: 0,shape_array_key,geometry,feed_key,name,stop_id,stop_sequence,stop_name,stop_geometry,num_visits,prior_stop_seq
0,007cf76062f5957d4e38ea54e624c0ad,"LINESTRING (267265.540 -584480.665, 267265.976...",a7ba6f075198e9bf9152fab6c7faf0f6,San Diego Schedule,99437,1,13 St & Broadway,POINT (267258.726 -584480.878),2,
1,007cf76062f5957d4e38ea54e624c0ad,"LINESTRING (267265.540 -584480.665, 267265.976...",a7ba6f075198e9bf9152fab6c7faf0f6,San Diego Schedule,99342,2,Broadway & Park Bl,POINT (267153.629 -584488.294),1,1.0
2,007cf76062f5957d4e38ea54e624c0ad,"LINESTRING (267265.540 -584480.665, 267265.976...",a7ba6f075198e9bf9152fab6c7faf0f6,San Diego Schedule,13311,3,Broadway & 8th Av,POINT (266847.851 -584503.046),1,2.0
3,007cf76062f5957d4e38ea54e624c0ad,"LINESTRING (267265.540 -584480.665, 267265.976...",a7ba6f075198e9bf9152fab6c7faf0f6,San Diego Schedule,13312,4,Broadway & 5th Av,POINT (266591.896 -584512.306),1,3.0
4,007cf76062f5957d4e38ea54e624c0ad,"LINESTRING (267265.540 -584480.665, 267265.976...",a7ba6f075198e9bf9152fab6c7faf0f6,San Diego Schedule,13313,5,Broadway & 2nd Av,POINT (266334.471 -584519.095),1,4.0


In [6]:
# Grab relevant subset based on stop sequence values to get stop geometry subset


In [7]:
def get_shape_components(
    shape_geometry: shapely.geometry.LineString,
) -> tuple:
    """
    For a shape, we want to get the list of shapely.Points and
    a calculated cumulative distance array.
    """
    shape_coords_list = [shapely.Point(i) for 
                         i in shape_geometry.simplify(0).coords]
    
    # calculate the distance between current point and prior
    # need to remove the first point so that we can 
    # compare to the prior
    point_series_no_idx0 = loopy.array_to_geoseries(
        shape_coords_list[1:],
        geom_type="point"
    )

    points_series = loopy.array_to_geoseries(
        shape_coords_list, 
        geom_type="point"
    )
    
    distance_from_prior = np.array(
        point_series_no_idx0.distance(points_series)
    )
    
    # Based on distance_from_prior, now create a 
    # cumulative distance array, and append 0 to 
    # the beginning. We want length of this array to match the 
    # length of stop_sequence array
    cumulative_distances = np.array(
        [0] + list(np.cumsum(distance_from_prior))
    )
    
    return shape_coords_list, cumulative_distances

In [8]:
def super_project(
    current_stop_seq: int,
    shape_geometry: shapely.geometry.LineString,
    stop_geometry_array: np.ndarray,
    stop_sequence_array: np.ndarray,
):
    
    shape_coords_list, cumulative_distances = get_shape_components(
        shape_geometry)

    # (1) Given a stop sequence value, find the stop_sequence values 
    # just flanking it (prior and subsequent).
    # this is important especially because stop_sequence does not have 
    # to be increasing in increments of 1, but it has to be monotonically increasing
    subset_seq = loopy.include_prior(
        stop_sequence_array, current_stop_seq)

    # (2) Grab relevant subset based on stop sequence values to get stop geometry subset
    # https://stackoverflow.com/questions/5508352/indexing-numpy-array-with-another-numpy-array
    subset_stop_geom = stop_geometry_array[subset_seq]
    
    # (3a) Project this vector of start/end stops
    subset_stop_proj = loopy.project_list_of_coords(
        shape_geometry, subset_stop_geom)
    
    # use this to get sense of direction, whether 
    # distance is increasing or decreasing as we move from prior to current stop
    start_stop = subset_stop_proj[0]
    end_stop = subset_stop_proj[-1]

    # (3b) Calculate distance between stops
    distance_between_stops = subset_stop_geom[0].distance(
        subset_stop_geom[-1])
    
    # (4) We know distance between stops, so let's back out the correct
    # "end_stop". If the end_stop is actually going 
    # back closer to the start of the shape, we'll use 
    # subtraction.
    # Normal case
    if start_stop < end_stop:
        destin_stop = start_stop + distance_between_stops
        
        # change this to point
        origin_destination_geom = loopy.interpolate_projected_points(
            shape_geometry, [start_stop, destin_stop])
    
    # Case where inlining occurs, and now the bus is doubling back    
    elif start_stop > end_stop:
        destin_stop = start_stop - distance_between_stops
        
        # Flip this so when we order the subset of points, we 
        # correctly append the origin to the shape_coords_list 
        # (which is destination, since it has a lower value)
        origin_destination_geom = loopy.interpolate_projected_points(
            shape_geometry, [destin_stop, start_stop])
        
    # (5) Find the subset from cumulative distances
    # that is in between our origin stop and destination stop
    shape_dist_subset_indices = loopy.cut_shape_by_origin_destination(
        cumulative_distances,
        (start_stop, destin_stop)
    )
    
    # Now grab these indices from our coordinates list
    subset_shape_geom = shape_coords_list[
        shape_dist_subset_indices[0]: 
        shape_dist_subset_indices[-1]+1
    ]
    
    # Attach the origin and destination, otherwise the segment
    # will not reach the actual stops, but will just grab the trunk portion
    subset_shape_geom_with_od = np.array(
        [origin_destination_geom[0]] + 
        subset_shape_geom + 
        [origin_destination_geom[-1]]
    )
    
    return subset_shape_geom_with_od, origin_destination_geom

In [9]:
def stop_segment_components_to_geoseries(
    subset_shape_geom_array: np.ndarray,
    subset_stop_geom_array: np.ndarray,
    crs: str = "EPSG:3310"
) -> tuple:
    """
    Turn segments and stops into geoseries so we can plot it easily.
    """
    stop_segment = loopy.array_to_geoseries(
        subset_shape_geom_array, 
        geom_type="line",
        crs=crs
    )
    
    related_stops = loopy.array_to_geoseries(
        subset_stop_geom_array,
        geom_type="point",
        crs=crs
    )
    
    return stop_segment, related_stops

In [10]:
def plot_segments_and_stops(
    segment: gpd.GeoSeries, 
    stops: gpd.GeoSeries
):
    m = segment.explore(tiles="CartoDB Positron", name="segment")
    m = stops.explore(m=m, name="stops")

    folium.LayerControl().add_to(m)
    return m

In [11]:
shape_geometry = gdf2.geometry.iloc[0]
stop_geometry_array = np.array(gdf2.stop_geometry)
stop_sequence_array = np.array(gdf2.stop_sequence)

current_stop_seq = 13

test_segment, test_stops = super_project(
    current_stop_seq,
    shape_geometry,
    stop_geometry_array,
    stop_sequence_array,
)


segment_to_plot, stops_to_plot = stop_segment_components_to_geoseries(
    test_segment, test_stops)



In [12]:
plot_segments_and_stops(segment_to_plot, stops_to_plot)