# Grab all the loopy/inlining/odd shapes
Select a couple of examples to see what the pattern is for `stop_sequence` and `shape_meters`.

In [1]:
import os
os.environ['USE_PYGEOS'] = '0'

import dask.dataframe as dd
import folium
import geopandas as gpd
import pandas as pd
import numpy as np
import shapely
import sys

from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date

# Temp append so we can import functions from scripts/
sys.path.append("./scripts/")
import prep_stop_segments
import cut_stop_segments
import loop_utils
import loopy

shape_with_utc = "f765b9d12fcca0173b4e3ddbc0374d18"
another_shape = "007cf76062f5957d4e38ea54e624c0ad"

In [None]:
loop_shapes = loop_utils.grab_loop_shapes(analysis_date)

stop_times_with_geom = prep_stop_segments.stop_times_aggregated_to_shape_array_key(
        analysis_date, loop_shapes)

st_loops = stop_times_with_geom.compute()

In [None]:
gdf = (loop_utils.assign_visits_to_stop(st_loops)
       .sort_values(["shape_array_key", "stop_sequence"])
       .reset_index(drop=True)
      )

In [2]:
from segment_speed_utils import helpers, gtfs_schedule_wrangling, wrangle_shapes

In [3]:
trips_with_geom = gtfs_schedule_wrangling.get_trips_with_geom(
    analysis_date)
stop_times_with_geom = prep_stop_segments.stop_times_aggregated_to_shape_array_key(
    analysis_date, trips_with_geom)

In [4]:
shape_geoseries = gpd.GeoSeries(stop_times_with_geom.geometry.compute())
stop_geoseries = gpd.GeoSeries(stop_times_with_geom.stop_geometry.compute())

# Get projected shape_meters as dask array
shape_meters_geoseries = wrangle_shapes.project_point_geom_onto_linestring(
    shape_geoseries,
    stop_geoseries,
    get_dask_array=True
)
    



In [5]:
stop_times_with_geom["shape_meters"] = shape_meters_geoseries

In [6]:
stop_times_with_geom = stop_times_with_geom.drop(columns = "stop_geometry")

In [7]:
gdf = stop_times_with_geom.compute()

In [8]:
shape_cols = ["shape_array_key"]    
unique_shapes = (stop_times_with_geom
                     [shape_cols + ["geometry"]]
                     .drop_duplicates()
                    )

In [15]:
gdf_wide = (gdf.sort_values(shape_cols + ["stop_sequence"])
            .groupby(shape_cols)
            .agg({
                "shape_meters": lambda x: list(x), 
                "stop_sequence": lambda x: list(x)
            }).reset_index()
           )

In [30]:
monotonic_results = []
for row in gdf_wide.itertuples():
    stops_proj_on_shape = np.array(getattr(row, "shape_meters"))
    monotonic_increasing = np.all(np.diff(stops_proj_on_shape) > 0)
    monotonic_results.append(monotonic_increasing)

In [31]:
gdf_wide = gdf_wide.assign(
    is_monotonic = monotonic_results
)

In [34]:
gdf_wide.is_monotonic.value_counts()

True     5071
False    1177
Name: is_monotonic, dtype: int64

In [33]:
gdf_wide[gdf_wide.shape_array_key==shape_with_utc]

Unnamed: 0,shape_array_key,shape_meters,stop_sequence,is_monotonic
6034,f765b9d12fcca0173b4e3ddbc0374d18,"[0.04571602426616189, 1056.7484483794988, 1610...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]",False


In [None]:
shapes = helpers.import_scheduled_shapes(analysis_date).compute()

In [None]:
# Check if shape coords are projected in a fairly monotonic way?

In [None]:
results = []
for row in shapes.itertuples():
    shape_geom = getattr(row, "geometry")
    shape_path_dist = np.array(
            [shape_geom.simplify(0).project(shapely.geometry.Point(p)) 
            for p in shape_geom.coords]
        )
    results.append(shape_path_dist)


In [None]:
results

In [None]:
simple_series = [shapely.is_simple(i) 
                 for i in shapes.geometry]

shapes = shapes.assign(
    is_simple = simple_series
)

shapes.is_simple.value_counts()

In [None]:
#https://gis.stackexchange.com/questions/423351/identifying-self-intersections-in-linestring-using-shapely
test_geom = shapes.geometry.iloc[2]
gpd.GeoSeries(shapely.ops.polygonize_full(test_geom), crs='EPSG:3310').explore()

In [None]:
test_geom

In [None]:
gpd.GeoSeries(polygons, crs="EPSG:3310"
             ).explore(tiles="CartoDB Positron")

In [None]:
polygons

In [None]:
# check how many intersections. maybe we want at least 2 intersections
#https://gis.stackexchange.com/questions/423351/identifying-self-intersections-in-linestring-using-shapely

In [None]:
shapes[shapes.is_simple==False].head(10).shape_array_key.value_counts()
#.explore(
#    "shape_array_key", tiles='CartoDB Positron'
#)

In [None]:
test_shape = "68d21e7b7cb2aaad6f07de925cab61f2"

In [None]:
stop_segments = gpd.read_parquet(
    f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet", 
    #filters = [[("shape_array_key", "==", test_shape)]]
)

In [None]:
stop_segments[stop_segments.shape_array_key==test_shape]

In [None]:

shape_df[shape_df.is_simple==  True].head(1).explore(tiles='CartoDB Positron')

In [None]:
gdf2 = gdf[gdf.shape_array_key.isin([another_shape])]

In [None]:
gdf2.head()

In [None]:
shape_geometry = gdf2.geometry.iloc[0]
stop_geometry_array = np.array(gdf2.stop_geometry)
stop_sequence_array = np.array(gdf2.stop_sequence)

In [None]:
current_stop_seq = 21

test_segment, test_stops = loopy.super_project(
    current_stop_seq,
    shape_geometry,
    stop_geometry_array,
    stop_sequence_array,
)

segment_to_plot, stops_to_plot = loopy.stop_segment_components_to_geoseries(
    test_segment, test_stops)

loopy.plot_segments_and_stops(segment_to_plot, stops_to_plot)

In [None]:
current_stop_seq = 33

test_segment, test_stops = loopy.super_project(
    current_stop_seq,
    shape_geometry,
    stop_geometry_array,
    stop_sequence_array,
)

segment_to_plot, stops_to_plot = loopy.stop_segment_components_to_geoseries(
    test_segment, test_stops)

loopy.plot_segments_and_stops(segment_to_plot, stops_to_plot)

## Expand to multiple stops within same shape

In [None]:
subset_df = gdf2[gdf2.shape_array_key==another_shape
                ].reset_index(drop=True)

shape_geometry = subset_df.geometry.iloc[0]
stop_geometry_array = np.array(subset_df.stop_geometry)
stop_sequence_array = np.array(subset_df.stop_sequence)

segment_results = []

for stop_seq in stop_sequence_array:
        
    segment_i, _ = loopy.super_project(
        stop_seq,
        shape_geometry,
        stop_geometry_array,
        stop_sequence_array,
    )
    segment_i_series = loopy.stop_segment_components_to_geoseries(
        segment_i, [])
    
    segment_results.append(segment_i_series)


In [None]:
stop_segments = pd.concat(segment_results, 
                          axis=0).reset_index(drop=True)

In [None]:
subset_df = subset_df.assign(
    stop_segments_geometry = stop_segments
)

In [None]:
subset_df[subset_df.stop_segments_geometry.isna()]

## Expand to multiple shapes

In [None]:
gdf3 = gdf[gdf.shape_array_key.isin([shape_with_utc, another_shape])]

In [None]:
def super_cut_stop_segments(
    gdf: gpd.GeoDataFrame, 
    one_shape: str
) -> gpd.GeoDataFrame:
    
    subset_df = (gdf[gdf.shape_array_key==one_shape]
                 .reset_index(drop=True)
                )

    shape_geometry = subset_df.geometry.iloc[0]
    stop_geometry_array = np.array(subset_df.stop_geometry)
    stop_sequence_array = np.array(subset_df.stop_sequence)

    segment_results = []

    for stop_seq in stop_sequence_array:

        segment_i, _ = loopy.super_project(
            stop_seq,
            shape_geometry,
            stop_geometry_array,
            stop_sequence_array,
        )
        segment_i_series = loopy.stop_segment_components_to_geoseries(
            segment_i, [])
    
        segment_results.append(segment_i_series)
    
    stop_segments = (pd.concat(segment_results, axis=0)
                     .reset_index(drop=True)
                    )
    subset_df = subset_df.assign(
        stop_segments_geometry = stop_segments
    )
    
    return subset_df


In [None]:
results_df = gpd.GeoDataFrame()

results = []

for i in gdf3.shape_array_key.unique():
    segments_for_shape = super_cut_stop_segments(gdf3, i)
    results.append(segments_for_shape)

In [None]:
results_df = pd.concat(results, axis=0)

In [None]:
results_df.shape_array_key.value_counts()

In [None]:
results_df.shape, gdf3.shape

In [None]:
results_df[results_df.stop_segments_geometry.isna()].shape

In [None]:
from segment_speed_utils import wrangle_shapes
from shared_utils import rt_utils

In [None]:
results_df2 = results_df.drop(columns = ["geometry", "stop_geometry"]
                             ).rename(columns = {"stop_segments_geometry": "geometry"})

In [None]:
segment_geom = gpd.GeoSeries(results_df2.geometry)
CRS = results_df2.crs.to_epsg()
    
geom_parallel = gpd.GeoSeries(
    [i.parallel_offset(30, "right") for i in segment_geom], 
    crs=CRS
)


In [None]:
geom_arrowized = rt_utils.arrowize_segment(
    geom_parallel, 
    buffer_distance = 20
)

results_df2 = results_df2.assign(
    geometry_arrowized = geom_arrowized
)

In [None]:
results_df2.plot()

In [None]:
#results_df2 = wrangle_shapes.add_arrowized_geometry(results_df2)

results_df2.set_geometry("geometry_arrowized").explore(
    "stop_id", tiles="CartoDB Positron")