# Plot speeds by segments

* `segments` going in is `LineString`
* playing with `buffer_distance` in `arrowize_segment` adjusts how thick the lines are. 
* how to offset / add more space between 2 directions?

In [1]:
import dask.dataframe as dd
import dask_geopandas as dg
import geopandas as gpd
import pandas as pd

from shared_utils import rt_utils

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/"
DASK_TEST = f"{GCS_FILE_PATH}dask_test/"

analysis_date = "2022-10-12"



In [2]:
def avg_speeds_with_segment_geom(
    analysis_date: str, 
    max_speed_cutoff: int = 70
) -> gpd.GeoDataFrame: 
    """
    Import the segment-trip table. 
    Average the speed_mph across all trips present in the segment.
    """
    # Test on BBB for now and get arrowizing right - double check 
    # where it's running in both directions that you can see both lines
    itp_id = 300
    df = dd.read_parquet(
        f"{DASK_TEST}speeds_{analysis_date}/")
    
    # Take the average after dropping unusually high speeds
    avg_speeds = (df[(df.speed_mph <= max_speed_cutoff) & 
                     (df.calitp_itp_id == itp_id)
                    ].compute()
        .groupby(["calitp_itp_id", "route_dir_identifier", "segment_sequence"])
        .agg({
            "speed_mph": "mean",
            "trip_id": "nunique"
        }).reset_index()
    )
    
    # Clean up for map
    avg_speeds = avg_speeds.assign(
        speed_mph = avg_speeds.speed_mph.round(2),
    ).rename(columns = {"trip_id": "n_trips"})
    
    # Merge in segment geometry
    segments = dg.read_parquet(
        f"{DASK_TEST}longest_shape_segments.parquet",
        filters = [[("calitp_itp_id", "==", itp_id)]],
        columns = ["calitp_itp_id", "route_dir_identifier", 
                   "segment_sequence", "geometry"]
    ).drop_duplicates().reset_index(drop=True)

    segments = segments.assign(
        geometry_arrowized = segments.apply(
            lambda x: rt_utils.arrowize_segment(x.geometry, 
                                                buffer_distance = 15),
            axis = 1,
            meta = ('geometry_arrowized', 'geometry')
        )
    ).compute()
    
    gdf = pd.merge(
        segments, 
        avg_speeds,
        on = ["calitp_itp_id", 
              "route_dir_identifier", "segment_sequence"],
        how = "inner"
    )
    
    return gdf

In [3]:
gdf = avg_speeds_with_segment_geom(
    analysis_date, 
    max_speed_cutoff = 70
)

In [5]:
gdf.head()

Unnamed: 0,calitp_itp_id,route_dir_identifier,segment_sequence,geometry,geometry_arrowized,speed_mph,n_trips
0,300,125684836,5,"LINESTRING (141577.007 -443175.505, 141655.349...","POLYGON ((142063.620 -443295.129, 142064.826 -...",8.22,26
1,300,125684836,2,"LINESTRING (141605.588 -441198.033, 141603.162...","POLYGON ((141580.103 -441572.166, 141672.672 -...",14.68,22
2,300,125684836,1,"LINESTRING (141230.909 -440493.838, 141303.887...","POLYGON ((141784.723 -441019.552, 141785.608 -...",9.51,25
3,300,125684836,3,"LINESTRING (141497.087 -441831.324, 141340.055...","POLYGON ((141010.919 -442063.836, 140970.765 -...",9.48,25
4,300,125684836,0,"LINESTRING (141656.516 -440040.208, 141749.055...","POLYGON ((141761.148 -440157.419, 141761.952 -...",3.85,25


In [4]:
gdf2 = gdf.set_geometry("geometry_arrowized")
gdf2.crs = gdf.crs

gdf2.explore(
    "speed_mph",
    tiles = "CartoDB Positron",
    cmap = rt_utils.ZERO_THIRTY_COLORSCALE
)