# Plot speeds by segments

In [None]:
import dask.dataframe as dd
import geopandas as gpd
import pandas as pd
import warnings

from shared_utils import rt_utils

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/"
DASK_TEST = f"{GCS_FILE_PATH}dask_test/"

analysis_date = "2022-10-12"

warnings.filterwarnings("ignore")

In [None]:
def avg_speeds_with_segment_geom(
    analysis_date: str, 
    max_speed_cutoff: int = 70
) -> gpd.GeoDataFrame: 
    """
    Import the segment-trip table. 
    Average the speed_mph across all trips present in the segment.
    """
    # Test on BBB for now and get arrowizing right - double check 
    # where it's running in both directions that you can see both lines
    df = dd.read_parquet(
        f"{DASK_TEST}speeds_{analysis_date}/")
    
    # Take the average after dropping unusually high speeds
    segment_cols = ["calitp_itp_id", "route_dir_identifier", 
                    "segment_sequence"]
    
    avg_speeds = (df[(df.speed_mph <= max_speed_cutoff)].compute()
        .groupby(segment_cols)
        .agg({
            "speed_mph": "mean",
            "trip_id": "nunique"
        }).reset_index()
    )
    
    # Clean up for map
    avg_speeds = avg_speeds.assign(
        speed_mph = avg_speeds.speed_mph.round(2),
    ).rename(columns = {"trip_id": "n_trips"})
    
    # Merge in segment geometry
    segments = gpd.read_parquet(
        f"{DASK_TEST}longest_shape_segments.parquet",
        columns = segment_cols + ["geometry", "geometry_arrowized"]
    ).drop_duplicates().reset_index(drop=True)
    
    segments2 = segments.set_geometry("geometry_arrowized")
    segments2.crs = segments.crs
    
    gdf = pd.merge(
        segments2[~segments2.geometry_arrowized.is_empty], 
        avg_speeds,
        on = segment_cols,
        how = "inner"
    )
    
    return gdf

In [None]:
gdf = avg_speeds_with_segment_geom(
    analysis_date, 
    max_speed_cutoff = 70
)

In [None]:
for s in range(30, 75, 5):
    print(f"# rows with over {s} mph: {len(gdf[gdf.speed_mph > s])}")

In [None]:
import branca

ZERO_FIFTY_COLORSCALE = branca.colormap.step.RdYlGn_10.scale(
    vmin=0, 
    vmax=50
)


In [None]:
gdf.explore(
    "speed_mph",
    tiles = "CartoDB Positron",
    cmap = ZERO_FIFTY_COLORSCALE
)