# Transit On the SHN 
* [GH Issue](https://github.com/cal-itp/data-analyses/issues/1477)


In [1]:
import geopandas as gpd
import google.auth
import numpy as np
import pandas as pd

credentials, project = google.auth.default()

import gcsfs

fs = gcsfs.GCSFileSystem()

In [2]:
from calitp_data_analysis import geography_utils, utils
from segment_speed_utils import gtfs_schedule_wrangling, helpers
from shared_utils import (
    catalog_utils,
    dask_utils,
    gtfs_utils_v2,
    portfolio_utils,
    publish_utils,
    rt_dates,
    rt_utils,
)
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

## District 2 Weirdness

In [4]:
import _ct_district_data_prep

In [18]:
district = '02 - Redding'

In [6]:
district_int = [int(s) for s in district.split() if s.isdigit()][0]

In [14]:
OPERATOR_PROFILE_REPORT = GTFS_DATA_DICT.digest_tables.operator_profiles_report
    
operator_df = pd.read_parquet(
    f"{RT_SCHED_GCS}{OPERATOR_PROFILE_REPORT}.parquet",
    )
    
operator_df2 = operator_df.loc[operator_df.caltrans_district == district]

In [22]:
OPERATOR_PROFILE = GTFS_DATA_DICT.digest_tables.operator_profiles
    
operator_og_df = pd.read_parquet(
    f"{RT_SCHED_GCS}{OPERATOR_PROFILE}.parquet",
    )
    
operator_og_df_df2 = operator_og_df.loc[operator_og_df.caltrans_district == district]

In [24]:
operator_og_df_df2.portfolio_organization_name.unique()

array(['Modoc Transportation Agency', 'Siskiyou County',
       'Redding Area Bus Authority', 'Tehama County', 'Trinity County',
       'Plumas Transit Systems', 'Lassen Transit Service Agency'],
      dtype=object)

In [17]:
operator_df.caltrans_district.unique()

array(['04 - Bay Area / Oakland', '12 - Santa Ana',
       '07 - Los Angeles / Ventura', '03 - Marysville / Sacramento',
       '08 - San Bernardino / Riverside', '06 - Fresno / Bakersfield',
       '10 - Stockton', '05 - San Luis Obispo / Santa Barbara',
       '01 - Eureka', '11 - San Diego', '02 - Redding'], dtype=object)

In [19]:
operator_df = _ct_district_data_prep.data_wrangling_operator_profile(district)

operator_gdf = _ct_district_data_prep.data_wrangling_operator_map(
    list(operator_df.portfolio_organization_name.unique())
)

ct_district_gdf = _ct_district_data_prep.load_ct_district(district_int)

transit_route_shs_gdf, transit_route_shs_table = (
    _ct_district_data_prep.final_transit_route_shs_outputs(20, str(district_int))
)
district_gdf = _ct_district_data_prep.load_ct_district(district_int)

In [21]:
operator_df

Unnamed: 0,portfolio_organization_name,service_date,caltrans_district,operator_n_routes,operator_n_trips,operator_n_shapes,operator_n_stops,operator_n_arrivals,operator_route_length_miles,n_downtown_local_routes,n_local_routes,n_coverage_routes,n_rapid_routes,n_express_routes,n_rail_routes,n_ferry_routes,vp_per_min_agency,spatial_accuracy_agency,n_feeds,operator_feeds,counties_served,service_area_pop,service_area_sq_miles,hq_city,reporter_type,primary_uza_name
80,Redding Area Bus Authority,2025-05-14,02 - Redding,12,198,27,343,5123,278.49,0,0,12,12,0,0,0,2.87,97.12,1,Redding Schedule,"Butte County, Shasta County, Tehama County",126551,100,Redding,Full Reporter,"Redding, CA"


In [8]:
shn_gdf = _ct_district_data_prep.load_buffered_shn_map(50, district_int)

In [9]:
shn_gdf.columns

Index(['geometry', 'District', 'State Highway Network Route'], dtype='object')

In [10]:
len(shn_gdf)

1

In [None]:
shn_gdf.explore('State Highway Network Route')

In [11]:
transit_route_shs_table

Unnamed: 0,Portfolio Organization Name,Route,State Highway Network Routes in District 2,Percentage of Transit Route on SHN Across All Districts
0,Lassen Transit Service Agency,South County/ Commuter,"139, 36, 395",61.7
1,Lassen Transit Service Agency,Susanville City Express Route,"139, 36",23.9
2,Lassen Transit Service Agency,Susanville City Route,"139, 36",30.8
3,Lassen Transit Service Agency,West County,"147, 89, 139, 44, 36",62.4
4,Plumas Transit Systems,East County,"70, 284, 49, 89, 395",88.2
5,Plumas Transit Systems,North County,"147, 70, 89, 36",85.6
6,Plumas Transit Systems,Quincy Local,70,57.0
7,Redding Area Bus Authority,Churn Creek/Knightson/Airport,"273, 5, 44",44.4
8,Redding Area Bus Authority,Route 1,"151, 273, 5, 299",41.2
9,Redding Area Bus Authority,Route 11,"273, 5, 299, 44",21.5


In [None]:
# Try another district
transit_route_shs_gdf_d3, transit_route_shs_table_d3 = (
    _ct_district_data_prep.final_transit_route_shs_outputs(20, "3")
)

In [None]:
transit_route_shs_gdf_d3.explore()

In [None]:
def process_transit_routes() -> gpd.GeoDataFrame:
    """
    Select the most recent transit route to
    figure out how much of it intersects with
    the state highway network.
    """
    # Load in the route shapes.
    OPERATOR_ROUTE = GTFS_DATA_DICT.digest_tables.operator_routes_map

    subset = [
        "service_date",
        "geometry",
        "portfolio_organization_name",
        "recent_combined_name",
        # "route_id",
    ]
    op_geography_df = gpd.read_parquet(
        f"{RT_SCHED_GCS}{OPERATOR_ROUTE}.parquet",
        storage_options={"token": credentials.token},
    )[subset]

    # Keep the row for each portfolio_organization_name/recent_combined_name
    # that is the most recent.
    most_recent_routes = publish_utils.filter_to_recent_date(
        df=op_geography_df,
        group_cols=[
            "portfolio_organization_name",
        ],
    )

    # Calculate the length of route, ensuring that it is in feet.
    most_recent_routes = most_recent_routes.assign(
        route_length_feet=most_recent_routes.geometry.to_crs(
            geography_utils.CA_NAD83Albers_ft
        ).length
    )

    # Drop any duplicates.
    # This will probably be taken out once the 1:m recent_combined_name
    # to route_id issue is resolved.
    most_recent_routes = most_recent_routes.drop_duplicates(
        subset=["portfolio_organization_name", "recent_combined_name", "service_date"]
    )
    return most_recent_routes

In [None]:
transit_routes = process_transit_routes()

In [None]:
# transit_routes.drop(columns=["geometry"]).sample(3)

In [None]:
# len(transit_routes)

In [None]:
# transit_routes.recent_combined_name.value_counts().head()

In [None]:
# transit_routes.portfolio_organization_name.value_counts().head()

## Load in SHS 


In [None]:
def dissolve_shn(columns_to_dissolve: list, file_name: str) -> gpd.GeoDataFrame:
    """
    Dissolve State Highway Network so there will only be one row for each
    route name and route type
    """
    # Read in the dataset and change the CRS to one to feet.
    SHN_FILE = catalog_utils.get_catalog(
        "shared_data_catalog"
    ).state_highway_network.urlpath

    shn = gpd.read_parquet(
        SHN_FILE,
        storage_options={"token": credentials.token},
    ).to_crs(geography_utils.CA_NAD83Albers_ft)

    # Dissolve by route which represents the the route's name and drop the other columns
    # because they are no longer relevant.
    shn_dissolved = (shn.dissolve(by=columns_to_dissolve).reset_index())[
        columns_to_dissolve + ["geometry"]
    ]

    # Rename because I don't want any confusion between SHN route and
    # transit route.
    shn_dissolved = shn_dissolved.rename(columns={"Route": "shn_route"})

    # Find the length of each highway.
    shn_dissolved = shn_dissolved.assign(
        highway_feet=shn_dissolved.geometry.length,
        shn_route=shn_dissolved.shn_route.astype(int).astype(str),
    )

    # Save this out so I don't have to dissolve it each time.
    shn_dissolved.to_parquet(
        f"gs://calitp-analytics-data/data-analyses/state_highway_network/shn_dissolved_by_{file_name}.parquet",
        filesystem=fs,
    )
    return shn_dissolved

In [None]:
# dissolved_route = dissolve_shn(["Route", "District"], "ct_district_route")

In [None]:
SHN_FILE = catalog_utils.get_catalog(
        "shared_data_catalog"
    ).state_highway_network.urlpath

shn = gpd.read_parquet(
        SHN_FILE,
        storage_options={"token": credentials.token},
    ).to_crs(geography_utils.CA_NAD83Albers_ft)

In [None]:
# shn.explore("Route")

In [None]:
for district in list(shn.loc[shn.Route == 1].District.unique()):
    print(f"District {district}")
    display(shn.loc[(shn.Route == 1) & (shn.District == district)].explore())

In [None]:
shn.loc[shn.Route == 1].explore("District")

In [None]:
dissolved_url = "gs://calitp-analytics-data/data-analyses/state_highway_network/shn_dissolved_by_ct_district_route.parquet"

In [None]:
dissolved_df = gpd.read_parquet(
        dissolved_url,
        storage_options={"token": credentials.token},
    )

In [None]:
dissolved_df.columns

In [None]:
dissolved_df.drop(columns = ["geometry"]).sample()

In [None]:
dissolved_df.groupby(["shn_route"]).agg({"District":"nunique"}).sort_values(by = ["District"], ascending = False).head()

In [None]:
dissolved_df.groupby(["District"]).agg({"shn_route":"nunique"})

In [None]:
dissolved_df.loc[dissolved_df.District == 2].explore("shn_route")

In [None]:
dissolved_df.loc[dissolved_df.shn_route == "1"].drop(columns = ["geometry"])

In [None]:
dissolved_df.loc[dissolved_df.shn_route == "1"].explore("District")

In [None]:
# shn_dissolved.loc[shn_dissolved.shn_route == 210].drop(columns=["geometry"])

In [None]:
# shn_dissolved.loc[shn_dissolved.shn_route == 110].drop(columns=["geometry"])

In [None]:
# shn_dissolved.loc[shn_dissolved.Route == 210].explore()

In [None]:
# shn_dissolved.loc[shn_dissolved.Route == 110].explore()

In [None]:
# len(dissolved)

In [None]:
def buffer_shn(buffer_amount: int, file_name: str) -> gpd.GeoDataFrame:
    """
    Add a buffer to the SHN before overlaying it with
    transit routes.
    """
    GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/state_highway_network/"

    # Read in the dissolved SHN file
    shn_df = gpd.read_parquet(
        f"{GCS_FILE_PATH}shn_dissolved_by_{file_name}.parquet",
        storage_options={"token": credentials.token},
    )

    # Buffer the state highway.
    shn_df_buffered = shn_df.assign(
        geometry=shn_df.geometry.buffer(buffer_amount),
    )

    # Save it out so we won't have to buffer over again and
    # can just read it in.
    shn_df_buffered.to_parquet(
        f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_ft_{file_name}.parquet",
        filesystem=fs,
    )

    return shn_df_buffered

In [None]:
SHN_HWY_BUFFER_FEET = 50
PARALLEL_HWY_BUFFER_FEET = geography_utils.FEET_PER_MI * 0.5

In [None]:
# intersecting_buffer.shape

In [None]:
# buffered_df = buffer_shn(SHN_HWY_BUFFER_FEET, "ct_district_route")

In [None]:
# buffered_df.loc[buffered_df.shn_route == 5].explore()

In [None]:
shn_district_df = gpd.read_parquet(
    f"gs://calitp-analytics-data/data-analyses/state_highway_network/shn_buffered_50_ft_ct_district_route.parquet",
    storage_options={"token": credentials.token},
)

In [None]:
shn_district_df.shape

In [None]:
shn_district_df.drop(columns=["geometry"]).sample(3)

In [None]:
shn_district_df.info()

In [None]:
# buffered_df.columns

## Overlay

In [None]:
def routes_shn_intersection(buffer_amount: int, file_name: str) -> gpd.GeoDataFrame:
    """
    Overlay the most recent transit routes with a buffered version
    of the SHN
    """
    GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/state_highway_network/"

    # Read in buffered shn here or re buffer if we don't have it available.
    HWY_FILE = f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_ft_{file_name}.parquet"

    if fs.exists(HWY_FILE):
        shn_routes_gdf = gpd.read_parquet(
            HWY_FILE, storage_options={"token": credentials.token}
        )
    else:
        shn_routes_gdf = buffer_shn(buffer_amount)

    # Process the most recent transit route geographies and ensure the
    # CRS matches the SHN routes' GDF so the overlay doesn't go wonky.
    transit_routes_gdf = process_transit_routes().to_crs(shn_routes_gdf.crs)

    # Overlay transit routes with the SHN geographies.
    gdf = gpd.overlay(
        transit_routes_gdf, shn_routes_gdf, how="intersection", keep_geom_type=True
    )

    # Calcuate the percent of the transit route that runs on a highway, round it up and
    # multiply it by 100. Drop the geometry because we want the original transit route
    # shapes.
    gdf = gdf.assign(
        pct_route_on_hwy=(gdf.geometry.length / gdf.route_length_feet).round(3) * 100,
    ).drop(
        columns=[
            "geometry",
        ]
    )

    # Join back the dataframe above with the original transit route dataframes
    # so we can have the original transit route geographies.
    gdf2 = pd.merge(
        transit_routes_gdf,
        gdf,
        on=[
            "service_date",
            "portfolio_organization_name",
            "recent_combined_name",
            "route_length_feet",
        ],
        how="left",
    )

    # Clean up
    gdf2.District = gdf2.District.fillna(0).astype(int)
    return gdf2

In [None]:
intersecting = routes_shn_intersection(50, "ct_district_route")

In [None]:
intersecting.info()

In [None]:
intersecting.loc[intersecting.recent_combined_name == "15 15 ARTIC Sports Complex Line"].drop(columns=["geometry", "route_length_feet", "highway_feet"])

In [None]:
intersecting.drop(columns=["geometry", "route_length_feet", "highway_feet"]).sample(10)

In [None]:
len(intersecting)

In [None]:
len(
    intersecting.loc[
        (intersecting.pct_route_on_hwy <= 1) & (intersecting.shn_route.notna())
    ]
)

In [None]:
intersecting.loc[
    (intersecting.pct_route_on_hwy <= 1) & (intersecting.shn_route.notna())
].drop(columns=["geometry", "route_length_feet", "highway_feet"]).sample(10)

In [None]:
intersecting.pct_route_on_hwy.describe()

In [None]:
intersecting.recent_combined_name.value_counts().describe()

In [None]:
# Find routes that cross multiple districts
multi_district_routes = (
    intersecting.groupby(["portfolio_organization_name", "recent_combined_name"])
    .agg({"District": "nunique"})
    .reset_index()
)

In [None]:
multi_district_routes.loc[
    ~multi_district_routes.portfolio_organization_name.isin(["FlixBus and Greyhound", "Amtrak"])
].sort_values(by=["District"], ascending=False).head(20)

In [None]:
bakersfield_route = intersecting.loc[
    intersecting.recent_combined_name == "100 Bakersfield - Lancaster"
]

In [None]:
bakersfield_route.District.unique()

In [None]:
# bakersfield_route.drop(columns = ["service_date"]).explore()

In [None]:
intersecting.loc[
    intersecting.recent_combined_name == "100 Bakersfield - Lancaster"
].pct_route_on_hwy.sum()

In [None]:
intersecting.loc[intersecting.recent_combined_name == "Route 785"].drop(
    columns=["geometry"]
)

## Create final dataframe to add to Transit Routes dataset on the Open Data Portal [here](https://data.ca.gov/dataset/california-transit-routes/resource/3ea7eb11-4fc6-45ed-88f5-b599e38c6b19)

In [None]:
def group_route_district(df: pd.DataFrame, pct_route_on_hwy_agg: str) -> pd.DataFrame:

    # Aggregate by adding all the districts and SHN to a single row, rather than
    # multiple and sum up the total % of SHN a transit route intersects with
    agg1 = (
        df.groupby(
            [
                "portfolio_organization_name",
                "recent_combined_name",
            ],
            as_index=False,
        )[["shn_route", "District", "pct_route_on_hwy_across_districts"]]
        .agg(
            {
                "shn_route": lambda x: ", ".join(set(x.astype(str))),
                "District": lambda x: ", ".join(set(x.astype(str))),
                "pct_route_on_hwy_across_districts": pct_route_on_hwy_agg,
            }
        )
        .reset_index(drop=True)
    )

    # Clean up
    agg1.pct_route_on_hwy_across_districts = (
        agg1.pct_route_on_hwy_across_districts.astype(float).round(2)
    )
    return agg1

In [None]:
# group_route_district()

In [None]:
def create_on_shs_column(df):
    df["on_shs"] = np.where(df["pct_route_on_hwy_across_districts"] == 0, "N", "Y")
    return df

In [None]:
def prep_open_data_portal(gdf: gpd.GeoDataFrame) -> pd.DataFrame:
    """
    Prepare the gdf to join with the existing transit_routes
    dataframe that is published on the Open Data Portal
    """
    # Rename column
    gdf = gdf.rename(columns={"pct_route_on_hwy": "pct_route_on_hwy_across_districts"})
    # Group the dataframe so that one route only has one
    # row instead of multiple rows after finding its
    # intersection with any SHN routes.
    agg1 = group_route_district(gdf, "sum")

    # Add yes/no column to signify if a transit route intersects
    # with a SHN route
    agg1 = create_on_shs_column(agg1)

    return agg1

In [None]:
intersecting.columns

In [None]:
open_data_portal_df = prep_open_data_portal(intersecting)

In [None]:
open_data_portal_df.District.unique()

In [None]:
open_data_portal_df.on_shs.value_counts()

In [None]:
open_data_portal_df.loc[
    open_data_portal_df.on_shs == "Y"
].pct_route_on_hwy_across_districts.describe()

In [None]:
open_data_portal_df.loc[
    open_data_portal_df.on_shs == "N"
].pct_route_on_hwy_across_districts.describe()

### Check a couple of routes 

In [None]:
intersecting.portfolio_organization_name.unique()

In [None]:
intersecting.loc[intersecting.portfolio_organization_name == 'Golden Gate Bridge, Highway and Transportation District',].drop(
    columns=["geometry"]
).sort_values(by = ["recent_combined_name"])

In [None]:
intersecting.loc[intersecting.recent_combined_name == '154 Novato - San Francisco',].pct_route_on_hwy.sum()

In [None]:
open_data_portal_df.loc[open_data_portal_df.recent_combined_name == '154 Novato - San Francisco']

In [None]:
intersecting.loc[intersecting.recent_combined_name == '101 Santa Rosa - San Francisco',].pct_route_on_hwy.sum()

In [None]:
open_data_portal_df.loc[open_data_portal_df.recent_combined_name == '101 Santa Rosa - San Francisco']

In [None]:
intersecting.loc[intersecting.recent_combined_name == '132 San Anselmo - San Francisco',].pct_route_on_hwy.sum()

In [None]:
intersecting.loc[intersecting.recent_combined_name == '132 San Anselmo - San Francisco',].shn_route.nunique()

In [None]:
open_data_portal_df.loc[open_data_portal_df.recent_combined_name == '132 San Anselmo - San Francisco']

## Create final dataframes for portfolio

In [None]:
def categorize_percentiles(df):
    bins = [20, 41, 61, 81, np.inf]
    labels = ["20-40", "41-60", "61-80", "81-100"]
    df["percentile_route"] = pd.cut(
        df["pct_route_on_hwy"], bins=bins, labels=labels, right=False
    ).astype(str)
    return df

In [None]:
def final_transit_route_shs_outputs(
    intersecting_gdf: gpd.GeoDataFrame,
    open_data_df: pd.DataFrame,
    pct_route_intersection: int,
    district: str,
):
    """
    Take the dataframes from prep_open_data_portal and routes_shn_intersection.
    Prepare them for display on the GTFS Caltrans District Digest.

    intersecting_gdf: geodataframe created by
    open_data_df: dataframe created by
    pct_route_intersection: cutoff of the % of the transit route intersecting with the SHN
    district: the Caltrans district we are interested in.
    """
    # Filter out for any pct_route_on_hwy that we deem too low & for the relevant district.
    open_data_df = open_data_df.loc[
        (open_data_df.pct_route_on_hwy_across_districts > pct_route_intersection)
        & (open_data_df.District.str.contains(district))
    ]
    # intersecting_gdf.District = intersecting_gdf.District
    intersecting_gdf = intersecting_gdf.loc[
        intersecting_gdf.District.astype(str).str.contains(district)
    ]

    # Join back to get the original transit route geometries and the names of the
    # state highways these routes intersect with. This gdf will be used to
    # display a map.
    map_gdf = pd.merge(
        intersecting_gdf[
            ["portfolio_organization_name", "recent_combined_name", "geometry"]
        ].drop_duplicates(),
        open_data_df,
        on=["portfolio_organization_name", "recent_combined_name"],
    )

    # Add column for color scale when mapping
    # map_gdf = categorize_percentiles(map_gdf)

    # We want a text table to display.
    # Have to rejoin and to find only the SHN routes that are in the district
    # we are interested in.
    text_table_df = pd.merge(
        intersecting_gdf[
            [
                "portfolio_organization_name",
                "recent_combined_name",
                "shn_route",
                "District",
            ]
        ],
        open_data_df[
            [
                "portfolio_organization_name",
                "recent_combined_name",
                "pct_route_on_hwy_across_districts",
            ]
        ],
        on=["portfolio_organization_name", "recent_combined_name"],
    )

    # Now we have to aggregate again so each route will only have one row with the
    # district and SHN route info delinated by commas if there are multiple values.
    text_table = group_route_district(text_table_df, "max")

    # Rename for clarity
    text_table = text_table.rename(
        columns={
            "shn_route": f"shn_routes_in_d_{district}",
        }
    )

    return map_gdf, text_table

### Example using 100 Bakersfield - Lancaster which crosses D6, D7, D9

In [None]:
map_gdf_d7, text_df_d7 = final_transit_route_shs_outputs(
    intersecting, open_data_portal_df, 20, "7"
)

In [None]:
map_gdf_d9, text_df_d9 = final_transit_route_shs_outputs(
    intersecting, open_data_portal_df, 20, "9"
)

In [None]:
map_gdf_d6, text_df_d6 = final_transit_route_shs_outputs(
    intersecting, open_data_portal_df, 20, "6"
)

In [None]:
text_df_d7.loc[text_df_d7.recent_combined_name == "100 Bakersfield - Lancaster"]

In [None]:
text_df_d7.loc[text_df_d7.recent_combined_name == "Route 785"]

In [None]:
text_df_d6.loc[text_df_d6.recent_combined_name == "100 Bakersfield - Lancaster"]

In [None]:
text_df_d9.loc[text_df_d9.recent_combined_name == "100 Bakersfield - Lancaster"]

## Sample Map
* This will be displayed by each Caltrans District as opposed to this sample which shows all the routes across the state that touch a state highway.

* Read in the buffered SHN file and dissolve it again so it's only one row, since we don't care about each individual state route.
* Amanda, note to self: should save this out to GCS because there's no point in dissolving over & over again
* Bug in geopandas [documented here](https://github.com/geopandas/geopandas/issues/3194), you can't specify a column and color.

In [None]:
def dissolve_buffered_for_map(buffer_amount: str) -> gpd.GeoDataFrame:
    GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/state_highway_network/"
    # Read in buffered shn here
    HWY_FILE = (
        f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_ft_ct_district_route.parquet"
    )
    gdf = gpd.read_parquet(HWY_FILE, storage_options={"token": credentials.token})

    # Dissolve by district
    gdf2 = gdf.dissolve("District").reset_index()[["geometry", "District", "shn_route"]]

    # Save
    gdf2.to_parquet(
        f"{GCS_FILE_PATH}shn_buffered_{buffer_amount}_gtfs_digest.parquet",
        filesystem=fs,
    )

In [None]:
gtfs_digest_shn = dissolve_buffered_for_map(SHN_HWY_BUFFER_FEET)

In [None]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/state_highway_network/"

In [None]:
shn_gdf = gpd.read_parquet(
    f"{GCS_FILE_PATH}shn_buffered_{SHN_HWY_BUFFER_FEET}_gtfs_digest.parquet",
    storage_options={"token": credentials.token},
)

In [None]:
shn_gdf.columns

In [None]:
shn_gdf.District.unique()

In [None]:
""" m = shn_gdf.loc[shn_gdf.District == 7].explore(
    height=500,
    width=1000,
    style_kwds={"color": "#9DA4A6", "weight": 6, "opacity": 0.5},
    tiles="CartoDB positron",
    name="shs",
)"""

In [None]:
m

In [None]:
cmap_colors = [
    "#93c3db",
    "#144c87",
    "#8c1024",
    "#e4846c",
]

In [None]:
cmap_colors_continuous = [
    "#93c3db",
    "#82b4d1",
    "#71a5c7",
    "#6096bd",
    "#4f87b3",
    "#3e78a9",
    "#2d699f",
    "#1c5a95",
    "#0b4b8b",
    "#003c81",
]

In [None]:
map_gdf_d7.columns

In [None]:
# map_gdf_d7.explore(
#    "percentile_route", m=m, categorical=True, cmap=cmap_colors, legend=True
# )

In [None]:
# map_gdf_d7.explore("pct_route_on_hwy_across_districts", m=m, cmap= 'Blues', legend = True)

### Sample Text Table

In [None]:
text_df_d7.sort_values(by=["pct_route_on_hwy_across_districts"], ascending=False)