# Transit On the SHN 
* [GH Issue](https://github.com/cal-itp/data-analyses/issues/1477)


In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
import google.auth
credentials, project = google.auth.default()

In [9]:
from shared_utils import (
    catalog_utils,
    dask_utils,
    gtfs_utils_v2,
    portfolio_utils,
    publish_utils,
    rt_dates,
    rt_utils,
)
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

## Load in Operator Routes

In [5]:
OPERATOR_ROUTE = GTFS_DATA_DICT.digest_tables.operator_routes_map

In [6]:
op_geography_df = gpd.read_parquet(
    f"{RT_SCHED_GCS}{OPERATOR_ROUTE}.parquet",
    storage_options={"token": credentials.token},
)

In [8]:
op_geography_df.columns

Index(['shape_array_key', 'geometry', 'feed_key', 'schedule_gtfs_dataset_key',
       'route_id', 'direction_id', 'route_key', 'route_length',
       'route_length_miles', 'is_downtown_local', 'is_local', 'is_coverage',
       'is_rapid', 'is_express', 'is_rail', 'is_ferry',
       'organization_source_record_id', 'organization_name', 'service_date',
       'portfolio_organization_name', 'name', 'combined_name',
       'recent_combined_name', 'recent_route_id',
       'route_length_miles_percentile', 'percentile_group',
       'shortest_longest'],
      dtype='object')

In [11]:
most_recent_dates = publish_utils.filter_to_recent_date(
    df=op_geography_df, group_cols=["portfolio_organization_name","route_id"]
)

In [12]:
most_recent_routes  = pd.merge(
    op_geography_df, most_recent_dates, on=["portfolio_organization_name", "route_id", "service_date"], how="inner"
)

In [14]:
most_recent_routes.shape

(5931, 27)

## Load in SHS 
* https://gis.data.ca.gov/datasets/77f2d7ba94e040a78bfbe36feb6279da_0/explore

In [16]:
shs_url = "https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHhighway/SHN_Lines/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"

In [18]:
shs_gdf = gpd.read_file(shs_url)

In [30]:
shs_gdf.shape

(2000, 20)

In [20]:
shs_gdf.columns

Index(['OBJECTID', 'Route', 'RteSuffix', 'RouteS', 'PMRouteID', 'County',
       'District', 'PMPrefix', 'bPM', 'ePM', 'PMSuffix', 'bPMc', 'ePMc',
       'bOdometer', 'eOdometer', 'AlignCode', 'RouteType', 'Direction',
       'Shape__Length', 'geometry'],
      dtype='object')

In [29]:
shs_gdf.drop(columns = ["geometry"]).sample(3)

Unnamed: 0,OBJECTID,Route,RteSuffix,RouteS,PMRouteID,County,District,PMPrefix,bPM,ePM,PMSuffix,bPMc,ePMc,bOdometer,eOdometer,AlignCode,RouteType,Direction,Shape__Length
745,746,32,,32,BUT032...R,BUT,3,,37.08,37.75,,37.083,37.749,48.85,49.52,Right,State,EB,1100.6
1364,1365,49,,49,ED.049...L,ED,3,,13.55,14.6,,13.546,14.597,146.54,147.6,Left,State,SB,2150.29
793,794,59,,59,MAD059...R,MAD,6,,0.0,0.06,,0.0,0.057,0.19,0.25,Right,State,NB,104.66


In [21]:
shs_gdf.shape

(2000, 20)

In [23]:
shs_gdf.Route.nunique()

70

In [31]:
# shs_gdf.explore()

In [26]:
# Dissolve?
shs_gdf_dissolved = shs_gdf.dissolve(by = ["Route"])

In [32]:
# shs_gdf_dissolved.explore()