# Routes on SHN Routes 
* Transit routes that cross SHN is now on our open data portal.
* Replace old code with this new dataset.

## Notes
* Many operators missing from the open data portal.

In [1]:
import _ct_district_grain_data_prep as _ct_district_data_prep
import geopandas as gpd
import pandas as pd
from calitp_data_analysis import geography_utils
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS

In [2]:
import google.auth

credentials, project = google.auth.default()
import gcsfs

fs = gcsfs.GCSFileSystem()

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [4]:
district = "07 - Los Angeles / Ventura"

In [5]:
# Extract district from district string when doing an sjoin
# between CT districts & routes
district_int = [int(s) for s in district.split() if s.isdigit()][0]

In [6]:
# Load Datasets
operator_df = _ct_district_data_prep.data_wrangling_operator_profile(district)

In [7]:
operator_list = list(operator_df.analysis_name.unique())

In [8]:
# operator_list

In [9]:
open_data_url = "https://caltrans-gis.dot.ca.gov/arcgis/rest/services/CHrailroad/CA_Transit_Routes/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson"

In [10]:
open_data_gdf = gpd.read_file(open_data_url)[
    [
        "agency",
        "route_name",
        "shn_route",
        "on_shs",
        "shn_districts",
        "pct_route_on_hwy_all_districts",
        "geometry",
    ]
]

In [11]:
open_data_gdf.columns

Index(['agency', 'route_name', 'shn_route', 'on_shs', 'shn_districts',
       'pct_route_on_hwy_all_districts', 'geometry'],
      dtype='object')

In [12]:
open_data_gdf.shape

(2000, 7)

In [13]:
open_data_gdf.on_shs.unique()

array([1, 0])

In [14]:
open_data_df = open_data_gdf.loc[
    (open_data_gdf.agency.isin(operator_list))
]

In [17]:
open_data_agencies = list(open_data_df.agency.unique())

## Many agencies are missing from the Open Data Portal?

In [18]:
set(operator_list)-set(open_data_agencies)

{'Antelope Valley Transit Authority',
 'City of Arcadia',
 'City of Artesia',
 'City of Baldwin Park',
 'City of Bell',
 'City of Bell Gardens',
 'City of Bellflower',
 'City of Carson',
 'City of Cerritos',
 'City of Commerce',
 'City of Cudahy',
 'City of Culver City',
 'City of Downey',
 'City of El Monte',
 'City of El Segundo',
 'City of Gardena',
 'City of Glendale',
 'City of Glendora',
 'City of Huntington Park',
 'City of Inglewood',
 'City of La Puente',
 'City of Lawndale',
 'City of Los Angeles',
 'City of Maywood',
 'City of Monterey Park',
 'City of Pasadena',
 'City of Redondo Beach',
 'City of Rosemead',
 'City of San Fernando',
 'City of Santa Clarita',
 'City of Sierra Madre',
 'City of South Gate',
 'City of West Covina',
 'Foothill Transit',
 'Long Beach Transit',
 'Los Angeles County',
 'Los Angeles County Metropolitan Transportation Authority',
 'Los Angeles World Airports',
 'Southern California Regional Rail Authority',
 'University of California, Los Angeles',


## Check `open_data/create_routes_data` again

In [20]:
TRAFFIC_OPS_GCS = f"{GTFS_DATA_DICT.gcs_paths.GCS}traffic_ops/"

In [21]:
TRAFFIC_OPS_GCS

'gs://calitp-analytics-data/data-analyses/traffic_ops/'

In [22]:
june_url = "gs://calitp-analytics-data/data-analyses/traffic_ops/ca_transit_routes_2025-06-11.parquet"

In [23]:
june_gdf = gpd.read_parquet(june_url,
    storage_options={"token": credentials.token},
)

In [24]:
june_gdf.columns

Index(['n_trips', 'geometry', 'schedule_gtfs_dataset_key', 'route_id',
       'route_type', 'shape_id', 'route_name_used', 'name', 'base64_url',
       'organization_source_record_id', 'organization_name',
       'caltrans_district'],
      dtype='object')

In [25]:
june_gdf.caltrans_district.unique()

array(['06 - Fresno / Bakersfield', '04 - Bay Area / Oakland',
       '07 - Los Angeles / Ventura', '03 - Marysville / Sacramento',
       '01 - Eureka', '08 - San Bernardino / Riverside', '02 - Redding',
       '11 - San Diego', '10 - Stockton',
       '05 - San Luis Obispo / Santa Barbara', '12 - Orange County',
       '09 - Bishop'], dtype=object)

In [26]:
june_gdf_d7 = june_gdf.loc[june_gdf.caltrans_district == '07 - Los Angeles / Ventura']

In [28]:
june_ops = list(june_gdf_d7.organization_name.unique())

In [29]:
set(operator_list)-set(june_ops)

{'City of Bellflower',
 'City of El Segundo',
 'City of La Puente',
 'City of Sierra Madre',
 'FlixBus and Greyhound',
 'Long Beach Transit',
 'Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)'}