In [20]:
import pandas as pd
import geopandas as gpd

from functools import cache

from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis.geography_utils import WGS84, CA_NAD83Albers_m
from calitp_data_analysis.sql import query_sql

@cache
def gcs_geopandas():
    return GCSGeoPandas()

In [6]:
eocs = pd.read_excel('./County_EOCs.xlsx')
eocs.columns = eocs.columns.str.lower().str.replace(' ', '_')

In [8]:
eocs.head(3)

Unnamed: 0,oa,physical_eoc_address,region,lat,lon
0,Alpine,"99 Water St Markleville, CA 96120",Inland,38.69453,-119.77899
1,Amador,"700 Court St., Jackson, CA 95642",Inland,38.351547,-120.76937
2,Butte,"205 Mira Loma Suite 30, Oroville, CA 95965",Inland,39.520803,-121.552203


In [10]:
path = 'calitp-analytics-data/data-analyses/gtfs_schedule/county_eocs.parquet'

In [54]:
gcs_geopandas().geo_data_frame_to_parquet(eocs, path)

In [13]:
eocs = gpd.GeoDataFrame(eocs, geometry=gpd.points_from_xy(eocs.lon, eocs.lat), crs=WGS84)

In [55]:
# eocs.explore()

In [18]:
# https://gis.data.ca.gov/datasets/CDEGIS::california-counties-3/explore

counties = gpd.read_file("https://services3.arcgis.com/fdvHcZVgB2QSRNkL/arcgis/rest/services/California_Counties/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson")

In [19]:
counties

Unnamed: 0,OBJECTID,Year,CountyName,Shape__Area,Shape__Length,COUNTY_FIPS,DistrictCount,geometry
0,117,2023,Alameda,3079162000.0,435624.8,1,22,"MULTIPOLYGON (((-121.96737 37.46764, -121.9683..."
1,118,2023,Alpine,3156006000.0,275565.4,3,2,"POLYGON ((-119.58545 38.71322, -119.58494 38.7..."
2,119,2023,Amador,2562528000.0,359598.6,5,2,"POLYGON ((-120.07240 38.70277, -120.07240 38.7..."
3,120,2023,Colusa,4990771000.0,386043.0,11,5,"POLYGON ((-121.89003 39.38387, -121.88996 39.3..."
4,121,2023,El Dorado,7624183000.0,467800.1,17,16,"POLYGON ((-119.90433 38.93333, -119.90442 38.9..."
5,122,2023,Humboldt,16173240000.0,803452.9,13,32,"MULTIPOLYGON (((-124.22308 40.68537, -124.2248..."
6,123,2023,Inyo,41088430000.0,1058774.0,27,7,"POLYGON ((-117.83274 37.46493, -117.83150 37.4..."
7,124,2023,Kern,31854110000.0,861842.3,29,47,"POLYGON ((-117.63430 35.79727, -117.63435 35.7..."
8,125,2023,Lake,5719581000.0,468541.0,85,7,"POLYGON ((-122.73908 39.38327, -122.75112 39.3..."
9,126,2023,Lassen,21275570000.0,776771.7,35,11,"POLYGON ((-119.99988 41.18398, -119.99989 41.1..."


In [49]:
bus_stops = query_sql(
'''
SELECT name, month_first_day, day_type, route_type_3 + route_type_11 AS bus_arrivals,
daily_stop_arrivals, stop_id, n_days, pt_geom
FROM `cal-itp-data-infra.mart_gtfs_rollup.fct_monthly_scheduled_stops`
WHERE year = 2025 AND month = 12
AND route_type_3 > 0 OR route_type_11 > 0
LIMIT 1000
'''
)

In [50]:
bus_stops = gpd.GeoDataFrame(bus_stops, geometry=gpd.GeoSeries.from_wkt(bus_stops.pt_geom), crs=WGS84).drop(columns=['pt_geom'])

In [51]:
bus_stops.head(10)

Unnamed: 0,name,month_first_day,day_type,bus_arrivals,daily_stop_arrivals,stop_id,n_days,geometry
0,Eastern Sierra Schedule,2023-11-01,Saturday,310.0,16.0,2324950,3,POINT (-118.97099 37.63867)
1,Eastern Sierra Schedule,2023-11-01,Weekday,317.4,28.0,20302,21,POINT (-118.99126 37.64419)
2,Eastern Sierra Schedule,2023-12-01,Sunday,192.5,312.4,20355,5,POINT (-118.97296 37.64022)
3,Eastern Sierra Schedule,2024-01-01,Weekday,477.2,204.0,20300,23,POINT (-118.99542 37.64427)
4,Eastern Sierra Schedule,2024-03-01,Weekday,1216.0,200.0,20366,21,POINT (-118.98462 37.63668)
5,Eastern Sierra Schedule,2024-03-01,Saturday,2432.0,1400.0,20317,5,POINT (-118.96664 37.64075)
6,Eastern Sierra Schedule,2024-04-01,Saturday,391.6,182.0,20302,4,POINT (-118.99126 37.64419)
7,Eastern Sierra Schedule,2024-04-01,Sunday,530.1,189.0,20324,4,POINT (-118.97093 37.63721)
8,Eastern Sierra Schedule,2024-05-01,Weekday,13.4,92.3,20304,23,POINT (-118.98840 37.64582)
9,Eastern Sierra Schedule,2024-05-01,Sunday,31.0,21.0,20333,3,POINT (-118.95979 37.64756)


In [52]:
type(bus_stops)

geopandas.geodataframe.GeoDataFrame

In [56]:
# bus_stops.drop(columns='month_first_day').explore()