In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [2]:
import geopandas as gpd
import pandas as pd
# from siuba import *
import numpy as np

In [3]:
from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD, HALF_MILE_BUFFER_METERS)

In [4]:
from calitp_data_analysis.tables import tbls

In [5]:
from calitp_data_analysis import get_fs

# Test out MPO-provided planned major stops

Here is the schema to use:

| Field name |	Data type |Naming convention for field values |
|-----|-----|-----|
| mpo	| Text |	Use your MPO/RTPA acronym, not full name |
| plan_name	| Text |	Include plan name and year e.g. “2025 mtp” |
| hqta_type	| Text |	Use types from Caltrans major stop map: major_stop_bus, major_stop_rail, major_stop_ferry, major_stop_brt |
| stop_id (optional)	| Text |	None |
| avg_trips_per_peak_hr (optional)	| Float |	None |


In [6]:
fs = get_fs()

## SANDAG

In [7]:
path = f'{GCS_FILE_PATH}mpo_input/sandag.geojson'

In [8]:
# gpd.read_file(path)

In [9]:
with fs.open(path) as f:
    sandag = gpd.read_file(f)

In [10]:
sandag.head(3)

Unnamed: 0,OBJECTID,mpo,plan_name,hqta_type,stop_id,avg_trips_per_peak_hr,geometry
0,1,SANDAG,2021RP_2035,major_stop_bus,23740,,POINT (-117.29703 33.25427)
1,2,SANDAG,2021RP_2035,major_stop_brt,5592,,POINT (-117.30019 33.25080)
2,3,SANDAG,2021RP_2035,major_stop_bus,5601,,POINT (-117.29531 33.24924)


## SCAG

In [11]:
path = f'{GCS_FILE_PATH}mpo_input/scag.geojson'

In [12]:
with fs.open(path) as f:
    scag = gpd.read_file(f)

In [13]:
scag.head(3)

Unnamed: 0,FID,mpo,plan_name,hqta_type,geometry
0,0,SCAG,2024 RTP,major_stop_bus,POINT (-119.17982 34.19901)
1,1,SCAG,2024 RTP,major_stop_bus,POINT (-119.17770 34.16101)
2,2,SCAG,2024 RTP,major_stop_bus,POINT (-118.60602 34.21970)


## SACOG

In [14]:
path = f'{GCS_FILE_PATH}mpo_input/sacog.geojson'

In [15]:
with fs.open(path) as f:
    sacog = gpd.read_file(f)

In [16]:
sacog.head(3)

Unnamed: 0,OBJECTID,stop_id,mpo,plan_name,hqta_type,avg_trips_per_peak_hr,geometry
0,1,2008,SACOG,2020 MTP-SCS,major_stop_bus,,POINT (-121.49482 38.58068)
1,2,2020,SACOG,2020 MTP-SCS,major_stop_brt,,POINT (-121.28658 38.74549)
2,3,2025,SACOG,2020 MTP-SCS,major_stop_bus,,POINT (-121.54711 38.58500)


## MTC

In [17]:
path = f'{GCS_FILE_PATH}mpo_input/mtc.geojson'

In [18]:
with fs.open(path) as f:
    mtc = gpd.read_file(f)

In [19]:
mtc.head(3)

Unnamed: 0,FID,primary_ag,hqta_type,mpo,plan_name,geometry
0,0,AC Transit,major_stop_brt,MTC,Plan Bay Area 2050 (2021),POINT (-122.29408 37.87535)
1,1,SamTrans,major_stop_brt,MTC,Plan Bay Area 2050 (2021),POINT (-122.35600 37.57840)
2,2,SamTrans,major_stop_brt,MTC,Plan Bay Area 2050 (2021),POINT (-122.34807 37.57595)


In [20]:
required_cols = ['mpo', 'hqta_type', 'plan_name']

In [21]:
set(required_cols).issubset(mtc.columns)

True

In [22]:
mtc.columns.isin(required_cols)

array([False, False,  True,  True,  True, False])

In [23]:
[col for col in required_cols if col in mtc.columns]

['mpo', 'hqta_type', 'plan_name']

In [24]:
def check_standardize(mpo_gdf):
    required_cols = ['mpo', 'hqta_type', 'plan_name']
    optional_cols = ['stop_id', 'avg_trips_per_peak_hr']
    all_cols = required_cols + optional_cols + ['geometry']
    assert set(required_cols).issubset(mpo_gdf.columns)
    filter_cols = [col for col in all_cols if col in mpo_gdf.columns]
    mpo_gdf = mpo_gdf[filter_cols]
    return mpo_gdf

In [25]:
mpo_planned_mts = pd.concat([check_standardize(gdf) for gdf in [sacog, sandag, scag, mtc]])

In [26]:
mpo_planned_mts

Unnamed: 0,mpo,hqta_type,plan_name,stop_id,avg_trips_per_peak_hr,geometry
0,SACOG,major_stop_bus,2020 MTP-SCS,2008,,POINT (-121.49482 38.58068)
1,SACOG,major_stop_brt,2020 MTP-SCS,2020,,POINT (-121.28658 38.74549)
2,SACOG,major_stop_bus,2020 MTP-SCS,2025,,POINT (-121.54711 38.58500)
3,SACOG,major_stop_bus,2020 MTP-SCS,2025,,POINT (-121.54711 38.58500)
4,SACOG,major_stop_bus,2020 MTP-SCS,2068,,POINT (-121.48656 38.57847)
...,...,...,...,...,...,...
300,MTC,major_stop_brt,Plan Bay Area 2050 (2021),,,POINT (-122.37223 37.58778)
301,MTC,major_stop_rail,Plan Bay Area 2050 (2021),,,POINT (-121.73110 37.70550)
302,MTC,major_stop_rail,Plan Bay Area 2050 (2021),,,POINT (-122.65597 38.26787)
303,MTC,major_stop_brt,Plan Bay Area 2050 (2021),,,POINT (-122.34518 37.96239)


In [27]:
# mpo_planned_mts.explore()

In [28]:
import intake

In [29]:
catalog = intake.open_catalog("../_shared_utils/shared_utils/shared_data_catalog.yml")

In [30]:
catalog.hqta_areas

hqta_areas:
  args:
    urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_areas.parquet
  description: CA high quality transit areas with polygon geometry (open data)
  driver: intake_geopandas.geopandas.GeoParquetSource
  metadata:
    catalog_dir: /home/jovyan/data-analyses/high_quality_transit_areas/../_shared_utils/shared_utils/


In [31]:
catalog.hqta_stops

hqta_stops:
  args:
    urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_points.parquet
  description: CA high quality transit areas with point geometry (open data)
  driver: intake_geopandas.geopandas.GeoParquetSource
  metadata:
    catalog_dir: /home/jovyan/data-analyses/high_quality_transit_areas/../_shared_utils/shared_utils/


In [32]:
import google.auth

In [33]:
credentials, project = google.auth.default()

## add buffered planned stops

In [34]:
existing_areas = catalog.hqta_areas(
    geopandas_kwargs={"storage_options": {"token": credentials.token}}
).read()

In [35]:
mpo_planned_mts = mpo_planned_mts.to_crs(PROJECT_CRS)

In [36]:
mpo_planned_mts['hqta_details'] = 'mpo_rtp_planned_major_stop'

In [37]:
mpo_buffered = mpo_planned_mts.copy()
mpo_buffered.geometry = mpo_buffered.geometry.buffer(HALF_MILE_BUFFER_METERS)

In [38]:
# mpo_buffered

In [39]:
# existing_areas

In [40]:
from calitp_data_analysis.geography_utils import WGS84

In [41]:
mpo_buffered = mpo_buffered.to_crs(WGS84)

In [42]:
areas_with_planned = pd.concat([existing_areas, mpo_buffered])

In [43]:
np.float64

numpy.float64

In [44]:
areas_with_planned = areas_with_planned.astype({'stop_id': str, 'avg_trips_per_peak_hr': np.float64})

In [45]:
import gcsfs
fs = gcsfs.GCSFileSystem(token=credentials)

In [46]:
areas_with_planned.to_parquet('gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_areas.parquet',
                             filesystem = fs)

## add planned stop points

In [47]:
existing_stops = catalog.hqta_stops(
    geopandas_kwargs={"storage_options": {"token": credentials.token}}
).read()

In [48]:
mpo_planned_mts = mpo_planned_mts.to_crs(WGS84)

In [49]:
stops_with_planned = pd.concat([existing_stops, mpo_planned_mts])

In [50]:
stops_with_planned = stops_with_planned.astype({'stop_id': str, 'avg_trips_per_peak_hr': np.float64})

In [51]:
stops_with_planned.to_parquet('gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_points.parquet',
                             filesystem = fs)