In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [2]:
import geopandas as gpd
import pandas as pd
# from siuba import *
import numpy as np
import sys
sys.path.append('./scripts/')

In [3]:
from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD, HALF_MILE_BUFFER_METERS,
                        MPO_DATA_PATH)

In [4]:
from calitp_data_analysis.tables import tbls

In [6]:
from calitp_data_analysis import get_fs
fs = get_fs()
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
gcsgp = GCSGeoPandas()

In [11]:
mpos = [x.split('/')[-1].split('.')[0] for x in fs.ls(MPO_DATA_PATH) if x.split('/')[-1]]

In [12]:
MPO_DATA_PATH

'gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/mpo_input/'

In [24]:
def read_standardize_mpo_input(mpo_name: str, mpo_data_path = MPO_DATA_PATH, gcsgp = gcsgp) -> gpd.GeoDataFrame:
    """
    Read in mpo-provided planned major transit stops and enforce schema.
    """
    mpo_gdf = gcsgp.read_file(f'{MPO_DATA_PATH}{mpo_name}.geojson')
    required_cols = ['mpo', 'hqta_type', 'plan_name']
    optional_cols = ['stop_id', 'avg_trips_per_peak_hr', 'agency_primary']
    all_cols = required_cols + optional_cols + ['geometry']
    assert set(required_cols).issubset(mpo_gdf.columns)
    filter_cols = [col for col in all_cols if col in mpo_gdf.columns]
    mpo_gdf = mpo_gdf[filter_cols]
    return mpo_gdf

In [25]:
mpo_planned_mts = pd.concat([read_standardize_mpo_input(mpo_name) for mpo_name in mpos])

In [28]:
mpo_planned_mts

Unnamed: 0,mpo,hqta_type,plan_name,agency_primary,geometry,stop_id,avg_trips_per_peak_hr
0,MTC,major_stop_brt,Plan Bay Area 2050 (2021),AC Transit,POINT (-122.29408 37.87535),,
1,MTC,major_stop_brt,Plan Bay Area 2050 (2021),SamTrans,POINT (-122.35600 37.57840),,
2,MTC,major_stop_brt,Plan Bay Area 2050 (2021),SamTrans,POINT (-122.34807 37.57595),,
3,MTC,major_stop_brt,Plan Bay Area 2050 (2021),SamTrans,POINT (-122.34752 37.57564),,
4,MTC,major_stop_brt,Plan Bay Area 2050 (2021),SamTrans,POINT (-122.34291 37.57451),,
...,...,...,...,...,...,...,...
1345,SCAG,major_stop_rail,2024 RTP,,POINT (-118.16458 33.91167),,
1346,SCAG,major_stop_rail,2024 RTP,,POINT (-118.22498 33.98541),,
1347,SCAG,major_stop_rail,2024 RTP,,POINT (-118.16146 33.90424),,
1348,SCAG,major_stop_rail,2024 RTP,,POINT (-118.08148 33.85992),,


In [34]:
mpo_planned_mts.iloc[0].mpo

'MTC'

In [42]:
type(mpo_planned_mts.iloc[0].stop_id)

float

In [44]:
isinstance(mpo_planned_mts.iloc[0].stop_id, str)

False

In [27]:
# mpo_planned_mts.explore()

In [28]:
import intake

In [29]:
catalog = intake.open_catalog("../_shared_utils/shared_utils/shared_data_catalog.yml")

In [30]:
catalog.hqta_areas

hqta_areas:
  args:
    urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_areas.parquet
  description: CA high quality transit areas with polygon geometry (open data)
  driver: intake_geopandas.geopandas.GeoParquetSource
  metadata:
    catalog_dir: /home/jovyan/data-analyses/high_quality_transit_areas/../_shared_utils/shared_utils/


In [31]:
catalog.hqta_stops

hqta_stops:
  args:
    urlpath: gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_points.parquet
  description: CA high quality transit areas with point geometry (open data)
  driver: intake_geopandas.geopandas.GeoParquetSource
  metadata:
    catalog_dir: /home/jovyan/data-analyses/high_quality_transit_areas/../_shared_utils/shared_utils/


In [32]:
import google.auth

In [33]:
credentials, project = google.auth.default()

## add buffered planned stops

In [34]:
existing_areas = catalog.hqta_areas(
    geopandas_kwargs={"storage_options": {"token": credentials.token}}
).read()

In [35]:
mpo_planned_mts = mpo_planned_mts.to_crs(PROJECT_CRS)

In [36]:
mpo_planned_mts['hqta_details'] = 'mpo_rtp_planned_major_stop'

In [37]:
mpo_buffered = mpo_planned_mts.copy()
mpo_buffered.geometry = mpo_buffered.geometry.buffer(HALF_MILE_BUFFER_METERS)

In [38]:
# mpo_buffered

In [39]:
# existing_areas

In [40]:
from calitp_data_analysis.geography_utils import WGS84

In [41]:
mpo_buffered = mpo_buffered.to_crs(WGS84)

In [42]:
areas_with_planned = pd.concat([existing_areas, mpo_buffered])

In [43]:
np.float64

numpy.float64

In [44]:
areas_with_planned = areas_with_planned.astype({'stop_id': str, 'avg_trips_per_peak_hr': np.float64})

In [45]:
import gcsfs
fs = gcsfs.GCSFileSystem(token=credentials)

In [46]:
areas_with_planned.to_parquet('gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_areas.parquet',
                             filesystem = fs)

## add planned stop points

In [47]:
existing_stops = catalog.hqta_stops(
    geopandas_kwargs={"storage_options": {"token": credentials.token}}
).read()

In [48]:
mpo_planned_mts = mpo_planned_mts.to_crs(WGS84)

In [49]:
stops_with_planned = pd.concat([existing_stops, mpo_planned_mts])

In [50]:
stops_with_planned = stops_with_planned.astype({'stop_id': str, 'avg_trips_per_peak_hr': np.float64})

In [51]:
stops_with_planned.to_parquet('gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/hqta_points.parquet',
                             filesystem = fs)