In [1]:
# !pip install -r requirements.txt

In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(900_000_000_000) ## 800GB?

In [53]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON
from ipywidgets import Text, HTML

from utilities import *
import shared_utils

[PRC 21064.3](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21064.3.&lawCode=PRC)
* _Major transit stop means a site containing any of the following:
(a) An existing rail or bus rapid transit station.
(b) A ferry terminal served by either a bus or rail transit service.
(c) The intersection of two or more major bus routes with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods._
    * "Intersection" may not be sufficiently well-defined for this analysis

[PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC&sectionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit)
* _(a) “Bus rapid transit” means a public mass transit service provided by a public agency or by a public-private partnership that includes all of the following features:
(1) Full-time dedicated bus lanes or operation in a separate right-of-way dedicated for public transportation with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods.
(2) Transit signal priority.
(3) All-door boarding.
(4) Fare collection system that promotes efficiency.
(5) Defined stations._
    * Unlikely to determine if a service qualifies as BRT under this definition using GTFS alone

### Rail Service

* temporary workaround with dim tables, should use tbl.gtfs schedule but LA Metro rail not in

In [68]:
def get_rail_routes(analysis_date):
    routes_on_date = (tbl.views.gtfs_schedule_fact_daily_feed_routes()
         >> filter(_.date == analysis_date)
         >> filter(_.calitp_extracted_at <= analysis_date, _.calitp_deleted_at >= analysis_date)
        )

    dim_routes = tbl.views.gtfs_schedule_dim_routes()
    routes_date_joined = (routes_on_date
         >> inner_join(_, dim_routes >> select(_.route_id, _.route_key, _.route_short_name,
                                                   _.route_long_name, _.route_desc, _.route_type,
                                              _.calitp_itp_id),
                       on = 'route_key')
         # >> distinct(_.calitp_itp_id, _.route_id, _.route_short_name, _.route_long_name, _.route_desc, _.route_type)
         >> filter(_.calitp_itp_id != 200) # avoid MTC feed in favor of individual operator feeds
         >> filter((_.route_type == '0') |
                    (_.route_type == '1') |
                    (_.route_type == '2'))
         # >> collect()
        )
    return routes_date_joined

In [69]:
analysis_date = dt.date(2022, 5, 4) ## Wed, May 4

In [70]:
new_routes = get_rail_routes(analysis_date)

In [60]:
ca = gpd.read_parquet(f'{GCS_FILE_PATH}ca_boundary.parquet')

In [79]:
def routes_to_stops(routes_tbl, analysis_date):
    
    trips_query = (tbl.views.gtfs_schedule_fact_daily_trips()
    >> filter(_.calitp_extracted_at <= analysis_date, _.calitp_deleted_at >= analysis_date)
    >> filter(_.service_date == analysis_date)
    >> filter(_.is_in_service == True)
    >> select(_.trip_key, _.service_date, _.route_id, _.calitp_itp_id)
    >> inner_join(_, routes_tbl, on = ['calitp_itp_id', 'route_id'])
    )
    trips_ix_query = (trips_query
    >> inner_join(_, tbl.views.gtfs_schedule_index_feed_trip_stops(), on = 'trip_key')
    >> select(-_.calitp_url_number, -_.calitp_extracted_at, -_.calitp_deleted_at)
    )
    stops = (tbl.views.gtfs_schedule_dim_stops()
     >> distinct(_.calitp_itp_id, _.stop_id,
              _.stop_lat, _.stop_lon, _.stop_name, _.stop_key)
     >> inner_join(_, trips_ix_query >> distinct(_.stop_key, _.route_type), on = 'stop_key')
     >> collect()
     >> distinct(_.calitp_itp_id, _.stop_id, _keep_all=True) ## should be ok to drop duplicates, but must use stop_id for future joins...
     >> select(-_.stop_key)
    )

    stops = gpd.GeoDataFrame(stops, geometry=gpd.points_from_xy(stops.stop_lon, stops.stop_lat),
                            crs='EPSG:4326').to_crs(shared_utils.geography_utils.CA_NAD83Albers)
    
    return stops.clip(ca)

In [81]:
rail_stops = routes_to_stops(new_routes, analysis_date)

In [82]:
map_hqta(rail_stops, 'route_type')

Map(center=[32.56951494317901, -117.06696483813707], controls=(ZoomControl(options=['position', 'zoom_in_text'…

#### BRT Service likely meeting [PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC&sectionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit) definition:

* LA Metro Orange, Silver excluding street running (stop flags only)
* Omnitrans sbX, all stops (curbside stations are well-defined, with fare prepayment)
* AC Transit Tempo, all stops (curbside stations are well-defined, with fare prepayment)

In [83]:
metro_brt = (tbl.gtfs_schedule.routes()
         >> filter(_.calitp_itp_id == 182)
         >> filter((_.route_id == '901-13149') |
                   (_.route_id =='910-13149'))
        )

In [12]:
metro_brt_stops = routes_to_stops(metro_brt)

In [13]:
## unable to filter out non-station stops using GTFS, manual list:
metro_street_running =['141012', '13805', '5397', '13803',
 '13804', '5396', '13802', '5395', '5410', '5411', '13817',
 '12304', '5408', '3821', '2603', '3153', '3124', '378', '65300039',
 '65300038', '15820', '13460', '4994', '1813', '2378', '5049',
 '4652', '2377', '4675', '5040', '65300042', '3674', '15713',
 '13561', '5378', '13560', '70500012', '5377', '15612',
 '12416', '11917', '12415', '8704']

In [14]:
metro_brt_stops = metro_brt_stops >> filter(-_.stop_id.isin(metro_street_running))

In [15]:
# map_hqta(metro_brt_stops)

In [16]:
act_brt = (tbl.gtfs_schedule.routes()
         >> filter(_.calitp_itp_id == 4)
         >> filter(_.route_id == '1T')
         )

In [17]:
act_brt_stops = routes_to_stops(act_brt)

In [18]:
# map_hqta(act_brt_stops)

In [19]:
omni_brt = (tbl.gtfs_schedule.routes()
         >> filter(_.calitp_itp_id == 232)
         >> filter(_.route_id == '9648')
         )

In [20]:
omni_brt_stops = routes_to_stops(omni_brt)

In [21]:
# map_hqta(omni_brt_stops)

### Ferry

In [22]:
ferry = (tbl.gtfs_schedule.routes()
         >> filter(_.route_type == '4')
         # >> filter(_.route_id == '9648')
         )

In [23]:
ferry_stops = routes_to_stops(ferry)
angel_and_alcatraz = ['2483552', '2483550'] ##only stops without bus service, TODO implement algorithm
ferry_stops = ferry_stops >> filter(-_.stop_id.isin(angel_and_alcatraz))

In [24]:
# map_hqta(ferry_stops)

## Combined

In [25]:
rail_brt_ferry = (rail_stops
                 .append(metro_brt_stops)
                 .append(act_brt_stops)
                 .append(omni_brt_stops)
                 .append(ferry_stops))

In [26]:
rail_brt_ferry

Unnamed: 0,calitp_itp_id,calitp_url_number,stop_id,stop_lat,stop_lon,route_type,geometry
0,10,0,3400002,37.797908,-121.263664,2,POINT (-111116.950 -23544.700)
1,10,0,3400001,37.957058,-121.278948,2,POINT (-112222.169 -5836.078)
2,10,0,3400003,37.696468,-121.433869,2,POINT (-126252.943 -34607.238)
3,10,0,3400004.3,37.657549,-121.882962,2,POINT (-165877.168 -38241.179)
4,10,0,3400004.1,37.697081,-121.717648,2,POINT (-151236.096 -34123.815)
...,...,...,...,...,...,...,...
28,41,0,2483549,37.872939,-122.455446,4,POINT (-215679.403 -13151.168)
29,41,0,2483569,37.856636,-122.478275,4,POINT (-217731.552 -14910.728)
31,338,0,818729,37.770840,-122.385791,4,POINT (-209847.993 -24653.833)
32,338,0,818733,37.864540,-122.313932,4,POINT (-203274.531 -14397.269)


In [27]:
## clip to CA since we now have Amtrak national...

In [28]:
import intake
catalog = intake.open_catalog('./catalog.yml')

In [29]:
ca_shape = catalog.stanford_shorelines.read().to_crs('EPSG:6414') >> filter(_.STFIPS == '06')

In [30]:
## after this cell displays output the notebook refuses to save...
##     File Save Error for rail_ferry_brt.ipynb
##     Invalid response: 413 Request Entity Too Large
# ca_shape['geometry'].iloc[0]

In [31]:
rail_brt_ferry = rail_brt_ferry.clip(ca_shape)

In [32]:
# map_hqta(rail_brt_ferry)

In [33]:
geoparquet_gcs_export(rail_brt_ferry, 'rail_brt_ferry')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  gdf.to_parquet(f"{name}.parquet")
